Ciela-Institute · arrjon · Nov 25, 2024 · Nov 25, 2024 · Nov 25, 2024 · Nov 25, 2024
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,4 @@
 numpy
-scipy
+scipy
+tqdm
+deprecation
diff --git a/src/tarp/drp.py b/src/tarp/drp.py
@@ -41,13 +41,13 @@ def _get_tarp_coverage_single(
     """
     Estimates coverage with the TARP method a single time.
 
-    Reference: `Lemos, Coogan et al 2023 <https://arxiv.org/abs/2302.03026>`_
+    Reference: `Lemos, Coogan et al. 2023 <https://arxiv.org/abs/2302.03026>`_
 
     Args:
         samples: the samples to compute the coverage of, with shape ``(n_samples, n_sims, n_dims)``.
-        theta: the true parameter values for each samples, with shape ``(n_sims, n_dims)``.
+        theta: the true parameter values for each sample, with shape ``(n_sims, n_dims)``.
         references: the reference points to use for the DRP regions, with shape
-            ``(n_references, n_sims)``, or the string ``"random"``. If the later, then
+            ``(n_references, n_sims)``, or the string ``"random"``. If the latter, then
             the reference points are chosen randomly from the unit hypercube over
             the parameter space.
         metric: the metric to use when computing the distance. Can be ``"euclidean"`` or
@@ -85,16 +85,12 @@ def _get_tarp_coverage_single(
     # Reshape theta
     theta = theta[np.newaxis, :, :]
 
-    # Normalize
-    if norm:
-        low = np.min(theta, axis=1, keepdims=True)
-        high = np.max(theta, axis=1, keepdims=True)
-        samples = (samples - low) / (high - low + 1e-10)
-        theta = (theta - low) / (high - low + 1e-10)
-
     # Generate reference points
+    references_given = False
     if isinstance(references, str) and references == "random":
-        references = np.random.uniform(low=0, high=1, size=(num_sims, num_dims))
+        references = np.random.uniform(low=0, high=1, size=(1, num_sims, num_dims))
+        if norm is False:
+            print("Warning: references are normalized but samples are not")
     else:
         assert isinstance(references, np.ndarray)  # to quiet pyright
         if references.ndim != 2:
@@ -107,15 +103,28 @@ def _get_tarp_coverage_single(
             raise ValueError(
                 "references must have the same number of columns as samples"
             )
+        references_given = True
+
+        # Reshape references
+        references = references[np.newaxis, :, :]
+
+    # Normalize
+    if norm:
+        low = np.min(theta, axis=1, keepdims=True)
+        high = np.max(theta, axis=1, keepdims=True)
+        samples = (samples - low) / (high - low + 1e-10)
+        theta = (theta - low) / (high - low + 1e-10)
+        if references_given:   # references not normalized if they are given, otherwise in [0, 1]
+            references = (references - low) / (high - low + 1e-10)
 
     # Compute distances
     if metric == "euclidean":
         samples_distances = np.sqrt(
-            np.sum((references[np.newaxis] - samples) ** 2, axis=-1)
+            np.sum((references - samples) ** 2, axis=-1)
         )
         theta_distances = np.sqrt(np.sum((references - theta) ** 2, axis=-1))
     elif metric == "manhattan":
-        samples_distances = np.sum(np.abs(references[np.newaxis] - samples), axis=-1)
+        samples_distances = np.sum(np.abs(references - samples), axis=-1)
         theta_distances = np.sum(np.abs(references - theta), axis=-1)
     else:
         raise ValueError("metric must be either 'euclidean' or 'manhattan'")
@@ -127,7 +136,7 @@ def _get_tarp_coverage_single(
     h, alpha = np.histogram(f, density=True, bins=num_alpha_bins, range=(0,1))
     dx = alpha[1] - alpha[0]
     ecp = np.cumsum(h) * dx
-    return np.concatenate([[0], ecp]), alpha
+    return np.concatenate(([0], ecp)), alpha
 
 
 def _get_tarp_coverage_bootstrap(samples: np.ndarray,
@@ -145,9 +154,9 @@ def _get_tarp_coverage_bootstrap(samples: np.ndarray,
 
     Args:
         samples: the samples to compute the coverage of, with shape ``(n_samples, n_sims, n_dims)``.
-        theta: the true parameter values for each samples, with shape ``(n_sims, n_dims)``.
+        theta: the true parameter values for each sample, with shape ``(n_sims, n_dims)``.
         references: the reference points to use for the DRP regions, with shape
-            ``(n_references, n_sims)``, or the string ``"random"``. If the later, then
+            ``(n_references, n_sims)``, or the string ``"random"``. If the latter, then
             the reference points are chosen randomly from the unit hypercube over
             the parameter space.
         metric: the metric to use when computing the distance. Can be ``"euclidean"`` or
@@ -167,25 +176,27 @@ def _get_tarp_coverage_bootstrap(samples: np.ndarray,
         num_alpha_bins = num_sims // 10
 
     boot_ecp = np.empty(shape=(num_bootstrap, num_alpha_bins+1))
+    alpha = None
     for i in tqdm(range(num_bootstrap)):
         idx = np.random.randint(low=0, high=num_sims, size=num_sims)
 
         # Sample with replacement from the full set of simulations
         boot_samples = samples[:, idx, :]
         boot_theta = theta[idx, :]
-
-        boot_ecp[i, :], alpha = _get_tarp_coverage_single(boot_samples,
-                                                          boot_theta,
-                                                          references=references,
-                                                          metric=metric,
-                                                          num_alpha_bins=num_alpha_bins,
-                                                          norm=norm,
-                                                          seed=seed)
-
-    # ecp_mean = boot_ecp.mean(axis=0)
-    # ecp_std = boot_ecp.std(axis=0)
-    # alpha_mean = boot_alpha.mean(axis=0)
-    # return ecp_mean, ecp_std, alpha_mean
+        if isinstance(references, np.ndarray):
+            boot_references = references[idx, :]  # reference might have a dependency on theta
+        else:
+            boot_references = references
+
+        boot_ecp[i, :], alpha = _get_tarp_coverage_single(
+            samples=boot_samples,
+            theta=boot_theta,
+            references=boot_references,
+            metric=metric,
+            num_alpha_bins=num_alpha_bins,
+            norm=norm,
+            seed=seed
+        )
     return boot_ecp, alpha
 
 
@@ -203,13 +214,13 @@ def get_tarp_coverage(
     """
     Estimates coverage with the TARP method.
 
-    Reference: `Lemos, Coogan et al 2023 <https://arxiv.org/abs/2302.03026>`_
+    Reference: `Lemos, Coogan et al. 2023 <https://arxiv.org/abs/2302.03026>`_
 
     Args:
         samples: the samples to compute the coverage of, with shape ``(n_samples, n_sims, n_dims)``.
-        theta: the true parameter values for each samples, with shape ``(n_sims, n_dims)``.
+        theta: the true parameter values for each sample, with shape ``(n_sims, n_dims)``.
         references: the reference points to use for the DRP regions, with shape
-            ``(n_references, n_sims)``, or the string ``"random"``. If the later, then
+            ``(n_references, n_sims)``, or the string ``"random"``. If the latter, then
             the reference points are chosen randomly from the unit hypercube over
             the parameter space.
         metric: the metric to use when computing the distance. Can be ``"euclidean"`` or

diff --git a/tests/test_drp.py b/tests/test_drp.py
@@ -1,6 +1,6 @@
 import unittest
 import numpy as np
-from tarp import get_tarp_coverage
+from src.tarp import get_tarp_coverage
 
 
 def get_test_data():
@@ -13,13 +13,14 @@ def get_test_data():
     samples = np.random.normal(
         loc=theta, scale=sigma, size=(num_samples, num_sims, num_dims)
     )
+    reference = theta + np.random.uniform(low=0, high=1, size=(num_sims, num_dims))
     theta = np.random.normal(loc=theta, scale=sigma, size=(num_sims, num_dims))
-    return samples, theta
+    return samples, theta, reference
 
 
 class TarpTest(unittest.TestCase):
     def test_single(self):
-        samples, theta = get_test_data()
+        samples, theta, _ = get_test_data()
         ecp, alpha = get_tarp_coverage(samples,
                                        theta,
                                        references="random",
@@ -29,7 +30,7 @@ def test_single(self):
         self.assertAlmostEqual(np.max(np.abs(ecp - alpha)), 0., delta=0.25)
 
     def test_norm(self):
-        samples, theta = get_test_data()
+        samples, theta, _ = get_test_data()
         ecp, alpha = get_tarp_coverage(samples,
                                        theta,
                                        references="random",
@@ -39,7 +40,7 @@ def test_norm(self):
         self.assertAlmostEqual(np.max(np.abs(ecp - alpha)), 0., delta=0.25)
 
     def test_bootstrap(self):
-        samples, theta = get_test_data()
+        samples, theta, _ = get_test_data()
         ecp, alpha = get_tarp_coverage(samples,
                                        theta,
                                        references="random",
@@ -51,6 +52,26 @@ def test_bootstrap(self):
         #self.assertAlmostEqual(np.max(np.abs(ecp_mean - alpha)/ecp_std), 0., delta=10.)
         self.assertAlmostEqual(np.max(np.abs(ecp_mean - alpha)), 0., delta=0.25)
 
+    def test_references(self):
+        samples, theta, references = get_test_data()
+        ecp, alpha = get_tarp_coverage(samples,
+                                       theta,
+                                       references=references,
+                                       metric="euclidean",
+                                       norm=False,
+                                       bootstrap=False)
+        self.assertAlmostEqual(np.max(np.abs(ecp - alpha)), 0., delta=0.25)
+
+    def test_references_norm(self):
+        samples, theta, references = get_test_data()
+        ecp, alpha = get_tarp_coverage(samples,
+                                       theta,
+                                       references=references,
+                                       metric="euclidean",
+                                       norm=True,
+                                       bootstrap=False)
+        self.assertAlmostEqual(np.max(np.abs(ecp - alpha)), 0., delta=0.25)
+
 
 if __name__ == "__main__":
     unittest.main()