Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
numpy
scipy
scipy
tqdm
deprecation
75 changes: 43 additions & 32 deletions src/tarp/drp.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ def _get_tarp_coverage_single(
"""
Estimates coverage with the TARP method a single time.

Reference: `Lemos, Coogan et al 2023 <https://arxiv.org/abs/2302.03026>`_
Reference: `Lemos, Coogan et al. 2023 <https://arxiv.org/abs/2302.03026>`_

Args:
samples: the samples to compute the coverage of, with shape ``(n_samples, n_sims, n_dims)``.
theta: the true parameter values for each samples, with shape ``(n_sims, n_dims)``.
theta: the true parameter values for each sample, with shape ``(n_sims, n_dims)``.
references: the reference points to use for the DRP regions, with shape
``(n_references, n_sims)``, or the string ``"random"``. If the later, then
``(n_references, n_sims)``, or the string ``"random"``. If the latter, then
the reference points are chosen randomly from the unit hypercube over
the parameter space.
metric: the metric to use when computing the distance. Can be ``"euclidean"`` or
Expand Down Expand Up @@ -85,16 +85,12 @@ def _get_tarp_coverage_single(
# Reshape theta
theta = theta[np.newaxis, :, :]

# Normalize
if norm:
low = np.min(theta, axis=1, keepdims=True)
high = np.max(theta, axis=1, keepdims=True)
samples = (samples - low) / (high - low + 1e-10)
theta = (theta - low) / (high - low + 1e-10)

# Generate reference points
references_given = False
if isinstance(references, str) and references == "random":
references = np.random.uniform(low=0, high=1, size=(num_sims, num_dims))
references = np.random.uniform(low=0, high=1, size=(1, num_sims, num_dims))
if norm is False:
print("Warning: references are normalized but samples are not")
else:
assert isinstance(references, np.ndarray) # to quiet pyright
if references.ndim != 2:
Expand All @@ -107,15 +103,28 @@ def _get_tarp_coverage_single(
raise ValueError(
"references must have the same number of columns as samples"
)
references_given = True

# Reshape references
references = references[np.newaxis, :, :]

# Normalize
if norm:
low = np.min(theta, axis=1, keepdims=True)
high = np.max(theta, axis=1, keepdims=True)
samples = (samples - low) / (high - low + 1e-10)
theta = (theta - low) / (high - low + 1e-10)
if references_given: # references not normalized if they are given, otherwise in [0, 1]
references = (references - low) / (high - low + 1e-10)

# Compute distances
if metric == "euclidean":
samples_distances = np.sqrt(
np.sum((references[np.newaxis] - samples) ** 2, axis=-1)
np.sum((references - samples) ** 2, axis=-1)
)
theta_distances = np.sqrt(np.sum((references - theta) ** 2, axis=-1))
elif metric == "manhattan":
samples_distances = np.sum(np.abs(references[np.newaxis] - samples), axis=-1)
samples_distances = np.sum(np.abs(references - samples), axis=-1)
theta_distances = np.sum(np.abs(references - theta), axis=-1)
else:
raise ValueError("metric must be either 'euclidean' or 'manhattan'")
Expand All @@ -127,7 +136,7 @@ def _get_tarp_coverage_single(
h, alpha = np.histogram(f, density=True, bins=num_alpha_bins, range=(0,1))
dx = alpha[1] - alpha[0]
ecp = np.cumsum(h) * dx
return np.concatenate([[0], ecp]), alpha
return np.concatenate(([0], ecp)), alpha


def _get_tarp_coverage_bootstrap(samples: np.ndarray,
Expand All @@ -145,9 +154,9 @@ def _get_tarp_coverage_bootstrap(samples: np.ndarray,

Args:
samples: the samples to compute the coverage of, with shape ``(n_samples, n_sims, n_dims)``.
theta: the true parameter values for each samples, with shape ``(n_sims, n_dims)``.
theta: the true parameter values for each sample, with shape ``(n_sims, n_dims)``.
references: the reference points to use for the DRP regions, with shape
``(n_references, n_sims)``, or the string ``"random"``. If the later, then
``(n_references, n_sims)``, or the string ``"random"``. If the latter, then
the reference points are chosen randomly from the unit hypercube over
the parameter space.
metric: the metric to use when computing the distance. Can be ``"euclidean"`` or
Expand All @@ -167,25 +176,27 @@ def _get_tarp_coverage_bootstrap(samples: np.ndarray,
num_alpha_bins = num_sims // 10

boot_ecp = np.empty(shape=(num_bootstrap, num_alpha_bins+1))
alpha = None
for i in tqdm(range(num_bootstrap)):
idx = np.random.randint(low=0, high=num_sims, size=num_sims)

# Sample with replacement from the full set of simulations
boot_samples = samples[:, idx, :]
boot_theta = theta[idx, :]

boot_ecp[i, :], alpha = _get_tarp_coverage_single(boot_samples,
boot_theta,
references=references,
metric=metric,
num_alpha_bins=num_alpha_bins,
norm=norm,
seed=seed)

# ecp_mean = boot_ecp.mean(axis=0)
# ecp_std = boot_ecp.std(axis=0)
# alpha_mean = boot_alpha.mean(axis=0)
# return ecp_mean, ecp_std, alpha_mean
if isinstance(references, np.ndarray):
boot_references = references[idx, :] # reference might have a dependency on theta
else:
boot_references = references

boot_ecp[i, :], alpha = _get_tarp_coverage_single(
samples=boot_samples,
theta=boot_theta,
references=boot_references,
metric=metric,
num_alpha_bins=num_alpha_bins,
norm=norm,
seed=seed
)
return boot_ecp, alpha


Expand All @@ -203,13 +214,13 @@ def get_tarp_coverage(
"""
Estimates coverage with the TARP method.

Reference: `Lemos, Coogan et al 2023 <https://arxiv.org/abs/2302.03026>`_
Reference: `Lemos, Coogan et al. 2023 <https://arxiv.org/abs/2302.03026>`_

Args:
samples: the samples to compute the coverage of, with shape ``(n_samples, n_sims, n_dims)``.
theta: the true parameter values for each samples, with shape ``(n_sims, n_dims)``.
theta: the true parameter values for each sample, with shape ``(n_sims, n_dims)``.
references: the reference points to use for the DRP regions, with shape
``(n_references, n_sims)``, or the string ``"random"``. If the later, then
``(n_references, n_sims)``, or the string ``"random"``. If the latter, then
the reference points are chosen randomly from the unit hypercube over
the parameter space.
metric: the metric to use when computing the distance. Can be ``"euclidean"`` or
Expand Down
31 changes: 26 additions & 5 deletions tests/test_drp.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest
import numpy as np
from tarp import get_tarp_coverage
from src.tarp import get_tarp_coverage


def get_test_data():
Expand All @@ -13,13 +13,14 @@ def get_test_data():
samples = np.random.normal(
loc=theta, scale=sigma, size=(num_samples, num_sims, num_dims)
)
reference = theta + np.random.uniform(low=0, high=1, size=(num_sims, num_dims))
theta = np.random.normal(loc=theta, scale=sigma, size=(num_sims, num_dims))
return samples, theta
return samples, theta, reference


class TarpTest(unittest.TestCase):
def test_single(self):
samples, theta = get_test_data()
samples, theta, _ = get_test_data()
ecp, alpha = get_tarp_coverage(samples,
theta,
references="random",
Expand All @@ -29,7 +30,7 @@ def test_single(self):
self.assertAlmostEqual(np.max(np.abs(ecp - alpha)), 0., delta=0.25)

def test_norm(self):
samples, theta = get_test_data()
samples, theta, _ = get_test_data()
ecp, alpha = get_tarp_coverage(samples,
theta,
references="random",
Expand All @@ -39,7 +40,7 @@ def test_norm(self):
self.assertAlmostEqual(np.max(np.abs(ecp - alpha)), 0., delta=0.25)

def test_bootstrap(self):
samples, theta = get_test_data()
samples, theta, _ = get_test_data()
ecp, alpha = get_tarp_coverage(samples,
theta,
references="random",
Expand All @@ -51,6 +52,26 @@ def test_bootstrap(self):
#self.assertAlmostEqual(np.max(np.abs(ecp_mean - alpha)/ecp_std), 0., delta=10.)
self.assertAlmostEqual(np.max(np.abs(ecp_mean - alpha)), 0., delta=0.25)

def test_references(self):
samples, theta, references = get_test_data()
ecp, alpha = get_tarp_coverage(samples,
theta,
references=references,
metric="euclidean",
norm=False,
bootstrap=False)
self.assertAlmostEqual(np.max(np.abs(ecp - alpha)), 0., delta=0.25)

def test_references_norm(self):
samples, theta, references = get_test_data()
ecp, alpha = get_tarp_coverage(samples,
theta,
references=references,
metric="euclidean",
norm=True,
bootstrap=False)
self.assertAlmostEqual(np.max(np.abs(ecp - alpha)), 0., delta=0.25)


if __name__ == "__main__":
unittest.main()