Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 18 additions & 9 deletions src/squidpy/gr/_ripley.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from numpy.random import default_rng
from scanpy import logging as logg
from scipy.spatial import ConvexHull, Delaunay
from scipy.spatial.distance import pdist
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, NearestNeighbors
from sklearn.preprocessing import LabelEncoder
from spatialdata import SpatialData

Expand Down Expand Up @@ -84,6 +83,8 @@ def ripley(
metric
Which metric to use for computing distances.
For available metrics, check out :class:`sklearn.metrics.DistanceMetric`.
For Ripley's L specifically, only metrics supported by :class:`sklearn.neighbors.KDTree`
are valid (see its ``valid_metrics`` attribute).
n_neigh
Number of neighbors to consider for the KNN graph.
n_simulations
Expand Down Expand Up @@ -146,8 +147,7 @@ def ripley(
distances, _ = tree_c.kneighbors(coordinates[cluster_idx != i, :], n_neighbors=n_neigh)
bins, obs_stats = _f_g_function(distances.squeeze(), support)
elif mode == RipleyStat.L:
distances = pdist(coord_c, metric=metric)
bins, obs_stats = _l_function(distances, support, N, area)
bins, obs_stats = _l_function(coord_c, support, N, area, metric)
else:
raise NotImplementedError(f"Mode `{mode.s!r}` is not yet implemented.")
obs_arr[i] = obs_stats
Expand All @@ -166,8 +166,7 @@ def ripley(
distances_i, _ = tree_i.kneighbors(coordinates, n_neighbors=1)
_, stats_i = _f_g_function(distances_i.squeeze(), support)
elif mode == RipleyStat.L:
distances_i = pdist(random_i, metric=metric)
_, stats_i = _l_function(distances_i, support, N, area)
_, stats_i = _l_function(random_i, support, N, area, metric)
else:
raise NotImplementedError(f"Mode `{mode.s!r}` is not yet implemented.")

Expand Down Expand Up @@ -208,10 +207,20 @@ def _f_g_function(distances: NDArrayA, support: NDArrayA) -> tuple[NDArrayA, NDA
return bins, np.concatenate((np.zeros((1,), dtype=float), fracs))


def _l_function(distances: NDArrayA, support: NDArrayA, n: int, area: float) -> tuple[NDArrayA, NDArrayA]:
n_pairs_less_than_d = (distances < support.reshape(-1, 1)).sum(axis=1)
def _l_function(points: NDArrayA, support: NDArrayA, n: int, area: float, metric: str) -> tuple[NDArrayA, NDArrayA]:
if metric not in KDTree.valid_metrics:
raise ValueError(f"Unsupported metric '{metric}'. Ripley's L supports {KDTree.valid_metrics}")
# Ripley's K(d) is the number of ordered point pairs within distance d. `two_point_correlation`
# computes exactly that (cumulatively over `support`, in a single tree pass) without
# materializing the O(m^2) pairwise distances.
tree = KDTree(points, metric=metric)
# `two_point_correlation` counts ordered pairs incl. the `m` self-matches at distance 0;
# subtracting `m` gives ordered non-self pairs.
# `dualtree=True` has been observed to be roughly 2x faster than the single-tree default.
num_points = points.shape[0]
n_ordered_pairs_less_than_d = tree.two_point_correlation(points, support, dualtree=True) - num_points
intensity = n / area
k_estimate = ((n_pairs_less_than_d * 2) / n) / intensity
k_estimate = (n_ordered_pairs_less_than_d / n) / intensity
l_estimate = np.sqrt(k_estimate / np.pi)
return support, l_estimate

Expand Down
5 changes: 5 additions & 0 deletions tests/graph/test_ripley.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
CLUSTER_KEY = "leiden"


def test_ripley_L_unsupported_metric_raises(adata_ripley: AnnData):
with pytest.raises(ValueError, match="Unsupported metric"):
ripley(adata_ripley, cluster_key=CLUSTER_KEY, mode="L", metric="cosine")


@pytest.mark.parametrize("mode", list(RipleyStat))
def test_ripley_modes(adata_ripley: AnnData, mode: RipleyStat):
adata = adata_ripley
Expand Down
Loading