From f52df9c800cac12eec2c1d710672c497fb5caa59 Mon Sep 17 00:00:00 2001 From: chiarasch Date: Mon, 9 Feb 2026 15:56:44 +0100 Subject: [PATCH 1/6] memory efficient knn --- src/cozipy/neighbors.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/cozipy/neighbors.py b/src/cozipy/neighbors.py index 8948ac8..18e4685 100644 --- a/src/cozipy/neighbors.py +++ b/src/cozipy/neighbors.py @@ -2,24 +2,20 @@ from scipy.spatial import Delaunay, cKDTree from scipy.sparse import coo_matrix - def knn_graph(coords, n_neighbors=6): """ - Lightweight KNN graph using full distance matrix (NumPy). - No sklearn dependency. Directional. + Memory-efficient KNN graph using cKDTree. + Returns COO adjacency matrix. """ - diff = coords[:, None, :] - coords[None, :, :] - dist = np.sum(diff ** 2, axis=2) - - idx = np.argsort(dist, axis=1) - neighbors = idx[:, 1:n_neighbors + 1] + tree = cKDTree(coords) + distances, indices = tree.query(coords, k=n_neighbors+1) rows = np.repeat(np.arange(coords.shape[0]), n_neighbors) - cols = neighbors.flatten() + cols = indices[:, 1:].flatten() data = np.ones(len(rows), dtype=int) - return coo_matrix((data, (rows, cols)), - shape=(coords.shape[0], coords.shape[0])) + return coo_matrix((data, (rows, cols)), shape=(coords.shape[0], coords.shape[0])) + def radius_graph(coords, radius): From 84f7b473877fdb7535d23d499e6d586451bb4d51 Mon Sep 17 00:00:00 2001 From: chiarasch Date: Mon, 9 Feb 2026 15:58:55 +0100 Subject: [PATCH 2/6] fix overflow issue in #2 --- src/cozipy/nep_cozi.py | 85 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 15 deletions(-) diff --git a/src/cozipy/nep_cozi.py b/src/cozipy/nep_cozi.py index 075e737..448fc75 100644 --- a/src/cozipy/nep_cozi.py +++ b/src/cozipy/nep_cozi.py @@ -1,4 +1,3 @@ -# nep_cozi.py import numpy as np import pandas as pd from scipy.sparse import coo_matrix @@ -10,19 +9,29 @@ def _encode_labels(labels): categories = labels.cat.categories.to_list() return codes, categories -def _compute_pair_counts_and_denominators(adj, labels_int): + +def _compute_pair_counts_and_denominators(adj, labels_int, n_types): """ Returns: counts[A, B] = total edges from A to B denom[A, B] = number of cells of type A having >=1 neighbor of type B + + The indices fed to np.bincount are calculated using 64-bit integers + to prevent overflow on large numbers of cell types (n_types). """ i, j = adj.row, adj.col labels_int = np.asarray(labels_int) - n_types = labels_int.max() + 1 + + if n_types == 0 or labels_int.size == 0: + return np.zeros((0, 0), dtype=int), np.zeros((0, 0), dtype=int) + + # Use 64-bit integers for the calculation that previously overflowed a 32-bit integer. + labels_int_64 = labels_int.astype(np.int64) + n_types_64 = np.int64(n_types) counts = np.bincount( - labels_int[i] * n_types + labels_int[j], - minlength=n_types * n_types + labels_int_64[i] * n_types_64 + labels_int_64[j], + minlength=n_types_64 * n_types_64 ).reshape(n_types, n_types) neigh_labels = labels_int[j] @@ -38,18 +47,46 @@ def _compute_pair_counts_and_denominators(adj, labels_int): return counts, denom -def nep_analysis(adj, labels, n_permutations=1000, random_state=None, return_df=True): + +def nep_analysis(adj, + labels, + n_permutations=1000, + random_state=None, + return_df=True, + fixed_type=None): """ NEP analysis with conditional ratios and z-scores. - Automatically accepts string or integer labels. + Parameters + ---------- + STILL UNDER DEVELOPMENT IF MAKES SENSE + fixed_type : str or int or None + If provided, this cell type will remain fixed in permutations while + all other cell types are shuffled. """ rng = np.random.default_rng(random_state) - + print("hello") labels_int, label_names = _encode_labels(labels) - n_types = len(label_names) - - obs_counts, obs_denom = _compute_pair_counts_and_denominators(adj, labels_int) + n_types = len(label_names) # SAFER calculation of n_types + + # Ensure n_types > 0 before proceeding + if n_types == 0: + if return_df: + # Return empty dataframes if there are no cell types + idx = [] + return { + "cond_ratio": pd.DataFrame([], index=idx, columns=idx), + "zscore": pd.DataFrame([], index=idx, columns=idx), + } + else: + return { + "cond_ratio": np.array([]).reshape(0, 0), + "zscore": np.array([]).reshape(0, 0), + } + + # Pass the safely calculated n_types + obs_counts, obs_denom = _compute_pair_counts_and_denominators( + adj, labels_int, n_types) obs_norm = obs_counts / np.maximum(obs_denom, 1) cond_ratio = np.zeros((n_types, n_types), float) @@ -58,9 +95,25 @@ def nep_analysis(adj, labels, n_permutations=1000, random_state=None, return_df= cond_ratio[A] = obs_denom[A] / max(total_A, 1) perm_norm = np.zeros((n_permutations, n_types, n_types), float) + + # convert fixed_type name -> integer index if needed + if fixed_type is not None and isinstance(fixed_type, str): + fixed_type = label_names.index(fixed_type) + for k in range(n_permutations): - perm = rng.permutation(labels_int) - c, d = _compute_pair_counts_and_denominators(adj, perm) + if fixed_type is None: + perm = rng.permutation(labels_int) + else: + # Mask cells to remain unchanged vs permuted + fixed_mask = labels_int == fixed_type + other_mask = ~fixed_mask + + # Copy original labels then permute only others + perm = labels_int.copy() + perm[other_mask] = rng.permutation(labels_int[other_mask]) + + # Pass the safely calculated n_types + c, d = _compute_pair_counts_and_denominators(adj, perm, n_types) perm_norm[k] = c / np.maximum(d, 1) expected = perm_norm.mean(axis=0) @@ -81,6 +134,7 @@ def nep_analysis(adj, labels, n_permutations=1000, random_state=None, return_df= "zscore": z, } + def run_cozi( coords, labels, @@ -88,7 +142,8 @@ def run_cozi( n_neighbors=6, radius=0.2, n_permutations=100, - random_state=None + random_state=None, + fixed_type=None ): """ Runs NEP analysis with specified neighborhood definition. @@ -105,4 +160,4 @@ def run_cozi( raise ValueError(f"Unknown neighborhood definition: {nbh_def}") # run NEP - return nep_analysis(adj, labels, n_permutations=n_permutations, random_state=random_state) + return nep_analysis(adj, labels, n_permutations=n_permutations, random_state=random_state, fixed_type=fixed_type) From 8944734dadd8e6437d09b41ce7047fcb7f3091b5 Mon Sep 17 00:00:00 2001 From: chiarasch Date: Mon, 9 Feb 2026 16:12:56 +0100 Subject: [PATCH 3/6] add changelog --- CHANGELOG.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..c307646 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,28 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +## [Unreleased] + +### Added + + +### Fixed +- NumPy / Numba compatibility issue + +--- + +## [0.1.0] - 2025-11-18 + +### Added +- COZI with knn, delaunay and radius nbh definition + +--- + +## [0.1.1] - 2026-02-09 + +### Added +- Allow fixed cell types in permutation + +### Fixed +- Overflow issue \ No newline at end of file From c26275c0d369fe110693c43f3de84f9c937a0bd3 Mon Sep 17 00:00:00 2001 From: chiarasch Date: Mon, 9 Feb 2026 16:13:32 +0100 Subject: [PATCH 4/6] delete print --- src/cozipy/nep_cozi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cozipy/nep_cozi.py b/src/cozipy/nep_cozi.py index 448fc75..5e2cf30 100644 --- a/src/cozipy/nep_cozi.py +++ b/src/cozipy/nep_cozi.py @@ -65,7 +65,6 @@ def nep_analysis(adj, all other cell types are shuffled. """ rng = np.random.default_rng(random_state) - print("hello") labels_int, label_names = _encode_labels(labels) n_types = len(label_names) # SAFER calculation of n_types From 2c0118e457012fb81184cb328706a3720b235065 Mon Sep 17 00:00:00 2001 From: chiarasch Date: Mon, 9 Feb 2026 16:16:25 +0100 Subject: [PATCH 5/6] add workflow automation --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c307646..efc0d7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ All notable changes to this project will be documented in this file. ### Added - Allow fixed cell types in permutation +- Workflow automation ### Fixed - Overflow issue \ No newline at end of file From f2f485148215ee9ad18bc1443da4448f8fba8383 Mon Sep 17 00:00:00 2001 From: chiarasch Date: Mon, 9 Feb 2026 16:29:46 +0100 Subject: [PATCH 6/6] bump version --- src/cozipy/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cozipy/__init__.py b/src/cozipy/__init__.py index 801c1b0..e563e32 100644 --- a/src/cozipy/__init__.py +++ b/src/cozipy/__init__.py @@ -1,5 +1,5 @@ from cozipy.nep_cozi import nep_analysis, run_cozi from cozipy.neighbors import knn_graph, radius_graph, delaunay_graph -__version__ = "0.1.0" +__version__ = "0.1.1" __all__ = ["nep_analysis", "run_cozi", "knn_graph", "radius_graph", "delaunay_graph"]