SchapiroLabor · chiarasch · Feb 9, 2026 · Feb 9, 2026 · Feb 9, 2026 · Feb 9, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,29 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+## [Unreleased]
+
+### Added
+
+
+### Fixed
+- NumPy / Numba compatibility issue
+
+---
+
+## [0.1.0] - 2025-11-18
+
+### Added
+- COZI with knn, delaunay and radius nbh definition
+
+---
+
+## [0.1.1] - 2026-02-09
+
+### Added
+- Allow fixed cell types in permutation
+- Workflow automation
+
+### Fixed
+- Overflow issue
diff --git a/src/cozipy/__init__.py b/src/cozipy/__init__.py
@@ -1,5 +1,5 @@
 from cozipy.nep_cozi import nep_analysis, run_cozi
 from cozipy.neighbors import knn_graph, radius_graph, delaunay_graph
 
-__version__ = "0.1.0"
+__version__ = "0.1.1"
 __all__ = ["nep_analysis", "run_cozi", "knn_graph", "radius_graph", "delaunay_graph"]
diff --git a/src/cozipy/neighbors.py b/src/cozipy/neighbors.py
@@ -2,24 +2,20 @@
 from scipy.spatial import Delaunay, cKDTree
 from scipy.sparse import coo_matrix
 
-
 def knn_graph(coords, n_neighbors=6):
     """
-    Lightweight KNN graph using full distance matrix (NumPy).
-    No sklearn dependency. Directional.
+    Memory-efficient KNN graph using cKDTree.
+    Returns COO adjacency matrix.
     """
-    diff = coords[:, None, :] - coords[None, :, :]
-    dist = np.sum(diff ** 2, axis=2)
-
-    idx = np.argsort(dist, axis=1)
-    neighbors = idx[:, 1:n_neighbors + 1]
+    tree = cKDTree(coords)
+    distances, indices = tree.query(coords, k=n_neighbors+1)  
 
     rows = np.repeat(np.arange(coords.shape[0]), n_neighbors)
-    cols = neighbors.flatten()
+    cols = indices[:, 1:].flatten()   
     data = np.ones(len(rows), dtype=int)
 
-    return coo_matrix((data, (rows, cols)),
-                      shape=(coords.shape[0], coords.shape[0]))
+    return coo_matrix((data, (rows, cols)), shape=(coords.shape[0], coords.shape[0]))
+
 
 
 def radius_graph(coords, radius):

diff --git a/src/cozipy/nep_cozi.py b/src/cozipy/nep_cozi.py
@@ -1,4 +1,3 @@
-# nep_cozi.py
 import numpy as np
 import pandas as pd
 from scipy.sparse import coo_matrix
@@ -10,19 +9,29 @@ def _encode_labels(labels):
     categories = labels.cat.categories.to_list()
     return codes, categories
 
-def _compute_pair_counts_and_denominators(adj, labels_int):
+
+def _compute_pair_counts_and_denominators(adj, labels_int, n_types):
     """
     Returns:
         counts[A, B] = total edges from A to B
         denom[A, B]  = number of cells of type A having >=1 neighbor of type B
+
+    The indices fed to np.bincount are calculated using 64-bit integers 
+    to prevent overflow on large numbers of cell types (n_types).
     """
     i, j = adj.row, adj.col
     labels_int = np.asarray(labels_int)
-    n_types = labels_int.max() + 1
+
+    if n_types == 0 or labels_int.size == 0:
+        return np.zeros((0, 0), dtype=int), np.zeros((0, 0), dtype=int)
+
+    # Use 64-bit integers for the calculation that previously overflowed a 32-bit integer.
+    labels_int_64 = labels_int.astype(np.int64)
+    n_types_64 = np.int64(n_types)
 
     counts = np.bincount(
-        labels_int[i] * n_types + labels_int[j],
-        minlength=n_types * n_types
+        labels_int_64[i] * n_types_64 + labels_int_64[j],
+        minlength=n_types_64 * n_types_64
     ).reshape(n_types, n_types)
 
     neigh_labels = labels_int[j]
@@ -38,18 +47,45 @@ def _compute_pair_counts_and_denominators(adj, labels_int):
 
     return counts, denom
 
-def nep_analysis(adj, labels, n_permutations=1000, random_state=None, return_df=True):
+
+def nep_analysis(adj,
+                 labels,
+                 n_permutations=1000,
+                 random_state=None,
+                 return_df=True,
+                 fixed_type=None):
     """
     NEP analysis with conditional ratios and z-scores.
 
-    Automatically accepts string or integer labels.
+    Parameters
+    ----------
+    STILL UNDER DEVELOPMENT IF MAKES SENSE
+    fixed_type : str or int or None
+        If provided, this cell type will remain fixed in permutations while
+        all other cell types are shuffled.
     """
     rng = np.random.default_rng(random_state)
-
     labels_int, label_names = _encode_labels(labels)
-    n_types = len(label_names)
-
-    obs_counts, obs_denom = _compute_pair_counts_and_denominators(adj, labels_int)
+    n_types = len(label_names)  # SAFER calculation of n_types
+
+    # Ensure n_types > 0 before proceeding
+    if n_types == 0:
+        if return_df:
+            # Return empty dataframes if there are no cell types
+            idx = []
+            return {
+                "cond_ratio": pd.DataFrame([], index=idx, columns=idx),
+                "zscore": pd.DataFrame([], index=idx, columns=idx),
+            }
+        else:
+            return {
+                "cond_ratio": np.array([]).reshape(0, 0),
+                "zscore": np.array([]).reshape(0, 0),
+            }
+
+    # Pass the safely calculated n_types
+    obs_counts, obs_denom = _compute_pair_counts_and_denominators(
+        adj, labels_int, n_types)
     obs_norm = obs_counts / np.maximum(obs_denom, 1)
 
     cond_ratio = np.zeros((n_types, n_types), float)
@@ -58,9 +94,25 @@ def nep_analysis(adj, labels, n_permutations=1000, random_state=None, return_df=
         cond_ratio[A] = obs_denom[A] / max(total_A, 1)
 
     perm_norm = np.zeros((n_permutations, n_types, n_types), float)
+
+    # convert fixed_type name -> integer index if needed
+    if fixed_type is not None and isinstance(fixed_type, str):
+        fixed_type = label_names.index(fixed_type)
+
     for k in range(n_permutations):
-        perm = rng.permutation(labels_int)
-        c, d = _compute_pair_counts_and_denominators(adj, perm)
+        if fixed_type is None:
+            perm = rng.permutation(labels_int)
+        else:
+            # Mask cells to remain unchanged vs permuted
+            fixed_mask = labels_int == fixed_type
+            other_mask = ~fixed_mask
+
+            # Copy original labels then permute only others
+            perm = labels_int.copy()
+            perm[other_mask] = rng.permutation(labels_int[other_mask])
+
+        # Pass the safely calculated n_types
+        c, d = _compute_pair_counts_and_denominators(adj, perm, n_types)
         perm_norm[k] = c / np.maximum(d, 1)
 
     expected = perm_norm.mean(axis=0)
@@ -81,14 +133,16 @@ def nep_analysis(adj, labels, n_permutations=1000, random_state=None, return_df=
             "zscore": z,
         }
 
+
 def run_cozi(
     coords,
     labels,
     nbh_def="knn",
     n_neighbors=6,
     radius=0.2,
     n_permutations=100,
-    random_state=None
+    random_state=None,
+    fixed_type=None
 ):
     """
     Runs NEP analysis with specified neighborhood definition.
@@ -105,4 +159,4 @@ def run_cozi(
         raise ValueError(f"Unknown neighborhood definition: {nbh_def}")
 
     # run NEP
-    return nep_analysis(adj, labels, n_permutations=n_permutations, random_state=random_state)
+    return nep_analysis(adj, labels, n_permutations=n_permutations, random_state=random_state, fixed_type=fixed_type)