Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Changelog

All notable changes to this project will be documented in this file.

## [Unreleased]

### Added


### Fixed
- NumPy / Numba compatibility issue

---

## [0.1.0] - 2025-11-18

### Added
- COZI with knn, delaunay and radius nbh definition

---

## [0.1.1] - 2026-02-09

### Added
- Allow fixed cell types in permutation
- Workflow automation

### Fixed
- Overflow issue
2 changes: 1 addition & 1 deletion src/cozipy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from cozipy.nep_cozi import nep_analysis, run_cozi
from cozipy.neighbors import knn_graph, radius_graph, delaunay_graph

__version__ = "0.1.0"
__version__ = "0.1.1"
__all__ = ["nep_analysis", "run_cozi", "knn_graph", "radius_graph", "delaunay_graph"]
18 changes: 7 additions & 11 deletions src/cozipy/neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,20 @@
from scipy.spatial import Delaunay, cKDTree
from scipy.sparse import coo_matrix


def knn_graph(coords, n_neighbors=6):
"""
Lightweight KNN graph using full distance matrix (NumPy).
No sklearn dependency. Directional.
Memory-efficient KNN graph using cKDTree.
Returns COO adjacency matrix.
"""
diff = coords[:, None, :] - coords[None, :, :]
dist = np.sum(diff ** 2, axis=2)

idx = np.argsort(dist, axis=1)
neighbors = idx[:, 1:n_neighbors + 1]
tree = cKDTree(coords)
distances, indices = tree.query(coords, k=n_neighbors+1)

rows = np.repeat(np.arange(coords.shape[0]), n_neighbors)
cols = neighbors.flatten()
cols = indices[:, 1:].flatten()
data = np.ones(len(rows), dtype=int)

return coo_matrix((data, (rows, cols)),
shape=(coords.shape[0], coords.shape[0]))
return coo_matrix((data, (rows, cols)), shape=(coords.shape[0], coords.shape[0]))



def radius_graph(coords, radius):
Expand Down
84 changes: 69 additions & 15 deletions src/cozipy/nep_cozi.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# nep_cozi.py
import numpy as np
import pandas as pd
from scipy.sparse import coo_matrix
Expand All @@ -10,19 +9,29 @@ def _encode_labels(labels):
categories = labels.cat.categories.to_list()
return codes, categories

def _compute_pair_counts_and_denominators(adj, labels_int):

def _compute_pair_counts_and_denominators(adj, labels_int, n_types):
"""
Returns:
counts[A, B] = total edges from A to B
denom[A, B] = number of cells of type A having >=1 neighbor of type B

The indices fed to np.bincount are calculated using 64-bit integers
to prevent overflow on large numbers of cell types (n_types).
"""
i, j = adj.row, adj.col
labels_int = np.asarray(labels_int)
n_types = labels_int.max() + 1

if n_types == 0 or labels_int.size == 0:
return np.zeros((0, 0), dtype=int), np.zeros((0, 0), dtype=int)

# Use 64-bit integers for the calculation that previously overflowed a 32-bit integer.
labels_int_64 = labels_int.astype(np.int64)
n_types_64 = np.int64(n_types)

counts = np.bincount(
labels_int[i] * n_types + labels_int[j],
minlength=n_types * n_types
labels_int_64[i] * n_types_64 + labels_int_64[j],
minlength=n_types_64 * n_types_64
).reshape(n_types, n_types)

neigh_labels = labels_int[j]
Expand All @@ -38,18 +47,45 @@ def _compute_pair_counts_and_denominators(adj, labels_int):

return counts, denom

def nep_analysis(adj, labels, n_permutations=1000, random_state=None, return_df=True):

def nep_analysis(adj,
labels,
n_permutations=1000,
random_state=None,
return_df=True,
fixed_type=None):
"""
NEP analysis with conditional ratios and z-scores.

Automatically accepts string or integer labels.
Parameters
----------
STILL UNDER DEVELOPMENT IF MAKES SENSE
fixed_type : str or int or None
If provided, this cell type will remain fixed in permutations while
all other cell types are shuffled.
"""
rng = np.random.default_rng(random_state)

labels_int, label_names = _encode_labels(labels)
n_types = len(label_names)

obs_counts, obs_denom = _compute_pair_counts_and_denominators(adj, labels_int)
n_types = len(label_names) # SAFER calculation of n_types

# Ensure n_types > 0 before proceeding
if n_types == 0:
if return_df:
# Return empty dataframes if there are no cell types
idx = []
return {
"cond_ratio": pd.DataFrame([], index=idx, columns=idx),
"zscore": pd.DataFrame([], index=idx, columns=idx),
}
else:
return {
"cond_ratio": np.array([]).reshape(0, 0),
"zscore": np.array([]).reshape(0, 0),
}

# Pass the safely calculated n_types
obs_counts, obs_denom = _compute_pair_counts_and_denominators(
adj, labels_int, n_types)
obs_norm = obs_counts / np.maximum(obs_denom, 1)

cond_ratio = np.zeros((n_types, n_types), float)
Expand All @@ -58,9 +94,25 @@ def nep_analysis(adj, labels, n_permutations=1000, random_state=None, return_df=
cond_ratio[A] = obs_denom[A] / max(total_A, 1)

perm_norm = np.zeros((n_permutations, n_types, n_types), float)

# convert fixed_type name -> integer index if needed
if fixed_type is not None and isinstance(fixed_type, str):
fixed_type = label_names.index(fixed_type)

for k in range(n_permutations):
perm = rng.permutation(labels_int)
c, d = _compute_pair_counts_and_denominators(adj, perm)
if fixed_type is None:
perm = rng.permutation(labels_int)
else:
# Mask cells to remain unchanged vs permuted
fixed_mask = labels_int == fixed_type
other_mask = ~fixed_mask

# Copy original labels then permute only others
perm = labels_int.copy()
perm[other_mask] = rng.permutation(labels_int[other_mask])

# Pass the safely calculated n_types
c, d = _compute_pair_counts_and_denominators(adj, perm, n_types)
perm_norm[k] = c / np.maximum(d, 1)

expected = perm_norm.mean(axis=0)
Expand All @@ -81,14 +133,16 @@ def nep_analysis(adj, labels, n_permutations=1000, random_state=None, return_df=
"zscore": z,
}


def run_cozi(
coords,
labels,
nbh_def="knn",
n_neighbors=6,
radius=0.2,
n_permutations=100,
random_state=None
random_state=None,
fixed_type=None
):
"""
Runs NEP analysis with specified neighborhood definition.
Expand All @@ -105,4 +159,4 @@ def run_cozi(
raise ValueError(f"Unknown neighborhood definition: {nbh_def}")

# run NEP
return nep_analysis(adj, labels, n_permutations=n_permutations, random_state=random_state)
return nep_analysis(adj, labels, n_permutations=n_permutations, random_state=random_state, fixed_type=fixed_type)