From 6ce91510d2749906cdc9fed4174e078871c1ea46 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Fri, 28 Mar 2025 15:39:37 +0100 Subject: [PATCH 01/37] mvp --- pyproject.toml | 2 + src/squidpy/__init__.py | 2 +- src/squidpy/exp/__init__.py | 11 ++ src/squidpy/exp/_feature.py | 224 ++++++++++++++++++++++++++++++++++++ 4 files changed, 238 insertions(+), 1 deletion(-) create mode 100644 src/squidpy/exp/__init__.py create mode 100644 src/squidpy/exp/_feature.py diff --git a/pyproject.toml b/pyproject.toml index 1143cab94..cd4e7a665 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,8 @@ dependencies = [ "xarray>=2024.10.0", "zarr>=2.6.1,<3.0.0", "spatialdata>=0.2.5", + "centrosome>=1.2.3", + "cp-measure>=0.1.4" ] [project.optional-dependencies] diff --git a/src/squidpy/__init__.py b/src/squidpy/__init__.py index 5fb2b848a..cfba804fd 100644 --- a/src/squidpy/__init__.py +++ b/src/squidpy/__init__.py @@ -3,7 +3,7 @@ from importlib import metadata from importlib.metadata import PackageMetadata -from squidpy import datasets, gr, im, pl, read, tl +from squidpy import datasets, exp, gr, im, pl, read, tl try: md: PackageMetadata = metadata.metadata(__name__) diff --git a/src/squidpy/exp/__init__.py b/src/squidpy/exp/__init__.py new file mode 100644 index 000000000..2dc08d915 --- /dev/null +++ b/src/squidpy/exp/__init__.py @@ -0,0 +1,11 @@ +"""Experimental module for Squidpy. + +This module contains experimental features that are still under development. +These features may change or be removed in future releases. +""" + +from __future__ import annotations + +from squidpy.exp._feature import calculate_image_features + +__all__ = ["calculate_image_features"] diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py new file mode 100644 index 000000000..42fca34a6 --- /dev/null +++ b/src/squidpy/exp/_feature.py @@ -0,0 +1,224 @@ +"""Experimental feature extraction module.""" + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any +import warnings +import anndata as ad +import numpy as np +import pandas as pd +from cp_measure.bulk import get_core_measurements +from spatialdata._logging import logger as logg +from spatialdata import SpatialData + +from squidpy._constants._constants import ImageFeature +from squidpy._docs import d, inject_docs +from squidpy._utils import Signal, _get_n_cores, parallelize +from spatialdata.models import TableModel +__all__ = ["calculate_image_features"] + + +@d.dedent +@inject_docs(f=ImageFeature) +def calculate_image_features( + sdata: SpatialData, + labels_key: str, + image_key: str, + adata_key_added: str = "morphology", + invalid_as_zero: bool = True, + n_jobs: int | None = None, + backend: str = "loky", + show_progress_bar: bool = True, +) -> pd.DataFrame | None: + """ + Calculate features from segmentation masks using CellProfiler measurements. + + This function uses the `cp_measure` package to extract features from + segmentation masks. It supports both basic shape features and + intensity-based features if an intensity image is provided. + + Parameters + ---------- + sdata + The spatial data object containing the segmentation masks. + labels_key + Key in :attr:`spatialdata.SpatialData.labels` containing the + segmentation masks. + image_key + Key in :attr:`spatialdata.SpatialData.images` containing the + intensity image. + adata_key_added + Key to store the AnnData object in the SpatialData object. + %(parallelize)s + + Returns + ------- + A :class:`pandas.DataFrame` with the calculated features. If the image has + multiple channels, features are calculated for each channel separately and + channel names are appended to the feature names. + + Notes + ----- + This is an experimental feature that requires the `cp_measure` package + to be installed. + """ + # Get the image and labels + image = np.asarray(sdata.images[image_key].compute()) + labels = np.asarray(sdata.labels[labels_key].compute()) + + # Check if labels are empty + if labels.size == 0: + raise ValueError("Labels array is empty") + + max_label = int(labels.max()) + if max_label == 0: + raise ValueError("No cells found in labels (max label is 0)") + + # Get channel names if available + channel_names = None + if ( + hasattr(sdata.images[image_key], "coords") + and "c" in sdata.images[image_key].coords + ): + channel_names = sdata.images[image_key].coords["c"].values + + # Handle image dimensions + if image.ndim == 2: + image = image[None, :, :] # Add channel dimension + elif image.ndim != 3: + raise ValueError(f"Expected 2D or 3D image, got shape {image.shape}") + + # Check if image and labels have matching dimensions + if image.shape[1:] != labels.shape: + raise ValueError( + f"Image and labels have mismatched dimensions: " + f"image {image.shape[1:]}, labels {labels.shape}" + ) + + # Get core measurements from cp_measure + measurements = get_core_measurements() + + # Process each channel + all_features = [] + n_channels = image.shape[0] + n_jobs = _get_n_cores(n_jobs) + + for ch_idx in range(n_channels): + ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" + + logg.info( + f"Calculating features for channel '{ch_idx}' " f"using '{n_jobs}' core(s)" + ) + + ch_image = image[ch_idx] + + # Get cell IDs + cell_ids = range(1, max_label + 1) + + # Parallelize feature calculation + res = parallelize( + _calculate_image_features_helper, + collection=cell_ids, + extractor=pd.concat, + n_jobs=n_jobs, + backend=backend, + show_progress_bar=show_progress_bar, + )(labels, ch_image, measurements, ch_name) + + all_features.append(res) + + # Create AnnData object from results + combined_features = pd.concat(all_features, axis=1) + if invalid_as_zero: + combined_features = combined_features.replace([np.inf, -np.inf], 0) + combined_features = combined_features.fillna(0) + adata = ad.AnnData(X=combined_features) + adata.obs_names = [f"cell_{i}" for i in range(1, max_label + 1)] + adata.var_names = combined_features.columns + + adata.uns["spatialdata_attrs"] = { + "region": labels_key, + "region_key": "region", + "instance_key": "label_id", + } + adata.obs["region"] = pd.Categorical([labels_key] * len(adata)) + adata.obs["label_id"] = range(1, max_label + 1) + # adata.obs[["region", "spot_id"]] + + # Add the AnnData object to the SpatialData object + sdata.tables[adata_key_added] = TableModel.parse(adata) + + # Combine features from all channels + # return pd.concat(all_features, axis=1) + + +def _calculate_image_features_helper( + cell_ids: Sequence[int], + labels: np.ndarray, + image: np.ndarray, + measurements: dict[str, Any], + channel_name: str | None = None, + queue: Any | None = None, +) -> pd.DataFrame: + """Helper function to calculate features for a subset of cells.""" + features_list = [] + for cell_id in cell_ids: + # Get cell mask + cell_mask = (labels == cell_id).astype(np.uint8) + + # Find bounding box of the cell + y_indices, x_indices = np.where(cell_mask) + if len(y_indices) == 0: # Skip empty cells + continue + + y_min, y_max = y_indices.min(), y_indices.max() + x_min, x_max = x_indices.min(), x_indices.max() + + # Add padding to ensure we capture the full cell + pad = 5 + y_min = max(0, y_min - pad) + y_max = min(labels.shape[0], y_max + pad) + x_min = max(0, x_min - pad) + x_max = min(labels.shape[1], x_max + pad) + + # Crop both mask and image to the bounding box + cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] + image_cropped = image[y_min:y_max, x_min:x_max] + + cell_features = {} + # Calculate all available features + for name, func in measurements.items(): + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + feature_dict = func(cell_mask_cropped, image_cropped) + # Convert numpy arrays to scalars + feature_dict = { + k: ( + float(v[0]) + if isinstance(v, np.ndarray) and v.size == 1 + else v + ) + for k, v in feature_dict.items() + } + # Append channel name to feature names + feature_dict = { + f"{k}_ch{channel_name}": v for k, v in feature_dict.items() + } + cell_features.update(feature_dict) + except Exception as e: + logg.warning( + f"Failed to calculate {name} features for cell {cell_id}: " + f"{str(e)}" + ) + + features_list.append(cell_features) + + if queue is not None: + queue.put(Signal.UPDATE) + + if queue is not None: + queue.put(Signal.FINISH) + + return pd.DataFrame(features_list, index=cell_ids) From 3a82ac8e207c37a4225fca51b6555dac78fd6fe5 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Fri, 28 Mar 2025 17:46:33 +0100 Subject: [PATCH 02/37] performance improvement --- src/squidpy/exp/_feature.py | 351 ++++++++++++++++++++++++++---------- 1 file changed, 255 insertions(+), 96 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 42fca34a6..5f7d7b0b3 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -3,33 +3,215 @@ from __future__ import annotations from collections.abc import Sequence -from typing import Any +from typing import Any, Callable import warnings import anndata as ad import numpy as np import pandas as pd -from cp_measure.bulk import get_core_measurements +from cp_measure.bulk import get_core_measurements, get_correlation_measurements from spatialdata._logging import logger as logg from spatialdata import SpatialData +import xarray as xr from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs from squidpy._utils import Signal, _get_n_cores, parallelize from spatialdata.models import TableModel + __all__ = ["calculate_image_features"] +def _measurement_wrapper( + func: Callable, + mask: np.ndarray, + image1: np.ndarray, + image2: np.ndarray | None = None, +) -> dict[str, Any]: + """Wrapper function to handle both core and correlation measurements. + + Parameters + ---------- + func + The measurement function to call + mask + The cell mask + image1 + First image (or only image for core measurements) + image2 + Second image for correlation measurements. If None, this is a core + measurement. + + Returns + ------- + Dictionary of feature values + """ + return func(mask, image1) if image2 is None else func(image1, image2, mask) + + +def _calculate_features_helper( + cell_ids: Sequence[int], + labels: np.ndarray, + image1: np.ndarray, + image2: np.ndarray | None, + measurements: dict[str, Any], + channel1_name: str | None = None, + channel2_name: str | None = None, + queue: Any | None = None, + verbose: bool = False, +) -> pd.DataFrame: + """Helper function to calculate features for a subset of cells.""" + features_dict = {} + + # Pre-allocate arrays for type conversion + uint8_features = [ + "radial_distribution", + "radial_zernikes", + "intensity", + "sizeshape", + "zernike", + "ferret", + ] + float_features = ["manders_fold", "rwc"] + + # Pre-compute image normalization if needed + if "texture" in measurements: + img1_min = image1.min() + img1_max = image1.max() + img1_range = img1_max - img1_min + 1e-10 + if image2 is not None: + img2_min = image2.min() + img2_max = image2.max() + img2_range = img2_max - img2_min + 1e-10 + + for cell_id in cell_ids: + # Get cell mask and find bounding box in one step + cell_mask = labels == cell_id + y_indices, x_indices = np.where(cell_mask) + if len(y_indices) == 0: # Skip empty cells + continue + + # Get bounding box with padding + y_min, y_max = y_indices.min(), y_indices.max() + x_min, x_max = x_indices.min(), x_indices.max() + pad = 5 + y_min = max(0, y_min - pad) + y_max = min(labels.shape[0], y_max + pad) + x_min = max(0, x_min - pad) + x_max = min(labels.shape[1], x_max + pad) + + # Crop all arrays at once + cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] + image1_cropped = image1[y_min:y_max, x_min:x_max] + image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max] + + # Quick shape check + if cell_mask_cropped.shape != image1_cropped.shape or ( + image2_cropped is not None + and cell_mask_cropped.shape != image2_cropped.shape + ): + if verbose: + logg.warning( + f"Shape mismatch for cell {cell_id}: " + f"mask {cell_mask_cropped.shape}, " + f"image1 {image1_cropped.shape}, " + f"image2 {image2_cropped.shape if image2_cropped is not None else 'None'}" + ) + continue + + cell_features = {} + # Calculate all available features + for name, func in measurements.items(): + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + + # Pre-convert inputs based on feature type + if name in uint8_features: + mask = cell_mask_cropped.astype(np.uint8) + img1 = image1_cropped.astype(np.uint8) + img2 = ( + None + if image2_cropped is None + else image2_cropped.astype(np.uint8) + ) + elif name == "texture": + mask = cell_mask_cropped.astype(np.uint8) + img1 = ( + image1_cropped.astype(np.float32) - img1_min + ) / img1_range + img2 = ( + None + if image2_cropped is None + else (image2_cropped.astype(np.float32) - img2_min) + / img2_range + ) + elif name in float_features: + mask = cell_mask_cropped.astype(np.float32) + img1 = image1_cropped.astype(np.float32) + img2 = ( + None + if image2_cropped is None + else image2_cropped.astype(np.float32) + ) + else: + mask = cell_mask_cropped.astype(np.float32) + img1 = image1_cropped.astype(np.float32) + img2 = ( + None + if image2_cropped is None + else image2_cropped.astype(np.float32) + ) + + feature_dict = _measurement_wrapper(func, mask, img1, img2) + + for k, v in feature_dict.items(): + if len(v) > 1: + raise ValueError(f"Feature {k} has more than one value.") + else: + feature_dict[k] = float(v[0]) + + # Append channel names efficiently + if image2 is None: + feature_dict = { + f"{k}_ch{channel1_name}": v for k, v in feature_dict.items() + } + else: + feature_dict = { + f"{k}_ch{channel1_name}_ch{channel2_name}": v + for k, v in feature_dict.items() + } + + cell_features.update(feature_dict) + except Exception as e: + if verbose: + logg.warning( + f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}" + ) + + features_dict[cell_id] = cell_features + + if queue is not None: + queue.put(Signal.UPDATE) + + if queue is not None: + queue.put(Signal.FINISH) + + return pd.DataFrame.from_dict(features_dict, orient="index") + + @d.dedent @inject_docs(f=ImageFeature) def calculate_image_features( sdata: SpatialData, labels_key: str, image_key: str, + scale: str | None = None, adata_key_added: str = "morphology", invalid_as_zero: bool = True, n_jobs: int | None = None, backend: str = "loky", show_progress_bar: bool = True, + verbose: bool = False, ) -> pd.DataFrame | None: """ Calculate features from segmentation masks using CellProfiler measurements. @@ -63,9 +245,23 @@ def calculate_image_features( This is an experimental feature that requires the `cp_measure` package to be installed. """ - # Get the image and labels - image = np.asarray(sdata.images[image_key].compute()) - labels = np.asarray(sdata.labels[labels_key].compute()) + + if ( + (isinstance(sdata.images[image_key], xr.DataTree) + or isinstance(sdata.labels[labels_key], xr.DataTree)) + and scale is None + ): + raise ValueError("When using multi-scale data, please specify the scale.") + + if scale is not None and not isinstance(scale, str): + raise ValueError("Scale must be a string.") + + if scale is not None: + image = np.asarray(sdata.images[image_key][scale].image.compute()) + labels = np.asarray(sdata.labels[labels_key][scale].image.compute()) + else: + image = np.asarray(sdata.images[image_key].compute()) + labels = np.asarray(sdata.labels[labels_key].compute()) # Check if labels are empty if labels.size == 0: @@ -97,128 +293,91 @@ def calculate_image_features( ) # Get core measurements from cp_measure - measurements = get_core_measurements() + measurements_core = get_core_measurements() + measurements_corr = get_correlation_measurements() + + # Get unique cell IDs from labels, excluding background (0) + cell_ids = np.unique(labels) + cell_ids = cell_ids[cell_ids != 0] # Process each channel all_features = [] n_channels = image.shape[0] n_jobs = _get_n_cores(n_jobs) - for ch_idx in range(n_channels): - ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" + logg.info(f"Using '{n_jobs}' core(s).") - logg.info( - f"Calculating features for channel '{ch_idx}' " f"using '{n_jobs}' core(s)" - ) + # First process core measurements for each channel + for ch_idx in range(n_channels): + logg.info(f"Calculating core features for channel '{ch_idx}'.") + ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" ch_image = image[ch_idx] - # Get cell IDs - cell_ids = range(1, max_label + 1) - - # Parallelize feature calculation res = parallelize( - _calculate_image_features_helper, + _calculate_features_helper, collection=cell_ids, extractor=pd.concat, n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, - )(labels, ch_image, measurements, ch_name) + verbose=verbose, + )(labels, ch_image, None, measurements_core, ch_name) all_features.append(res) + # Then process correlation measurements between channels + for ch1_idx in range(n_channels): + for ch2_idx in range(ch1_idx + 1, n_channels): + ch1_name = ( + channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}" + ) + ch2_name = ( + channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}" + ) + + logg.info( + f"Calculating correlation features between channels " + f"'{ch1_name}' and '{ch2_name}'." + ) + + ch1_image = image[ch1_idx] + ch2_image = image[ch2_idx] + + # Parallelize feature calculation + res = parallelize( + _calculate_features_helper, + collection=cell_ids, + extractor=pd.concat, + n_jobs=n_jobs, + backend=backend, + show_progress_bar=show_progress_bar, + verbose=verbose, + )(labels, ch1_image, ch2_image, measurements_corr, ch1_name, ch2_name) + + all_features.append(res) + # Create AnnData object from results combined_features = pd.concat(all_features, axis=1) if invalid_as_zero: combined_features = combined_features.replace([np.inf, -np.inf], 0) combined_features = combined_features.fillna(0) + + # Ensure cell IDs are preserved in the correct order + cell_ids = sorted(combined_features.index) + combined_features = combined_features.loc[cell_ids] + adata = ad.AnnData(X=combined_features) - adata.obs_names = [f"cell_{i}" for i in range(1, max_label + 1)] + adata.obs_names = [f"cell_{i}" for i in cell_ids] adata.var_names = combined_features.columns adata.uns["spatialdata_attrs"] = { "region": labels_key, - "region_key": "region", - "instance_key": "label_id", + "region_key": "region", + "instance_key": "label_id", } adata.obs["region"] = pd.Categorical([labels_key] * len(adata)) - adata.obs["label_id"] = range(1, max_label + 1) - # adata.obs[["region", "spot_id"]] + adata.obs["label_id"] = cell_ids # Add the AnnData object to the SpatialData object sdata.tables[adata_key_added] = TableModel.parse(adata) - - # Combine features from all channels - # return pd.concat(all_features, axis=1) - - -def _calculate_image_features_helper( - cell_ids: Sequence[int], - labels: np.ndarray, - image: np.ndarray, - measurements: dict[str, Any], - channel_name: str | None = None, - queue: Any | None = None, -) -> pd.DataFrame: - """Helper function to calculate features for a subset of cells.""" - features_list = [] - for cell_id in cell_ids: - # Get cell mask - cell_mask = (labels == cell_id).astype(np.uint8) - - # Find bounding box of the cell - y_indices, x_indices = np.where(cell_mask) - if len(y_indices) == 0: # Skip empty cells - continue - - y_min, y_max = y_indices.min(), y_indices.max() - x_min, x_max = x_indices.min(), x_indices.max() - - # Add padding to ensure we capture the full cell - pad = 5 - y_min = max(0, y_min - pad) - y_max = min(labels.shape[0], y_max + pad) - x_min = max(0, x_min - pad) - x_max = min(labels.shape[1], x_max + pad) - - # Crop both mask and image to the bounding box - cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] - image_cropped = image[y_min:y_max, x_min:x_max] - - cell_features = {} - # Calculate all available features - for name, func in measurements.items(): - try: - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - feature_dict = func(cell_mask_cropped, image_cropped) - # Convert numpy arrays to scalars - feature_dict = { - k: ( - float(v[0]) - if isinstance(v, np.ndarray) and v.size == 1 - else v - ) - for k, v in feature_dict.items() - } - # Append channel name to feature names - feature_dict = { - f"{k}_ch{channel_name}": v for k, v in feature_dict.items() - } - cell_features.update(feature_dict) - except Exception as e: - logg.warning( - f"Failed to calculate {name} features for cell {cell_id}: " - f"{str(e)}" - ) - - features_list.append(cell_features) - - if queue is not None: - queue.put(Signal.UPDATE) - - if queue is not None: - queue.put(Signal.FINISH) - - return pd.DataFrame(features_list, index=cell_ids) From f734634ea86167fdeb093190ca18c28f38e1cf35 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 28 Mar 2025 16:47:29 +0000 Subject: [PATCH 03/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/exp/_feature.py | 83 ++++++++++--------------------------- 1 file changed, 23 insertions(+), 60 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 5f7d7b0b3..cf7adbea3 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -2,21 +2,22 @@ from __future__ import annotations -from collections.abc import Sequence -from typing import Any, Callable import warnings +from collections.abc import Callable, Sequence +from typing import Any + import anndata as ad import numpy as np import pandas as pd +import xarray as xr from cp_measure.bulk import get_core_measurements, get_correlation_measurements -from spatialdata._logging import logger as logg from spatialdata import SpatialData -import xarray as xr +from spatialdata._logging import logger as logg +from spatialdata.models import TableModel from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs from squidpy._utils import Signal, _get_n_cores, parallelize -from spatialdata.models import TableModel __all__ = ["calculate_image_features"] @@ -106,8 +107,7 @@ def _calculate_features_helper( # Quick shape check if cell_mask_cropped.shape != image1_cropped.shape or ( - image2_cropped is not None - and cell_mask_cropped.shape != image2_cropped.shape + image2_cropped is not None and cell_mask_cropped.shape != image2_cropped.shape ): if verbose: logg.warning( @@ -129,38 +129,23 @@ def _calculate_features_helper( if name in uint8_features: mask = cell_mask_cropped.astype(np.uint8) img1 = image1_cropped.astype(np.uint8) - img2 = ( - None - if image2_cropped is None - else image2_cropped.astype(np.uint8) - ) + img2 = None if image2_cropped is None else image2_cropped.astype(np.uint8) elif name == "texture": mask = cell_mask_cropped.astype(np.uint8) - img1 = ( - image1_cropped.astype(np.float32) - img1_min - ) / img1_range + img1 = (image1_cropped.astype(np.float32) - img1_min) / img1_range img2 = ( None if image2_cropped is None - else (image2_cropped.astype(np.float32) - img2_min) - / img2_range + else (image2_cropped.astype(np.float32) - img2_min) / img2_range ) elif name in float_features: mask = cell_mask_cropped.astype(np.float32) img1 = image1_cropped.astype(np.float32) - img2 = ( - None - if image2_cropped is None - else image2_cropped.astype(np.float32) - ) + img2 = None if image2_cropped is None else image2_cropped.astype(np.float32) else: mask = cell_mask_cropped.astype(np.float32) img1 = image1_cropped.astype(np.float32) - img2 = ( - None - if image2_cropped is None - else image2_cropped.astype(np.float32) - ) + img2 = None if image2_cropped is None else image2_cropped.astype(np.float32) feature_dict = _measurement_wrapper(func, mask, img1, img2) @@ -172,21 +157,14 @@ def _calculate_features_helper( # Append channel names efficiently if image2 is None: - feature_dict = { - f"{k}_ch{channel1_name}": v for k, v in feature_dict.items() - } + feature_dict = {f"{k}_ch{channel1_name}": v for k, v in feature_dict.items()} else: - feature_dict = { - f"{k}_ch{channel1_name}_ch{channel2_name}": v - for k, v in feature_dict.items() - } + feature_dict = {f"{k}_ch{channel1_name}_ch{channel2_name}": v for k, v in feature_dict.items()} cell_features.update(feature_dict) except Exception as e: if verbose: - logg.warning( - f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}" - ) + logg.warning(f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}") features_dict[cell_id] = cell_features @@ -247,10 +225,8 @@ def calculate_image_features( """ if ( - (isinstance(sdata.images[image_key], xr.DataTree) - or isinstance(sdata.labels[labels_key], xr.DataTree)) - and scale is None - ): + isinstance(sdata.images[image_key], xr.DataTree) or isinstance(sdata.labels[labels_key], xr.DataTree) + ) and scale is None: raise ValueError("When using multi-scale data, please specify the scale.") if scale is not None and not isinstance(scale, str): @@ -273,10 +249,7 @@ def calculate_image_features( # Get channel names if available channel_names = None - if ( - hasattr(sdata.images[image_key], "coords") - and "c" in sdata.images[image_key].coords - ): + if hasattr(sdata.images[image_key], "coords") and "c" in sdata.images[image_key].coords: channel_names = sdata.images[image_key].coords["c"].values # Handle image dimensions @@ -287,10 +260,7 @@ def calculate_image_features( # Check if image and labels have matching dimensions if image.shape[1:] != labels.shape: - raise ValueError( - f"Image and labels have mismatched dimensions: " - f"image {image.shape[1:]}, labels {labels.shape}" - ) + raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}") # Get core measurements from cp_measure measurements_core = get_core_measurements() @@ -329,17 +299,10 @@ def calculate_image_features( # Then process correlation measurements between channels for ch1_idx in range(n_channels): for ch2_idx in range(ch1_idx + 1, n_channels): - ch1_name = ( - channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}" - ) - ch2_name = ( - channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}" - ) - - logg.info( - f"Calculating correlation features between channels " - f"'{ch1_name}' and '{ch2_name}'." - ) + ch1_name = channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}" + ch2_name = channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}" + + logg.info(f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'.") ch1_image = image[ch1_idx] ch2_image = image[ch2_idx] From 9fb24b407f8680cb743535c7467f0410ccde55ec Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Fri, 28 Mar 2025 22:38:35 +0100 Subject: [PATCH 04/37] added skimage features --- src/squidpy/exp/_feature.py | 457 ++++++++++++++++++++++++++++++------ 1 file changed, 384 insertions(+), 73 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 5f7d7b0b3..5ef09b20a 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -2,25 +2,199 @@ from __future__ import annotations -from collections.abc import Sequence -from typing import Any, Callable import warnings +from collections.abc import Callable, Sequence +from typing import Any + import anndata as ad +import itertools import numpy as np import pandas as pd +import xarray as xr from cp_measure.bulk import get_core_measurements, get_correlation_measurements -from spatialdata._logging import logger as logg +from scipy import ndimage +from skimage import measure from spatialdata import SpatialData -import xarray as xr +from spatialdata._logging import logger as logg +from spatialdata.models import TableModel +from skimage.measure import label from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs from squidpy._utils import Signal, _get_n_cores, parallelize -from spatialdata.models import TableModel __all__ = ["calculate_image_features"] +def _get_regionprops_features( + cell_ids: Sequence[int], + labels: np.ndarray, + intensity_image: np.ndarray | None = None, + queue: Any | None = None, +) -> dict[str, float]: + """Calculate regionprops features for a cell. + + Parameters + ---------- + cell_id + The ID of the cell to process + labels + The labels array containing cell masks + intensity_image + Optional intensity image for intensity-based features + queue + Optional queue for progress tracking. If provided, will send update signals. + + Returns + ------- + Dictionary of regionprops features + """ + # Define channel-independent properties (only need mask) + mask_props = { + "area", + "area_filled", + "area_convex", + "num_pixels", + "axis_major_length", + "axis_minor_length", + "eccentricity", + "equivalent_diameter", + "extent", + "feret_diameter_max", + "solidity", + "euler_number", + "centroid", + "centroid_local", + "perimeter", + "perimeter_crofton", + "inertia_tensor", + "inertia_tensor_eigvals", + } + + # Define channel-dependent properties (need intensity image) + intensity_props = { + "intensity_max", + "intensity_mean", + "intensity_min", + "intensity_std", + } + + features = {} + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + + # labels only (channel independent) + if intensity_image is None: + for cell_id in cell_ids: + cell_mask_cropped, _, _ = _get_cell_crops( + cell_id=cell_id, + labels=labels, + ) + + if cell_mask_cropped is None: + continue + + region_prop = measure.regionprops(label_image=label(cell_mask_cropped)) + + if not region_prop: + continue + + cell_features = {} + + # Calculate regionprops features while ignoring warnings + for prop in mask_props: + try: + value = getattr(region_prop, prop) + + # Handle array-like properties + if isinstance(value, (np.ndarray, list, tuple)): + value = np.array(value) + if value.ndim == 1: + for i, v in enumerate(value): + cell_features[f"{prop}_{i}"] = float(v) + elif value.ndim == 2: + for i, j in itertools.product( + range(value.shape[0]), range(value.shape[1]) + ): + cell_features[f"{prop}_{i}x{j}"] = float( + value[i, j] + ) + else: + cell_features[prop] = value + else: + cell_features[prop] = float(value) + except Exception: + continue + + if queue is not None: + queue.put(Signal.UPDATE) + + features[cell_id] = cell_features + + # Calculate intensity-dependent properties if intensity image is provided + else: + for cell_id in cell_ids: + cell_mask_cropped, intensity_image_cropped, _ = _get_cell_crops( + cell_id=cell_id, + labels=labels, + image1=intensity_image, + ) + + if cell_mask_cropped is None: + continue + + intensity_props_obj = measure.regionprops( + label_image=label(cell_mask_cropped), + intensity_image=intensity_image_cropped, + ) + + if not intensity_props_obj: + continue + + cell_features = {} + + for prop in intensity_props: + try: + value = getattr(intensity_props_obj, prop) + + # Skip callable properties + if callable(value): + continue + + # Handle array properties + if isinstance(value, (np.ndarray, list, tuple)): + value = np.array(value) + + if value.ndim == 1: + for i, v in enumerate(value): + cell_features[f"{prop}_{i}"] = float(v) + elif value.ndim == 2: + for i, j in itertools.product( + range(value.shape[0]), range(value.shape[1]) + ): + cell_features[f"{prop}_{i}x{j}"] = float( + value[i, j] + ) + else: + cell_features[prop] = value + else: + cell_features[prop] = float(value) + + except Exception: + continue + + if queue is not None: + queue.put(Signal.UPDATE) + + features[cell_id] = cell_features + + if queue is not None: + queue.put(Signal.FINISH) + + return pd.DataFrame.from_dict(features, orient="index") + + def _measurement_wrapper( func: Callable, mask: np.ndarray, @@ -48,6 +222,79 @@ def _measurement_wrapper( return func(mask, image1) if image2 is None else func(image1, image2, mask) +def _get_cell_crops( + cell_id: int, + labels: np.ndarray, + image1: np.ndarray | None = None, + image2: np.ndarray | None = None, + pad: int = 1, + verbose: bool = False, +) -> tuple[np.ndarray, np.ndarray, np.ndarray | None] | None: + """Generator function to get cropped arrays for a cell. + + Parameters + ---------- + cell_id + The ID of the cell to process + labels + The labels array containing cell masks + image1 + First image to crop + image2 + Optional second image to crop + pad + Amount of padding to add around the cell + verbose + Whether to print warning messages + + Returns + ------- + Tuple of (cell_mask_cropped, image1_cropped, image2_cropped) or None if cell is empty + """ + # Get cell mask and find bounding box in one step + cell_mask = labels == cell_id + y_indices, x_indices = np.where(cell_mask) + if len(y_indices) == 0: # Skip empty cells + return None + + # Get bounding box + y_min, y_max = y_indices.min(), y_indices.max() + x_min, x_max = x_indices.min(), x_indices.max() + + # Get image dimensions + height, width = labels.shape + + # Calculate desired padding + y_pad_min = min(pad, y_min) # How much we can pad to the top + y_pad_max = min(pad, height - y_max - 1) # How much we can pad to the bottom + x_pad_min = min(pad, x_min) # How much we can pad to the left + x_pad_max = min(pad, width - x_max - 1) # How much we can pad to the right + + # Apply symmetric padding where possible + y_min -= y_pad_min + y_max += y_pad_max + x_min -= x_pad_min + x_max += x_pad_max + + # Warn if cell is at border and padding is asymmetric + if verbose and ( + y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad + ): + logg.warning( + f"Cell {cell_id} is at image border. Padding is asymmetric: " + f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, " + f"x: {x_pad_min}/{pad} left, {x_pad_max}/{pad} right" + ) + + # Crop all arrays at once + cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] + + image1_cropped = None if image1 is None else image1[y_min:y_max, x_min:x_max] + image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max] + + return cell_mask_cropped, image1_cropped, image2_cropped + + def _calculate_features_helper( cell_ids: Sequence[int], labels: np.ndarray, @@ -84,42 +331,40 @@ def _calculate_features_helper( img2_range = img2_max - img2_min + 1e-10 for cell_id in cell_ids: - # Get cell mask and find bounding box in one step - cell_mask = labels == cell_id - y_indices, x_indices = np.where(cell_mask) - if len(y_indices) == 0: # Skip empty cells + # Get cropped arrays for this cell + result = _get_cell_crops(cell_id, labels, image1, image2, verbose=verbose) + if result is None: continue - # Get bounding box with padding - y_min, y_max = y_indices.min(), y_indices.max() - x_min, x_max = x_indices.min(), x_indices.max() - pad = 5 - y_min = max(0, y_min - pad) - y_max = min(labels.shape[0], y_max + pad) - x_min = max(0, x_min - pad) - x_max = min(labels.shape[1], x_max + pad) - - # Crop all arrays at once - cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] - image1_cropped = image1[y_min:y_max, x_min:x_max] - image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max] - - # Quick shape check - if cell_mask_cropped.shape != image1_cropped.shape or ( - image2_cropped is not None - and cell_mask_cropped.shape != image2_cropped.shape - ): + cell_mask_cropped, image1_cropped, image2_cropped = result + cell_features = {} + + # Calculate regionprops features first + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + regionprops_features = _get_regionprops_features( + cell_mask_cropped, + image1_cropped, + image1_cropped if image2 is None else None, + ) + if image2 is None: + regionprops_features = { + f"{k}_ch{channel1_name}": v for k, v in regionprops_features.items() + } + else: + regionprops_features = { + f"{k}_ch{channel1_name}_ch{channel2_name}": v + for k, v in regionprops_features.items() + } + cell_features.update(regionprops_features) + except Exception as e: if verbose: logg.warning( - f"Shape mismatch for cell {cell_id}: " - f"mask {cell_mask_cropped.shape}, " - f"image1 {image1_cropped.shape}, " - f"image2 {image2_cropped.shape if image2_cropped is not None else 'None'}" + f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}" ) - continue - cell_features = {} - # Calculate all available features + # Calculate all available cp-measure features for name, func in measurements.items(): try: with warnings.catch_warnings(): @@ -206,6 +451,7 @@ def calculate_image_features( labels_key: str, image_key: str, scale: str | None = None, + measurements: list[str] | str | None = None, adata_key_added: str = "morphology", invalid_as_zero: bool = True, n_jobs: int | None = None, @@ -247,10 +493,9 @@ def calculate_image_features( """ if ( - (isinstance(sdata.images[image_key], xr.DataTree) - or isinstance(sdata.labels[labels_key], xr.DataTree)) - and scale is None - ): + isinstance(sdata.images[image_key], xr.DataTree) + or isinstance(sdata.labels[labels_key], xr.DataTree) + ) and scale is None: raise ValueError("When using multi-scale data, please specify the scale.") if scale is not None and not isinstance(scale, str): @@ -263,6 +508,29 @@ def calculate_image_features( image = np.asarray(sdata.images[image_key].compute()) labels = np.asarray(sdata.labels[labels_key].compute()) + available_measurements = [ + "skimage:label", + "skimage:label+image", + "cpmeasure:core", + "cpmeasure:correlation", + ] + + if measurements is None: + measurements = available_measurements + + if isinstance(measurements, str): + measurements = [measurements] + + if isinstance(measurements, list): + invalid_measurements = [ + m for m in measurements if m not in available_measurements + ] + if invalid_measurements: + raise ValueError( + f"Invalid measurement(s): {invalid_measurements}, " + f"available measurements: {available_measurements}" + ) + # Check if labels are empty if labels.size == 0: raise ValueError("Labels array is empty") @@ -288,13 +556,11 @@ def calculate_image_features( # Check if image and labels have matching dimensions if image.shape[1:] != labels.shape: raise ValueError( - f"Image and labels have mismatched dimensions: " - f"image {image.shape[1:]}, labels {labels.shape}" + f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}" ) - # Get core measurements from cp_measure - measurements_core = get_core_measurements() - measurements_corr = get_correlation_measurements() + if "cpmeasure:correlation" in measurements: + measurements_corr = get_correlation_measurements() # Get unique cell IDs from labels, excluding background (0) cell_ids = np.unique(labels) @@ -307,58 +573,103 @@ def calculate_image_features( logg.info(f"Using '{n_jobs}' core(s).") - # First process core measurements for each channel - for ch_idx in range(n_channels): - logg.info(f"Calculating core features for channel '{ch_idx}'.") - - ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" - ch_image = image[ch_idx] - + if "skimage:label" in measurements: + logg.info("Calculating 'skimage' label features.") res = parallelize( - _calculate_features_helper, + _get_regionprops_features, collection=cell_ids, extractor=pd.concat, n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, verbose=verbose, - )(labels, ch_image, None, measurements_core, ch_name) - + )(labels=labels, intensity_image=None) all_features.append(res) - # Then process correlation measurements between channels - for ch1_idx in range(n_channels): - for ch2_idx in range(ch1_idx + 1, n_channels): - ch1_name = ( - channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}" - ) - ch2_name = ( - channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}" - ) + # skimage features that need a mask and an image + if "skimage:label+image" in measurements: + for ch_idx in range(n_channels): - logg.info( - f"Calculating correlation features between channels " - f"'{ch1_name}' and '{ch2_name}'." + ch_name = ( + channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" ) + ch_image = image[ch_idx] - ch1_image = image[ch1_idx] - ch2_image = image[ch2_idx] - - # Parallelize feature calculation + logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.") res = parallelize( - _calculate_features_helper, + _get_regionprops_features, collection=cell_ids, extractor=pd.concat, n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, verbose=verbose, - )(labels, ch1_image, ch2_image, measurements_corr, ch1_name, ch2_name) - + )(labels=labels, intensity_image=ch_image) all_features.append(res) + # cpmeasure features that need a mask and an image + if "cpmeasure:core" in measurements: + measurements_core = get_core_measurements() + + for ch_idx in range(n_channels): + + ch_name = ( + channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" + ) + ch_image = image[ch_idx] + if "cpmeasure:core" in measurements: + logg.info( + f"Calculating 'cpmeasure' core features for channel '{ch_idx}'." + ) + + res = parallelize( + _calculate_features_helper, + collection=cell_ids, + extractor=pd.concat, + n_jobs=n_jobs, + backend=backend, + show_progress_bar=show_progress_bar, + verbose=verbose, + )(labels, ch_image, None, measurements_core, ch_name) + all_features.append(res) + + # cpmeasure features that correlate two channels + if "cpmeasure:correlation" in measurements: + for ch1_idx in range(n_channels): + for ch2_idx in range(ch1_idx + 1, n_channels): + ch1_name = ( + channel_names[ch1_idx] + if channel_names is not None + else f"ch{ch1_idx}" + ) + ch2_name = ( + channel_names[ch2_idx] + if channel_names is not None + else f"ch{ch2_idx}" + ) + + logg.info( + f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'." + ) + + ch1_image = image[ch1_idx] + ch2_image = image[ch2_idx] + + # Parallelize feature calculation + res = parallelize( + _calculate_features_helper, + collection=cell_ids, + extractor=pd.concat, + n_jobs=n_jobs, + backend=backend, + show_progress_bar=show_progress_bar, + verbose=verbose, + )(labels, ch1_image, ch2_image, measurements_corr, ch1_name, ch2_name) + all_features.append(res) + # Create AnnData object from results combined_features = pd.concat(all_features, axis=1) + if invalid_as_zero: combined_features = combined_features.replace([np.inf, -np.inf], 0) combined_features = combined_features.fillna(0) From 78ed95cf5cfe19187bec595338a374f47109b928 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 28 Mar 2025 22:37:46 +0000 Subject: [PATCH 05/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/exp/_feature.py | 80 ++++++++++--------------------------- 1 file changed, 20 insertions(+), 60 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index eb2808e96..18ccf7cfd 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -2,24 +2,23 @@ from __future__ import annotations +import itertools import warnings from collections.abc import Callable, Sequence from typing import Any import anndata as ad -import itertools import numpy as np import pandas as pd import xarray as xr -import xarray as xr from cp_measure.bulk import get_core_measurements, get_correlation_measurements from scipy import ndimage from skimage import measure +from skimage.measure import label from spatialdata import SpatialData from spatialdata._logging import logger as logg from spatialdata.models import TableModel -from skimage.measure import label from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs from squidpy._utils import Signal, _get_n_cores, parallelize @@ -115,12 +114,8 @@ def _get_regionprops_features( for i, v in enumerate(value): cell_features[f"{prop}_{i}"] = float(v) elif value.ndim == 2: - for i, j in itertools.product( - range(value.shape[0]), range(value.shape[1]) - ): - cell_features[f"{prop}_{i}x{j}"] = float( - value[i, j] - ) + for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])): + cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) else: cell_features[prop] = value else: @@ -171,12 +166,8 @@ def _get_regionprops_features( for i, v in enumerate(value): cell_features[f"{prop}_{i}"] = float(v) elif value.ndim == 2: - for i, j in itertools.product( - range(value.shape[0]), range(value.shape[1]) - ): - cell_features[f"{prop}_{i}x{j}"] = float( - value[i, j] - ) + for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])): + cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) else: cell_features[prop] = value else: @@ -278,9 +269,7 @@ def _get_cell_crops( x_max += x_pad_max # Warn if cell is at border and padding is asymmetric - if verbose and ( - y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad - ): + if verbose and (y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad): logg.warning( f"Cell {cell_id} is at image border. Padding is asymmetric: " f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, " @@ -350,20 +339,15 @@ def _calculate_features_helper( image1_cropped if image2 is None else None, ) if image2 is None: - regionprops_features = { - f"{k}_ch{channel1_name}": v for k, v in regionprops_features.items() - } + regionprops_features = {f"{k}_ch{channel1_name}": v for k, v in regionprops_features.items()} else: regionprops_features = { - f"{k}_ch{channel1_name}_ch{channel2_name}": v - for k, v in regionprops_features.items() + f"{k}_ch{channel1_name}_ch{channel2_name}": v for k, v in regionprops_features.items() } cell_features.update(regionprops_features) except Exception as e: if verbose: - logg.warning( - f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}" - ) + logg.warning(f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}") # Calculate all available cp-measure features for name, func in measurements.items(): @@ -472,8 +456,7 @@ def calculate_image_features( """ if ( - isinstance(sdata.images[image_key], xr.DataTree) - or isinstance(sdata.labels[labels_key], xr.DataTree) + isinstance(sdata.images[image_key], xr.DataTree) or isinstance(sdata.labels[labels_key], xr.DataTree) ) and scale is None: raise ValueError("When using multi-scale data, please specify the scale.") @@ -501,13 +484,10 @@ def calculate_image_features( measurements = [measurements] if isinstance(measurements, list): - invalid_measurements = [ - m for m in measurements if m not in available_measurements - ] + invalid_measurements = [m for m in measurements if m not in available_measurements] if invalid_measurements: raise ValueError( - f"Invalid measurement(s): {invalid_measurements}, " - f"available measurements: {available_measurements}" + f"Invalid measurement(s): {invalid_measurements}, available measurements: {available_measurements}" ) # Check if labels are empty @@ -531,9 +511,7 @@ def calculate_image_features( # Check if image and labels have matching dimensions if image.shape[1:] != labels.shape: - raise ValueError( - f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}" - ) + raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}") if "cpmeasure:correlation" in measurements: measurements_corr = get_correlation_measurements() @@ -565,10 +543,7 @@ def calculate_image_features( # skimage features that need a mask and an image if "skimage:label+image" in measurements: for ch_idx in range(n_channels): - - ch_name = ( - channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" - ) + ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" ch_image = image[ch_idx] logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.") @@ -588,15 +563,10 @@ def calculate_image_features( measurements_core = get_core_measurements() for ch_idx in range(n_channels): - - ch_name = ( - channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" - ) + ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" ch_image = image[ch_idx] if "cpmeasure:core" in measurements: - logg.info( - f"Calculating 'cpmeasure' core features for channel '{ch_idx}'." - ) + logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.") res = parallelize( _calculate_features_helper, @@ -613,20 +583,10 @@ def calculate_image_features( if "cpmeasure:correlation" in measurements: for ch1_idx in range(n_channels): for ch2_idx in range(ch1_idx + 1, n_channels): - ch1_name = ( - channel_names[ch1_idx] - if channel_names is not None - else f"ch{ch1_idx}" - ) - ch2_name = ( - channel_names[ch2_idx] - if channel_names is not None - else f"ch{ch2_idx}" - ) + ch1_name = channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}" + ch2_name = channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}" - logg.info( - f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'." - ) + logg.info(f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'.") ch1_image = image[ch1_idx] ch2_image = image[ch2_idx] From c865d57152ed039c8ca50b645118d8f6291ec600 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Mon, 31 Mar 2025 18:40:19 +0200 Subject: [PATCH 06/37] cleaned up a bit --- src/squidpy/exp/_feature.py | 947 ++++++++++++++++++++---------------- 1 file changed, 515 insertions(+), 432 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 18ccf7cfd..cb0e41101 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -15,7 +15,7 @@ from scipy import ndimage from skimage import measure from skimage.measure import label -from spatialdata import SpatialData +from spatialdata import SpatialData, rasterize from spatialdata._logging import logger as logg from spatialdata.models import TableModel @@ -25,402 +25,51 @@ __all__ = ["calculate_image_features"] - -def _get_regionprops_features( - cell_ids: Sequence[int], - labels: np.ndarray, - intensity_image: np.ndarray | None = None, - queue: Any | None = None, -) -> dict[str, float]: - """Calculate regionprops features for a cell. - - Parameters - ---------- - cell_id - The ID of the cell to process - labels - The labels array containing cell masks - intensity_image - Optional intensity image for intensity-based features - queue - Optional queue for progress tracking. If provided, will send update signals. - - Returns - ------- - Dictionary of regionprops features - """ - # Define channel-independent properties (only need mask) - mask_props = { - "area", - "area_filled", - "area_convex", - "num_pixels", - "axis_major_length", - "axis_minor_length", - "eccentricity", - "equivalent_diameter", - "extent", - "feret_diameter_max", - "solidity", - "euler_number", - "centroid", - "centroid_local", - "perimeter", - "perimeter_crofton", - "inertia_tensor", - "inertia_tensor_eigvals", - } - - # Define channel-dependent properties (need intensity image) - intensity_props = { - "intensity_max", - "intensity_mean", - "intensity_min", - "intensity_std", - } - - features = {} - - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - - # labels only (channel independent) - if intensity_image is None: - for cell_id in cell_ids: - cell_mask_cropped, _, _ = _get_cell_crops( - cell_id=cell_id, - labels=labels, - ) - - if cell_mask_cropped is None: - continue - - region_prop = measure.regionprops(label_image=label(cell_mask_cropped)) - - if not region_prop: - continue - - cell_features = {} - - # Calculate regionprops features while ignoring warnings - for prop in mask_props: - try: - value = getattr(region_prop, prop) - - # Handle array-like properties - if isinstance(value, (np.ndarray, list, tuple)): - value = np.array(value) - if value.ndim == 1: - for i, v in enumerate(value): - cell_features[f"{prop}_{i}"] = float(v) - elif value.ndim == 2: - for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])): - cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) - else: - cell_features[prop] = value - else: - cell_features[prop] = float(value) - except Exception: - continue - - if queue is not None: - queue.put(Signal.UPDATE) - - features[cell_id] = cell_features - - # Calculate intensity-dependent properties if intensity image is provided - else: - for cell_id in cell_ids: - cell_mask_cropped, intensity_image_cropped, _ = _get_cell_crops( - cell_id=cell_id, - labels=labels, - image1=intensity_image, - ) - - if cell_mask_cropped is None: - continue - - intensity_props_obj = measure.regionprops( - label_image=label(cell_mask_cropped), - intensity_image=intensity_image_cropped, - ) - - if not intensity_props_obj: - continue - - cell_features = {} - - for prop in intensity_props: - try: - value = getattr(intensity_props_obj, prop) - - # Skip callable properties - if callable(value): - continue - - # Handle array properties - if isinstance(value, (np.ndarray, list, tuple)): - value = np.array(value) - - if value.ndim == 1: - for i, v in enumerate(value): - cell_features[f"{prop}_{i}"] = float(v) - elif value.ndim == 2: - for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])): - cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) - else: - cell_features[prop] = value - else: - cell_features[prop] = float(value) - - except Exception: - continue - - if queue is not None: - queue.put(Signal.UPDATE) - - features[cell_id] = cell_features - - if queue is not None: - queue.put(Signal.FINISH) - - return pd.DataFrame.from_dict(features, orient="index") - - -def _measurement_wrapper( - func: Callable, - mask: np.ndarray, - image1: np.ndarray, - image2: np.ndarray | None = None, -) -> dict[str, Any]: - """Wrapper function to handle both core and correlation measurements. - - Parameters - ---------- - func - The measurement function to call - mask - The cell mask - image1 - First image (or only image for core measurements) - image2 - Second image for correlation measurements. If None, this is a core - measurement. - - Returns - ------- - Dictionary of feature values - """ - return func(mask, image1) if image2 is None else func(image1, image2, mask) - - -def _get_cell_crops( - cell_id: int, - labels: np.ndarray, - image1: np.ndarray | None = None, - image2: np.ndarray | None = None, - pad: int = 1, - verbose: bool = False, -) -> tuple[np.ndarray, np.ndarray, np.ndarray | None] | None: - """Generator function to get cropped arrays for a cell. - - Parameters - ---------- - cell_id - The ID of the cell to process - labels - The labels array containing cell masks - image1 - First image to crop - image2 - Optional second image to crop - pad - Amount of padding to add around the cell - verbose - Whether to print warning messages - - Returns - ------- - Tuple of (cell_mask_cropped, image1_cropped, image2_cropped) or None if cell is empty - """ - # Get cell mask and find bounding box in one step - cell_mask = labels == cell_id - y_indices, x_indices = np.where(cell_mask) - if len(y_indices) == 0: # Skip empty cells - return None - - # Get bounding box - y_min, y_max = y_indices.min(), y_indices.max() - x_min, x_max = x_indices.min(), x_indices.max() - - # Get image dimensions - height, width = labels.shape - - # Calculate desired padding - y_pad_min = min(pad, y_min) # How much we can pad to the top - y_pad_max = min(pad, height - y_max - 1) # How much we can pad to the bottom - x_pad_min = min(pad, x_min) # How much we can pad to the left - x_pad_max = min(pad, width - x_max - 1) # How much we can pad to the right - - # Apply symmetric padding where possible - y_min -= y_pad_min - y_max += y_pad_max - x_min -= x_pad_min - x_max += x_pad_max - - # Warn if cell is at border and padding is asymmetric - if verbose and (y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad): - logg.warning( - f"Cell {cell_id} is at image border. Padding is asymmetric: " - f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, " - f"x: {x_pad_min}/{pad} left, {x_pad_max}/{pad} right" - ) - - # Crop all arrays at once - cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] - - image1_cropped = None if image1 is None else image1[y_min:y_max, x_min:x_max] - image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max] - - return cell_mask_cropped, image1_cropped, image2_cropped - - -def _calculate_features_helper( - cell_ids: Sequence[int], - labels: np.ndarray, - image1: np.ndarray, - image2: np.ndarray | None, - measurements: dict[str, Any], - channel1_name: str | None = None, - channel2_name: str | None = None, - queue: Any | None = None, - verbose: bool = False, -) -> pd.DataFrame: - """Helper function to calculate features for a subset of cells.""" - features_dict = {} - - # Pre-allocate arrays for type conversion - uint8_features = [ - "radial_distribution", - "radial_zernikes", - "intensity", - "sizeshape", - "zernike", - "ferret", - ] - float_features = ["manders_fold", "rwc"] - - # Pre-compute image normalization if needed - if "texture" in measurements: - img1_min = image1.min() - img1_max = image1.max() - img1_range = img1_max - img1_min + 1e-10 - if image2 is not None: - img2_min = image2.min() - img2_max = image2.max() - img2_range = img2_max - img2_min + 1e-10 - - for cell_id in cell_ids: - # Get cropped arrays for this cell - result = _get_cell_crops(cell_id, labels, image1, image2, verbose=verbose) - if result is None: - continue - - cell_mask_cropped, image1_cropped, image2_cropped = result - cell_features = {} - - # Calculate regionprops features first - try: - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - regionprops_features = _get_regionprops_features( - cell_mask_cropped, - image1_cropped, - image1_cropped if image2 is None else None, - ) - if image2 is None: - regionprops_features = {f"{k}_ch{channel1_name}": v for k, v in regionprops_features.items()} - else: - regionprops_features = { - f"{k}_ch{channel1_name}_ch{channel2_name}": v for k, v in regionprops_features.items() - } - cell_features.update(regionprops_features) - except Exception as e: - if verbose: - logg.warning(f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}") - - # Calculate all available cp-measure features - for name, func in measurements.items(): - try: - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - - # Pre-convert inputs based on feature type - if name in uint8_features: - mask = cell_mask_cropped.astype(np.uint8) - img1 = image1_cropped.astype(np.uint8) - img2 = None if image2_cropped is None else image2_cropped.astype(np.uint8) - elif name == "texture": - mask = cell_mask_cropped.astype(np.uint8) - img1 = (image1_cropped.astype(np.float32) - img1_min) / img1_range - img2 = ( - None - if image2_cropped is None - else (image2_cropped.astype(np.float32) - img2_min) / img2_range - ) - elif name in float_features: - mask = cell_mask_cropped.astype(np.float32) - img1 = image1_cropped.astype(np.float32) - img2 = None if image2_cropped is None else image2_cropped.astype(np.float32) - else: - mask = cell_mask_cropped.astype(np.float32) - img1 = image1_cropped.astype(np.float32) - img2 = None if image2_cropped is None else image2_cropped.astype(np.float32) - - feature_dict = _measurement_wrapper(func, mask, img1, img2) - - for k, v in feature_dict.items(): - if len(v) > 1: - raise ValueError(f"Feature {k} has more than one value.") - else: - feature_dict[k] = float(v[0]) - - # Append channel names efficiently - if image2 is None: - feature_dict = {f"{k}_ch{channel1_name}": v for k, v in feature_dict.items()} - else: - feature_dict = {f"{k}_ch{channel1_name}_ch{channel2_name}": v for k, v in feature_dict.items()} - - cell_features.update(feature_dict) - except Exception as e: - if verbose: - logg.warning(f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}") - - features_dict[cell_id] = cell_features - - if queue is not None: - queue.put(Signal.UPDATE) - - if queue is not None: - queue.put(Signal.FINISH) - - return pd.DataFrame.from_dict(features_dict, orient="index") +# Define constant property sets +_MASK_PROPS = { + "area", + "area_filled", + "area_convex", + "num_pixels", + "axis_major_length", + "axis_minor_length", + "eccentricity", + "equivalent_diameter", + "extent", + "feret_diameter_max", + "solidity", + "euler_number", + "centroid", + "centroid_local", + "perimeter", + "perimeter_crofton", + "inertia_tensor", + "inertia_tensor_eigvals", +} +_INTENSITY_PROPS = { + "intensity_max", + "intensity_mean", + "intensity_min", + "intensity_std", +} @d.dedent @inject_docs(f=ImageFeature) def calculate_image_features( sdata: SpatialData, - labels_key: str, image_key: str, + labels_key: str | None = None, + shapes_key: str | None = None, scale: str | None = None, measurements: list[str] | str | None = None, adata_key_added: str = "morphology", invalid_as_zero: bool = True, n_jobs: int | None = None, backend: str = "loky", - show_progress_bar: bool = True, + show_progress_bar: bool = False, # slower, needs to be optimised verbose: bool = False, + inplace: bool = True, ) -> pd.DataFrame | None: """ Calculate features from segmentation masks using CellProfiler measurements. @@ -436,6 +85,9 @@ def calculate_image_features( labels_key Key in :attr:`spatialdata.SpatialData.labels` containing the segmentation masks. + shapes_key + Key in :attr:`spatialdata.SpatialData.shapes` containing the + shape features. image_key Key in :attr:`spatialdata.SpatialData.images` containing the intensity image. @@ -455,20 +107,62 @@ def calculate_image_features( to be installed. """ + if image_key not in sdata.images.keys(): + raise ValueError( + f"Image key '{image_key}' not found, valid keys: {list(sdata.images.keys())}" + ) + + if labels_key is not None and shapes_key is not None: + raise ValueError("Use either `labels_key` or `shapes_key`, not both.") + + if labels_key is not None and labels_key not in sdata.labels.keys(): + raise ValueError( + f"Labels key '{labels_key}' not found, valid keys: {list(sdata.labels.keys())}" + ) + + if shapes_key is not None and shapes_key not in sdata.shapes.keys(): + raise ValueError( + f"Shapes key '{shapes_key}' not found, valid keys: {list(sdata.shapes.keys())}" + ) + if ( - isinstance(sdata.images[image_key], xr.DataTree) or isinstance(sdata.labels[labels_key], xr.DataTree) + isinstance(sdata.images[image_key], xr.DataTree) + or isinstance(sdata.labels[labels_key], xr.DataTree) ) and scale is None: raise ValueError("When using multi-scale data, please specify the scale.") if scale is not None and not isinstance(scale, str): raise ValueError("Scale must be a string.") - if scale is not None: - image = np.asarray(sdata.images[image_key][scale].image.compute()) - labels = np.asarray(sdata.labels[labels_key][scale].image.compute()) + image = _get_array_from_DataTree_or_DataArray(sdata.images[image_key], scale) + labels = ( + _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale) + if labels_key is not None + else None + ) + + if labels is not None and image.shape[1:] != labels.shape: + raise ValueError( + f"Image dimensions {image.shape[1:]} do not match labels dimensions {labels.shape} at scale '{scale}'" + ) + + if shapes_key is not None: + scale_str = f" (using scale '{scale}')" if scale is not None else "" + logg.info(f"Converting shapes to labels{scale_str}.") + _, max_y, max_x = image.shape + labels = np.asarray( + rasterize( + sdata.shapes[shapes_key], + ["x", "y"], + min_coordinate=[0, 0], + max_coordinate=[max_x, max_y], + target_coordinate_system="global", + target_unit_to_pixels=1.0, + return_regions_as_labels=True, + ) + ) else: - image = np.asarray(sdata.images[image_key].compute()) - labels = np.asarray(sdata.labels[labels_key].compute()) + labels = _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale) available_measurements = [ "skimage:label", @@ -484,13 +178,14 @@ def calculate_image_features( measurements = [measurements] if isinstance(measurements, list): - invalid_measurements = [m for m in measurements if m not in available_measurements] + invalid_measurements = [ + m for m in measurements if m not in available_measurements + ] if invalid_measurements: raise ValueError( f"Invalid measurement(s): {invalid_measurements}, available measurements: {available_measurements}" ) - # Check if labels are empty if labels.size == 0: raise ValueError("Labels array is empty") @@ -498,29 +193,31 @@ def calculate_image_features( if max_label == 0: raise ValueError("No cells found in labels (max label is 0)") - # Get channel names if available channel_names = None - if hasattr(sdata.images[image_key], "coords") and "c" in sdata.images[image_key].coords: + if ( + hasattr(sdata.images[image_key], "coords") + and "c" in sdata.images[image_key].coords + ): channel_names = sdata.images[image_key].coords["c"].values - # Handle image dimensions if image.ndim == 2: - image = image[None, :, :] # Add channel dimension + image = image[None, :, :] elif image.ndim != 3: raise ValueError(f"Expected 2D or 3D image, got shape {image.shape}") - # Check if image and labels have matching dimensions if image.shape[1:] != labels.shape: - raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}") + raise ValueError( + f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}" + ) if "cpmeasure:correlation" in measurements: measurements_corr = get_correlation_measurements() - # Get unique cell IDs from labels, excluding background (0) cell_ids = np.unique(labels) cell_ids = cell_ids[cell_ids != 0] + # Sort cell_ids to ensure consistent order + cell_ids = np.sort(cell_ids) - # Process each channel all_features = [] n_channels = image.shape[0] n_jobs = _get_n_cores(n_jobs) @@ -540,12 +237,12 @@ def calculate_image_features( )(labels=labels, intensity_image=None) all_features.append(res) - # skimage features that need a mask and an image if "skimage:label+image" in measurements: for ch_idx in range(n_channels): - ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" + ch_name = ( + channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" + ) ch_image = image[ch_idx] - logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.") res = parallelize( _get_regionprops_features, @@ -556,42 +253,47 @@ def calculate_image_features( show_progress_bar=show_progress_bar, verbose=verbose, )(labels=labels, intensity_image=ch_image) + # Append channel names to each feature column + res = res.rename(columns=lambda col: f"{col}_{ch_name}") all_features.append(res) - # cpmeasure features that need a mask and an image if "cpmeasure:core" in measurements: measurements_core = get_core_measurements() - for ch_idx in range(n_channels): - ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" + ch_name = ( + channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" + ) ch_image = image[ch_idx] - if "cpmeasure:core" in measurements: - logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.") - - res = parallelize( - _calculate_features_helper, - collection=cell_ids, - extractor=pd.concat, - n_jobs=n_jobs, - backend=backend, - show_progress_bar=show_progress_bar, - verbose=verbose, - )(labels, ch_image, None, measurements_core, ch_name) - all_features.append(res) + logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.") + res = parallelize( + _calculate_features_helper, + collection=cell_ids, + extractor=pd.concat, + n_jobs=n_jobs, + backend=backend, + show_progress_bar=show_progress_bar, + verbose=verbose, + )(labels, ch_image, None, measurements_core, ch_name) + all_features.append(res) - # cpmeasure features that correlate two channels if "cpmeasure:correlation" in measurements: for ch1_idx in range(n_channels): for ch2_idx in range(ch1_idx + 1, n_channels): - ch1_name = channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}" - ch2_name = channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}" - - logg.info(f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'.") - + ch1_name = ( + channel_names[ch1_idx] + if channel_names is not None + else f"{ch1_idx}" + ) + ch2_name = ( + channel_names[ch2_idx] + if channel_names is not None + else f"{ch2_idx}" + ) + logg.info( + f"Calculating 'cpmeasure' correlation features between channels '{ch1_name}' and '{ch2_name}'." + ) ch1_image = image[ch1_idx] ch2_image = image[ch2_idx] - - # Parallelize feature calculation res = parallelize( _calculate_features_helper, collection=cell_ids, @@ -603,7 +305,6 @@ def calculate_image_features( )(labels, ch1_image, ch2_image, measurements_corr, ch1_name, ch2_name) all_features.append(res) - # Create AnnData object from results combined_features = pd.concat(all_features, axis=1) if invalid_as_zero: @@ -611,7 +312,6 @@ def calculate_image_features( combined_features = combined_features.fillna(0) # Ensure cell IDs are preserved in the correct order - cell_ids = sorted(combined_features.index) combined_features = combined_features.loc[cell_ids] adata = ad.AnnData(X=combined_features) @@ -619,12 +319,395 @@ def calculate_image_features( adata.var_names = combined_features.columns adata.uns["spatialdata_attrs"] = { - "region": labels_key, + "region": labels_key if labels_key is not None else shapes_key, "region_key": "region", "instance_key": "label_id", } - adata.obs["region"] = pd.Categorical([labels_key] * len(adata)) + adata.obs["region"] = pd.Categorical( + [labels_key if labels_key is not None else shapes_key] * len(adata) + ) adata.obs["label_id"] = cell_ids - # Add the AnnData object to the SpatialData object - sdata.tables[adata_key_added] = TableModel.parse(adata) + if inplace: + sdata.tables[adata_key_added] = TableModel.parse(adata) + else: + return combined_features + + +def _extract_features_from_regionprops( + region_obj: Any, props: set[str], cell_id: int, skip_callable: bool = False +) -> dict[str, float]: + """Extract features from a regionprops object given a list of properties.""" + cell_features = {} + for prop in props: + try: + value = getattr(region_obj, prop) + if skip_callable and callable(value): + continue + if isinstance(value, np.ndarray | list | tuple): + value = np.array(value) + if value.ndim == 1: + for i, v in enumerate(value): + cell_features[f"{prop}_{i}"] = float(v) + elif value.ndim == 2: + for i, j in itertools.product( + range(value.shape[0]), range(value.shape[1]) + ): + cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) + else: + cell_features[prop] = value + else: + cell_features[prop] = float(value) + except Exception as e: + logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}") + continue + return cell_features + + +def _calculate_regionprops_from_crop( + cell_mask_cropped: np.ndarray, + intensity_image_cropped: np.ndarray | None, + cell_id: int, +) -> dict[str, float]: + """ + Calculate regionprops features from pre-cropped arrays. + Uses intensity-based properties if an intensity image is provided. + """ + if intensity_image_cropped is None: + region_props = measure.regionprops(label_image=label(cell_mask_cropped)) + if not region_props: + return {} + return _extract_features_from_regionprops(region_props[0], _MASK_PROPS, cell_id) + else: + region_props = measure.regionprops( + label_image=label(cell_mask_cropped), + intensity_image=intensity_image_cropped, + ) + if not region_props: + return {} + return _extract_features_from_regionprops( + region_props[0], _INTENSITY_PROPS, cell_id, skip_callable=True + ) + + +def _append_channel_names( + features: dict, channel1: str, channel2: str | None = None +) -> dict: + """Append channel name(s) to all keys in the feature dictionary.""" + if channel2 is None: + return {f"{k}_{channel1}": v for k, v in features.items()} + else: + return {f"{k}_{channel1}_{channel2}": v for k, v in features.items()} + + +def _prepare_images_for_measurement( + name: str, + cell_mask: np.ndarray, + img1: np.ndarray, + img2: np.ndarray | None, + conv_params: dict, +) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]: + """ + Convert inputs to the appropriate dtype based on the measurement type. + """ + if name in conv_params.get("uint8_features", []): + mask = cell_mask.astype(np.uint8) + image1_prepared = img1.astype(np.uint8) + image2_prepared = None if img2 is None else img2.astype(np.uint8) + elif name == "texture": + mask = cell_mask.astype(np.uint8) + image1_prepared = ( + img1.astype(np.float32) - conv_params["img1_min"] + ) / conv_params["img1_range"] + image2_prepared = ( + None + if img2 is None + else (img2.astype(np.float32) - conv_params["img2_min"]) + / conv_params["img2_range"] + ) + elif name in conv_params.get("float_features", []): + mask = cell_mask.astype(np.float32) + image1_prepared = img1.astype(np.float32) + image2_prepared = None if img2 is None else img2.astype(np.float32) + else: + mask = cell_mask.astype(np.float32) + image1_prepared = img1.astype(np.float32) + image2_prepared = None if img2 is None else img2.astype(np.float32) + return mask, image1_prepared, image2_prepared + + +def _get_cell_crops( + cell_id: int, + labels: np.ndarray, + image1: np.ndarray | None = None, + image2: np.ndarray | None = None, + pad: int = 1, + verbose: bool = False, +) -> tuple[np.ndarray, np.ndarray, np.ndarray | None] | None: + """Generator function to get cropped arrays for a cell. + + Parameters + ---------- + cell_id + The ID of the cell to process + labels + The labels array containing cell masks + image1 + First image to crop + image2 + Optional second image to crop + pad + Amount of padding to add around the cell + verbose + Whether to print warning messages + + Returns + ------- + Tuple of (cell_mask_cropped, image1_cropped, image2_cropped) or None if cell is empty + """ + cell_mask = labels == cell_id + y_indices, x_indices = np.where(cell_mask) + if len(y_indices) == 0: # Skip empty cells + return None + + y_min, y_max = y_indices.min(), y_indices.max() + x_min, x_max = x_indices.min(), x_indices.max() + height, width = labels.shape + + y_pad_min = min(pad, y_min) + y_pad_max = min(pad, height - y_max - 1) + x_pad_min = min(pad, x_min) + x_pad_max = min(pad, width - x_max - 1) + + y_min -= y_pad_min + y_max += y_pad_max + x_min -= x_pad_min + x_max += x_pad_max + + if verbose and ( + y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad + ): + logg.warning( + f"Cell {cell_id} is at image border. Padding is asymmetric: " + f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, " + f"x: {x_pad_min}/{pad} left, {x_pad_max}/{pad} right" + ) + + cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] + image1_cropped = None if image1 is None else image1[y_min:y_max, x_min:x_max] + image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max] + + return cell_mask_cropped, image1_cropped, image2_cropped + + +def _get_regionprops_features( + cell_ids: Sequence[int], + labels: np.ndarray, + intensity_image: np.ndarray | None = None, + queue: Any | None = None, +) -> pd.DataFrame: + """Calculate regionprops features for each cell from the full label image.""" + # Initialize features dictionary with None values to preserve order + features = {cell_id: None for cell_id in cell_ids} + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # Process cells in order to preserve order + for cell_id in cell_ids: + crop = _get_cell_crops(cell_id, labels, image1=intensity_image) + if crop is None: + continue + cell_mask_cropped, intensity_image_cropped, _ = crop + cell_features = _calculate_regionprops_from_crop( + cell_mask_cropped, intensity_image_cropped, cell_id + ) + features[cell_id] = cell_features + if queue is not None: + queue.put(Signal.UPDATE) + if queue is not None: + queue.put(Signal.FINISH) + + # Convert to DataFrame while preserving order + df = pd.DataFrame.from_dict(features, orient="index") + # Ensure the index matches the input cell_ids order + df = df.reindex(cell_ids) + return df + + +def _measurement_wrapper( + func: Callable, + mask: np.ndarray, + image1: np.ndarray, + image2: np.ndarray | None = None, +) -> dict[str, Any]: + """Wrapper function to handle both core and correlation measurements. + + Parameters + ---------- + func + The measurement function to call + mask + The cell mask + image1 + First image (or only image for core measurements) + image2 + Second image for correlation measurements. If None, this is a core + measurement. + + Returns + ------- + Dictionary of feature values + """ + return func(mask, image1) if image2 is None else func(image1, image2, mask) + + +def _calculate_features_helper( + cell_ids: Sequence[int], + labels: np.ndarray, + image1: np.ndarray, + image2: np.ndarray | None, + measurements: dict[str, Any], + channel1_name: str | None = None, + channel2_name: str | None = None, + queue: Any | None = None, + verbose: bool = False, +) -> pd.DataFrame: + """Helper function to calculate features for a subset of cells.""" + # Initialize features dictionary with None values to preserve order + features_dict = {cell_id: None for cell_id in cell_ids} + + # Pre-allocate lists for type conversion + uint8_features = [ + "radial_distribution", + "radial_zernikes", + "intensity", + "sizeshape", + "zernike", + "ferret", + ] + float_features = ["manders_fold", "rwc"] + + # Pre-compute normalization if needed + conv_params: dict[str, Any] = { + "uint8_features": uint8_features, + "float_features": float_features, + } + if "texture" in measurements: + img1_min = image1.min() + img1_max = image1.max() + conv_params["img1_min"] = img1_min + conv_params["img1_range"] = img1_max - img1_min + 1e-10 + if image2 is not None: + img2_min = image2.min() + img2_max = image2.max() + conv_params["img2_min"] = img2_min + conv_params["img2_range"] = img2_max - img2_min + 1e-10 + + # Process cells in order to preserve order + for cell_id in cell_ids: + crop = _get_cell_crops(cell_id, labels, image1, image2, verbose=verbose) + if crop is None: + continue + cell_mask_cropped, image1_cropped, image2_cropped = crop + cell_features = {} + + # Calculate regionprops features using cached crop + try: + region_features = _calculate_regionprops_from_crop( + cell_mask_cropped, + image1_cropped if image2 is None else None, + cell_id, + ) + if image2 is None: + region_features = _append_channel_names(region_features, channel1_name) + else: + region_features = _append_channel_names( + region_features, channel1_name, channel2_name + ) + cell_features.update(region_features) + except Exception as e: + if verbose: + logg.warning( + f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}" + ) + + # Calculate cp-measure features for each measurement + for name, func in measurements.items(): + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + mask_conv, img1_conv, img2_conv = _prepare_images_for_measurement( + name, + cell_mask_cropped, + image1_cropped, + image2_cropped, + conv_params, + ) + feature_dict = _measurement_wrapper( + func, mask_conv, img1_conv, img2_conv + ) + # Ensure each feature returns a single value + for k, v in feature_dict.items(): + if len(v) > 1: + raise ValueError(f"Feature {k} has more than one value.") + else: + feature_dict[k] = float(v[0]) + if image2 is None: + feature_dict = _append_channel_names( + feature_dict, channel1_name + ) + else: + feature_dict = _append_channel_names( + feature_dict, channel1_name, channel2_name + ) + cell_features.update(feature_dict) + except Exception as e: + if verbose: + logg.warning( + f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}" + ) + + features_dict[cell_id] = cell_features + + if queue is not None: + queue.put(Signal.UPDATE) + + if queue is not None: + queue.put(Signal.FINISH) + + # Convert to DataFrame while preserving order + df = pd.DataFrame.from_dict(features_dict, orient="index") + # Ensure the index matches the input cell_ids order + df = df.reindex(cell_ids) + return df + + +def _get_array_from_DataTree_or_DataArray( + data: xr.DataTree | xr.DataArray, scale: str | None = None +) -> np.ndarray: + """ + Returns a NumPy array for the given data and scale. + If data is an xr.DataTree, it checks for the scale key and computes the image. + If data is an xr.DataArray, it computes the array (ignoring scale). + + Parameters + ---------- + data + The xarray data to convert to a NumPy array + scale + Optional scale key for DataTree data + + Returns + ------- + np.ndarray + The computed NumPy array + """ + if not isinstance(data, xr.DataTree): + return np.asarray(data.compute()) + if scale is None: + raise ValueError("Scale must be provided for DataTree data") + if scale not in data: + raise ValueError( + f"Scale '{scale}' not found. Available scales: {list(data.keys())}" + ) + return np.asarray(data[scale].image.compute()) From 0bc950764e34f3c85f993acec490840c5a298a0c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 31 Mar 2025 16:40:41 +0000 Subject: [PATCH 07/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/exp/_feature.py | 119 +++++++++--------------------------- 1 file changed, 28 insertions(+), 91 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index cb0e41101..c49ddc5c4 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -108,26 +108,19 @@ def calculate_image_features( """ if image_key not in sdata.images.keys(): - raise ValueError( - f"Image key '{image_key}' not found, valid keys: {list(sdata.images.keys())}" - ) + raise ValueError(f"Image key '{image_key}' not found, valid keys: {list(sdata.images.keys())}") if labels_key is not None and shapes_key is not None: raise ValueError("Use either `labels_key` or `shapes_key`, not both.") if labels_key is not None and labels_key not in sdata.labels.keys(): - raise ValueError( - f"Labels key '{labels_key}' not found, valid keys: {list(sdata.labels.keys())}" - ) + raise ValueError(f"Labels key '{labels_key}' not found, valid keys: {list(sdata.labels.keys())}") if shapes_key is not None and shapes_key not in sdata.shapes.keys(): - raise ValueError( - f"Shapes key '{shapes_key}' not found, valid keys: {list(sdata.shapes.keys())}" - ) + raise ValueError(f"Shapes key '{shapes_key}' not found, valid keys: {list(sdata.shapes.keys())}") if ( - isinstance(sdata.images[image_key], xr.DataTree) - or isinstance(sdata.labels[labels_key], xr.DataTree) + isinstance(sdata.images[image_key], xr.DataTree) or isinstance(sdata.labels[labels_key], xr.DataTree) ) and scale is None: raise ValueError("When using multi-scale data, please specify the scale.") @@ -135,11 +128,7 @@ def calculate_image_features( raise ValueError("Scale must be a string.") image = _get_array_from_DataTree_or_DataArray(sdata.images[image_key], scale) - labels = ( - _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale) - if labels_key is not None - else None - ) + labels = _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale) if labels_key is not None else None if labels is not None and image.shape[1:] != labels.shape: raise ValueError( @@ -178,9 +167,7 @@ def calculate_image_features( measurements = [measurements] if isinstance(measurements, list): - invalid_measurements = [ - m for m in measurements if m not in available_measurements - ] + invalid_measurements = [m for m in measurements if m not in available_measurements] if invalid_measurements: raise ValueError( f"Invalid measurement(s): {invalid_measurements}, available measurements: {available_measurements}" @@ -194,10 +181,7 @@ def calculate_image_features( raise ValueError("No cells found in labels (max label is 0)") channel_names = None - if ( - hasattr(sdata.images[image_key], "coords") - and "c" in sdata.images[image_key].coords - ): + if hasattr(sdata.images[image_key], "coords") and "c" in sdata.images[image_key].coords: channel_names = sdata.images[image_key].coords["c"].values if image.ndim == 2: @@ -206,9 +190,7 @@ def calculate_image_features( raise ValueError(f"Expected 2D or 3D image, got shape {image.shape}") if image.shape[1:] != labels.shape: - raise ValueError( - f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}" - ) + raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}") if "cpmeasure:correlation" in measurements: measurements_corr = get_correlation_measurements() @@ -239,9 +221,7 @@ def calculate_image_features( if "skimage:label+image" in measurements: for ch_idx in range(n_channels): - ch_name = ( - channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" - ) + ch_name = channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" ch_image = image[ch_idx] logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.") res = parallelize( @@ -260,9 +240,7 @@ def calculate_image_features( if "cpmeasure:core" in measurements: measurements_core = get_core_measurements() for ch_idx in range(n_channels): - ch_name = ( - channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" - ) + ch_name = channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" ch_image = image[ch_idx] logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.") res = parallelize( @@ -279,16 +257,8 @@ def calculate_image_features( if "cpmeasure:correlation" in measurements: for ch1_idx in range(n_channels): for ch2_idx in range(ch1_idx + 1, n_channels): - ch1_name = ( - channel_names[ch1_idx] - if channel_names is not None - else f"{ch1_idx}" - ) - ch2_name = ( - channel_names[ch2_idx] - if channel_names is not None - else f"{ch2_idx}" - ) + ch1_name = channel_names[ch1_idx] if channel_names is not None else f"{ch1_idx}" + ch2_name = channel_names[ch2_idx] if channel_names is not None else f"{ch2_idx}" logg.info( f"Calculating 'cpmeasure' correlation features between channels '{ch1_name}' and '{ch2_name}'." ) @@ -323,9 +293,7 @@ def calculate_image_features( "region_key": "region", "instance_key": "label_id", } - adata.obs["region"] = pd.Categorical( - [labels_key if labels_key is not None else shapes_key] * len(adata) - ) + adata.obs["region"] = pd.Categorical([labels_key if labels_key is not None else shapes_key] * len(adata)) adata.obs["label_id"] = cell_ids if inplace: @@ -350,9 +318,7 @@ def _extract_features_from_regionprops( for i, v in enumerate(value): cell_features[f"{prop}_{i}"] = float(v) elif value.ndim == 2: - for i, j in itertools.product( - range(value.shape[0]), range(value.shape[1]) - ): + for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])): cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) else: cell_features[prop] = value @@ -385,14 +351,10 @@ def _calculate_regionprops_from_crop( ) if not region_props: return {} - return _extract_features_from_regionprops( - region_props[0], _INTENSITY_PROPS, cell_id, skip_callable=True - ) + return _extract_features_from_regionprops(region_props[0], _INTENSITY_PROPS, cell_id, skip_callable=True) -def _append_channel_names( - features: dict, channel1: str, channel2: str | None = None -) -> dict: +def _append_channel_names(features: dict, channel1: str, channel2: str | None = None) -> dict: """Append channel name(s) to all keys in the feature dictionary.""" if channel2 is None: return {f"{k}_{channel1}": v for k, v in features.items()} @@ -416,14 +378,9 @@ def _prepare_images_for_measurement( image2_prepared = None if img2 is None else img2.astype(np.uint8) elif name == "texture": mask = cell_mask.astype(np.uint8) - image1_prepared = ( - img1.astype(np.float32) - conv_params["img1_min"] - ) / conv_params["img1_range"] + image1_prepared = (img1.astype(np.float32) - conv_params["img1_min"]) / conv_params["img1_range"] image2_prepared = ( - None - if img2 is None - else (img2.astype(np.float32) - conv_params["img2_min"]) - / conv_params["img2_range"] + None if img2 is None else (img2.astype(np.float32) - conv_params["img2_min"]) / conv_params["img2_range"] ) elif name in conv_params.get("float_features", []): mask = cell_mask.astype(np.float32) @@ -484,9 +441,7 @@ def _get_cell_crops( x_min -= x_pad_min x_max += x_pad_max - if verbose and ( - y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad - ): + if verbose and (y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad): logg.warning( f"Cell {cell_id} is at image border. Padding is asymmetric: " f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, " @@ -518,9 +473,7 @@ def _get_regionprops_features( if crop is None: continue cell_mask_cropped, intensity_image_cropped, _ = crop - cell_features = _calculate_regionprops_from_crop( - cell_mask_cropped, intensity_image_cropped, cell_id - ) + cell_features = _calculate_regionprops_from_crop(cell_mask_cropped, intensity_image_cropped, cell_id) features[cell_id] = cell_features if queue is not None: queue.put(Signal.UPDATE) @@ -621,15 +574,11 @@ def _calculate_features_helper( if image2 is None: region_features = _append_channel_names(region_features, channel1_name) else: - region_features = _append_channel_names( - region_features, channel1_name, channel2_name - ) + region_features = _append_channel_names(region_features, channel1_name, channel2_name) cell_features.update(region_features) except Exception as e: if verbose: - logg.warning( - f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}" - ) + logg.warning(f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}") # Calculate cp-measure features for each measurement for name, func in measurements.items(): @@ -643,9 +592,7 @@ def _calculate_features_helper( image2_cropped, conv_params, ) - feature_dict = _measurement_wrapper( - func, mask_conv, img1_conv, img2_conv - ) + feature_dict = _measurement_wrapper(func, mask_conv, img1_conv, img2_conv) # Ensure each feature returns a single value for k, v in feature_dict.items(): if len(v) > 1: @@ -653,19 +600,13 @@ def _calculate_features_helper( else: feature_dict[k] = float(v[0]) if image2 is None: - feature_dict = _append_channel_names( - feature_dict, channel1_name - ) + feature_dict = _append_channel_names(feature_dict, channel1_name) else: - feature_dict = _append_channel_names( - feature_dict, channel1_name, channel2_name - ) + feature_dict = _append_channel_names(feature_dict, channel1_name, channel2_name) cell_features.update(feature_dict) except Exception as e: if verbose: - logg.warning( - f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}" - ) + logg.warning(f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}") features_dict[cell_id] = cell_features @@ -682,9 +623,7 @@ def _calculate_features_helper( return df -def _get_array_from_DataTree_or_DataArray( - data: xr.DataTree | xr.DataArray, scale: str | None = None -) -> np.ndarray: +def _get_array_from_DataTree_or_DataArray(data: xr.DataTree | xr.DataArray, scale: str | None = None) -> np.ndarray: """ Returns a NumPy array for the given data and scale. If data is an xr.DataTree, it checks for the scale key and computes the image. @@ -707,7 +646,5 @@ def _get_array_from_DataTree_or_DataArray( if scale is None: raise ValueError("Scale must be provided for DataTree data") if scale not in data: - raise ValueError( - f"Scale '{scale}' not found. Available scales: {list(data.keys())}" - ) + raise ValueError(f"Scale '{scale}' not found. Available scales: {list(data.keys())}") return np.asarray(data[scale].image.compute()) From 60ed2234e93e08043ecac1cc2afb1e0dddae4e1c Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 1 Apr 2025 15:13:21 +0200 Subject: [PATCH 08/37] fixed off-by-one error --- src/squidpy/exp/_feature.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index cb0e41101..7c4c8b5db 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -326,7 +326,11 @@ def calculate_image_features( adata.obs["region"] = pd.Categorical( [labels_key if labels_key is not None else shapes_key] * len(adata) ) - adata.obs["label_id"] = cell_ids + # here we either use the cell_ids or the index of the shapes. Needed + # because when converting the shapes to labels, a potential index 0 + # in the shapes is set to 1 in the labels and therefore we'd otherwise + # be off-by-one in the label_id. + adata.obs["label_id"] = sdata.shapes[shapes_key].index.values if shapes_key is not None else cell_ids if inplace: sdata.tables[adata_key_added] = TableModel.parse(adata) From 9aade33593deeb90ede9175204f532ceac726ac5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 1 Apr 2025 13:14:11 +0000 Subject: [PATCH 09/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/exp/_feature.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 357b5c4bb..bee79a1a8 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -293,12 +293,10 @@ def calculate_image_features( "region_key": "region", "instance_key": "label_id", } - adata.obs["region"] = pd.Categorical( - [labels_key if labels_key is not None else shapes_key] * len(adata) - ) - # here we either use the cell_ids or the index of the shapes. Needed + adata.obs["region"] = pd.Categorical([labels_key if labels_key is not None else shapes_key] * len(adata)) + # here we either use the cell_ids or the index of the shapes. Needed # because when converting the shapes to labels, a potential index 0 - # in the shapes is set to 1 in the labels and therefore we'd otherwise + # in the shapes is set to 1 in the labels and therefore we'd otherwise # be off-by-one in the label_id. adata.obs["label_id"] = sdata.shapes[shapes_key].index.values if shapes_key is not None else cell_ids From 6d056b48f06bcd9460d664b90496dd296319b94a Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 29 Apr 2025 15:55:10 -0400 Subject: [PATCH 10/37] fixed mypy --- src/squidpy/exp/_feature.py | 90 +++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 34 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index bee79a1a8..e642fc581 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -9,6 +9,7 @@ import anndata as ad import numpy as np +import numpy.typing as npt import pandas as pd import xarray as xr from cp_measure.bulk import get_core_measurements, get_correlation_measurements @@ -53,6 +54,12 @@ "intensity_std", } +# Define array types using modern syntax +NDArray = npt.NDArray[Any] # Generic array +FloatArray = npt.NDArray[np.float32] # Float32 array +IntArray = npt.NDArray[np.int_] # Integer array +BoolArray = npt.NDArray[np.bool_] # Boolean array + @d.dedent @inject_docs(f=ImageFeature) @@ -199,6 +206,7 @@ def calculate_image_features( cell_ids = cell_ids[cell_ids != 0] # Sort cell_ids to ensure consistent order cell_ids = np.sort(cell_ids) + cell_ids_list = cell_ids.tolist() # Convert to list for parallelize all_features = [] n_channels = image.shape[0] @@ -210,7 +218,7 @@ def calculate_image_features( logg.info("Calculating 'skimage' label features.") res = parallelize( _get_regionprops_features, - collection=cell_ids, + collection=cell_ids_list, extractor=pd.concat, n_jobs=n_jobs, backend=backend, @@ -226,7 +234,7 @@ def calculate_image_features( logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.") res = parallelize( _get_regionprops_features, - collection=cell_ids, + collection=cell_ids_list, extractor=pd.concat, n_jobs=n_jobs, backend=backend, @@ -234,7 +242,7 @@ def calculate_image_features( verbose=verbose, )(labels=labels, intensity_image=ch_image) # Append channel names to each feature column - res = res.rename(columns=lambda col: f"{col}_{ch_name}") + res = res.rename(columns=lambda col, ch_name=ch_name: f"{col}_{ch_name}") all_features.append(res) if "cpmeasure:core" in measurements: @@ -245,7 +253,7 @@ def calculate_image_features( logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.") res = parallelize( _calculate_features_helper, - collection=cell_ids, + collection=cell_ids_list, extractor=pd.concat, n_jobs=n_jobs, backend=backend, @@ -266,7 +274,7 @@ def calculate_image_features( ch2_image = image[ch2_idx] res = parallelize( _calculate_features_helper, - collection=cell_ids, + collection=cell_ids_list, extractor=pd.concat, n_jobs=n_jobs, backend=backend, @@ -307,7 +315,10 @@ def calculate_image_features( def _extract_features_from_regionprops( - region_obj: Any, props: set[str], cell_id: int, skip_callable: bool = False + region_obj: Any, + props: set[str], + cell_id: int, + skip_callable: bool = False, ) -> dict[str, float]: """Extract features from a regionprops object given a list of properties.""" cell_features = {} @@ -325,18 +336,18 @@ def _extract_features_from_regionprops( for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])): cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) else: - cell_features[prop] = value + cell_features[prop] = float(value.flatten()[0]) # Convert to float else: cell_features[prop] = float(value) - except Exception as e: + except (ValueError, TypeError, AttributeError) as e: logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}") continue return cell_features def _calculate_regionprops_from_crop( - cell_mask_cropped: np.ndarray, - intensity_image_cropped: np.ndarray | None, + cell_mask_cropped: NDArray, + intensity_image_cropped: NDArray | None, cell_id: int, ) -> dict[str, float]: """ @@ -358,7 +369,11 @@ def _calculate_regionprops_from_crop( return _extract_features_from_regionprops(region_props[0], _INTENSITY_PROPS, cell_id, skip_callable=True) -def _append_channel_names(features: dict, channel1: str, channel2: str | None = None) -> dict: +def _append_channel_names( + features: dict[str, Any], + channel1: str | None, + channel2: str | None = None, +) -> dict[str, Any]: """Append channel name(s) to all keys in the feature dictionary.""" if channel2 is None: return {f"{k}_{channel1}": v for k, v in features.items()} @@ -368,11 +383,11 @@ def _append_channel_names(features: dict, channel1: str, channel2: str | None = def _prepare_images_for_measurement( name: str, - cell_mask: np.ndarray, - img1: np.ndarray, - img2: np.ndarray | None, - conv_params: dict, -) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]: + cell_mask: NDArray, + img1: NDArray, + img2: NDArray | None, + conv_params: dict[str, Any], +) -> tuple[NDArray, NDArray | None, NDArray | None]: """ Convert inputs to the appropriate dtype based on the measurement type. """ @@ -399,12 +414,12 @@ def _prepare_images_for_measurement( def _get_cell_crops( cell_id: int, - labels: np.ndarray, - image1: np.ndarray | None = None, - image2: np.ndarray | None = None, + labels: NDArray, + image1: NDArray | None = None, + image2: NDArray | None = None, pad: int = 1, verbose: bool = False, -) -> tuple[np.ndarray, np.ndarray, np.ndarray | None] | None: +) -> tuple[NDArray, NDArray | None, NDArray | None] | None: """Generator function to get cropped arrays for a cell. Parameters @@ -461,13 +476,13 @@ def _get_cell_crops( def _get_regionprops_features( cell_ids: Sequence[int], - labels: np.ndarray, - intensity_image: np.ndarray | None = None, + labels: NDArray, + intensity_image: NDArray | None = None, queue: Any | None = None, ) -> pd.DataFrame: """Calculate regionprops features for each cell from the full label image.""" # Initialize features dictionary with None values to preserve order - features = {cell_id: None for cell_id in cell_ids} + features = dict.fromkeys(cell_ids, None) with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -492,10 +507,10 @@ def _get_regionprops_features( def _measurement_wrapper( - func: Callable, - mask: np.ndarray, - image1: np.ndarray, - image2: np.ndarray | None = None, + func: Callable[..., dict[str, Any]], + mask: NDArray, + image1: NDArray | None, + image2: NDArray | None = None, ) -> dict[str, Any]: """Wrapper function to handle both core and correlation measurements. @@ -515,14 +530,16 @@ def _measurement_wrapper( ------- Dictionary of feature values """ + if image1 is None: + return {} # Return empty dict if no image data return func(mask, image1) if image2 is None else func(image1, image2, mask) def _calculate_features_helper( cell_ids: Sequence[int], - labels: np.ndarray, - image1: np.ndarray, - image2: np.ndarray | None, + labels: NDArray, + image1: NDArray, + image2: NDArray | None, measurements: dict[str, Any], channel1_name: str | None = None, channel2_name: str | None = None, @@ -531,7 +548,7 @@ def _calculate_features_helper( ) -> pd.DataFrame: """Helper function to calculate features for a subset of cells.""" # Initialize features dictionary with None values to preserve order - features_dict = {cell_id: None for cell_id in cell_ids} + features_dict = dict.fromkeys(cell_ids, None) # Pre-allocate lists for type conversion uint8_features = [ @@ -580,7 +597,7 @@ def _calculate_features_helper( else: region_features = _append_channel_names(region_features, channel1_name, channel2_name) cell_features.update(region_features) - except Exception as e: + except (ValueError, TypeError, AttributeError) as e: if verbose: logg.warning(f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}") @@ -589,6 +606,8 @@ def _calculate_features_helper( try: with warnings.catch_warnings(): warnings.simplefilter("ignore") + if image1_cropped is None: + continue mask_conv, img1_conv, img2_conv = _prepare_images_for_measurement( name, cell_mask_cropped, @@ -608,7 +627,7 @@ def _calculate_features_helper( else: feature_dict = _append_channel_names(feature_dict, channel1_name, channel2_name) cell_features.update(feature_dict) - except Exception as e: + except (ValueError, TypeError, AttributeError) as e: if verbose: logg.warning(f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}") @@ -627,7 +646,10 @@ def _calculate_features_helper( return df -def _get_array_from_DataTree_or_DataArray(data: xr.DataTree | xr.DataArray, scale: str | None = None) -> np.ndarray: +def _get_array_from_DataTree_or_DataArray( + data: xr.DataTree | xr.DataArray, + scale: str | None = None, +) -> NDArray: """ Returns a NumPy array for the given data and scale. If data is an xr.DataTree, it checks for the scale key and computes the image. From 5e11b8dcb60002d03c718865f24a46f703f8168e Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 29 Apr 2025 16:33:20 -0400 Subject: [PATCH 11/37] numba --- src/squidpy/exp/_feature.py | 150 +++++++++++++++++++++++++----------- 1 file changed, 106 insertions(+), 44 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index e642fc581..8f734ba00 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -23,6 +23,8 @@ from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs from squidpy._utils import Signal, _get_n_cores, parallelize +from numba import njit, prange +import os __all__ = ["calculate_image_features"] @@ -321,27 +323,63 @@ def _extract_features_from_regionprops( skip_callable: bool = False, ) -> dict[str, float]: """Extract features from a regionprops object given a list of properties.""" - cell_features = {} + # Pre-allocate arrays for common cases + max_1d_features = 10 # Adjust based on typical usage + max_2d_features = 10 # Adjust based on typical usage + + # Initialize arrays for batch processing + names_1d = np.empty(max_1d_features, dtype=object) + values_1d = np.empty(max_1d_features, dtype=float) + names_2d = np.empty(max_2d_features, dtype=object) + values_2d = np.empty(max_2d_features, dtype=float) + + # Initialize counters + count_1d = 0 + count_2d = 0 + scalar_features = {} + for prop in props: try: value = getattr(region_obj, prop) if skip_callable and callable(value): continue + if isinstance(value, np.ndarray | list | tuple): value = np.array(value) if value.ndim == 1: - for i, v in enumerate(value): - cell_features[f"{prop}_{i}"] = float(v) + # Vectorized operation for 1D arrays + n = len(value) + if count_1d + n <= max_1d_features: + indices = np.arange(n) + names_1d[count_1d:count_1d+n] = [f"{prop}_{i}" for i in indices] + values_1d[count_1d:count_1d+n] = value.astype(float) + count_1d += n elif value.ndim == 2: - for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])): - cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) + # Vectorized operation for 2D arrays + rows, cols = value.shape + n = rows * cols + if count_2d + n <= max_2d_features: + i, j = np.meshgrid(range(rows), range(cols), indexing='ij') + names_2d[count_2d:count_2d+n] = [f"{prop}_{i_}_{j_}" for i_, j_ in zip(i.ravel(), j.ravel())] + values_2d[count_2d:count_2d+n] = value.ravel().astype(float) + count_2d += n else: - cell_features[prop] = float(value.flatten()[0]) # Convert to float + scalar_features[prop] = float(value.flatten()[0]) else: - cell_features[prop] = float(value) + scalar_features[prop] = float(value) + except (ValueError, TypeError, AttributeError) as e: logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}") continue + + # Combine all features + cell_features = {} + if count_1d > 0: + cell_features.update(dict(zip(names_1d[:count_1d], values_1d[:count_1d]))) + if count_2d > 0: + cell_features.update(dict(zip(names_2d[:count_2d], values_2d[:count_2d]))) + cell_features.update(scalar_features) + return cell_features @@ -412,65 +450,89 @@ def _prepare_images_for_measurement( return mask, image1_prepared, image2_prepared -def _get_cell_crops( +@njit(fastmath=True) +def _get_cell_crops_numba( cell_id: int, - labels: NDArray, - image1: NDArray | None = None, - image2: NDArray | None = None, + labels: np.ndarray, + image1: np.ndarray, + image2: np.ndarray, pad: int = 1, - verbose: bool = False, -) -> tuple[NDArray, NDArray | None, NDArray | None] | None: - """Generator function to get cropped arrays for a cell. - - Parameters - ---------- - cell_id - The ID of the cell to process - labels - The labels array containing cell masks - image1 - First image to crop - image2 - Optional second image to crop - pad - Amount of padding to add around the cell - verbose - Whether to print warning messages - - Returns - ------- - Tuple of (cell_mask_cropped, image1_cropped, image2_cropped) or None if cell is empty +) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + """Numba-accelerated version of _get_cell_crops. + + Note: image1 and image2 should be passed as empty arrays (np.zeros((0,0))) if not used. """ + # Find cell boundaries cell_mask = labels == cell_id y_indices, x_indices = np.where(cell_mask) - if len(y_indices) == 0: # Skip empty cells - return None + if len(y_indices) == 0: + # Return empty arrays if no cell found + return np.zeros((0, 0), dtype=np.bool_), np.zeros((0, 0), dtype=image1.dtype), np.zeros((0, 0), dtype=image2.dtype) y_min, y_max = y_indices.min(), y_indices.max() x_min, x_max = x_indices.min(), x_indices.max() height, width = labels.shape + # Calculate padding y_pad_min = min(pad, y_min) y_pad_max = min(pad, height - y_max - 1) x_pad_min = min(pad, x_min) x_pad_max = min(pad, width - x_max - 1) + # Apply padding y_min -= y_pad_min y_max += y_pad_max x_min -= x_pad_min x_max += x_pad_max - if verbose and (y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad): - logg.warning( - f"Cell {cell_id} is at image border. Padding is asymmetric: " - f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, " - f"x: {x_pad_min}/{pad} left, {x_pad_max}/{pad} right" - ) + # Calculate crop dimensions + y_size = y_max - y_min + x_size = x_max - x_min + + # Create output arrays + cell_mask_cropped = np.zeros((y_size, x_size), dtype=np.bool_) + image1_cropped = np.zeros((y_size, x_size), dtype=image1.dtype) + image2_cropped = np.zeros((y_size, x_size), dtype=image2.dtype) + + # Copy data + for i in range(y_size): + for j in range(x_size): + cell_mask_cropped[i, j] = cell_mask[y_min + i, x_min + j] + if image1.size > 0: # Only copy if image1 is not empty + image1_cropped[i, j] = image1[y_min + i, x_min + j] + if image2.size > 0: # Only copy if image2 is not empty + image2_cropped[i, j] = image2[y_min + i, x_min + j] + + return cell_mask_cropped, image1_cropped, image2_cropped - cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] - image1_cropped = None if image1 is None else image1[y_min:y_max, x_min:x_max] - image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max] +def _get_cell_crops( + cell_id: int, + labels: NDArray, + image1: NDArray | None = None, + image2: NDArray | None = None, + pad: int = 1, + verbose: bool = False, +) -> tuple[NDArray, NDArray | None, NDArray | None] | None: + """Generator function to get cropped arrays for a cell.""" + # Create empty arrays for unused images + empty_image = np.zeros((0, 0), dtype=np.float32) + image1_np = image1 if image1 is not None else empty_image + image2_np = image2 if image2 is not None else empty_image + + # Use Numba-accelerated version + cell_mask_cropped, image1_cropped, image2_cropped = _get_cell_crops_numba( + cell_id, labels, image1_np, image2_np, pad + ) + + # Return None if no cell found + if cell_mask_cropped.size == 0: + return None + + # Convert back to None for unused images + image1_cropped = image1_cropped if image1 is not None else None + image2_cropped = image2_cropped if image2 is not None else None + return cell_mask_cropped, image1_cropped, image2_cropped From 7963e845f73972f6231ffb01df164eb3f35402ca Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 29 Apr 2025 16:39:24 -0400 Subject: [PATCH 12/37] pre-alloc types --- src/squidpy/exp/_feature.py | 89 +++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 8f734ba00..74a635d55 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -62,6 +62,18 @@ IntArray = npt.NDArray[np.int_] # Integer array BoolArray = npt.NDArray[np.bool_] # Boolean array +# Define property sets at module level for better performance +_SCALAR_PROPS = frozenset({ + "area", "area_filled", "area_convex", "num_pixels", + "axis_major_length", "axis_minor_length", "eccentricity", + "equivalent_diameter", "extent", "feret_diameter_max", + "solidity", "euler_number", "perimeter", "perimeter_crofton" +}) + +_ARRAY_1D_PROPS = frozenset({"centroid", "centroid_local"}) +_ARRAY_2D_PROPS = frozenset({"inertia_tensor"}) +_SPECIAL_PROPS = frozenset({"inertia_tensor_eigvals"}) + @d.dedent @inject_docs(f=ImageFeature) @@ -323,20 +335,7 @@ def _extract_features_from_regionprops( skip_callable: bool = False, ) -> dict[str, float]: """Extract features from a regionprops object given a list of properties.""" - # Pre-allocate arrays for common cases - max_1d_features = 10 # Adjust based on typical usage - max_2d_features = 10 # Adjust based on typical usage - - # Initialize arrays for batch processing - names_1d = np.empty(max_1d_features, dtype=object) - values_1d = np.empty(max_1d_features, dtype=float) - names_2d = np.empty(max_2d_features, dtype=object) - values_2d = np.empty(max_2d_features, dtype=float) - - # Initialize counters - count_1d = 0 - count_2d = 0 - scalar_features = {} + cell_features = {} for prop in props: try: @@ -344,42 +343,44 @@ def _extract_features_from_regionprops( if skip_callable and callable(value): continue - if isinstance(value, np.ndarray | list | tuple): - value = np.array(value) - if value.ndim == 1: - # Vectorized operation for 1D arrays - n = len(value) - if count_1d + n <= max_1d_features: - indices = np.arange(n) - names_1d[count_1d:count_1d+n] = [f"{prop}_{i}" for i in indices] - values_1d[count_1d:count_1d+n] = value.astype(float) - count_1d += n - elif value.ndim == 2: - # Vectorized operation for 2D arrays - rows, cols = value.shape - n = rows * cols - if count_2d + n <= max_2d_features: - i, j = np.meshgrid(range(rows), range(cols), indexing='ij') - names_2d[count_2d:count_2d+n] = [f"{prop}_{i_}_{j_}" for i_, j_ in zip(i.ravel(), j.ravel())] - values_2d[count_2d:count_2d+n] = value.ravel().astype(float) - count_2d += n - else: - scalar_features[prop] = float(value.flatten()[0]) + if prop in _SCALAR_PROPS: + cell_features[prop] = float(value) + elif prop in _ARRAY_1D_PROPS: + # Convert to array only once + value = np.asarray(value) + for i, v in enumerate(value): + cell_features[f"{prop}_{i}"] = float(v) + elif prop in _ARRAY_2D_PROPS: + # Convert to array only once + value = np.asarray(value) + for i in range(value.shape[0]): + for j in range(value.shape[1]): + cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) + elif prop in _SPECIAL_PROPS: + # Convert to array only once + value = np.asarray(value) + for i, v in enumerate(value): + cell_features[f"{prop}_{i}"] = float(v) else: - scalar_features[prop] = float(value) + # Fallback for any other properties + if isinstance(value, (np.ndarray, list, tuple)): + value = np.asarray(value) + if value.ndim == 1: + for i, v in enumerate(value): + cell_features[f"{prop}_{i}"] = float(v) + elif value.ndim == 2: + for i in range(value.shape[0]): + for j in range(value.shape[1]): + cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) + else: + cell_features[prop] = float(value.flatten()[0]) + else: + cell_features[prop] = float(value) except (ValueError, TypeError, AttributeError) as e: logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}") continue - # Combine all features - cell_features = {} - if count_1d > 0: - cell_features.update(dict(zip(names_1d[:count_1d], values_1d[:count_1d]))) - if count_2d > 0: - cell_features.update(dict(zip(names_2d[:count_2d], values_2d[:count_2d]))) - cell_features.update(scalar_features) - return cell_features From 6998a7d46ab9f7a30eee1f0343f3477c764e9ae2 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 29 Apr 2025 17:31:11 -0400 Subject: [PATCH 13/37] pyproject + numba --- pyproject.toml | 2 +- src/squidpy/exp/_feature.py | 84 ++++++++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 26 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cd4e7a665..007118929 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,7 @@ dependencies = [ "zarr>=2.6.1,<3.0.0", "spatialdata>=0.2.5", "centrosome>=1.2.3", - "cp-measure>=0.1.4" + "cp_measure>=0.1.4" ] [project.optional-dependencies] diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 74a635d55..562286e99 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -463,46 +463,57 @@ def _get_cell_crops_numba( Note: image1 and image2 should be passed as empty arrays (np.zeros((0,0))) if not used. """ - # Find cell boundaries + # Find cell boundaries using vectorized operations cell_mask = labels == cell_id - y_indices, x_indices = np.where(cell_mask) - if len(y_indices) == 0: - # Return empty arrays if no cell found + if not np.any(cell_mask): return np.zeros((0, 0), dtype=np.bool_), np.zeros((0, 0), dtype=image1.dtype), np.zeros((0, 0), dtype=image2.dtype) + # Get non-zero indices efficiently + y_indices, x_indices = np.nonzero(cell_mask) y_min, y_max = y_indices.min(), y_indices.max() x_min, x_max = x_indices.min(), x_indices.max() + + # Get image dimensions height, width = labels.shape - # Calculate padding + # Calculate padding with boundary checks in one step y_pad_min = min(pad, y_min) y_pad_max = min(pad, height - y_max - 1) x_pad_min = min(pad, x_min) x_pad_max = min(pad, width - x_max - 1) - # Apply padding - y_min -= y_pad_min - y_max += y_pad_max - x_min -= x_pad_min - x_max += x_pad_max - - # Calculate crop dimensions - y_size = y_max - y_min - x_size = x_max - x_min + # Calculate crop dimensions with padding + y_start = y_min - y_pad_min + y_end = y_max + y_pad_max + 1 + x_start = x_min - x_pad_min + x_end = x_max + x_pad_max + 1 - # Create output arrays + # Create output arrays with exact size + y_size = y_end - y_start + x_size = x_end - x_start + + # Create cell mask crop cell_mask_cropped = np.zeros((y_size, x_size), dtype=np.bool_) - image1_cropped = np.zeros((y_size, x_size), dtype=image1.dtype) - image2_cropped = np.zeros((y_size, x_size), dtype=image2.dtype) - - # Copy data for i in range(y_size): for j in range(x_size): - cell_mask_cropped[i, j] = cell_mask[y_min + i, x_min + j] - if image1.size > 0: # Only copy if image1 is not empty - image1_cropped[i, j] = image1[y_min + i, x_min + j] - if image2.size > 0: # Only copy if image2 is not empty - image2_cropped[i, j] = image2[y_min + i, x_min + j] + cell_mask_cropped[i, j] = cell_mask[y_start + i, x_start + j] + + # Handle image crops efficiently + if image1.size > 0: + image1_cropped = np.zeros((y_size, x_size), dtype=image1.dtype) + for i in range(y_size): + for j in range(x_size): + image1_cropped[i, j] = image1[y_start + i, x_start + j] + else: + image1_cropped = np.zeros((0, 0), dtype=image1.dtype) + + if image2.size > 0: + image2_cropped = np.zeros((y_size, x_size), dtype=image2.dtype) + for i in range(y_size): + for j in range(x_size): + image2_cropped[i, j] = image2[y_start + i, x_start + j] + else: + image2_cropped = np.zeros((0, 0), dtype=image2.dtype) return cell_mask_cropped, image1_cropped, image2_cropped @@ -595,7 +606,30 @@ def _measurement_wrapper( """ if image1 is None: return {} # Return empty dict if no image data - return func(mask, image1) if image2 is None else func(image1, image2, mask) + + try: + if image2 is None: + return func(mask, image1) + else: + # Check if we have valid data for correlation + if not np.any(mask) or not np.any(image1) or not np.any(image2): + # Get feature names from a successful call to maintain structure + dummy_mask = np.ones((2, 2), dtype=bool) + dummy_img = np.ones((2, 2), dtype=image1.dtype) + feature_names = func(dummy_img, dummy_img, dummy_mask).keys() + # Return dictionary with NaN values for all features + return {name: np.nan for name in feature_names} + return func(image1, image2, mask) + except (IndexError, ValueError) as e: + # Handle cases where correlation calculation fails + if "index 0 is out of bounds" in str(e) or "size 0" in str(e): + # Get feature names from a successful call to maintain structure + dummy_mask = np.ones((2, 2), dtype=bool) + dummy_img = np.ones((2, 2), dtype=image1.dtype) + feature_names = func(dummy_img, dummy_img, dummy_mask).keys() + # Return dictionary with NaN values for all features + return {name: np.nan for name in feature_names} + raise # Re-raise other errors def _calculate_features_helper( From f7342c8320820739d00ccf272f0df1d311ef3024 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 21:31:30 +0000 Subject: [PATCH 14/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/exp/_feature.py | 62 +++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 562286e99..6cc87a007 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -3,6 +3,7 @@ from __future__ import annotations import itertools +import os import warnings from collections.abc import Callable, Sequence from typing import Any @@ -13,6 +14,7 @@ import pandas as pd import xarray as xr from cp_measure.bulk import get_core_measurements, get_correlation_measurements +from numba import njit, prange from scipy import ndimage from skimage import measure from skimage.measure import label @@ -23,8 +25,6 @@ from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs from squidpy._utils import Signal, _get_n_cores, parallelize -from numba import njit, prange -import os __all__ = ["calculate_image_features"] @@ -63,12 +63,24 @@ BoolArray = npt.NDArray[np.bool_] # Boolean array # Define property sets at module level for better performance -_SCALAR_PROPS = frozenset({ - "area", "area_filled", "area_convex", "num_pixels", - "axis_major_length", "axis_minor_length", "eccentricity", - "equivalent_diameter", "extent", "feret_diameter_max", - "solidity", "euler_number", "perimeter", "perimeter_crofton" -}) +_SCALAR_PROPS = frozenset( + { + "area", + "area_filled", + "area_convex", + "num_pixels", + "axis_major_length", + "axis_minor_length", + "eccentricity", + "equivalent_diameter", + "extent", + "feret_diameter_max", + "solidity", + "euler_number", + "perimeter", + "perimeter_crofton", + } +) _ARRAY_1D_PROPS = frozenset({"centroid", "centroid_local"}) _ARRAY_2D_PROPS = frozenset({"inertia_tensor"}) @@ -336,13 +348,13 @@ def _extract_features_from_regionprops( ) -> dict[str, float]: """Extract features from a regionprops object given a list of properties.""" cell_features = {} - + for prop in props: try: value = getattr(region_obj, prop) if skip_callable and callable(value): continue - + if prop in _SCALAR_PROPS: cell_features[prop] = float(value) elif prop in _ARRAY_1D_PROPS: @@ -376,11 +388,11 @@ def _extract_features_from_regionprops( cell_features[prop] = float(value.flatten()[0]) else: cell_features[prop] = float(value) - + except (ValueError, TypeError, AttributeError) as e: logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}") continue - + return cell_features @@ -460,19 +472,23 @@ def _get_cell_crops_numba( pad: int = 1, ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: """Numba-accelerated version of _get_cell_crops. - + Note: image1 and image2 should be passed as empty arrays (np.zeros((0,0))) if not used. """ # Find cell boundaries using vectorized operations cell_mask = labels == cell_id if not np.any(cell_mask): - return np.zeros((0, 0), dtype=np.bool_), np.zeros((0, 0), dtype=image1.dtype), np.zeros((0, 0), dtype=image2.dtype) + return ( + np.zeros((0, 0), dtype=np.bool_), + np.zeros((0, 0), dtype=image1.dtype), + np.zeros((0, 0), dtype=image2.dtype), + ) # Get non-zero indices efficiently y_indices, x_indices = np.nonzero(cell_mask) y_min, y_max = y_indices.min(), y_indices.max() x_min, x_max = x_indices.min(), x_indices.max() - + # Get image dimensions height, width = labels.shape @@ -491,13 +507,13 @@ def _get_cell_crops_numba( # Create output arrays with exact size y_size = y_end - y_start x_size = x_end - x_start - + # Create cell mask crop cell_mask_cropped = np.zeros((y_size, x_size), dtype=np.bool_) for i in range(y_size): for j in range(x_size): cell_mask_cropped[i, j] = cell_mask[y_start + i, x_start + j] - + # Handle image crops efficiently if image1.size > 0: image1_cropped = np.zeros((y_size, x_size), dtype=image1.dtype) @@ -506,7 +522,7 @@ def _get_cell_crops_numba( image1_cropped[i, j] = image1[y_start + i, x_start + j] else: image1_cropped = np.zeros((0, 0), dtype=image1.dtype) - + if image2.size > 0: image2_cropped = np.zeros((y_size, x_size), dtype=image2.dtype) for i in range(y_size): @@ -531,20 +547,20 @@ def _get_cell_crops( empty_image = np.zeros((0, 0), dtype=np.float32) image1_np = image1 if image1 is not None else empty_image image2_np = image2 if image2 is not None else empty_image - + # Use Numba-accelerated version cell_mask_cropped, image1_cropped, image2_cropped = _get_cell_crops_numba( cell_id, labels, image1_np, image2_np, pad ) - + # Return None if no cell found if cell_mask_cropped.size == 0: return None - + # Convert back to None for unused images image1_cropped = image1_cropped if image1 is not None else None image2_cropped = image2_cropped if image2 is not None else None - + return cell_mask_cropped, image1_cropped, image2_cropped @@ -606,7 +622,7 @@ def _measurement_wrapper( """ if image1 is None: return {} # Return empty dict if no image data - + try: if image2 is None: return func(mask, image1) From b2eb6aa5426a8992dc38ad252d6090a96cd4c00a Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 29 Apr 2025 17:35:07 -0400 Subject: [PATCH 15/37] mypy --- src/squidpy/exp/_feature.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 6cc87a007..41e0a9e86 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -375,7 +375,7 @@ def _extract_features_from_regionprops( cell_features[f"{prop}_{i}"] = float(v) else: # Fallback for any other properties - if isinstance(value, (np.ndarray, list, tuple)): + if isinstance(value, np.ndarray | list | tuple): value = np.asarray(value) if value.ndim == 1: for i, v in enumerate(value): @@ -466,11 +466,11 @@ def _prepare_images_for_measurement( @njit(fastmath=True) def _get_cell_crops_numba( cell_id: int, - labels: np.ndarray, - image1: np.ndarray, - image2: np.ndarray, + labels: npt.NDArray[np.int_], + image1: npt.NDArray[np.float32], + image2: npt.NDArray[np.float32], pad: int = 1, -) -> tuple[np.ndarray, np.ndarray, np.ndarray]: +) -> tuple[npt.NDArray[np.bool_], npt.NDArray[np.float32], npt.NDArray[np.float32]]: """Numba-accelerated version of _get_cell_crops. Note: image1 and image2 should be passed as empty arrays (np.zeros((0,0))) if not used. @@ -553,7 +553,6 @@ def _get_cell_crops( cell_id, labels, image1_np, image2_np, pad ) - # Return None if no cell found if cell_mask_cropped.size == 0: return None @@ -634,7 +633,7 @@ def _measurement_wrapper( dummy_img = np.ones((2, 2), dtype=image1.dtype) feature_names = func(dummy_img, dummy_img, dummy_mask).keys() # Return dictionary with NaN values for all features - return {name: np.nan for name in feature_names} + return dict.fromkeys(feature_names, np.nan) return func(image1, image2, mask) except (IndexError, ValueError) as e: # Handle cases where correlation calculation fails @@ -644,7 +643,7 @@ def _measurement_wrapper( dummy_img = np.ones((2, 2), dtype=image1.dtype) feature_names = func(dummy_img, dummy_img, dummy_mask).keys() # Return dictionary with NaN values for all features - return {name: np.nan for name in feature_names} + return dict.fromkeys(feature_names, np.nan) raise # Re-raise other errors From ce28cffb8c6cd7297da31c899ee92384c3b2a614 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 27 Jan 2026 12:49:02 +0000 Subject: [PATCH 16/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pyproject.toml | 2 +- src/squidpy/experimental/__init__.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index de4b07ab2..c0de03060 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "aiohttp>=3.8.1", "anndata>=0.9", "centrosome>=1.2.3", - "cp_measure>=0.1.4", + "cp-measure>=0.1.4", "cycler>=0.11", "dask[array]>=2021.2,<=2024.11.2", "dask-image>=0.5", diff --git a/src/squidpy/experimental/__init__.py b/src/squidpy/experimental/__init__.py index b963b0d0f..6ed68ff79 100644 --- a/src/squidpy/experimental/__init__.py +++ b/src/squidpy/experimental/__init__.py @@ -6,7 +6,8 @@ from __future__ import annotations -from . import im, pl from squidpy.experimental._feature import calculate_image_features +from . import im, pl + __all__ = ["im", "pl", "calculate_image_features"] From e1327ba7c486128d25d7057d1cf7c2bd224411d9 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 27 Jan 2026 13:55:11 +0100 Subject: [PATCH 17/37] Reorganize CellProfiler features into experimental.im submodule Moved calculate_image_features from experimental._feature to experimental.im._feature to follow the existing module structure. Now accessible as squidpy.experimental.im.calculate_image_features --- src/squidpy/experimental/__init__.py | 3 +-- src/squidpy/experimental/im/__init__.py | 2 ++ src/squidpy/experimental/{ => im}/_feature.py | 0 3 files changed, 3 insertions(+), 2 deletions(-) rename src/squidpy/experimental/{ => im}/_feature.py (100%) diff --git a/src/squidpy/experimental/__init__.py b/src/squidpy/experimental/__init__.py index b963b0d0f..435cd0098 100644 --- a/src/squidpy/experimental/__init__.py +++ b/src/squidpy/experimental/__init__.py @@ -7,6 +7,5 @@ from __future__ import annotations from . import im, pl -from squidpy.experimental._feature import calculate_image_features -__all__ = ["im", "pl", "calculate_image_features"] +__all__ = ["im", "pl"] diff --git a/src/squidpy/experimental/im/__init__.py b/src/squidpy/experimental/im/__init__.py index b88006688..618bc913c 100644 --- a/src/squidpy/experimental/im/__init__.py +++ b/src/squidpy/experimental/im/__init__.py @@ -6,12 +6,14 @@ WekaParams, detect_tissue, ) +from ._feature import calculate_image_features from ._make_tiles import make_tiles, make_tiles_from_spots __all__ = [ "BackgroundDetectionParams", "FelzenszwalbParams", "WekaParams", + "calculate_image_features", "detect_tissue", "make_tiles", "make_tiles_from_spots", diff --git a/src/squidpy/experimental/_feature.py b/src/squidpy/experimental/im/_feature.py similarity index 100% rename from src/squidpy/experimental/_feature.py rename to src/squidpy/experimental/im/_feature.py From c5ab02329461017d73466f36f71076540bdd7329 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 27 Jan 2026 13:59:26 +0100 Subject: [PATCH 18/37] Remove unused imports --- src/squidpy/experimental/im/_feature.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/squidpy/experimental/im/_feature.py b/src/squidpy/experimental/im/_feature.py index 41e0a9e86..e3bd3b57d 100644 --- a/src/squidpy/experimental/im/_feature.py +++ b/src/squidpy/experimental/im/_feature.py @@ -2,8 +2,6 @@ from __future__ import annotations -import itertools -import os import warnings from collections.abc import Callable, Sequence from typing import Any @@ -14,8 +12,7 @@ import pandas as pd import xarray as xr from cp_measure.bulk import get_core_measurements, get_correlation_measurements -from numba import njit, prange -from scipy import ndimage +from numba import njit from skimage import measure from skimage.measure import label from spatialdata import SpatialData, rasterize From ab626454bec5897695d8921cf2e8ba0e603f850a Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 27 Jan 2026 14:08:58 +0100 Subject: [PATCH 19/37] Add minimal tests for calculate_image_features - Test basic feature calculation with shapes - Test copy vs inplace behavior - Test error cases for invalid keys - Uses sdata_hne fixture with skimage:label for fast execution --- .../test_calculate_image_features.py | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 tests/experimental/test_calculate_image_features.py diff --git a/tests/experimental/test_calculate_image_features.py b/tests/experimental/test_calculate_image_features.py new file mode 100644 index 000000000..2306fc3d8 --- /dev/null +++ b/tests/experimental/test_calculate_image_features.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +import pandas as pd +import pytest + +import squidpy as sq + + +class TestCalculateImageFeatures: + """Tests for calculate_image_features function.""" + + def test_calculate_features_with_shapes(self, sdata_hne): + """Test basic feature calculation with shapes.""" + # Use minimal measurements to keep test fast + sq.experimental.im.calculate_image_features( + sdata_hne, + image_key="hne", + shapes_key="spots", + measurements=["skimage:label"], + adata_key_added="morphology", + n_jobs=1, + inplace=True, + ) + + # Check that the table was added + assert "morphology" in sdata_hne.tables + adata = sdata_hne.tables["morphology"] + + # Check basic structure + assert adata.n_obs > 0 + assert adata.n_vars > 0 + + # Check that spatialdata_attrs is set + assert "spatialdata_attrs" in adata.uns + assert adata.uns["spatialdata_attrs"]["region"] == "spots" + assert adata.uns["spatialdata_attrs"]["region_key"] == "region" + assert adata.uns["spatialdata_attrs"]["instance_key"] == "label_id" + + # Check that region and label_id are in obs + assert "region" in adata.obs + assert "label_id" in adata.obs + + def test_calculate_features_copy(self, sdata_hne): + """Test that copy=False returns DataFrame.""" + result = sq.experimental.im.calculate_image_features( + sdata_hne, + image_key="hne", + shapes_key="spots", + measurements=["skimage:label"], + n_jobs=1, + inplace=False, + ) + + # Should return DataFrame when inplace=False + assert isinstance(result, pd.DataFrame) + assert result.shape[0] > 0 + assert result.shape[1] > 0 + + def test_invalid_image_key(self, sdata_hne): + """Test error when image key doesn't exist.""" + with pytest.raises(ValueError, match="Image key 'nonexistent' not found"): + sq.experimental.im.calculate_image_features( + sdata_hne, + image_key="nonexistent", + shapes_key="spots", + measurements=["skimage:label"], + ) + + def test_invalid_shapes_key(self, sdata_hne): + """Test error when shapes key doesn't exist.""" + with pytest.raises(ValueError, match="Shapes key 'nonexistent' not found"): + sq.experimental.im.calculate_image_features( + sdata_hne, + image_key="hne", + shapes_key="nonexistent", + measurements=["skimage:label"], + ) + + def test_both_labels_and_shapes_error(self, sdata_hne): + """Test error when both labels_key and shapes_key are provided.""" + with pytest.raises(ValueError, match="Use either `labels_key` or `shapes_key`, not both"): + sq.experimental.im.calculate_image_features( + sdata_hne, + image_key="hne", + labels_key="fake_labels", + shapes_key="spots", + measurements=["skimage:label"], + ) + + def test_invalid_measurement(self, sdata_hne): + """Test error with invalid measurement type.""" + with pytest.raises(ValueError, match="Invalid measurement"): + sq.experimental.im.calculate_image_features( + sdata_hne, + image_key="hne", + shapes_key="spots", + measurements=["nonexistent:measurement"], + ) From 61ef03d4b2484a0235f2463e06daaf202f2c8460 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 27 Jan 2026 14:22:00 +0100 Subject: [PATCH 20/37] Fix tests: add scale parameter for multi-scale data --- tests/experimental/test_calculate_image_features.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/experimental/test_calculate_image_features.py b/tests/experimental/test_calculate_image_features.py index 2306fc3d8..33b2e9784 100644 --- a/tests/experimental/test_calculate_image_features.py +++ b/tests/experimental/test_calculate_image_features.py @@ -16,6 +16,7 @@ def test_calculate_features_with_shapes(self, sdata_hne): sdata_hne, image_key="hne", shapes_key="spots", + scale="scale0", measurements=["skimage:label"], adata_key_added="morphology", n_jobs=1, @@ -46,6 +47,7 @@ def test_calculate_features_copy(self, sdata_hne): sdata_hne, image_key="hne", shapes_key="spots", + scale="scale0", measurements=["skimage:label"], n_jobs=1, inplace=False, @@ -94,5 +96,6 @@ def test_invalid_measurement(self, sdata_hne): sdata_hne, image_key="hne", shapes_key="spots", + scale="scale0", measurements=["nonexistent:measurement"], ) From 5ba939d9d92a98cc4315207cfa9bf4f9ca6110d2 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 27 Jan 2026 14:37:20 +0100 Subject: [PATCH 21/37] Add tests for bugs described in issue --- .../test_calculate_image_features.py | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/tests/experimental/test_calculate_image_features.py b/tests/experimental/test_calculate_image_features.py index 33b2e9784..9165d8ab3 100644 --- a/tests/experimental/test_calculate_image_features.py +++ b/tests/experimental/test_calculate_image_features.py @@ -1,7 +1,11 @@ from __future__ import annotations +import numpy as np import pandas as pd import pytest +import xarray as xr +from spatialdata import SpatialData +from spatialdata.models import Image2DModel, Labels2DModel import squidpy as sq @@ -99,3 +103,66 @@ def test_invalid_measurement(self, sdata_hne): scale="scale0", measurements=["nonexistent:measurement"], ) + + def test_with_intensity_features(self, sdata_hne): + """Test intensity-based features with multi-channel image.""" + result = sq.experimental.im.calculate_image_features( + sdata_hne, + image_key="hne", + shapes_key="spots", + scale="scale0", + measurements=["skimage:label+image"], + n_jobs=1, + inplace=False, + ) + + assert result.shape[0] > 0 + assert result.shape[1] > 0 + # Column names should include channel information + assert any("_" in col for col in result.columns) + + def test_dimension_mismatch(self): + """Test error when image and labels have mismatched dimensions.""" + rng = np.random.default_rng(42) + + # Create image: 100x100, 3 channels + image_data = rng.integers(0, 255, (3, 100, 100), dtype=np.uint8) + image_xr = xr.DataArray( + image_data, + dims=["c", "y", "x"], + coords={"c": ["R", "G", "B"]}, + ) + + # Create labels: 80x80 (different dimensions) + labels_data = rng.integers(1, 10, (80, 80), dtype=np.uint32) + labels_xr = xr.DataArray(labels_data, dims=["y", "x"]) + + sdata = SpatialData( + images={"test_img": Image2DModel.parse(image_xr)}, + labels={"test_labels": Labels2DModel.parse(labels_xr)}, + ) + + with pytest.raises(ValueError, match="do not match"): + sq.experimental.im.calculate_image_features( + sdata, + image_key="test_img", + labels_key="test_labels", + measurements=["skimage:label"], + n_jobs=1, + ) + + def test_with_progress_bar(self, sdata_hne): + """Test that progress bar can be enabled.""" + result = sq.experimental.im.calculate_image_features( + sdata_hne, + image_key="hne", + shapes_key="spots", + scale="scale0", + measurements=["skimage:label"], + show_progress_bar=True, + n_jobs=1, + inplace=False, + ) + + assert isinstance(result, pd.DataFrame) + assert result.shape[0] > 0 From 3a2a642f6295c58adffeedbd9410b1830e37a17f Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 27 Jan 2026 15:52:06 +0100 Subject: [PATCH 22/37] reduced size of sdata obj for testing --- src/squidpy/experimental/im/_feature.py | 86 +++++++++++++++--- .../test_calculate_image_features.py | 89 +++++++++++++++---- 2 files changed, 142 insertions(+), 33 deletions(-) diff --git a/src/squidpy/experimental/im/_feature.py b/src/squidpy/experimental/im/_feature.py index e3bd3b57d..3251809cb 100644 --- a/src/squidpy/experimental/im/_feature.py +++ b/src/squidpy/experimental/im/_feature.py @@ -196,11 +196,59 @@ def calculate_image_features( if isinstance(measurements, str): measurements = [measurements] - if isinstance(measurements, list): - invalid_measurements = [m for m in measurements if m not in available_measurements] - if invalid_measurements: + # Parse measurements to support individual properties: "skimage:label:area" + parsed_label_props = None + parsed_intensity_props = None + has_cpmeasure_core = False + has_cpmeasure_correlation = False + + for m in measurements: + parts = m.split(":") + if len(parts) == 3 and parts[0] == "skimage": + # Individual property: "skimage:label:area" + group, prop = parts[1], parts[2] + if group == "label": + if prop not in _MASK_PROPS: + raise ValueError( + f"Unknown skimage label property: '{prop}'. " + f"Available: {sorted(_MASK_PROPS)}" + ) + if parsed_label_props is None: + parsed_label_props = set() + parsed_label_props.add(prop) + elif group == "label+image": + if prop not in _INTENSITY_PROPS: + raise ValueError( + f"Unknown skimage intensity property: '{prop}'. " + f"Available: {sorted(_INTENSITY_PROPS)}" + ) + if parsed_intensity_props is None: + parsed_intensity_props = set() + parsed_intensity_props.add(prop) + else: + raise ValueError(f"Unknown skimage group: '{group}'. Use 'label' or 'label+image'") + elif m == "skimage:label": + # Full label group + if parsed_label_props is None: + parsed_label_props = _MASK_PROPS.copy() + else: + parsed_label_props.update(_MASK_PROPS) + elif m == "skimage:label+image": + # Full intensity group + if parsed_intensity_props is None: + parsed_intensity_props = _INTENSITY_PROPS.copy() + else: + parsed_intensity_props.update(_INTENSITY_PROPS) + elif m == "cpmeasure:core": + has_cpmeasure_core = True + elif m == "cpmeasure:correlation": + has_cpmeasure_correlation = True + elif m not in available_measurements: raise ValueError( - f"Invalid measurement(s): {invalid_measurements}, available measurements: {available_measurements}" + f"Invalid measurement: '{m}'. " + f"Available: {available_measurements}, " + f"or use 'skimage:label:property' / 'skimage:label+image:property' " + f"for individual properties" ) if labels.size == 0: @@ -222,7 +270,7 @@ def calculate_image_features( if image.shape[1:] != labels.shape: raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}") - if "cpmeasure:correlation" in measurements: + if has_cpmeasure_correlation: measurements_corr = get_correlation_measurements() cell_ids = np.unique(labels) @@ -237,7 +285,7 @@ def calculate_image_features( logg.info(f"Using '{n_jobs}' core(s).") - if "skimage:label" in measurements: + if parsed_label_props is not None: logg.info("Calculating 'skimage' label features.") res = parallelize( _get_regionprops_features, @@ -247,14 +295,14 @@ def calculate_image_features( backend=backend, show_progress_bar=show_progress_bar, verbose=verbose, - )(labels=labels, intensity_image=None) + )(labels=labels, intensity_image=None, props=parsed_label_props) all_features.append(res) - if "skimage:label+image" in measurements: + if parsed_intensity_props is not None: for ch_idx in range(n_channels): ch_name = channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" ch_image = image[ch_idx] - logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.") + logg.info(f"Calculating 'skimage' image features for channel '{ch_name}'.") res = parallelize( _get_regionprops_features, collection=cell_ids_list, @@ -263,12 +311,12 @@ def calculate_image_features( backend=backend, show_progress_bar=show_progress_bar, verbose=verbose, - )(labels=labels, intensity_image=ch_image) + )(labels=labels, intensity_image=ch_image, props=parsed_intensity_props) # Append channel names to each feature column res = res.rename(columns=lambda col, ch_name=ch_name: f"{col}_{ch_name}") all_features.append(res) - if "cpmeasure:core" in measurements: + if has_cpmeasure_core: measurements_core = get_core_measurements() for ch_idx in range(n_channels): ch_name = channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" @@ -397,6 +445,7 @@ def _calculate_regionprops_from_crop( cell_mask_cropped: NDArray, intensity_image_cropped: NDArray | None, cell_id: int, + props: set[str], ) -> dict[str, float]: """ Calculate regionprops features from pre-cropped arrays. @@ -406,7 +455,7 @@ def _calculate_regionprops_from_crop( region_props = measure.regionprops(label_image=label(cell_mask_cropped)) if not region_props: return {} - return _extract_features_from_regionprops(region_props[0], _MASK_PROPS, cell_id) + return _extract_features_from_regionprops(region_props[0], props, cell_id) else: region_props = measure.regionprops( label_image=label(cell_mask_cropped), @@ -414,7 +463,7 @@ def _calculate_regionprops_from_crop( ) if not region_props: return {} - return _extract_features_from_regionprops(region_props[0], _INTENSITY_PROPS, cell_id, skip_callable=True) + return _extract_features_from_regionprops(region_props[0], props, cell_id, skip_callable=True) def _append_channel_names( @@ -565,6 +614,7 @@ def _get_regionprops_features( labels: NDArray, intensity_image: NDArray | None = None, queue: Any | None = None, + props: set[str] | None = None, ) -> pd.DataFrame: """Calculate regionprops features for each cell from the full label image.""" # Initialize features dictionary with None values to preserve order @@ -578,7 +628,15 @@ def _get_regionprops_features( if crop is None: continue cell_mask_cropped, intensity_image_cropped, _ = crop - cell_features = _calculate_regionprops_from_crop(cell_mask_cropped, intensity_image_cropped, cell_id) + # Default to full property sets for backward compatibility + if props is None: + props = _INTENSITY_PROPS if intensity_image is not None else _MASK_PROPS + cell_features = _calculate_regionprops_from_crop( + cell_mask_cropped, + intensity_image_cropped, + cell_id, + props, + ) features[cell_id] = cell_features if queue is not None: queue.put(Signal.UPDATE) diff --git a/tests/experimental/test_calculate_image_features.py b/tests/experimental/test_calculate_image_features.py index 9165d8ab3..534991b1a 100644 --- a/tests/experimental/test_calculate_image_features.py +++ b/tests/experimental/test_calculate_image_features.py @@ -4,20 +4,36 @@ import pandas as pd import pytest import xarray as xr -from spatialdata import SpatialData +from spatialdata import SpatialData, bounding_box_query from spatialdata.models import Image2DModel, Labels2DModel import squidpy as sq +@pytest.fixture() +def sdata_hne_small(sdata_hne): + """Small subset of sdata_hne for faster tests (10-100 spots).""" + # Query a small bounding box to get 10-100 spots + # Using a 500x500 pixel box should give us a reasonable number + sdata_small = bounding_box_query( + sdata_hne, + axes=["x", "y"], + min_coordinate=[1000, 1000], + max_coordinate=[1500, 1500], + target_coordinate_system="global", + filter_table=True, + ) + return sdata_small + + class TestCalculateImageFeatures: """Tests for calculate_image_features function.""" - def test_calculate_features_with_shapes(self, sdata_hne): + def test_calculate_features_with_shapes(self, sdata_hne_small): """Test basic feature calculation with shapes.""" # Use minimal measurements to keep test fast sq.experimental.im.calculate_image_features( - sdata_hne, + sdata_hne_small, image_key="hne", shapes_key="spots", scale="scale0", @@ -28,8 +44,8 @@ def test_calculate_features_with_shapes(self, sdata_hne): ) # Check that the table was added - assert "morphology" in sdata_hne.tables - adata = sdata_hne.tables["morphology"] + assert "morphology" in sdata_hne_small.tables + adata = sdata_hne_small.tables["morphology"] # Check basic structure assert adata.n_obs > 0 @@ -45,10 +61,10 @@ def test_calculate_features_with_shapes(self, sdata_hne): assert "region" in adata.obs assert "label_id" in adata.obs - def test_calculate_features_copy(self, sdata_hne): + def test_calculate_features_copy(self, sdata_hne_small): """Test that copy=False returns DataFrame.""" result = sq.experimental.im.calculate_image_features( - sdata_hne, + sdata_hne_small, image_key="hne", shapes_key="spots", scale="scale0", @@ -62,52 +78,52 @@ def test_calculate_features_copy(self, sdata_hne): assert result.shape[0] > 0 assert result.shape[1] > 0 - def test_invalid_image_key(self, sdata_hne): + def test_invalid_image_key(self, sdata_hne_small): """Test error when image key doesn't exist.""" with pytest.raises(ValueError, match="Image key 'nonexistent' not found"): sq.experimental.im.calculate_image_features( - sdata_hne, + sdata_hne_small, image_key="nonexistent", shapes_key="spots", measurements=["skimage:label"], ) - def test_invalid_shapes_key(self, sdata_hne): + def test_invalid_shapes_key(self, sdata_hne_small): """Test error when shapes key doesn't exist.""" with pytest.raises(ValueError, match="Shapes key 'nonexistent' not found"): sq.experimental.im.calculate_image_features( - sdata_hne, + sdata_hne_small, image_key="hne", shapes_key="nonexistent", measurements=["skimage:label"], ) - def test_both_labels_and_shapes_error(self, sdata_hne): + def test_both_labels_and_shapes_error(self, sdata_hne_small): """Test error when both labels_key and shapes_key are provided.""" with pytest.raises(ValueError, match="Use either `labels_key` or `shapes_key`, not both"): sq.experimental.im.calculate_image_features( - sdata_hne, + sdata_hne_small, image_key="hne", labels_key="fake_labels", shapes_key="spots", measurements=["skimage:label"], ) - def test_invalid_measurement(self, sdata_hne): + def test_invalid_measurement(self, sdata_hne_small): """Test error with invalid measurement type.""" with pytest.raises(ValueError, match="Invalid measurement"): sq.experimental.im.calculate_image_features( - sdata_hne, + sdata_hne_small, image_key="hne", shapes_key="spots", scale="scale0", measurements=["nonexistent:measurement"], ) - def test_with_intensity_features(self, sdata_hne): + def test_with_intensity_features(self, sdata_hne_small): """Test intensity-based features with multi-channel image.""" result = sq.experimental.im.calculate_image_features( - sdata_hne, + sdata_hne_small, image_key="hne", shapes_key="spots", scale="scale0", @@ -151,10 +167,10 @@ def test_dimension_mismatch(self): n_jobs=1, ) - def test_with_progress_bar(self, sdata_hne): + def test_with_progress_bar(self, sdata_hne_small): """Test that progress bar can be enabled.""" result = sq.experimental.im.calculate_image_features( - sdata_hne, + sdata_hne_small, image_key="hne", shapes_key="spots", scale="scale0", @@ -166,3 +182,38 @@ def test_with_progress_bar(self, sdata_hne): assert isinstance(result, pd.DataFrame) assert result.shape[0] > 0 + + def test_single_mask_property(self, sdata_hne_small): + """Test selecting a single skimage mask property (area) only.""" + result = sq.experimental.im.calculate_image_features( + sdata_hne_small, + image_key="hne", + shapes_key="spots", + scale="scale0", + measurements=["skimage:label:area"], + inplace=False, + n_jobs=1, + ) + + assert isinstance(result, pd.DataFrame) + assert result.shape[0] > 0 + assert list(result.columns) == ["area"] + + def test_single_intensity_property(self, sdata_hne_small): + """Test selecting a single intensity property (mean) per channel.""" + result = sq.experimental.im.calculate_image_features( + sdata_hne_small, + image_key="hne", + shapes_key="spots", + scale="scale0", + measurements=["skimage:label+image:intensity_mean"], + inplace=False, + n_jobs=1, + ) + + # Expect one column per channel + assert isinstance(result, pd.DataFrame) + assert result.shape[0] > 0 + assert all(col.endswith(("_0", "_1", "_2")) or "_" in col for col in result.columns) + # Should not contain other intensity props + assert not any(col.startswith("intensity_max") for col in result.columns) From 4c405f7480c4cd67a63f5568721d1fa0e60ede38 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 27 Jan 2026 14:52:38 +0000 Subject: [PATCH 23/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/experimental/im/_feature.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/squidpy/experimental/im/_feature.py b/src/squidpy/experimental/im/_feature.py index 3251809cb..427f89eb1 100644 --- a/src/squidpy/experimental/im/_feature.py +++ b/src/squidpy/experimental/im/_feature.py @@ -209,18 +209,14 @@ def calculate_image_features( group, prop = parts[1], parts[2] if group == "label": if prop not in _MASK_PROPS: - raise ValueError( - f"Unknown skimage label property: '{prop}'. " - f"Available: {sorted(_MASK_PROPS)}" - ) + raise ValueError(f"Unknown skimage label property: '{prop}'. Available: {sorted(_MASK_PROPS)}") if parsed_label_props is None: parsed_label_props = set() parsed_label_props.add(prop) elif group == "label+image": if prop not in _INTENSITY_PROPS: raise ValueError( - f"Unknown skimage intensity property: '{prop}'. " - f"Available: {sorted(_INTENSITY_PROPS)}" + f"Unknown skimage intensity property: '{prop}'. Available: {sorted(_INTENSITY_PROPS)}" ) if parsed_intensity_props is None: parsed_intensity_props = set() From 936938f5f013db610be753f10f3afdcaf449537f Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 27 Jan 2026 16:03:21 +0100 Subject: [PATCH 24/37] updated test --- .../test_calculate_image_features.py | 66 +++++++++++++++---- 1 file changed, 54 insertions(+), 12 deletions(-) diff --git a/tests/experimental/test_calculate_image_features.py b/tests/experimental/test_calculate_image_features.py index 534991b1a..983b60938 100644 --- a/tests/experimental/test_calculate_image_features.py +++ b/tests/experimental/test_calculate_image_features.py @@ -12,18 +12,60 @@ @pytest.fixture() def sdata_hne_small(sdata_hne): - """Small subset of sdata_hne for faster tests (10-100 spots).""" - # Query a small bounding box to get 10-100 spots - # Using a 500x500 pixel box should give us a reasonable number - sdata_small = bounding_box_query( - sdata_hne, - axes=["x", "y"], - min_coordinate=[1000, 1000], - max_coordinate=[1500, 1500], - target_coordinate_system="global", - filter_table=True, - ) - return sdata_small + """Small subset of sdata_hne for faster tests (aim for 10–100 spots).""" + + def _subset_spots(sd: SpatialData, max_spots: int = 100): + if "spots" not in sd.shapes: + return sd + spots = sd.shapes["spots"] + try: + length = len(spots) + except Exception: # pragma: no cover - defensive + return sd + if length <= max_spots: + return sd + # Try to subset while preserving type (GeoDataFrame / array) + try: + spots_subset = spots.iloc[:max_spots] + except Exception: # pragma: no cover + spots_subset = spots[:max_spots] + return SpatialData( + images=sd.images, + labels=sd.labels, + shapes={"spots": spots_subset}, + tables=sd.tables, + ) + + # Derive a central bounding box and enlarge until we capture some spots + if "spots" not in sdata_hne.shapes: + return sdata_hne + + spots = sdata_hne.shapes["spots"] + try: + minx, miny, maxx, maxy = spots.total_bounds # type: ignore[attr-defined] + except Exception: # pragma: no cover + # Fallback: return original if bounds are unavailable + return sdata_hne + + cx, cy = (minx + maxx) / 2, (miny + maxy) / 2 + candidate_sizes = [500, 1000, 2000, 4000] + + for size in candidate_sizes: + half = size / 2 + candidate = bounding_box_query( + sdata_hne, + axes=["x", "y"], + min_coordinate=[cx - half, cy - half], + max_coordinate=[cx + half, cy + half], + target_coordinate_system="global", + filter_table=True, + ) + + if "spots" in candidate.shapes and len(candidate.shapes["spots"]) > 0: + return _subset_spots(candidate) + + # If no spots found in any candidate box, fall back to original + return _subset_spots(sdata_hne) class TestCalculateImageFeatures: From 19d0efb040ccdc52a80b74658c5fd7aef48f8dd0 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 27 Jan 2026 16:05:46 +0100 Subject: [PATCH 25/37] lint --- tests/experimental/test_calculate_image_features.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/experimental/test_calculate_image_features.py b/tests/experimental/test_calculate_image_features.py index 983b60938..d5d199895 100644 --- a/tests/experimental/test_calculate_image_features.py +++ b/tests/experimental/test_calculate_image_features.py @@ -20,14 +20,14 @@ def _subset_spots(sd: SpatialData, max_spots: int = 100): spots = sd.shapes["spots"] try: length = len(spots) - except Exception: # pragma: no cover - defensive + except TypeError: # pragma: no cover - defensive return sd if length <= max_spots: return sd # Try to subset while preserving type (GeoDataFrame / array) try: spots_subset = spots.iloc[:max_spots] - except Exception: # pragma: no cover + except AttributeError: # pragma: no cover spots_subset = spots[:max_spots] return SpatialData( images=sd.images, @@ -43,7 +43,7 @@ def _subset_spots(sd: SpatialData, max_spots: int = 100): spots = sdata_hne.shapes["spots"] try: minx, miny, maxx, maxy = spots.total_bounds # type: ignore[attr-defined] - except Exception: # pragma: no cover + except AttributeError: # pragma: no cover # Fallback: return original if bounds are unavailable return sdata_hne From 80acb8c645a6b01df3c8ade249a123b13cc751dc Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 27 Jan 2026 16:38:23 +0100 Subject: [PATCH 26/37] fixed test --- src/squidpy/experimental/im/_feature.py | 31 +++++--- .../test_calculate_image_features.py | 73 ++++++------------- 2 files changed, 44 insertions(+), 60 deletions(-) diff --git a/src/squidpy/experimental/im/_feature.py b/src/squidpy/experimental/im/_feature.py index 427f89eb1..91d9f360b 100644 --- a/src/squidpy/experimental/im/_feature.py +++ b/src/squidpy/experimental/im/_feature.py @@ -169,17 +169,23 @@ def calculate_image_features( scale_str = f" (using scale '{scale}')" if scale is not None else "" logg.info(f"Converting shapes to labels{scale_str}.") _, max_y, max_x = image.shape - labels = np.asarray( - rasterize( - sdata.shapes[shapes_key], - ["x", "y"], - min_coordinate=[0, 0], - max_coordinate=[max_x, max_y], - target_coordinate_system="global", - target_unit_to_pixels=1.0, - return_regions_as_labels=True, + try: + labels = np.asarray( + rasterize( + sdata.shapes[shapes_key], + ["x", "y"], + min_coordinate=[0, 0], + max_coordinate=[max_x, max_y], + target_coordinate_system="global", + target_unit_to_pixels=1.0, + return_regions_as_labels=True, + ) ) - ) + except ValueError as e: + raise ValueError( + "Failed to rasterize shapes; geometries may be empty or unsupported for rasterization. " + "Filter out empty/non-polygon geometries or choose a different shapes_key." + ) from e else: labels = _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale) @@ -373,7 +379,10 @@ def calculate_image_features( # because when converting the shapes to labels, a potential index 0 # in the shapes is set to 1 in the labels and therefore we'd otherwise # be off-by-one in the label_id. - adata.obs["label_id"] = sdata.shapes[shapes_key].index.values if shapes_key is not None else cell_ids + if shapes_key is not None and len(sdata.shapes[shapes_key]) == len(adata): + adata.obs["label_id"] = sdata.shapes[shapes_key].index.values + else: + adata.obs["label_id"] = cell_ids if inplace: sdata.tables[adata_key_added] = TableModel.parse(adata) diff --git a/tests/experimental/test_calculate_image_features.py b/tests/experimental/test_calculate_image_features.py index d5d199895..ba86558d6 100644 --- a/tests/experimental/test_calculate_image_features.py +++ b/tests/experimental/test_calculate_image_features.py @@ -14,58 +14,33 @@ def sdata_hne_small(sdata_hne): """Small subset of sdata_hne for faster tests (aim for 10–100 spots).""" - def _subset_spots(sd: SpatialData, max_spots: int = 100): - if "spots" not in sd.shapes: - return sd - spots = sd.shapes["spots"] - try: - length = len(spots) - except TypeError: # pragma: no cover - defensive - return sd - if length <= max_spots: - return sd - # Try to subset while preserving type (GeoDataFrame / array) - try: - spots_subset = spots.iloc[:max_spots] - except AttributeError: # pragma: no cover - spots_subset = spots[:max_spots] - return SpatialData( - images=sd.images, - labels=sd.labels, - shapes={"spots": spots_subset}, - tables=sd.tables, - ) - - # Derive a central bounding box and enlarge until we capture some spots if "spots" not in sdata_hne.shapes: return sdata_hne - spots = sdata_hne.shapes["spots"] - try: - minx, miny, maxx, maxy = spots.total_bounds # type: ignore[attr-defined] - except AttributeError: # pragma: no cover - # Fallback: return original if bounds are unavailable - return sdata_hne - - cx, cy = (minx + maxx) / 2, (miny + maxy) / 2 - candidate_sizes = [500, 1000, 2000, 4000] - - for size in candidate_sizes: - half = size / 2 - candidate = bounding_box_query( - sdata_hne, - axes=["x", "y"], - min_coordinate=[cx - half, cy - half], - max_coordinate=[cx + half, cy + half], - target_coordinate_system="global", - filter_table=True, - ) - - if "spots" in candidate.shapes and len(candidate.shapes["spots"]) > 0: - return _subset_spots(candidate) - - # If no spots found in any candidate box, fall back to original - return _subset_spots(sdata_hne) + # Crop to central tissue area (roughly the dense tissue in Visium H&E) + # Chosen from dataset bounds as seen in the provided plot. + bbox = bounding_box_query( + sdata_hne, + axes=["x", "y"], + min_coordinate=[2500, 1500], + max_coordinate=[7500, 8000], + target_coordinate_system="global", + filter_table=True, + ) + + # Ensure we keep only spots and drop empties + if "spots" in bbox.shapes: + spots = bbox.shapes["spots"] + try: + spots = spots.loc[~spots.geometry.is_empty] # type: ignore[attr-defined] + except AttributeError: + pass + # Rebuild SpatialData to use filtered spots; fall back to original if empty + if len(spots) > 0: + return SpatialData(images=bbox.images, labels=bbox.labels, shapes={"spots": spots}, tables=bbox.tables) + + # Fallback: return original sdata if crop produced no valid spots + return sdata_hne class TestCalculateImageFeatures: From a41ef20d4b3a6a2aa150c302cb8110cd74e7cc52 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Wed, 28 Jan 2026 15:07:12 +0100 Subject: [PATCH 27/37] small refactor + test speedup --- src/squidpy/experimental/im/_feature.py | 150 ++++++++++-------- .../test_calculate_image_features.py | 61 ++++--- 2 files changed, 125 insertions(+), 86 deletions(-) diff --git a/src/squidpy/experimental/im/_feature.py b/src/squidpy/experimental/im/_feature.py index 91d9f360b..80b34c55f 100644 --- a/src/squidpy/experimental/im/_feature.py +++ b/src/squidpy/experimental/im/_feature.py @@ -4,7 +4,7 @@ import warnings from collections.abc import Callable, Sequence -from typing import Any +from typing import Any, NamedTuple import anndata as ad import numpy as np @@ -84,6 +84,70 @@ _SPECIAL_PROPS = frozenset({"inertia_tensor_eigvals"}) +class ParsedMeasurements(NamedTuple): + measurements: list[str] + label_props: set[str] | None + intensity_props: set[str] | None + has_cpmeasure_core: bool + has_cpmeasure_correlation: bool + + +def _parse_measurements( + measurements: str | list[str] | None, + available_measurements: list[str], +) -> ParsedMeasurements: + """Parse and validate measurements, supporting per-property selection.""" + if measurements is None: + measurements = available_measurements + + if isinstance(measurements, str): + measurements = [measurements] + + parsed_label_props: set[str] | None = None + parsed_intensity_props: set[str] | None = None + has_cpmeasure_core = False + has_cpmeasure_correlation = False + + for m in measurements: + parts = m.split(":") + if len(parts) == 3 and parts[0] == "skimage": + group, prop = parts[1], parts[2] + if group == "label": + if prop not in _MASK_PROPS: + raise ValueError(f"Unknown skimage label property: '{prop}'. Available: {sorted(_MASK_PROPS)}") + parsed_label_props = (parsed_label_props or set()) | {prop} + elif group == "label+image": + if prop not in _INTENSITY_PROPS: + raise ValueError( + f"Unknown skimage intensity property: '{prop}'. Available: {sorted(_INTENSITY_PROPS)}" + ) + parsed_intensity_props = (parsed_intensity_props or set()) | {prop} + else: + raise ValueError(f"Unknown skimage group: '{group}'. Use 'label' or 'label+image'") + elif m == "skimage:label": + parsed_label_props = (parsed_label_props or set()) | _MASK_PROPS.copy() + elif m == "skimage:label+image": + parsed_intensity_props = (parsed_intensity_props or set()) | _INTENSITY_PROPS.copy() + elif m == "cpmeasure:core": + has_cpmeasure_core = True + elif m == "cpmeasure:correlation": + has_cpmeasure_correlation = True + elif m not in available_measurements: + raise ValueError( + f"Invalid measurement: '{m}'. " + f"Available: {available_measurements}, " + f"or use 'skimage:label:property' / 'skimage:label+image:property' for individual properties" + ) + + return ParsedMeasurements( + measurements, + parsed_label_props, + parsed_intensity_props, + has_cpmeasure_core, + has_cpmeasure_correlation, + ) + + @d.dedent @inject_docs(f=ImageFeature) def calculate_image_features( @@ -135,11 +199,18 @@ def calculate_image_features( ----- This is an experimental feature that requires the `cp_measure` package to be installed. + + Per-property selection is supported, e.g. ``"skimage:label:area"`` or + ``"skimage:label+image:intensity_mean"``. Full groups remain available via + ``"skimage:label"`` and ``"skimage:label+image"``. """ if image_key not in sdata.images.keys(): raise ValueError(f"Image key '{image_key}' not found, valid keys: {list(sdata.images.keys())}") + if labels_key is None and shapes_key is None: + raise ValueError("Provide either `labels_key` or `shapes_key`.") + if labels_key is not None and shapes_key is not None: raise ValueError("Use either `labels_key` or `shapes_key`, not both.") @@ -196,62 +267,15 @@ def calculate_image_features( "cpmeasure:correlation", ] - if measurements is None: - measurements = available_measurements - - if isinstance(measurements, str): - measurements = [measurements] - - # Parse measurements to support individual properties: "skimage:label:area" - parsed_label_props = None - parsed_intensity_props = None - has_cpmeasure_core = False - has_cpmeasure_correlation = False + parsed = _parse_measurements(measurements, available_measurements) - for m in measurements: - parts = m.split(":") - if len(parts) == 3 and parts[0] == "skimage": - # Individual property: "skimage:label:area" - group, prop = parts[1], parts[2] - if group == "label": - if prop not in _MASK_PROPS: - raise ValueError(f"Unknown skimage label property: '{prop}'. Available: {sorted(_MASK_PROPS)}") - if parsed_label_props is None: - parsed_label_props = set() - parsed_label_props.add(prop) - elif group == "label+image": - if prop not in _INTENSITY_PROPS: - raise ValueError( - f"Unknown skimage intensity property: '{prop}'. Available: {sorted(_INTENSITY_PROPS)}" - ) - if parsed_intensity_props is None: - parsed_intensity_props = set() - parsed_intensity_props.add(prop) - else: - raise ValueError(f"Unknown skimage group: '{group}'. Use 'label' or 'label+image'") - elif m == "skimage:label": - # Full label group - if parsed_label_props is None: - parsed_label_props = _MASK_PROPS.copy() - else: - parsed_label_props.update(_MASK_PROPS) - elif m == "skimage:label+image": - # Full intensity group - if parsed_intensity_props is None: - parsed_intensity_props = _INTENSITY_PROPS.copy() - else: - parsed_intensity_props.update(_INTENSITY_PROPS) - elif m == "cpmeasure:core": - has_cpmeasure_core = True - elif m == "cpmeasure:correlation": - has_cpmeasure_correlation = True - elif m not in available_measurements: - raise ValueError( - f"Invalid measurement: '{m}'. " - f"Available: {available_measurements}, " - f"or use 'skimage:label:property' / 'skimage:label+image:property' " - f"for individual properties" - ) + if ( + parsed.label_props is None + and parsed.intensity_props is None + and not parsed.has_cpmeasure_core + and not parsed.has_cpmeasure_correlation + ): + raise ValueError("No valid measurements requested") if labels.size == 0: raise ValueError("Labels array is empty") @@ -272,7 +296,7 @@ def calculate_image_features( if image.shape[1:] != labels.shape: raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}") - if has_cpmeasure_correlation: + if parsed.has_cpmeasure_correlation: measurements_corr = get_correlation_measurements() cell_ids = np.unique(labels) @@ -287,7 +311,7 @@ def calculate_image_features( logg.info(f"Using '{n_jobs}' core(s).") - if parsed_label_props is not None: + if parsed.label_props is not None: logg.info("Calculating 'skimage' label features.") res = parallelize( _get_regionprops_features, @@ -297,10 +321,10 @@ def calculate_image_features( backend=backend, show_progress_bar=show_progress_bar, verbose=verbose, - )(labels=labels, intensity_image=None, props=parsed_label_props) + )(labels=labels, intensity_image=None, props=parsed.label_props) all_features.append(res) - if parsed_intensity_props is not None: + if parsed.intensity_props is not None: for ch_idx in range(n_channels): ch_name = channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" ch_image = image[ch_idx] @@ -313,12 +337,12 @@ def calculate_image_features( backend=backend, show_progress_bar=show_progress_bar, verbose=verbose, - )(labels=labels, intensity_image=ch_image, props=parsed_intensity_props) + )(labels=labels, intensity_image=ch_image, props=parsed.intensity_props) # Append channel names to each feature column res = res.rename(columns=lambda col, ch_name=ch_name: f"{col}_{ch_name}") all_features.append(res) - if has_cpmeasure_core: + if parsed.has_cpmeasure_core: measurements_core = get_core_measurements() for ch_idx in range(n_channels): ch_name = channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" @@ -335,7 +359,7 @@ def calculate_image_features( )(labels, ch_image, None, measurements_core, ch_name) all_features.append(res) - if "cpmeasure:correlation" in measurements: + if parsed.has_cpmeasure_correlation: for ch1_idx in range(n_channels): for ch2_idx in range(ch1_idx + 1, n_channels): ch1_name = channel_names[ch1_idx] if channel_names is not None else f"{ch1_idx}" diff --git a/tests/experimental/test_calculate_image_features.py b/tests/experimental/test_calculate_image_features.py index ba86558d6..51d52ab67 100644 --- a/tests/experimental/test_calculate_image_features.py +++ b/tests/experimental/test_calculate_image_features.py @@ -4,7 +4,7 @@ import pandas as pd import pytest import xarray as xr -from spatialdata import SpatialData, bounding_box_query +from spatialdata import SpatialData from spatialdata.models import Image2DModel, Labels2DModel import squidpy as sq @@ -17,30 +17,23 @@ def sdata_hne_small(sdata_hne): if "spots" not in sdata_hne.shapes: return sdata_hne - # Crop to central tissue area (roughly the dense tissue in Visium H&E) - # Chosen from dataset bounds as seen in the provided plot. - bbox = bounding_box_query( - sdata_hne, - axes=["x", "y"], - min_coordinate=[2500, 1500], - max_coordinate=[7500, 8000], - target_coordinate_system="global", - filter_table=True, - ) + spots = sdata_hne.shapes["spots"] + try: + spots = spots.loc[~spots.geometry.is_empty] # type: ignore[attr-defined] + except AttributeError: + pass - # Ensure we keep only spots and drop empties - if "spots" in bbox.shapes: - spots = bbox.shapes["spots"] - try: - spots = spots.loc[~spots.geometry.is_empty] # type: ignore[attr-defined] - except AttributeError: - pass - # Rebuild SpatialData to use filtered spots; fall back to original if empty - if len(spots) > 0: - return SpatialData(images=bbox.images, labels=bbox.labels, shapes={"spots": spots}, tables=bbox.tables) + # Take the first ~100 spots to keep rasterization fast and non-empty + spots_subset = spots.iloc[:100] if len(spots) > 100 else spots + if len(spots_subset) == 0: + return sdata_hne - # Fallback: return original sdata if crop produced no valid spots - return sdata_hne + return SpatialData( + images=sdata_hne.images, + labels=sdata_hne.labels, + shapes={"spots": spots_subset}, + tables=sdata_hne.tables, + ) class TestCalculateImageFeatures: @@ -126,6 +119,15 @@ def test_both_labels_and_shapes_error(self, sdata_hne_small): measurements=["skimage:label"], ) + def test_missing_labels_and_shapes(self, sdata_hne_small): + """Test error when neither labels_key nor shapes_key is provided.""" + with pytest.raises(ValueError, match="Provide either `labels_key` or `shapes_key`."): + sq.experimental.im.calculate_image_features( + sdata_hne_small, + image_key="hne", + measurements=["skimage:label"], + ) + def test_invalid_measurement(self, sdata_hne_small): """Test error with invalid measurement type.""" with pytest.raises(ValueError, match="Invalid measurement"): @@ -137,6 +139,19 @@ def test_invalid_measurement(self, sdata_hne_small): measurements=["nonexistent:measurement"], ) + def test_no_valid_measurements(self, sdata_hne_small): + """Test error when no valid measurements are requested.""" + with pytest.raises(ValueError, match="No valid measurements requested"): + sq.experimental.im.calculate_image_features( + sdata_hne_small, + image_key="hne", + shapes_key="spots", + scale="scale0", + measurements=[], + n_jobs=1, + inplace=False, + ) + def test_with_intensity_features(self, sdata_hne_small): """Test intensity-based features with multi-channel image.""" result = sq.experimental.im.calculate_image_features( From 6b78f0b45f4cf0412148f46bf67433167d25a7f2 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Wed, 28 Jan 2026 15:33:55 +0100 Subject: [PATCH 28/37] added features from legacy function + tests --- src/squidpy/experimental/im/_feature.py | 110 ++++++++++++++++++ .../test_calculate_image_features.py | 48 ++++++++ 2 files changed, 158 insertions(+) diff --git a/src/squidpy/experimental/im/_feature.py b/src/squidpy/experimental/im/_feature.py index 80b34c55f..060c9e8ff 100644 --- a/src/squidpy/experimental/im/_feature.py +++ b/src/squidpy/experimental/im/_feature.py @@ -14,6 +14,7 @@ from cp_measure.bulk import get_core_measurements, get_correlation_measurements from numba import njit from skimage import measure +from skimage.feature import graycomatrix, graycoprops from skimage.measure import label from spatialdata import SpatialData, rasterize from spatialdata._logging import logger as logg @@ -90,6 +91,9 @@ class ParsedMeasurements(NamedTuple): intensity_props: set[str] | None has_cpmeasure_core: bool has_cpmeasure_correlation: bool + has_squidpy_summary: bool + has_squidpy_texture: bool + has_squidpy_color_hist: bool def _parse_measurements( @@ -107,6 +111,9 @@ def _parse_measurements( parsed_intensity_props: set[str] | None = None has_cpmeasure_core = False has_cpmeasure_correlation = False + has_squidpy_summary = False + has_squidpy_texture = False + has_squidpy_color_hist = False for m in measurements: parts = m.split(":") @@ -132,6 +139,12 @@ def _parse_measurements( has_cpmeasure_core = True elif m == "cpmeasure:correlation": has_cpmeasure_correlation = True + elif m == "squidpy:summary": + has_squidpy_summary = True + elif m == "squidpy:texture": + has_squidpy_texture = True + elif m == "squidpy:color_hist": + has_squidpy_color_hist = True elif m not in available_measurements: raise ValueError( f"Invalid measurement: '{m}'. " @@ -145,6 +158,9 @@ def _parse_measurements( parsed_intensity_props, has_cpmeasure_core, has_cpmeasure_correlation, + has_squidpy_summary, + has_squidpy_texture, + has_squidpy_color_hist, ) @@ -265,6 +281,9 @@ def calculate_image_features( "skimage:label+image", "cpmeasure:core", "cpmeasure:correlation", + "squidpy:summary", + "squidpy:texture", + "squidpy:color_hist", ] parsed = _parse_measurements(measurements, available_measurements) @@ -274,6 +293,9 @@ def calculate_image_features( and parsed.intensity_props is None and not parsed.has_cpmeasure_core and not parsed.has_cpmeasure_correlation + and not parsed.has_squidpy_summary + and not parsed.has_squidpy_texture + and not parsed.has_squidpy_color_hist ): raise ValueError("No valid measurements requested") @@ -311,6 +333,10 @@ def calculate_image_features( logg.info(f"Using '{n_jobs}' core(s).") + if parsed.has_squidpy_summary or parsed.has_squidpy_texture or parsed.has_squidpy_color_hist: + sq_feats = _compute_squidpy_channel_features(image, labels, cell_ids, channel_names, parsed) + all_features.extend(sq_feats) + if parsed.label_props is not None: logg.info("Calculating 'skimage' label features.") res = parallelize( @@ -538,6 +564,90 @@ def _prepare_images_for_measurement( return mask, image1_prepared, image2_prepared +def _get_label_bbox(labels: NDArray) -> tuple[int, int, int, int]: + """Return tight bounding box (y_min, y_max, x_min, x_max) for non-zero labels.""" + yx = np.argwhere(labels > 0) + if yx.size == 0: + return 0, labels.shape[0], 0, labels.shape[1] + y_min, x_min = yx.min(axis=0) + y_max, x_max = yx.max(axis=0) + 1 + return int(y_min), int(y_max), int(x_min), int(x_max) + + +def _compute_squidpy_channel_features( + image: NDArray, + labels: NDArray, + cell_ids: NDArray, + channel_names: NDArray | None, + parsed: ParsedMeasurements, +) -> list[pd.DataFrame]: + """Compute squidpy legacy-like features and broadcast to cells.""" + feats: list[pd.DataFrame] = [] + + # Crop to label bbox to speed computations + y_min, y_max, x_min, x_max = _get_label_bbox(labels) + img_crop = image[:, y_min:y_max, x_min:x_max] + + n_channels = img_crop.shape[0] + ch_names = channel_names if channel_names is not None else np.arange(n_channels).astype(str) + + if parsed.has_squidpy_summary: + summary_vals: dict[str, float] = {} + for ch_idx in range(n_channels): + ch = img_crop[ch_idx].astype(np.float32) + summary_vals.update( + { + f"summary_mean_{ch_names[ch_idx]}": float(np.mean(ch)), + f"summary_std_{ch_names[ch_idx]}": float(np.std(ch)), + f"summary_min_{ch_names[ch_idx]}": float(np.min(ch)), + f"summary_max_{ch_names[ch_idx]}": float(np.max(ch)), + } + ) + df = pd.DataFrame([summary_vals] * len(cell_ids), index=cell_ids) + feats.append(df) + + if parsed.has_squidpy_texture: + tex_vals: dict[str, float] = {} + # Quantize to 32 levels to keep GLCM small + quant_levels = 32 + for ch_idx in range(n_channels): + ch = img_crop[ch_idx] + # normalize to [0, quant_levels-1] + ch_norm = ch.astype(np.float32) + if ch_norm.max() > ch_norm.min(): + ch_norm = (ch_norm - ch_norm.min()) / (ch_norm.max() - ch_norm.min()) + ch_q = np.clip((ch_norm * (quant_levels - 1)).round().astype(np.uint8), 0, quant_levels - 1) + glcm = graycomatrix(ch_q, distances=[1], angles=[0], levels=quant_levels, symmetric=True, normed=True) + tex_vals.update( + { + f"texture_contrast_{ch_names[ch_idx]}": float(graycoprops(glcm, "contrast")[0, 0]), + f"texture_dissimilarity_{ch_names[ch_idx]}": float(graycoprops(glcm, "dissimilarity")[0, 0]), + f"texture_homogeneity_{ch_names[ch_idx]}": float(graycoprops(glcm, "homogeneity")[0, 0]), + f"texture_energy_{ch_names[ch_idx]}": float(graycoprops(glcm, "energy")[0, 0]), + f"texture_ASM_{ch_names[ch_idx]}": float(graycoprops(glcm, "ASM")[0, 0]), + f"texture_correlation_{ch_names[ch_idx]}": float(graycoprops(glcm, "correlation")[0, 0]), + } + ) + df = pd.DataFrame([tex_vals] * len(cell_ids), index=cell_ids) + feats.append(df) + + if parsed.has_squidpy_color_hist: + hist_vals: dict[str, float] = {} + bins = 16 + for ch_idx in range(n_channels): + ch = img_crop[ch_idx].astype(np.float32) + hist, bin_edges = np.histogram(ch, bins=bins, range=(ch.min(), ch.max() if ch.max() > ch.min() else ch.min() + 1)) + hist = hist.astype(np.float32) + hist_sum = hist.sum() + if hist_sum > 0: + hist = hist / hist_sum + hist_vals.update({f"color_hist_bin{b}_{ch_names[ch_idx]}": float(v) for b, v in enumerate(hist)}) + df = pd.DataFrame([hist_vals] * len(cell_ids), index=cell_ids) + feats.append(df) + + return feats + + @njit(fastmath=True) def _get_cell_crops_numba( cell_id: int, diff --git a/tests/experimental/test_calculate_image_features.py b/tests/experimental/test_calculate_image_features.py index 51d52ab67..14df7a401 100644 --- a/tests/experimental/test_calculate_image_features.py +++ b/tests/experimental/test_calculate_image_features.py @@ -249,3 +249,51 @@ def test_single_intensity_property(self, sdata_hne_small): assert all(col.endswith(("_0", "_1", "_2")) or "_" in col for col in result.columns) # Should not contain other intensity props assert not any(col.startswith("intensity_max") for col in result.columns) + + def test_squidpy_summary(self, sdata_hne_small): + """Test squidpy summary stats per channel.""" + result = sq.experimental.im.calculate_image_features( + sdata_hne_small, + image_key="hne", + shapes_key="spots", + scale="scale0", + measurements=["squidpy:summary"], + inplace=False, + n_jobs=1, + ) + + assert isinstance(result, pd.DataFrame) + assert result.shape[0] > 0 + assert any(col.startswith("summary_mean") for col in result.columns) + + def test_squidpy_texture(self, sdata_hne_small): + """Test squidpy texture stats per channel.""" + result = sq.experimental.im.calculate_image_features( + sdata_hne_small, + image_key="hne", + shapes_key="spots", + scale="scale0", + measurements=["squidpy:texture"], + inplace=False, + n_jobs=1, + ) + + assert isinstance(result, pd.DataFrame) + assert result.shape[0] > 0 + assert any(col.startswith("texture_contrast") for col in result.columns) + + def test_squidpy_color_hist(self, sdata_hne_small): + """Test squidpy color histogram per channel.""" + result = sq.experimental.im.calculate_image_features( + sdata_hne_small, + image_key="hne", + shapes_key="spots", + scale="scale0", + measurements=["squidpy:color_hist"], + inplace=False, + n_jobs=1, + ) + + assert isinstance(result, pd.DataFrame) + assert result.shape[0] > 0 + assert any(col.startswith("color_hist_bin") for col in result.columns) From 8da641bf1ed7a3d9d93bf62380fe23fbceeb6b39 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 28 Jan 2026 14:34:08 +0000 Subject: [PATCH 29/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/experimental/im/_feature.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/squidpy/experimental/im/_feature.py b/src/squidpy/experimental/im/_feature.py index 060c9e8ff..13c35d9d5 100644 --- a/src/squidpy/experimental/im/_feature.py +++ b/src/squidpy/experimental/im/_feature.py @@ -636,7 +636,9 @@ def _compute_squidpy_channel_features( bins = 16 for ch_idx in range(n_channels): ch = img_crop[ch_idx].astype(np.float32) - hist, bin_edges = np.histogram(ch, bins=bins, range=(ch.min(), ch.max() if ch.max() > ch.min() else ch.min() + 1)) + hist, bin_edges = np.histogram( + ch, bins=bins, range=(ch.min(), ch.max() if ch.max() > ch.min() else ch.min() + 1) + ) hist = hist.astype(np.float32) hist_sum = hist.sum() if hist_sum > 0: From baf7f39a03d56712105b2a47139657f4f91415e0 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Wed, 8 Apr 2026 00:01:11 +0200 Subject: [PATCH 30/37] Add cell-aware tiling for large image featurization Introduces _tiling.py with build_tile_specs() and extract_tile() that split a label image into overlapping tiles where each cell is assigned to exactly one tile by centroid. Non-owned cells are zeroed out so downstream processing never double-counts. Includes 31 tests: deterministic brick-pattern grid (touching and non-touching), coverage verification, and visual regression tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/squidpy/experimental/im/_tiling.py | 236 ++++++++++ .../TilingVisual_tile_assignment_gap.png | Bin 0 -> 20511 bytes .../TilingVisual_tile_assignment_touching.png | Bin 0 -> 9600 bytes tests/experimental/test_tiling.py | 411 ++++++++++++++++++ 4 files changed, 647 insertions(+) create mode 100644 src/squidpy/experimental/im/_tiling.py create mode 100644 tests/_images/TilingVisual_tile_assignment_gap.png create mode 100644 tests/_images/TilingVisual_tile_assignment_touching.png create mode 100644 tests/experimental/test_tiling.py diff --git a/src/squidpy/experimental/im/_tiling.py b/src/squidpy/experimental/im/_tiling.py new file mode 100644 index 000000000..6154faa44 --- /dev/null +++ b/src/squidpy/experimental/im/_tiling.py @@ -0,0 +1,236 @@ +"""Cell-aware tiling for large images. + +Splits a label image into overlapping tiles such that every cell is fully +contained in exactly one tile. Cells are assigned to tiles by centroid: +the tile whose non-overlapping base region contains the centroid owns the +cell. Non-owned cells are zeroed out in each tile's mask so that +downstream processing never double-counts. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Literal + +import numpy as np +from skimage.measure import regionprops + + +@dataclass(frozen=True) +class CellInfo: + """Centroid and bounding box for a single label.""" + + label: int + centroid_y: float + centroid_x: float + bbox_h: int # height of bounding box + bbox_w: int # width of bounding box + + +@dataclass(frozen=True) +class TileSpec: + """Specification for a single tile. + + Attributes + ---------- + base + The non-overlapping region ``(y0, x0, y1, x1)`` used for centroid + ownership. Tiles partition the image into a grid of base regions. + crop + The extended region ``(y0, x0, y1, x1)`` that includes the overlap + margin. This is the actual slice extracted from the image/labels. + owned_ids + Label IDs whose centroid falls inside ``base``. Only these labels + are kept in the tile's mask; all others are zeroed out. + """ + + base: tuple[int, int, int, int] + crop: tuple[int, int, int, int] + owned_ids: frozenset[int] = field(default_factory=frozenset) + + +def _compute_cell_info(labels: np.ndarray) -> dict[int, CellInfo]: + """Compute centroid and bounding-box size for every label. + + Parameters + ---------- + labels + 2-D integer label image where 0 is background. + + Returns + ------- + Mapping from label ID to :class:`CellInfo`. + """ + props = regionprops(labels) + info: dict[int, CellInfo] = {} + for p in props: + min_row, min_col, max_row, max_col = p.bbox + info[p.label] = CellInfo( + label=p.label, + centroid_y=p.centroid[0], + centroid_x=p.centroid[1], + bbox_h=max_row - min_row, + bbox_w=max_col - min_col, + ) + return info + + +def _auto_margin(cell_info: dict[int, CellInfo]) -> int: + """Compute the minimum margin that covers the largest cell's half-extent.""" + if not cell_info: + return 0 + max_half = max(max(c.bbox_h, c.bbox_w) for c in cell_info.values()) + # Full bbox extent: a cell's centroid can be at most half a bbox away + # from its edge, so margin = ceil(max_extent / 2) guarantees coverage. + # Add 1 pixel for safety (rounding / off-by-one). + return int(np.ceil(max_half / 2)) + 1 + + +def build_tile_specs( + labels: np.ndarray, + tile_size: int = 2048, + overlap_margin: int | Literal["auto"] = "auto", +) -> list[TileSpec]: + """Build tile specifications for a label image. + + Each tile gets a non-overlapping *base* region (for centroid ownership) + and an extended *crop* region (base + margin on each side). Every + nonzero label is assigned to exactly one tile. + + Parameters + ---------- + labels + 2-D integer label image (0 = background). + tile_size + Side length of the non-overlapping base grid cells. + overlap_margin + Pixel margin added around each base region. ``"auto"`` computes the + minimum margin from the largest cell's bounding box. + + Returns + ------- + List of :class:`TileSpec`, one per grid cell that owns at least one + label. Empty tiles (no cells) are omitted. + """ + if labels.ndim != 2: + raise ValueError(f"Expected 2-D labels, got shape {labels.shape}") + if tile_size <= 0: + raise ValueError(f"tile_size must be positive, got {tile_size}") + + H, W = labels.shape + cell_info = _compute_cell_info(labels) + + if isinstance(overlap_margin, str) and overlap_margin == "auto": + margin = _auto_margin(cell_info) + else: + margin = int(overlap_margin) + if margin < 0: + raise ValueError(f"overlap_margin must be non-negative, got {margin}") + + # Assign each cell to a base-grid cell by its centroid + cell_to_tile: dict[int, tuple[int, int]] = {} + for lid, ci in cell_info.items(): + tile_row = min(int(ci.centroid_y) // tile_size, (H - 1) // tile_size) + tile_col = min(int(ci.centroid_x) // tile_size, (W - 1) // tile_size) + cell_to_tile[lid] = (tile_row, tile_col) + + # Group cells by tile + tile_to_cells: dict[tuple[int, int], set[int]] = {} + for lid, key in cell_to_tile.items(): + tile_to_cells.setdefault(key, set()).add(lid) + + # Build specs (skip empty tiles) + n_rows = (H + tile_size - 1) // tile_size + n_cols = (W + tile_size - 1) // tile_size + + specs: list[TileSpec] = [] + for row in range(n_rows): + for col in range(n_cols): + owned = tile_to_cells.get((row, col), set()) + if not owned: + continue + + # Base region (non-overlapping) + by0 = row * tile_size + bx0 = col * tile_size + by1 = min(by0 + tile_size, H) + bx1 = min(bx0 + tile_size, W) + + # Crop region (with margin, clamped) + cy0 = max(by0 - margin, 0) + cx0 = max(bx0 - margin, 0) + cy1 = min(by1 + margin, H) + cx1 = min(bx1 + margin, W) + + specs.append( + TileSpec( + base=(by0, bx0, by1, bx1), + crop=(cy0, cx0, cy1, cx1), + owned_ids=frozenset(owned), + ) + ) + + return specs + + +def extract_tile( + image: np.ndarray, + labels: np.ndarray, + spec: TileSpec, +) -> tuple[np.ndarray, np.ndarray]: + """Extract a tile's image and mask, zeroing out non-owned cells. + + Parameters + ---------- + image + 3-D array of shape ``(C, H, W)``. + labels + 2-D integer label image of shape ``(H, W)``. + spec + Tile specification from :func:`build_tile_specs`. + + Returns + ------- + tile_image + Cropped image of shape ``(C, crop_h, crop_w)``. + tile_labels + Cropped label image with non-owned cells zeroed out. + """ + cy0, cx0, cy1, cx1 = spec.crop + tile_image = image[:, cy0:cy1, cx0:cx1] + tile_labels = labels[cy0:cy1, cx0:cx1].copy() + + # Zero out labels not owned by this tile + unique_in_crop = np.unique(tile_labels) + for lid in unique_in_crop: + if lid != 0 and lid not in spec.owned_ids: + tile_labels[tile_labels == lid] = 0 + + return tile_image, tile_labels + + +def verify_coverage( + labels: np.ndarray, + specs: list[TileSpec], +) -> None: + """Assert that tile specs provide full, non-overlapping cell coverage. + + Raises + ------ + AssertionError + If any cell is missing or assigned to more than one tile. + """ + all_label_ids = set(np.unique(labels)) + all_label_ids.discard(0) + + owned_union: set[int] = set() + for spec in specs: + overlap = owned_union & spec.owned_ids + assert not overlap, f"Cells {overlap} assigned to multiple tiles" + owned_union |= spec.owned_ids + + missing = all_label_ids - owned_union + assert not missing, f"Cells {missing} not assigned to any tile" + + extra = owned_union - all_label_ids + assert not extra, f"Tile specs reference non-existent labels {extra}" diff --git a/tests/_images/TilingVisual_tile_assignment_gap.png b/tests/_images/TilingVisual_tile_assignment_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..1fb306de3db656a50f4c07fbcf7d462d0248178a GIT binary patch literal 20511 zcmZ6yWmH=Y7c|-ymmbrOEKFUaotHAEh|GOb0!hQ;=AH>1#_?#uQoK@{i zo!yKa!SCdaob9dcoUJX4zq*1QohY+%d0I6zI&$_E+hV3%{}8p55*gI=fU@O>~y!|+qadluXH)) zf+EVL$aq%ebG4w!pY(JfY)h+B8;kaOBszEUdWd^nQOGY;$OlJUgzxWB-ispp$5_R$ z{)m2fTeFMGOtp6Hi2OE8eYM@#d3-#^{g1!)YRs)|{H_iEI~IHjixD}%=)@9EE`?>% zHwDAGNBmQ-vmp+39AN=mI!kOH3(3OoP|Bc#E^0YSRyiatBl1uqzA6+P8T4s?`4nm6 z|NE5twW5*|QCV48&GbonBR+eA^(?Q`Zq9k#fXz+g+qe38{oXR|+A7B#puppPE{TRB z7)+JrzKKF?6LWHNbCWi{FMvTNv|{9vkd(B!9j~a-aMp6fwBj&G()6&KjvXp~J|)Y% zk#5)V-7cWL^y7Y{`?^2U!rO2>&DKt`f$7=XP32Fc&A!m%mje0p^S|ulRd>0Pk;dGB zZ&=9dNjkRQH8k*D&(=nX-^TdQevj<*{1FR2f4&+wIh-jT8Xh*{=3HHGbv+UF@Zj|_ z(9#M~6ncQyX|V3T7$l3PuWXB(y;*S@-*`N#G|QfJQ!~@kBMHJF2`jDaNE5WT=K7MM zK;!f`mO{!B`g->BtrvF2uVuT=%*AG(Csz(`ZVGO00!*R%ueZmGy`TfK=XE5Wodned z-f8kCYzoP$7Sr*pNHpfQn9SanyMNKkwypBb8fMu(T&)UMyQ5;3mL;2iou7G~cGqL2 zNR2RsYOV^>ZF|C*J3`jrYY|R^-1q+k$7E$iMMR*Nf$D(|0>l{XQeT991+4Vw>FCZ^ z-TwW_%Y&aTkT+S?a!fMn4XS?mA@FiNeLtQ3+Gl&8kbrxut*=k&d)NBU=x++^LYoJB zT3TAlm0T(tf>;oG_g1uU)ZM?IWN)u`oys~oXqEHf?Ot~^d^g}>l**Pvy8MoO*<|D6 zh3fOCmt(Jzw?g3;z*B|YzmBHUmLvN)ZL{hZip zE9WZz1F67?nU2o*te8NIYw^VOcH-OMsXU9$x-D{xL!n}Jl#tJ(;p_7QbZ;AdT`onCXYB;hg13Z6Y-9rNG4(Sue4mD@%QY^v8Q*AvDPyLUtmMn- zdEfgeD%e(QE$m%G@MDOtXEhBZTwPsDYw9XVof&ErU!y?w9~Py?bsWh_G`ST~Z%#-6H z`sZ^%Wc=H0V`sH3qf2jbqX_0_PV?s$qh7?_-~W)V_QnUB9JUt3%DDe5^PPA2Jh3?K z%H`$fSF@7U!&A$pNPj|l?InBlgI#thff(jNanx!s@Q9@}F}#t>6?iYIRcrbYjoV#- z6wKEDJ2cC0U_g?~OvH&$=B(@G@y2B1{b%+`xx&w2FxdTSlzWj~s|I&-h^3&S*Z$Zt?c_6;oGETn7~0OhbXG;{HM2eW%~dJ`Y4mtn2tN%cJ6h zTq*<_Xh0%kN5^6Az*Q8!eYZUm6q)E&IgW%W|F?S3c+o3dS=|L0V2fFx z&FOy-+nBL%;n)OtFZG2P_!Gvqi!;MpyfCp|qgKD^Yni6|Uj0Iii{ zS>OT^US)OWTlTEvrQk&RQtDXl>oDS6_V&zf{a{H^tC{8Pc{$80LK$&L!8tTM_G$bs z51Z%dy4wA|*u3o~n%y^0LRM&C?xc0$Hcuu#CVRIpys~Yq;mWbbe%eKIJ<=`NWKh8& z`%6U=q53SR*5yW_-)s;zMU>h6LC4c69Qn5k&_72wjSBr>7Xw@H@?M57TM!vPp<>7V z=jGd`)fAWUv-8d8hvTew@VY_=y0@zkx4tWqiArYMCSz1m&r1o82e{)}3vOM1n^tJG z%b=}8_Unto?E=pEx$vIF_(3Lk(uwbkVSDe#M*sX8Vwo#Gu=Qq6hjlOet?y3>bZ!sc zu&+A6;UU^LV=Y`SLvP!8|kz`&m0wJq7PBNO^E;$ zMFB1U53VB3^nYRs`&x238B=5^CHQmuKEN%ML1=`YyIue4s6)Xcb~G5hk!Bb_&R)7% zsNb&G6V$__qzKa(g^=7xvF!wiNDDunt-4Y_g1y9BWlJ4Su8z8#y2n4qg%eZ?Yomm( zT+QJ#ENP8_&vKJ9{jP#dtM0y)qT#$<07^bbY~^Z`(yV3sQah^ZZ6Yn>!`ZF)Z8#lP zooHOR+ic!vze^UP?m?h{nGk-ZJ6SN0k4Mv!Urw- z5+-^HF%U^~!)*4@?WF+Q&27-K6?31X0Jyg$3*$F377B#)A-IcU0rpKNl)VCg7`(4I#|dQar^TP?vbq9a14=%}Yz0m;y{^kMemmwT zKed979tql%Yc#e4>%DXF?I4=#iNIaxG1lrmxquwoQ)m`Nidw@t3MfkzC@JXi3cyL` z79an-Kr74(HzFabfAZVi??ExBV*|f z!G0h8?#5Vd4lB$0p27GYkcj-f-pen^+`LLYrd=EET`=M29~-$8`Q8}TZlSjg)y5@K zfw2)Hvg0bmT*NwGU(Qi}WyI-+{sJ|AZw4WzjMKb+z4V-ISa(=*ur_)+rYg)MVD|1X z?FS(34K~tOs1~3HPbYrBMOQ4dHWhCFzWFo2sxO1hj&KRVt>Jz%&>Dvqmly0Mq9Ajz z$|O1!>q^UG6@b^L^i(^rr%hHQB@c2Rf+MRisPam|(?9H?hNPwI20s;t76g=o8Q$m;BpR`dk< z6e43i$W==+4DQ7VA{Ht*D_?XoNCrgaLh7Kalz6koWD^`S*KD**mtJ7uSio?9k{ZmE zpA{J;weOCH`*D+uj_r4N#-VrBG(__GBu!Pb@$gG*26?W;W8e`(wT`-n^(jB{%n1^~ z6sq4fq^Lc(@ycyf?0whRzY*gCa+zwI<%XtzI9KM6RtCm{-;2|6k(gTX&Q?4TgQWFF zPmsRYKeW|H{Ive9iuhxrnO=%^dEBSv3v>BKEAN{C7`{TNUi^>w+I3}QQ4t;qGwGDd z@x9@rHZa+9Ywh#OecaG!l0$^}*~*_>x2&JKLc99xphaKr2aL8_ZM8;oG%we){^)E_ z08QoRz|cX0Ffv3-|72!_=h*?8G!XK$x5P}UD-#_48>%j$3(0xzPw~co*$4@~f0ZXb z=(Z9P@+hoW^So>0KtV1cbxxhC%F%FR24f<=p0Rg*8?~_ZAkku)V*2PXK+@HGC$*^m z_JU_`81KNFCeD9=@^w&aB1eknJPeN>{qB!O3W$pWOI{hy_zLC2yg*n|!;k?@sif3o zg!nyl%GWcjpd(`?=1ABsx;@q}`Xskr6U$C13ty`fGndRU!Q|=iATz@SR3tO4%t6so z!D@99uaywt(6AnY~iIQp3-5iGlV;fB; z4XEfi0=H2}sqmw4Af;!ap48cOi}ijy_Dflf&^rT>FP2kV!G+O1HM8kTDOtZ}aJ`%z zJmrc0p_k5LWV1)};+m8q9Qq+DKuv&%G1P$-`x@o{Fzf6q?EdZ}@lZicnj|6wOHJ#S z^W5U5m5;T3-*J$<|BQJtdoYaH=ArCmbr*J_N1?@_btRABbSqwzAam3uo-c5xtNd0} zx)D$7A*lxaQ6I{Y9vaq+-;#AwFt*NQy#>S z{!0`7uZcra(0xvU0O^bi|FWoZ(iemS08sNT9qP$}l&i=3jdl{BWT~Qd*0S5dXKKAB zT(2%j1dG4aTleCTqY+N*vW*UELQ&%5U0N7I2Z(MF-9T<2F*kgRG_2KTX;buc)>YIS zJUm;^b!K=H$LnuOkV=*4qmAZ&|I_B$>tSl9k2c*4eSIte-S$0(?Z#2sa;8Z4(0)@q zZFC#%=*)+%Se*L132u>ER;%c5Ki$=&nfH+}d`BBUhoMHrx7!vkJJdURe;?n|{W;1v&gf6aQ7t8i>9?=#? zV%#$D)bkm15ZYdy(8rMBn59c%(SKR?NO%fgvaPGIrhe`XzJpu-7w-EH-zL_67E(k9 zp8mGJLl(aEL!N|94ZSqNx2tka9FX>}1C60s*3G`YoGzlBi024%(kJN|ln-2#B+_Wd zyYEL*RtJ9Gii1#n*=yiLp(2ScR`H$kj4R{jW3j+Q`-B%0eOa_h%|@=s+faFW09-z- zx%7IuFxQdVHE}%`D7(46%qrB1j$)G8j3Wl?$Op5u{D6O9XDC-E*1?PVV_poPXpeRw z_GzDaJd7Sxr-?Y3IVK)o6NffTuVIM5I>pu1RqN7k{Qx`6tF;AczAvyVuALFbOB3?T z$=n=r6G^uRx} z(BG%-UujTHswS$f(`uMP8;XQUZAuq|A?+F!G7PDB)B&RWZMP+WvfLp?ph#w`5_xyCd-_|%zUx~aMimQc#OMLTUV2eRs$?p63VLKKr6?9 zoAhc;XXX%hNhwJnHwnaNZ+m?F!lU$M9d#rI?rvCGd9x%>dRW$jQUVxwNsC-mbVE@D z5kRC`f=~p}ft3q%LQ5ZYdLmke;2L{_emN%_GY?;L5Gs*^x0MN{A!o64RQ+upzduNq z1RJkG;ruos9l*RBVeF4y9bpD3qP(~|iQiEP>7coC^J+yYmU%btGvbsCdEjSGcG9cs z`NS^-7CmUM`(!=jhW!-nrPKj{Vo*r1kQFwS6EQ$J%yd;qL0h|!@R3a&(CrAX8qO+G zh&7Dn2-K$I!p8d3$s@Zt4?KV9BV*>|>|nGl{0O&f!Q5|}cyImTSNL{BU%D+pNZI;l zaz5#?m_BHnYJ+Uz6cezr0SQ2~St?kPKol3H4*6P0iIVmIu>so(Q=C;CnatF0q(!Bz z^^hzVok$VE0c;<3Ei{1JU4#1El`^U7F|9(qIc+r3(SsU5MqJPUVy;5qHuB0>F{?UL z62$;j8W^sZXDsi3qV%L820elc2L1m$6o<99mUtI&VKupVPFLIBVN*XAUW1B`v@Y`F zBKSLCgz1G4Z>Jilmc0Gl(zouW(3TEh#Fm7Q6w*N3IOHsd;az-^hLm4XCT1?!aOz|{ zqkpJB4W%blx&xI%w3}1yZg5{n#YeVDdPfQ%;YuJ7)bZJNq3XTcstZBS28)gnhuf(x zgFy!|8Ud3yEp89Y%3^Wc9@5w;E!kvdD+GJ6>;LX2w_Ol1eA!vP*ck%>$cUh{Uf9I-}IHd_t;RulTE=lNoCawaG)!i@} zE}q@11IC|B68_wrI_Lb7N}R%ofL*X+iA)abOG}`?>D}8l_OK&Yk@Bga;}YJ6$$@m@lxi^ zGo?lvXR`+(yS@0}+i74i$wq>WKFU@Rzb>i7i*yLMR&4AsVP%rF@=Sk`KVR&d8~VpS@tQ_T&iOj>UMi2ubM)o?R~y>(ZS#+-yyr+DzmRMD zu)KGv8H)twB-NL_$bPNNX4>HkJy3A*YlI0Wj`}M{asKbqUn6ta|Ox z5@Xc}M+#3v@dJ(+wdXuxO2%#1du6S*wl-pufe8QUgCDDAmi|tTc~l0UwgI=1U$_VK zLgY<`+dsx?=nZM*@BjdsLxZ_sf3>JWgt5O^>#pH4Lq*>5;fbXRzYOff$G>5TYU>d` z!L(sPA+mjoptOAAz<5Ie*U`*Rp&TXMt_n&IG%fU#@{;R6yV6+SOI2M))BPSvjFS*3 z;L-eXX}%hm$i6Guz(qFarQ&mvXRPYXs4NTI92Tv!oPENv@!56 z7uzeHVIQO5k7rdmR=vRoX?5KbhxH{1i8rIjj?%*uE)@L|sh11HsJSH_sp*h|YWQ4} z^#Obi4j4Yw3MvGGq#dk)o1^g|>CYje(ri-crd5~+Y~+Y=#!Y>(n-&t2YI_nDx170? zUp0JBF$dOjIlsiFAO7Htk_hNE_D(RvZkt8S=AWgopa1L!$9 zUp<+u`J{VDdPf!Lfr(4#C#(lix^bWo>MB1W4B$BZ3nL>xo*_JNwwwlED>9< z*R#8KG(<(E4aT|Ghp;hffg5$ zC6t1y6{AQEz&HgZ>#jH%i1S+uePK)N+o*dC-Om&la^_Egg#2l%b=Gqf^@^q|u8UL_5GIw~+0+JT&aJjGQR-C%F!I(d1IAK$1gI&+-m zf_l4I*n(sY~$B`=XgN@}VBRURbywb1T zDNiWz8Ltb8cGQwH=^Zp-Y;AOjkC{R0lU>*$))yJQxh>qzd9v}7x<-gVT)V(Fx3OGt ztZ`%gv_6{banh2pZqk@+DbT`_waB71or8Lk) z-#1M0p-Ubt3v82V(&*&Hkz6KG%=G8F1lqmS8T!wqv5P<9E$^%&4N}|(92t@)+_e%y zSRss`U@}IP6!5xiAETN$MNR#dv{mvyG1#KAfyNIhNx;3@MM2QcOKD_?m&KKbh7IKd z53mk03+SknG&BY>YTB>G;zASijd9-dCS+Ku1&z~yJryJE!bglu*lJJS_nW&Ir#O5w zBhL>cs<~F^$-$?X4oEVjWnH5DzROGl)XAG`YDbN&fCr>msspFiEHQ@w>|vAYEJ)nc z=&Xiae*cr(R{oLr*VBijpS-NBQ5+a43H9QN%j$~nkcjkD){pfup>-Ct(ReQ2rQo3L zd^$m7s6(5BBBKrp7#`-Hv%b_Oyf=X%A{t*}1)yK(T zx|uR*oCy5hLY>Z~Po>4a>!|l8!ZmM8_3?HtlcjeKu^bMl>wtZg0NF>6rbYamv!Z?I z*Tb>1C(%|c&@v(T4J2zJ1g2W2N5*t8M-uaG@ISfUTwc5e)5N<+;%w}~!G<_HQyreoH8`O+uXBX9J*!g^9XMcir zbWsONWK%+G^CiiqqbmQ};+qJA421K42EWj9E2m3Te6OVp34-bJ1JwZkl#5kJ;q_lq zhxE80Zo9BQ@vw+)hdbOKlLyAS+Ehz{4{PW`>cCHRWH)!e)dBGhXzk{cH7i; z0xH}y;*Dak66LL)@~ASSsiJMy>7E*R%7U{}lrJ}iY+>Er{6={B;7R_UvqHEAb5_*c zu!d3I3zEq2qseVI zV=9f5|C)4u>q**E;#B1j`MIZOq3iO3e(7lU?PQ5Ls9zhLwQXvBt1acRJ)2@FF?sEn zR1fQLKbVWnC#)+$EYUf_Sy?JFW%RaQF1Yd~0fh{z`E|FXbX3uFRNz%rnoV0YXRFvv zh<8BTA!5IV2UEnfslTeZG7N@;tn)5uDSB-*N#DU-lw+$6V4wV$YzYzD4l5RVZh_1f zCd&)iYlX(b#1)3>mLE!{Vx2%zTKs`*KGbaQ>QH44u*cT|pv<)xDKxhqm28I~t^ylv zM~N^D2{+!wtb~o7cq3QfQ^*p4A6L-aJK>q%)5pg*wj$|p?ji;7D(fthdMS-7U!hfT zTy>!AC1dn^KZ#hY)`P&S>l&4Ar_N$}6V|2^rA4iM!ZVOxCicQ#MGkC{yr@vgne>Fot8B9Kz5sQGu6e#SOQlQSH0jWLLj|^Pi&S9$WSDDPPRTko5i#8pt5&wl zuHdd!g}6dlOacn^Y2T7{h0WjIIKt0Ym|wIeGU2OW%-{SLm0Im z3^YCx6QT|qgfliDZW)^C*E4VxuVov;x7CP?DE_Xe3ngQq!JwJElYhEziAs=u>f0;| z+igTTRPOGcB1b=qk5ViCPHptW>`Jx)BvuF)Qy@tpQO~z7g2(Uch+o9&kL!sPH*WCYi6F7V->4h$+Q?HC59>0xs5 zERzGBJCie{okr?@R7a=kiKUQkhr|pp_9hPwkoMSiUyqtAFN9m|^Z8p+&G=tSc1 zxis>h>%{(7$bznA&hNcwuZ3RQO+R<;aqLv-9x0dAGU}VNec!K1i=Xp{4YH3bxc+?w#+7WrGd$G&dG4ldT%i1)}$0!KTN7sdEQvUQU_ahKcBw9VhT`4(pcRw zr*Xkxnk@GZ(E^v!nknX@ff2V7jdIsB`0mg57pNqcb<+v~z26i*;1Lki#vsJ99J}&g zUS2Bc$v?Q%QAWWO-dAB^vRHiKAk$^+-Te)`LeRv*zR87#Xpmr;oDfmB)yHm{e%AB= z?EmB(c4!i#OtcSCD93jTp&dE@1hLS=ZKBPa3#I*;!oQ+>P2?-LEx+;aUp7TwtEfW! zQ@MTs=Xpp&MrV)heqm1i(Cr(h&K`)u=t>4-5xC)ro3uOs0{GrzNPhb!Q30N_915&6CRwDETw9uyWo^y)pvZ8@J?Gm@8cJdXUxU5U6vRRVguC4 z8GjKI3d8Vr<{J?lb)Az>NEq-R?qSK1NPud=dec^vNjdAE|5L>sg7&uiFjf>Z`4V|4 ziUs0m!EMfJHYF+pIamXH3UcbugniZClbUMBzSNiz9m+rN7T=?A4eLKVx!*$n&_vQv z6}E$*xq*{(U`bGhv%$GaCca*lt|njE9R95^)Ao+7 zF!B%3!m!ZTjzJxI-G>uAQj#D5rF8!2`3u(N?p6Y#gTE=H0 zeGl@J7jPV50TXLsA9{hGuik$>4lN4ihS*W0xjB{JgM-N>pwtTLKu2O2d?kmQCs{%% z>Ot_yQ4P{A$^x1N0xHO1Md1QDS`4A5!?$xtzmV-!I6_hIE%9{-qUT4BcQ#y9_40np zh`IyixRk;(UI+hpj&l!WfO)xqIVmM!m-|=YXR52Lk6V;pxk{wH*BxH8H&nJuITCqLm%%M$n6x!7?xdQ5>+{>i@n;Oa5`*hTq>_% zN`NC)Lz78`(IZ9~l5d_=iKF?QsOQ5qKWB-e{OVDS`qEBxTou1{%LP3^41B$y1a zQ)q6A`TjCk=2p^Au=GJgf0899b>O_Xz1QCYpN}*|-{Xy6r@B2(EOF`&%oY}$2}w6t z3qeL|cR3a*r`zdqd1Cp=2?IPTI(4Wae5Z#K+U>k!iOL$0dj3jHCk8j;LeXNOygGE_ z!F_j$HLY*M?kAo>qT}0c<>Ezl2dEsVjr6={>hNs#dq#8}Q6)ct*z}HWz4Hx;DdM$R zBhhik07osKnAw(F<9($;W#+1F_*$wU;r(n{5gnDXX!1+a_!|4qa+UmukR5k5z`+LB zjtlCsA3?+9m+_TfQBrplty}PTHyFXWzLql#sk|L@|&4<(w z@qv=&H`m%7F=aX2KhLA`|D8!W;kO-(Aczgq6MeZ{lf20DSAQQ}-H`V~rI)n##ey-2 z6hz}{C>d4uj06F^4#gwpwJaOlq*1bIO#c{_xjYUNrXx#EM$dWHF+bOq)2`%e=^WX%ibtMKc*dY@ zJslfI!n+KlZGkx@5#{{6CiY_SOTP@6s2!D+mIlDk>(zp+v9o0uwKC(ZdH9+Pv3>nM z>F_7cFGD984wo|b-o44vveooN*xDh#Kc!!ks7UI<{CbGFqbljDa{tE`F5HgIe1R5r z^@PEyBJ<*w4x1@{!0XXilnN4$QgUvgZ&17lEHRi@L8?Jy^gmK}~a0VKqU z9ivJOF+Whv-=i#?X>=5*0=6B%`~%7M_RBDcTEJBf>2C#uX1q&BCy)WArT>jgn}<4} zSRz*w@-=v>F{;f_rTQ^c5r!L<>Nkw^jqNDgi8Vg+UISg~>U7;4Fc+-V?d*T`ScmUE z97mPnI?v(lu#P6q)8ar>ft~?!FA@E=qFf8tdUS95*Q7S8-w&NN6~yyt;1I5KQhb#T zqV79bdY`M0epxf&a(=ay3vVF-P1+N2j zrTx(MH_wz;#1M(UCUwV$bPnaJJZF@ww?~f(&r_iX+kXFm*sr8ockgcJ&eS`|j`4?I zTc2Awc2-=uU!5*~;v%hSx8kr1rM~w!kxU@ASo63_IGJ~C3u<)j=m(z}j6c&;?sFYm zLqbZY0&l~wAoHsIsco__B$BW`iLJ_+|f>-dd>%wT;kaK4FjUg!^5orI<=MH6F->dw5H{A|3Ryr@Y z6icI0pE1K>faq860TzR6xS?znCuKi|Ihwg7<{O`WyDLwq&LS!b}P8~g@~nt1S2QAjK_ zI~B$xCl5v$IW2%P<8!;Zqr?VH23@olc}bi$S$p;;a~F@E(Z&dZR=LV%c^BCr^K@n^ z7+%A-__(-LEwxiq@JL^^-rZ*FYjdTazxsVY?tETse?aI_431W+n4I_{7TCz#E+<_N zTD&6-mvHy!Y3`vi8K4CZc(nKQ2KxNQ4VSRzBNlaaMuQ=smqkmkKWRkN3<6bj2)ijynM8u znF89)lFiM{Jo_1Te0oM#l^?mf;)XUcwV!6;>!mb^tIfYLKXew zf75tXoA$E260E2UtoVMmu8y6ah)--7QHK3TWNXXts68@1e&JeA)YRvtGs@dr^TiO> zxvcGLnV9-iO~WM=t3>d)N8vlRh9vs_c{?-PQ1ONIl*!ov#JXH?@i+rk#W@rmF84LdrtnwNY7c(v>6sv(IZZiTNW74 zV+5Iwt&5(M2*)?ttjCGXj2oQ|9J9Af?wkC}lDP9djA@v|h0_P1Y%JQ6XPUzJqw!Ew zM?JKRSOckMb+pnLgMbS5y{=?^YylQ`vv;i;)ItmcOy~P4e3(PHPK` zCANdj8`wNmK6%PAkdYq^Kl!3Bm62UF_t8Q|hP6($cpx|5d}(UB8CruCQzB(ty=XWx z7KFv&J5)WP`sJ7FiG+$OK(hiA8ie1G$i{Ii@014+ZW7)$+tMbDp@1sLC6{*O>u+KC zQ(y1^NXv6o^|nJC_rpY$grKH_?;TwoL5phxh7=$k!0&5&RG^wt^n7mxb~K!--oco2 ze<%ebV+aSA@x)Lp?P?8`uXdp-Q8@HM`5R&64i{pd)waCFXje^sK@a(X23jmK`OS?k zCU5vGs+DJpVUgF#i-YC9QfDP5jff)o?rK60)H3)juVGewdfxAe;lTMlph8x!#P65E9IqnmQtx7Mos}KjyY2OMQek7gbhwOar=K^jJRowD}~Qme}pkVDV5lmCe`)c|=Uf+b(L3 zY3U2tCna?GY@&RInhY5NisNE8>r{=#@d$LqKc}zj)B~sGZ)I6Yd_;blE-c zf9&_a<2dfN6MFeL-xOg`?v2 ztp81&_Ki)&_5$p8$wBHvwY4&T>ihwtGCR96OS#5~S~0*-6q526V5-f4L+ejY@E#hj^NR2 zIM<`JBW_6kFLp5ZU%c&j(Uh#EmE7WIYPHGWz{}c)SkaWDv7NNQq(v3SlpyHEUs0TL zl}I%yI?YU24hl~LKaRX13A+e`b0_wAejc!#h<38)se;5>8F4JAutYJ#QyLL0+eyk)rV5Bb`L## zs!t|Wz`cyiOv%t^ybwF*4!!u%C^f=GahsMlDX|kl`k5wLfaptuwcp^We{TUq;8T3) z-fgwkgg6G39tFf4)wcsZ3O_iW7pqhiOT2`VW@qy7MiLPH!!95RXM%Qd-Q|eTLwpjJ zd;wzY+5x8nQSX`I|O(L6O6Hn7- zVq^qkv^yWDlo@oA!aja)_UkoGOHn-H}e)_mJQ>!i)PSII-b(wDGC;%_-m z4jv;7@H!$tPpUP~svaF1Vw!BwmOzYw*9Vj;k%w}MvSx!87+e9bxRg03xD)&gc2;7J zRhAMX;!wm74kMhh%X3iNAfNhyP-kzY9UjImL zoQ-KEK*MJ+w8Rk?j_|W9^VFEs1~xkB+;CB|4F+KIQm{}ug+PGKHQrQ5r%?N#K3n^6 zy=9jMonxNXe~ZB6o0vB&4b8IM683+Y(bJC^16N$u+$sA+(`GZ?{W4pzUJdi*``A zEK~FEerLPR7uO$Pu+c=$cRBcV7&DZFZP{dbvjJ7rwqVN(6uBg?zBgyhR4!x3UIlF2 z7vC3=P-G5=FyyxS2f2g>DuJnYWc~iKRTCXNvEOW8=7 z79E1e6%65aX^_o`z1H?FFN$Mp4Hqe+SLs{-)(LK|0V$JuoXje5FluajTym%>+o}UK zQI8P8a+oXa`DHGM~R%oDN14CG!ii*-we%tKPYDQ>ODN2Zbcl(mM3 z6#9iS5%EXqt`hRGVuBzR1Sfc)h+ciJPXw@OVJ>X!S@!sw>TL7`!~>+#t*w z#;8iWly-s5j9(=~8}?lq(~imak~4qx=n2H}I-;i%D>o$La(K8;{l?WnP0Qb&Wf&l` z+*b9NS(L5Tg5qmA9#=24`glFpxbm0L0m=KFW-mEyrtpU^R%{t}V)x@RaOLSIsq(GF zFNMUtw%_v_#s46K;gIWPpJQq}X>^Yce$NkPvXdlU;Y3bY6v}u=j!vU6`!6gB^^rFl zhasj?HMp)WL^yctzi9IQ1Z!fNdxCBAO(0xUR^~F}>ISdjJwm*;0|KX#?YG z9T%xO6dnxbm}&@>Yl7Lu!_`Z0;iF5o5+nM8h#jysi2tSCEHQ%OLM#58pF8dTWmU_6 zHb0Y+`<+?nCHUvZhc9NIN?vB~^PX`k?6D(*rOYDclt|rXEr_&jjMUa>(V@5p-ZO%) z_T$02Vjn=K?!IqVczRaGc?W>BikO8~^9HNd1fO;vPTWzK#G|T8BZHMk*R#b)Znuu% zPkAGuOGQ?zRo1-lQB3q;xhfTMTkXUeYs|E{D=I!W3}WQ*VF)tWM>xWri}mjnHES1J z>oL^)GUgrr`#B}HJ|dsJf}Qt7=wqkGgcT3wkX(;N)=NiqU0Q-j(C&s}A?>lq-ei^Q zpI%K$5-f^Q8h2s7O8C+{aM6>EKU=%iVMM#DdPFXnIR zz(s!xIR(13wvvQxxf-~$vc=FRjempXz~Tb)j*cxCF?C=~WRzk_LKqx3*XFtb*}7)F zfJ8p9*#ZY+<;m;3L)$%g6GO+O zrufd|sAT_lsA^m-|F{B19=Il9voz`xU9)JZsaG}%$%v3A!9bZ1(IE;2soIG?ZO%D7HEgti*_0Wy z>5{h^_?k67Ascv{3#eX3M)WI-7v`ZYpkJ2dkX_3Uv-Bz!D$$H=*w4hh(s0TT zohJFTxy--&n{}bdgThM*D_yQZ(|ofwxkBEAr%adP8=>fLCx7@{bKnI?g9adesY0r% z0!bK@7SEF{>97l58&1&{KT80li34_qdWN8U#0-Ts#&%!34tKi=(z&RD>A+Hcedn9J zEPBtkl;_k+N4xcxXwAh0tlnf{1-_H;%tOUL2IIah^y|#h0(ge^CJY3QnO0ZRgn-=5 z$^Z1jfyJ>LWj}~A#Jdg?D0Og{tm=CpI$w2@ zax|v@-#M8h`6=d|@6fD#LG{08sy({Zu}Tb|{>#J5^}M!td+Xb3pqtBL78vHA0*tEF zkr(P#Z7uPb3GMxBQq~Qpzn85v#+8nVDt?pQWOUo2^>?$-188a+TkI_LU67PFfj|uj z<)&;OZitzP?f*>m+b*^yQx%Q5v?|x0&o+T~GFmL5CiZW>J5PQyj)*46d8%wS9vRZ~ zv4{V3poPGL8*-n`K-At+k3BbrI~GjcL3D&DK`y$Ix5>ek(BA1We$8Qi;)+tK7bG(K z&_-2_&^k2Vg^HwxLEdv%J}6b}c_+NnzEmadM+t-3&Bafdvr-oZq&BofP${;(y2@BHKOz`9NZI6Kh36JA zlk<6+{N+Qtgayiqi-TNC)Y&-)q2pWhR`o0=jQ#q-BvG5>Ot9k5N-If#S@r7K6~?%t zS*kf-Qa3X$F-g&pn$<}G#r6Mdi6l)jEsFwtrk`ddVEr-86KW#BMd(li_5x(3BTKMP z;40w~gyKWzGyZ^Z0npmUR}tchak?WU^n$&NhbOHjF zy~z!^n=3fO7y8N8LQrKC4yqfXnPe0Ivdh%J&cpyzq-^bv_;}R_$e|*;%=CkC8NK^F z5tk|NURaXaS7M0oy;L~_Pih-Aqy5cB(_ot!*PrjHraD7UgPE-S#r|OFM4T^}L>q1N z1R@9bS_e+TR%ENa9~^IqPQ;@4RPa%z!KNXNDgFcfXJ@`-^`rGc4HHdvn_Z#Sx(qa% z`uYtSRyAQs@3N9e)IC;|n%^&5ZHDKJ@Z_K4634qlOywKYc%jT+N*(gbk@9F;T--g~ zHbTrBB+l|2uA3mV5?8e^*+z;!&wU%nhdf>Ic7>sm6U$B}xXVzibkT zj>}veHqT`4F>J>00=ZIv)uW)he^Nz{Ar9X`vq{Ntt)9HbZmx#FiPcI2VR3S7*i@E1 zbwP^M<2P%W`LDzB#x+rS`H!$IX(k4(svPx)nwpxX!NQB(TAi#NjeO!|v$D;uZ7j@+ zpD{2RTE`BdiypaUl9f(Wab4e)W>=luR?jCk;Lvwi*Q;;U`eX+Kdq+c}a5qLSwS06A zq=k#Cv9WMIdsPYy!B_ZBx_qFz*{J%l*sHxajED_~c9(EmF-H5O- z)Z4uJx(vd86S`?-D9Y&k1<-*jnUYI_s8`=JJ)})TD~$_$hGbJobOu zV_aBw@EPHV!mbGYK{5}-A+@@jbpABP0&{u!J_&sNG$G(}#yJfa{8r;}t-Zy||3bjL zbHrj^dS)$3$4o(j=9EQOiLei|8!vBn@vS+a!VAhOFI8p$?fspp=~KfmX?e*Zkr z-}l^e&o$TF-}!t#@AvzqaCs)c|1m0KmqRrhyeUciiU}q4YrKojzdY?Z5BRP=>tc)1 ziG^@Q_X(~h>p!{W{L<3(Qf6?aQAtSC+)Q{t0BFXooMTZT)f|~8{+cOoZt3-( zZ3=kvU1D!*IR=b4U51KKsy}w;Mn5ksEbw3UI1W1QFIqO7S*bkxZ0T}3VqY><_sYp3 zg5Rg~J913~@2}6c(SI^y{eV?)IW9*lLLGzbO_PtIowO-1cb6Hbk5($VFTfbvUxqsz z3y`kQ4`uezA(zJSgF)kjIr{^?YfP6gyL*!iMaA;F`e4uy?5PFeF8fhT=FaS+KVNbR zna6am>K55apQj{N*%$oSObO(m+W1FNMTZ_UzV7-$GGzU9J5O2D$-ZpWlkP`6&yKC$ zsi=@6aM7yX-5~IC_VQR*ujKL7E>7!IeZ$kPU(No#%(j}G&sOb7&XBD--YCN$UgW%& zoC_hnrr^e%>>?rtr!3i%4$rV};%VbEJ{@qf>)QRgTnb_!K%ZGY3hoKj7C{s@)MIO4$r6~ag@1qx5XIalX|2ZU2 zmr`JHhd{fImz5i@8lRC~E&%G?dw8g+(1`(^)FOB_UPd(W)08yoW@tPspb&6-drhzC zg7y{O=bgN{kt3WKQin-rIIM2n%SfRQC(@YfN2vwu_Z#>l0u61i+ zA#>teJCeqV7ZC`4xb%f!A6Q1wZjdg+Iy-A;dMxq0Kqb}A5_Tbc>5?9*i&3c8rJ~05 z-5C)t9)7FOtknOnux2`~=5HsF+!6YizkjF1R3PGWGnD*Cz*^2L&61>4=waXK-+Tf# zLQO}?)p?AkOA0hr=!91N6=IzpG~9Km}hWzXGD9x$7_A1L}ptEc%H$$q0HRbIrf=a`OE^wiXLHG(i8bP5(0NhPKj zPR_+coQo4eaPG8XUje&p#YpY4c@6S;LyiU*mmF8Gv^o!KXwY}zl1PVx$(smX`J>xM z@{i}_N=JH4IkJ6vLV$j|Z*|ymu5GfdVz}`0Xc!wx*|Cy+3PFwVr zG0)4exZAg_YIgnDUFR@;L;@}${-Ilu^=W_;d0dna*Qj2X2UQM^MFL)gr*_$i=OHPf z)1Ti&CZP#;9<1MQi=y19Ypaz@ zJArlXtcy$g)l&NW zcDts7z2>^DDHIVVsQL(-n&9&HFvU8*;QSZ`KV+rojmR|2x+)f{|6YKvfTcHz<beHW!@Ii^1AK?Br$e8vy!#l0f(f0(V#Nlymd^zhS*tsY zce6ZcQ${v;)gdm{iK!Uaxc_dYsOC)y$6#o1vID$Y(?w%GG)v3zL8H1`ANNfQicM{viFmnVm`%E4jFV(Qe0UDNk16VypaViZs*UlSX@; z|MVzTFSJYH?qWU1#-SXwrwTC--aj+p=fPbKi~A%K@tk_rZd#^kw_zPw<7L)%&X>9< zn$A7GQkyk5f^ooYeY_Vys=!(hHYD{Pwh+&pW+>5b8}#F8w2Weh6%iRBTN_0?Iy!1# zY&Ulmv$NK=iO%VWP2uQL)tKFR$=Fum#wC}dxdx&`P3xe@u!#zmH-tYQL13*(1}n8e z;KmK?^xy2u_AI~7|HU}_UM(ZB=3*fqO3`LGo!*~V13PtZ(Lxr*C%3B-pfl&7^(22k zk7tG;A~x74ks)p|8|d?1&%mJE6Y&`xUd0kHS*P@MN73msh~i}be+23UWYa~ z=yLvzV%5M8j}s>tDa=<@(JK}g9!lmT(_gSi`kfsEPL}k5c4ipyW0a&>#vrr49l4uQ z{qo}+xKyTfKtY1YKukpp?->lJ%aO0i0|(1c1~vKDzyZhzz7!9B3TR~@D)>-wVW_Dt zukP2u?2k+OiQ|jy&w4K1sO)Q_w3R@g+A3Q;k&rKAb2|>=!gEm!;6sNQro1snO6ir? zN|)fZw(w@67_Kd~Cgu`OPN%BPD58W7k0&YHm3EEcowNPoUzwC!d@@^L>mc!k;wR|5&j?wo81mSglZ?zrXZA$U7h&m33c1or`w4 z2D=flP?9A2fh)|_il*B`8LQAw9vVFv(pgQXb#lBp;p=+xyCyV)WAzidy(vz0L`K4N zD;Ttvz$I-d?8!_ierW=D3O(l2?wJ|I$~4#ntlFW1ymm8|xam`k^;9#t{ttpoOUp0h z#Y}5^#sG5;`=tqZ&?3lZG=WH*qL833-#amJ!yr1Ze%^cI-Lv>*4rME1+#03?v!}Ge z?h90j7*(allseXYPHoqg*ivP^=2FM{5$Y9^s9*RjPO^@mDTH;L?2))LH3QPV2YhK> z_$_2?#el$)KO@*Mvo2F}awuQejI8Ji2wnF`W`MYp=eQKH&v0GrNjEYvY296)3)&vC zyN44ZlgYIQ8{HKE=ra*A?8t1je6{}H+~Uik6O8E?E)h{t7AzK9NTDn%pxu0@8U(n2 zu$C!UM}O6wcEnX3S9&Fze%GP95s->@A81MRIosx)nbbbVj@3=L@)LA)6<_*?h6`IS ziU~yi-2cS+Ww^|9?f2U0XxR!74n(A>-OZPaJh<;VT9S9*wNl6hTL+zH&czSreBE}NiDXTD9 zSztV2MI$_F#z*zhAtBpVG;H`K-o;B8^DC=W%me>Z&2g5>>=fQ+^tXe6;|NVa!HxZB z)YY0=gb?d(h0|XaE@-yY%WgrV{i9hR`mm9WjXQ|&7o{R09bRp=NwN?N!x;Zg@Nros zvx85UzWuQW4q!VWRrqy?YIlo?vRV@#cO!gU>KZpZ&+ILx!afT%?0>DV{rO|RzD;R` zc59kqQ@~8!85tWl{~oy>U#bid7Z>;KKiFFSaOyZ)x?fdkDt&AJFijJ~u=?AwB4`dV zI6Pc=Terl>Zd4?uoycTQRW78M$TnB|jWbJeQ7NhTHubXF+21A}Z2nb9rM9M8XoU@~ z+Be*n-?!+!0)(l>KF5ee_1`@B-vjm=0(_I;=XMJFe;IJe!Kv551>n;7c7*LXf0M^; z@9M)YsWQ4xs_Bx8`OS1M9^U-=tpOAJtgxwuFh{>69KT$BpTwB{-Mg1X)wne)ue_Tu zt9m?oG}DWxW-|1ADWHep34{v&EkOO59Fj=yYyO8q=W%xIzv9pR56HUz+;TB&{U_?w Wv$bfG)9QfLcf{DhT)$M;CHmhAy749e literal 0 HcmV?d00001 diff --git a/tests/_images/TilingVisual_tile_assignment_touching.png b/tests/_images/TilingVisual_tile_assignment_touching.png new file mode 100644 index 0000000000000000000000000000000000000000..f8b3ac98d90baa23b8cf38055030c8c29fd88291 GIT binary patch literal 9600 zcmb_iWl$Vll*OIk?hZi*m!QD{L4*51AXtE)g9o?Z4nYG1XK)EJI3##*CqQs_-{$*P zYJYCkPF43*kM?`-zH`qx_eQ9zDqv%fW5B_|VZTw7)dZf8pAR$?;4GoNlK?!4y2|Of zzH_v4^#D6t!l{5=o$MW5?QP6y-7TG6Y#bfqg4U}u!_}^@7Y~n$oS%tl6q#u-1lJl+%SqdtFzOG-*w<@Ue9v>MHhPD>-5 zJ#gnRs6+cOnk^?S4ga*5_ZrpbY*=VuXh=3k!ncWSws{${jy?2MBr&Y|soF(HODor~ z(IHL3SJZ!P`@)MfTHax(-S_Ti``yFUipAaKfgaQhe#Z0m+-7oWD$n?byI!po_UY*< zghSkQTOEs>KirgWZ)p05hofJmc9{2D&lf|RgcmO z`#3OPZ-+-s9TOgowm(j z`uH=M9St456Bz4_2!(%Yy3>4}P34*I?ap`7R}dZ{ygNy;Cq$fAu%~{kvjK+K)e*6y zjXRKhH2Eek5ntJqtsCcl##}wG3urc~05mu_c-_nG7ciK}vJOoC7B{cEsyhuuw~1FG zvr3m;Roh=L@Us8Y!(#BhoGYitim#A=Go`o}p{f6afIoWc?QYrjOcl*j2W#ZLT)&Bq zL6G^XV6b=-CJC3UkI$=E^shX8_De0vne{GPBTzk#>vfu^f#IwD+R;BDBqpna9Z@9Q zr)e3zF<4giStBPSZk6eDWJCNM4OZ3c-n4U1>4V79t84aP5DDM5_fcEe_VzaOishYP zC{@&s%j2lctD@NSOwN%F9zM1pW7Md%$~LRev);}_RI!!Owf&-zkzX;tT%mV^n%wry zf>qh6sr@s+PO9-<@A1rfdslyTJfDgt=I1g~n&Mb0=9GE3xDKD(X#5`bOwZP9HZFj*>Nm~bNA5io_r9^g4v^m%dojdO z4htdsyt(t$bE+YjNg!R{#P7y3yXXC&Q;LBZcZMAp*+U_Qat*}|_41F`p9q#!PGAa- z@6SeEx@-2pY!T<@u8VFn#`nD?0$O%vI_Ayq2l0bLLQY!E*AZ|6X|?RlE>kEa{aVX< z5{PFS=oAqsrIglT$ZJEFRgAsMS9l|G$uw+~B%VUvcHe)|Cy z(mjU8NqC%%{;dtWX~;8XLAq+fBw7*Kg^;ZUv3%otBHfLwxpp`Eo*AFCNF<>`k z)rtH0*pNob?Qben$7tgY_)1WfovLsDjJYhO%x#R!X6F^1KIIJVygx^I;g*K?W12!` z$?Jk|t7Y^>u?sVk?e9oljnffi;)vBHPbr3@yC;pkP#v>?)KKqT3ML^`kvjB@tP4_d zeRlTVO^}v;v#LrFxHX` zyWiOP2Oim$y|C8y><8Ho;$FSG)jLp&9J8R$g^d)@0|j>HD&g#Swl=ER^O1F(_Cv~& zZi@%m@+CXz=-&9*uR;Ui1WbaIRR^oH3c8E=c3#eL@Dw

YAL*9x8>^!mew(0>+%Q zi*+^xjq zt9?NUY(z@t?fMdOmcR8}^%jx4y)${6EV;yQm27rJsMF@_n8>{=$;$J-{IxJ6J~OFm z&f?SgZBqY3@D18;lZWCCDbDI}iUjS;vuG}~&9U-;teZFa2hDmFbqHNsSZ^V*O}I)j zWkged0~}?qD%l{;^8Wg>VUk3Da?*PvZBB_W3iO6*IRfDnZi>W3sXD4(1!_GwV2z0= zl}6~OJCV+g7lRX2ae~#5x;8l%eWqa@S(%aXzLZmNsxc%#?`rTlzbRq#Z8b<&9zTJt*9$;kPt%S;;P}mXD&avQ-H3+nG&UMjyWjG^s@w z$Wf?8VQvkR;Fq^S53V9hyI5yz4WhNJ$|>*2)kn+`&dX-@Y>ye%Dyl>JBWf+^ayFN0 zn)k%$sc`&`WQn3M&^j9}kG$JWQp%&Ad`GQbyUFy46CY{xi|WpmEF-+o5*H4^`Df6i zKb8@=!E8JxCn;quk>^{x8DwcHeQn)I%tX*$%OK?_Zj2^vgLLa{-_!_}Uwp9D(gE7h z*taYz8Uo-3JFxcHl_6pKyb3GYGQQT+uo-t(miZK;eh6wX#&~%E`Vk&pZXLoOueg8+ z3Vb4271pUpE9E?~WpM)SwbSBGtq*c;RGTvfQAA!wN%^#0pSYRwh5&N#^7i*b zBm(M*i#Za)syzyZF`DM=wQErhjcyoBj5gCah6anOM$LX@yF9O!^FBK1yOlA0#{P3s z`6)+OROk$kl83CuM+xbX8JJzrCRh}2yA2~;^#lhoN5T^yPdX-G;VV+O3d<1vVm}Ji z%fe9Ueft#A_K)%WcTL5ZnaEIMoB1qtXa8;bpNIrPvR_=Q5=GX;`R!Kk4u7cGp!)@p zE}Q(bFipPRUd62;g&d~;T&)URV|~;m%0^F2s*!Nmkz^B*-XSbJz{)s&e>E zbx9e$JB=*K=Tt!&sn96$_wG}Ip?vo~Brv9nshr2&WeRaLwq{kWZ)%%qgN<`oXj>SG zw4qKT7tSc|z8@xY1iEI9y8YgOoEJ#uu@lqWLSQgFMOyT&GLxF2PJHa*uxO3RZ!#-x z7^VOTyT?!xEh8ExpA}(ZT|+KrtF!%QS;j$MDa&<$coQTOTBFJ@5lXPt7=nq2h>!|9 z(LS|8PRyTKR!K<-bm8C{z+{li-?u|)AjI_a^kKi8gt6syBM}wuQXN9Jkap)yj0XF; zz_BrPhmDpNQTxRvyuFz!1$lWC!H@I7I1<-zo12@9i#t0zHTpy8CF^^bJP$FlbDvz4 zN?=7yp`8q+bvO;EAu!gt`B?r zzHK4CDfvu{<-<8tCnpy(GK`FjOv+;h_eGweT!%e2z`}gvw-*+bXi`-ywfIkqW4n^N z-FUY-ghKVx#g8RBn$_r=shm*K5eBW57Wj+Tzovv}{fo)@cd*b@KS?T-H3f*d?FPri z;c^%?;Sxt7+t}FPBEi>w7!94X?<>_V%XnB$VtdzGIb~X{W1?w*kSCAme}B^N)8=(y ztA8Di$8de>!eQ8e#h?&l|IRlUaW7fEy92jWx^_x2_-c#{Avs0L2$h%T(`#2c4JBpT z$qZBnwvsavx(=h|yW$!h-w;KE9$G!Tu+WLW%I=s6!T3n%ZjrmEWLTY2ia&~i=Bs;6 zvF`sE`uArLfNP*$>;l5aNNh56?&La7415ly?A;{rHfV5dwBj;9oy;aoH^Al93u`nF z{V)NN8g&o8X!s>!sEyp~U;|x`u2NBSx?*hrOL7SUK3f&xPc=)HzT5;5VL~Kf9>CyX za$zXJuv*|qp-G;K2d$XK4NCO2R{PceD=LkArY>GZg{V= zr3{~btvsSOKBw#--*`$PVbQUZ^c5HK^a(zGp=bLOgoF3{vQc(lVQow-j z-C;h1#c6@?+baJ)%jx;C)DyD@`g>I$n%Ax`t*@)xPA?H0muKpbL=QD+=?5jT?(g~3 zz>1Nr-zS#&hCRd?7r)74^w5=jw_A8WwU*mP!liub<&hsUnFQ~-B9OJnVO9FH5nyA5 zas=iRG5w;IRRCc6>f)kNA=5QBekLSCohd_rXHC2(To{0db}O^pu_N*Gywe&K8F2tT zgRh`QTavtVsg4EuhkU zbvM3Yr3L*X-qNpr5pMt>!t2`@O@qJ{bv?OZ7678LjXaNipYOR@cK~K9SBsLu!7q{6 z$kQUJj5 zRIMfgch-v#eH+jD@5@L@RstA=aCYLf$}%AR(D(=VKIF$Z zBW3RnnX(A1x?KJ|Q76nnK9@;r3!+U;tL6l6l;TO5H3knij zTAl{%ULLK)4h_9U3RM7inr^lUDXCm+Qy&PY2<;G7ws+l6TSt;8RZHEAo$Kxeq+T9H z)_+>S<(@Ue_p0Ac`wM+vj9sgl@#hX6aSMnocme%X-2K0Kgr29ic6KK}RvumuFu#== z%@+5xA7W)?g?)xcG~*}tJWcO~fk4BtD0q`w)bEYUFKe8iXr-X}WtFh0I0p3(aza#Y zyH}*}V5vK5m2Q{CqtVLn0NrxEfNu)1`87580RaK8ij13_=s1WmCnhFn(GURDj}0hj zKzR?givfbX-OsWgB@LxeO69$=jww4d#d>1zEgC4>^#XO-dp3B?RDdtj1QU%@ccp0} zZ+Ut$j~#dZ+z$m6)vg;>rd#zm_Y_MC3gngyD?D!v)5e?WE@;2P&%3Z??DBSv$RELa zj`bJ2yox}A?^eOf@O|q;J^aYqLVJY@$6$a0WIU5e-KUc$raik?Hv@0=$2wh7KM6M) zf!?&sln@5YloLRfxEWT?UkDl;^;y)t#qgX*?1ymI48W*!Wg~l{rtMHhaBf+q4QXT})q^X&c%91FM9}ZOEq^a7KA3)5fMux2 z)RwYziEY~;sZRlzD0#Dy_#CqP_1XZ!`wYX%5BS%p?30=Py<%cISH}ys!hQ?E@fA)M zDkd6Ek%{#-BUdHX{@JCZ!mm@uf`+>r&p~+R5c1_GEuz=%DK4}67G5ka?h9FWThZdI z-ys!YC7Jgu2W>AK={HGRB3y~{OkH^-Q`PN21HriiM+)HB6kY9WT`Wg&QbdFKDX(xEDGI(H@-#K9%TV44yCFxc_M7!P zAMy!nu=t9Kt78a=Hl(8zh1dWuoe{^|9r`Pqw%46!%zDn=EVdV+w=sSjA4qL#5@r(< zGmMOhNs-mq7;O+k+mHRe_o2~Y>8Ax)@YP5>g96m|&Rs1#FgEI9n*dW?$O?J#vkN7c zK=i;kxk`CYI+1~h!p6U%Y;Y^jndF@o_f+%%SX1fU=m~7nBg40{ipotsUej8Tab&UC z<>dUg+Svh>n0qcK+JPs_zL)3n5Z4?yk&t!JIfjR|xHzeSD1)){1!hNm%pC;{GmIW> zY!2QS)ATCAIId#qb3qbwCX!x8yPm$$&(K6?)YyL5^5?b^iP71p625nqYe(6BB>uln zAA44eUsK+k@J;QK{rxVGfnX-RWw#q3Y3TLh+YIx1d==xOPo-sSlt43uY1CFt9&x#n zjrj!(@06EPdUE~#+=hplnUw<3JIdep6o#^88DRnlJK6nFqx`CU_AS-o_{lyZ7F8KbEQ`k~KjLs2L;UI4&G$A|gp5DDrVq zM-ko>qYjfd0iM+JUPTZ^V&x1t7C@A|oV9pFO=xtiGp5H}(h+J@Sr<5`5XC$(|wLbiqbW)n%wPuQe%~bc~Hiu>7ri3Vjy+ zB2OO?l?nBkpece{By|YZs0i75MBVpEtNJYo$kno6k%@U6%0eK*09nRaY4}L-f>IbR zaWL3zZ<-DSsvZQ%c!lO75Gf5@i8=3;05pQt+{|hS@SLRx%2Z8L3V%}Gj=xtH z@#Zz7?o4N~6ymAh92qDXKhR*?(mUbyFy5F>rM^>|_3or!J`bUk{Ut6wUY;M~iu=OH zfuBFShSK?}?dN!@&v+9O64dqd2BLir++BjnWvYF)o%Lrc_l>Xn)BPc97(SUbJZ5`o zp?wq+Y*Y=4?5mwvxmTXU7ZTc3^(?CS+s&3WJks1m-WJTN=4tlGrZz3x|B>b>(W2dXAMc%a!^*8^PGcbR1`Ox zg-QHyLz(`N6EHtkCY{JiFLmS>pH!^HH=ti@bwY;1qWU$QfJN1;GY~mEGEzoK38P3g zD-tm1frr`eB9&oki(U*jtnceMXri`CB|3zlV`Gn{-qbzN$9ewtdrFR+SI!#)g6azV zeFoClB9oC;W$~>bSI#wvd7T%?(I_wz-Lp)wX){_QymSDk%jC)T6X$09! zm$ynVQqC@Gw_pNlsqbNCafWKE0^EU5q}L0Pj-Sk~;Pv%9jhoHAd?K!Y;ZL}~88cN8 zQ5&u7z7^UJ=Zy~%mmV0S3->-G^US5q<>wyt$NGAw&4txg6767U1{Ha`zpn~u9*GS8 z_1;7+5A%h*l(X~i<9zz7*iAZ>U>t(NnSJl&Er7&cfcYm&8Wr=G6-$9M9@8uaI46GH zL}ElQN&$`0v~OzpyyGG>)BD5JQ8NUf6_Mu1fn)YNNPE$B)?@VgvyCL}z3pAT&l-U% z3`4QQaI5qR`z6S1ft5=Bo-yx?>1DJ7XZ?4zZ`U2Vzq*jJUQB1+NqvdA#6cX*;v>Pi z^3o%AFFeE`n?=+@M$CC(Z>^<%7DD|*$(Z{=f)AQM;e+t&=8RQMEfqDA-hNc#pdmmY zGq~L~BAd|>%$D~G`pm?EtWNOEyafsV$=T#-$JFKpRxSn-u~yiNKpEosy*|ipbOE5= zSJ~bbu>!jgnxYZrSD!yuH_kUI9u&TO3J;4cU2?=yX# zPB3hL%fCkYa4IG43J?EiVK(h{T+O|a=%b83rQsF$ViP}m!C}ickjzw3l*2z$e)E{H z6A_jEWGXc?*i@MIhBRsY#Ni6|n`=KMzs;bUvz09J%yIMFgYWuSESPU9>G zbhc#4!LAxpssgVPR4s}^dfm{8F(c(6`zzjFaJG!%13a>#yLWAfp@hw-v;k-@AA|hrn0r++7Kx znDRm&-cFVW{BA5_xER;c*GmJv1+-SMHcpb!Z~3CiuCB&CYio{J+Nlh1vy(J6Yh+yc zVI$<*tAB*`c^W{f9(g-EJD=kUE1vM;InE$}BzV4N#N70riZn-QFsL9Ev?tBk_dfa4 zt5;`@mqRe?(71j!D%lU<4alMo>%Eng;N8Q=g$7UK+75HJ?+L#3`Dp=jNJ~KC+z72G zE9?Hn-SNd?sfCV#AxLfap^w6GwJ_DB3$Xh$AN(8~DrW@l*b|b?%>doP=IZ4o8j3}R z&}I@lKX1Uu%zS!WP{2q?Onj<}@!#^`BA@+C<#`<{!#_zZg9O}7;ad1@e;@oK8 zJVVKlJiKHf+L$pXWJJ=U6UP{dkpwY*N{f8Spxm;M=io+%niUN?H6wjwC7oi0O-UcJ zdJ$jst4r{)3C}R*qsH6BDknC4J-$M^PgcKhk0sx^>D>oVy;fy=In2W2|ActDRUo(% z{oeP|W?ReyfTBiMLdh;)EDf#sDjohz)NCQg>rdLCm@97a=89zmPYCdC|#fzp4C`7q!O`E*S4A zj+`k!iAyrWXe7MnHwt=oVi~noc;xx>xCUT7*b!s#0nh1a@$vEUc~GvCt19(?k3xjX zo&>1-B+W3tXQ4DhH2us~cz)l*fW%NZw$|sw@xvvZV?U~u+d5YaN1dF@?6+`M8MW}a ze>B9c^~Du+{8)XQ(l^7X8_B26kUAzHn^%QhT;}<}#5#D63r-Sw1TD)s_KkW)c%i7p zFOb^zmf}t-)c98yBoI(OfDqY%wSf{7QpO&?h`VwcY`y53!Cg>Q21Miqx- zKF__4EzuFUO(sO|S#~~q{Ykt3jAy9Gn65R5E+wP<8BneVS#q9(%&e-&mJtXt&Fy;b zijYZdTTKiVh?PTugeP`lW)@dTf5mquSzWe@AtBs_MujoX#sS0&uO#b_w#n0Rx=CSj zWKqu+e~*sh^#Q88u4BPfxzs#YWQaI2pW{TIZf5Y-(aMqJ8*4xb^Qb>tJJT=?5BcYb z3!k+4Pg_opy~JU{kWi1~Ux}$LsSy#`$l5q=_ManA@QyzP{GvpFuTD)tQS`wg-(m`u zHb4LJF@C3uSU*Otvbl-x$E)mP&msTCB$j6Qug*?&a_U)qdvgB?qhJU>G6{)*;h(SR z=@CgugpDywiU|eHk6;?#b3A7k7m)QBZDC=P!>ezOEA8r-w!aGm#cd~h=!;C+yy)oY zIvasp_9cN6D42(2sAp0O*lK1{*V~@%kfzb1p=yQ8;gZX{yx9htA2{@DkicN@Gb|UW z5T%{&H)ASpy5oRbpUvKejK%9mQ6(h-4hKq)O-Ki7D`G@ zWiRR-1WM>o_X7hYR8))8jltPw7nZK>ZZP182=D#!U4_A?y=8v(hkz>o$I3z5dh?Be zc*t4^$@PQ!Ewv}s8;e(0i!Kvu-C;Ld-iOT!c-VkpK9RtrTmbG4eFL;QB>x@G?C+kb zFl6z)n9`kS`n1j%)k|;O;`+wxdM#{0OIcn%RLjU{NHUg;PhNvl_406uod`gD7iJa~ zWx$KyS_9Zx3%_c)AXELUvqcdG(p!brIiz{Z2jQ4$bzQNZ;fISzNmUacB z6F}|fYM3+9vh5ldGJyK6pJR*rK_rldv=Tv1$dOfz<=f)ZMla66Tz?7l@%ycv?te*8)$;vuP*WF2UAUgE^wUuU= z(GX^A1T(8`r%DCIpCXCaozAy4v~+Y%y}Qiu3hRWyp1%NA{fQGG)BsenK+QT4WaZ>e zLNVTbSCzB1eF;3EHouDmXU?8elK|h*Y_&ymUUIuac3^2dnoPGf7rF|rEaLw(=VY+= z(6{#QPL+)}yL>P)_@%T!o2Ea11o2kHHwl9!AxOg zWsH!s*)ZRf`j#qxh`HIHmZ2O;8i32M`&Q}m-d=S)wM`_1%f|(v%XF*bXVCIvk){2= epK^^KaR{QugQ>U*qJU;7xHodDvXwGsf&T%apng&S literal 0 HcmV?d00001 diff --git a/tests/experimental/test_tiling.py b/tests/experimental/test_tiling.py new file mode 100644 index 000000000..9752c69c6 --- /dev/null +++ b/tests/experimental/test_tiling.py @@ -0,0 +1,411 @@ +"""Tests for cell-aware tiling logic. + +Uses a deterministic "brick-pattern" grid of rectangular cells on a +500×500 image. Even rows are aligned; odd rows are shifted right by +half a cell width, like bricks in a wall. The image divides into 4 +tiles of 250×250. Because cell positions are predictable we can check +*exactly* which cell lands in which tile. +""" + +from __future__ import annotations + +import matplotlib.pyplot as plt +import numpy as np +import pytest + +from tests.conftest import PlotTester, PlotTesterMeta + +from squidpy.experimental.im._tiling import ( + TileSpec, + build_tile_specs, + extract_tile, + verify_coverage, +) + +# --------------------------------------------------------------------------- +# Brick-pattern fixture +# --------------------------------------------------------------------------- + +_IMAGE_SIZE = 500 +_CELL_H = 20 +_CELL_W = 30 + + +def _make_brick_labels( + image_size: int = _IMAGE_SIZE, + cell_h: int = _CELL_H, + cell_w: int = _CELL_W, + gap: int = 10, +) -> tuple[np.ndarray, dict[int, tuple[float, float]]]: + """Create a brick-pattern label image and return centroids. + + Parameters + ---------- + image_size + Side length of the square image. + cell_h, cell_w + Height and width of each rectangular cell. + gap + Gap between cells (0 = touching). + + Returns + ------- + labels + ``(image_size, image_size)`` int32 array. + centroids + Mapping from label ID → ``(centroid_y, centroid_x)``. + """ + labels = np.zeros((image_size, image_size), dtype=np.int32) + centroids: dict[int, tuple[float, float]] = {} + + step_y = cell_h + gap + step_x = cell_w + gap + cell_id = 0 + + row_idx = 0 + y = gap // 2 # start with half-gap from top + while y + cell_h <= image_size: + # Odd rows shift right by half a cell+gap step + x_offset = (step_x // 2) if (row_idx % 2 == 1) else 0 + x = x_offset + gap // 2 + while x + cell_w <= image_size: + cell_id += 1 + labels[y : y + cell_h, x : x + cell_w] = cell_id + # Match regionprops centroid: mean of pixel indices [y, y+cell_h-1] + cy = y + (cell_h - 1) / 2.0 + cx = x + (cell_w - 1) / 2.0 + centroids[cell_id] = (cy, cx) + x += step_x + y += step_y + row_idx += 1 + + return labels, centroids + + +def _make_image(image_size: int = _IMAGE_SIZE, n_channels: int = 3) -> np.ndarray: + rng = np.random.default_rng(42) + return rng.integers(0, 255, (n_channels, image_size, image_size), dtype=np.uint8) + + +def _expected_tile_key(cy: float, cx: float, tile_size: int, image_size: int) -> tuple[int, int]: + """Which tile base-grid cell a centroid falls into.""" + max_row = (image_size - 1) // tile_size + max_col = (image_size - 1) // tile_size + row = min(int(cy) // tile_size, max_row) + col = min(int(cx) // tile_size, max_col) + return (row, col) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(params=[10, 0], ids=["gap=10", "gap=0"]) +def brick_labels(request): + """Brick-pattern labels with gap (non-touching) or without (touching).""" + gap = request.param + labels, centroids = _make_brick_labels(gap=gap) + return labels, centroids, gap + + +@pytest.fixture() +def brick_image(): + return _make_image() + + +# --------------------------------------------------------------------------- +# build_tile_specs — deterministic checks +# --------------------------------------------------------------------------- + +_TILE_SIZE = 250 # 500 / 250 = 2×2 = 4 tiles + + +class TestBuildTileSpecs: + def test_four_tiles(self, brick_labels): + """500×500 with tile_size=250 produces at most 4 tiles.""" + labels, _, _ = brick_labels + specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + assert len(specs) <= 4 + + def test_full_coverage(self, brick_labels): + """Every cell is assigned to exactly one tile.""" + labels, _, _ = brick_labels + specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + verify_coverage(labels, specs) + + def test_cell_assigned_to_centroid_tile(self, brick_labels): + """Each cell's tile matches the tile we predict from its centroid.""" + labels, centroids, _ = brick_labels + specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + + # Build actual mapping: cell_id → tile base origin + actual: dict[int, tuple[int, int]] = {} + for spec in specs: + for lid in spec.owned_ids: + actual[lid] = (spec.base[0], spec.base[1]) + + for lid, (cy, cx) in centroids.items(): + expected_row, expected_col = _expected_tile_key(cy, cx, _TILE_SIZE, _IMAGE_SIZE) + expected_origin = (expected_row * _TILE_SIZE, expected_col * _TILE_SIZE) + assert actual[lid] == expected_origin, ( + f"Cell {lid} centroid=({cy:.1f},{cx:.1f}): " + f"expected tile origin {expected_origin}, got {actual[lid]}" + ) + + def test_no_duplicates(self, brick_labels): + """No cell ID appears in more than one tile.""" + labels, _, _ = brick_labels + specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + + seen: set[int] = set() + for spec in specs: + overlap = seen & spec.owned_ids + assert not overlap, f"Duplicate cell IDs: {overlap}" + seen |= spec.owned_ids + + def test_boundary_cells_exist(self, brick_labels): + """With the brick offset, some cells straddle the y=250 or x=250 boundary.""" + labels, centroids, gap = brick_labels + # A cell straddles a boundary if its rectangle crosses y=250 or x=250 + # but its centroid is on one side + boundary_cells = [] + step_y = _CELL_H + gap + step_x = _CELL_W + gap + for lid, (cy, cx) in centroids.items(): + half_h = _CELL_H / 2.0 + half_w = _CELL_W / 2.0 + y0, y1 = cy - half_h, cy + half_h + x0, x1 = cx - half_w, cx + half_w + crosses_y = (y0 < 250 < y1) + crosses_x = (x0 < 250 < x1) + if crosses_y or crosses_x: + boundary_cells.append(lid) + + # With cell_h=20 and various gaps, we expect some boundary cells + # (the brick offset makes this likely for odd rows near y=250) + # Just verify they're all assigned somewhere + specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + all_owned = set() + for s in specs: + all_owned |= s.owned_ids + for lid in boundary_cells: + assert lid in all_owned, f"Boundary cell {lid} not assigned" + + def test_crop_contains_owned_cells_fully(self, brick_labels): + """Every owned cell's rectangle fits inside its tile's crop region.""" + labels, centroids, _ = brick_labels + specs = build_tile_specs(labels, tile_size=_TILE_SIZE, overlap_margin="auto") + + for spec in specs: + cy0, cx0, cy1, cx1 = spec.crop + for lid in spec.owned_ids: + cent_y, cent_x = centroids[lid] + # Reconstruct cell pixel range from centroid + # Centroid is mean of [y, y+cell_h-1], so half-extent = (cell_h-1)/2 + cell_y0 = cent_y - (_CELL_H - 1) / 2.0 + cell_y1 = cent_y + (_CELL_H - 1) / 2.0 + cell_x0 = cent_x - (_CELL_W - 1) / 2.0 + cell_x1 = cent_x + (_CELL_W - 1) / 2.0 + assert cy0 <= cell_y0 and cell_y1 <= cy1, ( + f"Cell {lid} y-range [{cell_y0:.0f},{cell_y1:.0f}] " + f"not in crop y-range [{cy0},{cy1}]" + ) + assert cx0 <= cell_x0 and cell_x1 <= cx1, ( + f"Cell {lid} x-range [{cell_x0:.0f},{cell_x1:.0f}] " + f"not in crop x-range [{cx0},{cx1}]" + ) + + +class TestBuildTileSpecsEdgeCases: + def test_empty_labels(self): + labels = np.zeros((500, 500), dtype=np.int32) + specs = build_tile_specs(labels, tile_size=250) + assert specs == [] + verify_coverage(labels, specs) + + def test_single_cell_whole_image(self): + """One cell that fills most of the image.""" + labels = np.zeros((500, 500), dtype=np.int32) + labels[10:490, 10:490] = 1 + specs = build_tile_specs(labels, tile_size=250) + verify_coverage(labels, specs) + assert len(specs) == 1 # centroid is at ~(250,250), lands in one tile + + def test_invalid_tile_size(self): + labels = np.zeros((100, 100), dtype=np.int32) + with pytest.raises(ValueError, match="tile_size must be positive"): + build_tile_specs(labels, tile_size=0) + + def test_invalid_labels_ndim(self): + labels = np.zeros((2, 100, 100), dtype=np.int32) + with pytest.raises(ValueError, match="Expected 2-D labels"): + build_tile_specs(labels, tile_size=100) + + def test_tile_size_larger_than_image(self): + """tile_size > image → single tile.""" + labels, _ = _make_brick_labels(image_size=100, gap=5) + specs = build_tile_specs(labels, tile_size=1000) + verify_coverage(labels, specs) + assert len(specs) == 1 + + +# --------------------------------------------------------------------------- +# extract_tile +# --------------------------------------------------------------------------- + + +class TestExtractTile: + def test_non_owned_cells_zeroed(self, brick_labels, brick_image): + """Only owned cells survive in the extracted tile mask.""" + labels, _, _ = brick_labels + specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + + for spec in specs: + _, tile_lbl = extract_tile(brick_image, labels, spec) + present = set(np.unique(tile_lbl)) + present.discard(0) + assert present == spec.owned_ids, ( + f"Tile base={spec.base}: expected {spec.owned_ids}, " + f"got {present}" + ) + + def test_owned_cell_pixels_preserved(self, brick_labels, brick_image): + """Pixel values for owned cells match the original labels.""" + labels, _, _ = brick_labels + specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + + for spec in specs: + cy0, cx0, cy1, cx1 = spec.crop + _, tile_lbl = extract_tile(brick_image, labels, spec) + for lid in spec.owned_ids: + orig_in_crop = labels[cy0:cy1, cx0:cx1] == lid + tile_matches = tile_lbl == lid + np.testing.assert_array_equal(orig_in_crop, tile_matches) + + def test_original_labels_not_mutated(self, brick_labels, brick_image): + labels, _, _ = brick_labels + labels_copy = labels.copy() + specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + for spec in specs: + extract_tile(brick_image, labels, spec) + np.testing.assert_array_equal(labels, labels_copy) + + def test_image_crop_shape(self, brick_labels, brick_image): + """Extracted image has shape (C, crop_h, crop_w).""" + labels, _, _ = brick_labels + specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + for spec in specs: + tile_img, tile_lbl = extract_tile(brick_image, labels, spec) + cy0, cx0, cy1, cx1 = spec.crop + assert tile_img.shape == (3, cy1 - cy0, cx1 - cx0) + assert tile_lbl.shape == (cy1 - cy0, cx1 - cx0) + + +# --------------------------------------------------------------------------- +# End-to-end roundtrip +# --------------------------------------------------------------------------- + + +class TestEndToEnd: + def test_roundtrip_no_cells_lost(self, brick_labels, brick_image): + """Build specs → extract tiles → union of labels == all cells.""" + labels, centroids, _ = brick_labels + specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + verify_coverage(labels, specs) + + recovered: set[int] = set() + for spec in specs: + _, tile_lbl = extract_tile(brick_image, labels, spec) + tile_ids = set(np.unique(tile_lbl)) + tile_ids.discard(0) + assert tile_ids == spec.owned_ids + recovered |= tile_ids + + assert recovered == set(centroids.keys()) + + def test_touching_cells_no_merge(self): + """With gap=0, adjacent cells still get distinct labels and assignments.""" + labels, centroids = _make_brick_labels(gap=0) + n_cells = len(centroids) + assert n_cells > 0 + + specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + verify_coverage(labels, specs) + + # Total owned cells across all tiles == total cells + total_owned = sum(len(s.owned_ids) for s in specs) + assert total_owned == n_cells + + def test_nontouching_cells_same_result(self): + """With gap=10, same coverage guarantees hold.""" + labels, centroids = _make_brick_labels(gap=10) + n_cells = len(centroids) + assert n_cells > 0 + + specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + verify_coverage(labels, specs) + + total_owned = sum(len(s.owned_ids) for s in specs) + assert total_owned == n_cells + + +# --------------------------------------------------------------------------- +# Visual test — tile assignment plot +# --------------------------------------------------------------------------- + +# Tile colors: one distinct color per tile quadrant +_TILE_COLORS = [ + (0.12, 0.47, 0.71), # blue — top-left + (1.00, 0.50, 0.05), # orange — top-right + (0.17, 0.63, 0.17), # green — bottom-left + (0.84, 0.15, 0.16), # red — bottom-right +] + + +def _plot_tile_assignment(labels, specs, title=""): + """Render each cell colored by its owning tile, with grid lines.""" + rgb = np.ones((*labels.shape, 3), dtype=np.float32) # white background + + for i, spec in enumerate(specs): + color = _TILE_COLORS[i % len(_TILE_COLORS)] + for lid in spec.owned_ids: + mask = labels == lid + rgb[mask] = color + + fig, ax = plt.subplots(1, 1, figsize=(6, 6)) + ax.imshow(rgb, origin="upper") + + # Draw tile base-grid lines + for spec in specs: + by0, bx0, by1, bx1 = spec.base + rect = plt.Rectangle( + (bx0 - 0.5, by0 - 0.5), + bx1 - bx0, + by1 - by0, + linewidth=1.5, + edgecolor="black", + facecolor="none", + linestyle="--", + ) + ax.add_patch(rect) + + ax.set_xlim(-0.5, labels.shape[1] - 0.5) + ax.set_ylim(labels.shape[0] - 0.5, -0.5) + ax.set_title(title or "Tile assignment") + ax.set_xlabel("x") + ax.set_ylabel("y") + + +class TestTilingVisual(PlotTester, metaclass=PlotTesterMeta): + def test_plot_tile_assignment_gap(self): + """Visual: brick pattern (gap=10), cells colored by tile.""" + labels, _ = _make_brick_labels(gap=10) + specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + _plot_tile_assignment(labels, specs, title="Tile assignment (gap=10)") + + def test_plot_tile_assignment_touching(self): + """Visual: brick pattern (gap=0, touching), cells colored by tile.""" + labels, _ = _make_brick_labels(gap=0) + specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + _plot_tile_assignment(labels, specs, title="Tile assignment (gap=0, touching)") From 0af69aa361d0e22da45f0fce402fb2b4f5d89098 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Apr 2026 22:02:22 +0000 Subject: [PATCH 31/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/experimental/test_tiling.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/tests/experimental/test_tiling.py b/tests/experimental/test_tiling.py index 9752c69c6..15ca8d761 100644 --- a/tests/experimental/test_tiling.py +++ b/tests/experimental/test_tiling.py @@ -13,14 +13,13 @@ import numpy as np import pytest -from tests.conftest import PlotTester, PlotTesterMeta - from squidpy.experimental.im._tiling import ( TileSpec, build_tile_specs, extract_tile, verify_coverage, ) +from tests.conftest import PlotTester, PlotTesterMeta # --------------------------------------------------------------------------- # Brick-pattern fixture @@ -149,8 +148,7 @@ def test_cell_assigned_to_centroid_tile(self, brick_labels): expected_row, expected_col = _expected_tile_key(cy, cx, _TILE_SIZE, _IMAGE_SIZE) expected_origin = (expected_row * _TILE_SIZE, expected_col * _TILE_SIZE) assert actual[lid] == expected_origin, ( - f"Cell {lid} centroid=({cy:.1f},{cx:.1f}): " - f"expected tile origin {expected_origin}, got {actual[lid]}" + f"Cell {lid} centroid=({cy:.1f},{cx:.1f}): expected tile origin {expected_origin}, got {actual[lid]}" ) def test_no_duplicates(self, brick_labels): @@ -177,8 +175,8 @@ def test_boundary_cells_exist(self, brick_labels): half_w = _CELL_W / 2.0 y0, y1 = cy - half_h, cy + half_h x0, x1 = cx - half_w, cx + half_w - crosses_y = (y0 < 250 < y1) - crosses_x = (x0 < 250 < x1) + crosses_y = y0 < 250 < y1 + crosses_x = x0 < 250 < x1 if crosses_y or crosses_x: boundary_cells.append(lid) @@ -208,12 +206,10 @@ def test_crop_contains_owned_cells_fully(self, brick_labels): cell_x0 = cent_x - (_CELL_W - 1) / 2.0 cell_x1 = cent_x + (_CELL_W - 1) / 2.0 assert cy0 <= cell_y0 and cell_y1 <= cy1, ( - f"Cell {lid} y-range [{cell_y0:.0f},{cell_y1:.0f}] " - f"not in crop y-range [{cy0},{cy1}]" + f"Cell {lid} y-range [{cell_y0:.0f},{cell_y1:.0f}] not in crop y-range [{cy0},{cy1}]" ) assert cx0 <= cell_x0 and cell_x1 <= cx1, ( - f"Cell {lid} x-range [{cell_x0:.0f},{cell_x1:.0f}] " - f"not in crop x-range [{cx0},{cx1}]" + f"Cell {lid} x-range [{cell_x0:.0f},{cell_x1:.0f}] not in crop x-range [{cx0},{cx1}]" ) @@ -265,10 +261,7 @@ def test_non_owned_cells_zeroed(self, brick_labels, brick_image): _, tile_lbl = extract_tile(brick_image, labels, spec) present = set(np.unique(tile_lbl)) present.discard(0) - assert present == spec.owned_ids, ( - f"Tile base={spec.base}: expected {spec.owned_ids}, " - f"got {present}" - ) + assert present == spec.owned_ids, f"Tile base={spec.base}: expected {spec.owned_ids}, got {present}" def test_owned_cell_pixels_preserved(self, brick_labels, brick_image): """Pixel values for owned cells match the original labels.""" From d1b0b957ce5ac3723ec5d92416230573e6082073 Mon Sep 17 00:00:00 2001 From: anon Date: Thu, 14 May 2026 17:12:02 +0200 Subject: [PATCH 32/37] Finish lazy-tiling refactor and address PR #982 review feedback Wires the in-progress cp_measure.featurizer + lazy-tiling refactor onto a working _tiling.py and closes out the six open notes on the PR. _tiling.py: * build_tile_specs now takes (shape, cell_info), so it is agnostic to whether labels are in memory, dask-backed, or multiscale. * compute_cell_info is public; new compute_cell_info_multiscale (read coarsest scale, rescale to target) and compute_cell_info_tiled (stream tiles, merge boundary-spanning cells via additive accumulators). * extract_tile_lazy slices an xr.DataArray and materializes only the crop; extract_tile retained for in-memory callers. * verify_coverage takes a label_ids set. _feature.py: * Channel names: read via spatialdata.models.get_channel_names so c_coords set at parse time flow through to output column suffixes. * Progress: tqdm wrapper around joblib.Parallel(return_as='generator_unordered') + periodic logg.info('Tile {n}/{total} done (elapsed ...)') so non-TTY runs (CI, slurm) also see progress. * Alignment: _align_to_image_grid replaces the dim-mismatch raise with a coordinate-system aware crop. Identity-or-integer-pixel-translation is honored as a 1-to-1 pixel alignment; the overlap rectangle is processed and out-of-extent cells are counted, not crashed on. Non-pixel-aligned transforms either raise with a spatialdata.rasterize hint (align_mode='strict', default) or trigger materialization via spatialdata.rasterize (align_mode='rasterize') with a warning. * DropReport: per-run counter for cells dropped due to extent, partial boundary intersection, cp_measure no-data, or empty tiles. Emitted via logg.info(report.summary()) at the end of every run. Tests: 39 in test_tiling.py (was 30; new coverage for the lazy/multiscale helpers + verify_coverage edge cases), 35 in test_calculate_image_features including a TestPR982Concerns class with one regression test per open note. Co-Authored-By: Claude Opus 4.7 --- src/squidpy/experimental/im/_feature.py | 1776 +++++++++-------- src/squidpy/experimental/im/_tiling.py | 337 +++- .../test_calculate_image_features.py | 751 +++++-- tests/experimental/test_tiling.py | 200 +- 4 files changed, 1896 insertions(+), 1168 deletions(-) diff --git a/src/squidpy/experimental/im/_feature.py b/src/squidpy/experimental/im/_feature.py index 13c35d9d5..d161f6697 100644 --- a/src/squidpy/experimental/im/_feature.py +++ b/src/squidpy/experimental/im/_feature.py @@ -1,984 +1,1020 @@ -"""Experimental feature extraction module.""" +"""Experimental feature extraction module. + +Extracts per-cell features from segmentation masks using cp_measure, +scikit-image regionprops, and squidpy-specific metrics. Large images +are automatically tiled so that each tile is processed independently. +""" from __future__ import annotations +import time import warnings -from collections.abc import Callable, Sequence -from typing import Any, NamedTuple +from dataclasses import dataclass, field, fields +from typing import Any, Literal, NamedTuple import anndata as ad import numpy as np -import numpy.typing as npt import pandas as pd import xarray as xr -from cp_measure.bulk import get_core_measurements, get_correlation_measurements -from numba import njit +from cp_measure.featurizer import featurize, make_featurizer_config +from joblib import Parallel, delayed from skimage import measure from skimage.feature import graycomatrix, graycoprops -from skimage.measure import label from spatialdata import SpatialData, rasterize from spatialdata._logging import logger as logg -from spatialdata.models import TableModel +from spatialdata.models import TableModel, get_channel_names +from spatialdata.transformations import get_transformation +from tqdm.auto import tqdm + +from squidpy.experimental.im._tiling import ( + build_tile_specs, + compute_cell_info, + compute_cell_info_multiscale, + compute_cell_info_tiled, + extract_tile_lazy, +) -from squidpy._constants._constants import ImageFeature -from squidpy._docs import d, inject_docs -from squidpy._utils import Signal, _get_n_cores, parallelize +# --------------------------------------------------------------------------- +# Drop accounting +# --------------------------------------------------------------------------- -__all__ = ["calculate_image_features"] -# Define constant property sets -_MASK_PROPS = { - "area", - "area_filled", - "area_convex", - "num_pixels", - "axis_major_length", - "axis_minor_length", - "eccentricity", - "equivalent_diameter", - "extent", - "feret_diameter_max", - "solidity", - "euler_number", - "centroid", - "centroid_local", - "perimeter", - "perimeter_crofton", - "inertia_tensor", - "inertia_tensor_eigvals", -} -_INTENSITY_PROPS = { - "intensity_max", - "intensity_mean", - "intensity_min", - "intensity_std", -} +@dataclass +class DropReport: + """Counters for cells that were excluded during a featurization run. + + Emitted once at the end of ``calculate_image_features`` so users know + why their cell count shrank. + """ -# Define array types using modern syntax -NDArray = npt.NDArray[Any] # Generic array -FloatArray = npt.NDArray[np.float32] # Float32 array -IntArray = npt.NDArray[np.int_] # Integer array -BoolArray = npt.NDArray[np.bool_] # Boolean array + outside_image_extent: int = 0 + partial_at_image_boundary: int = 0 + cp_measure_no_data: int = 0 + empty_tile_drop: int = 0 + other: dict[str, int] = field(default_factory=dict) + + def summary(self) -> str: + lines = ["Cell drop report:"] + for f in fields(self): + v = getattr(self, f.name) + if isinstance(v, int) and v > 0: + lines.append(f" {f.name}: {v}") + elif isinstance(v, dict): + for k, vv in v.items(): + if vv: + lines.append(f" {k}: {vv}") + if len(lines) == 1: + return "Cell drop report: no cells dropped." + return "\n".join(lines) -# Define property sets at module level for better performance -_SCALAR_PROPS = frozenset( + +__all__ = ["calculate_image_features"] + +# --------------------------------------------------------------------------- +# Skimage property sets +# --------------------------------------------------------------------------- + +_MASK_PROPS = frozenset( { "area", "area_filled", "area_convex", - "num_pixels", "axis_major_length", "axis_minor_length", "eccentricity", - "equivalent_diameter", + "equivalent_diameter_area", "extent", "feret_diameter_max", "solidity", "euler_number", + "centroid", + "centroid_local", "perimeter", "perimeter_crofton", + "inertia_tensor", + "inertia_tensor_eigvals", + } +) +_INTENSITY_PROPS = frozenset( + { + "intensity_max", + "intensity_mean", + "intensity_min", + "intensity_std", } ) -_ARRAY_1D_PROPS = frozenset({"centroid", "centroid_local"}) -_ARRAY_2D_PROPS = frozenset({"inertia_tensor"}) -_SPECIAL_PROPS = frozenset({"inertia_tensor_eigvals"}) - - -class ParsedMeasurements(NamedTuple): - measurements: list[str] - label_props: set[str] | None - intensity_props: set[str] | None - has_cpmeasure_core: bool - has_cpmeasure_correlation: bool - has_squidpy_summary: bool - has_squidpy_texture: bool - has_squidpy_color_hist: bool - - -def _parse_measurements( - measurements: str | list[str] | None, - available_measurements: list[str], -) -> ParsedMeasurements: - """Parse and validate measurements, supporting per-property selection.""" - if measurements is None: - measurements = available_measurements - - if isinstance(measurements, str): - measurements = [measurements] - - parsed_label_props: set[str] | None = None - parsed_intensity_props: set[str] | None = None - has_cpmeasure_core = False - has_cpmeasure_correlation = False - has_squidpy_summary = False - has_squidpy_texture = False - has_squidpy_color_hist = False - - for m in measurements: - parts = m.split(":") - if len(parts) == 3 and parts[0] == "skimage": - group, prop = parts[1], parts[2] - if group == "label": - if prop not in _MASK_PROPS: - raise ValueError(f"Unknown skimage label property: '{prop}'. Available: {sorted(_MASK_PROPS)}") - parsed_label_props = (parsed_label_props or set()) | {prop} - elif group == "label+image": - if prop not in _INTENSITY_PROPS: - raise ValueError( - f"Unknown skimage intensity property: '{prop}'. Available: {sorted(_INTENSITY_PROPS)}" - ) - parsed_intensity_props = (parsed_intensity_props or set()) | {prop} - else: - raise ValueError(f"Unknown skimage group: '{group}'. Use 'label' or 'label+image'") - elif m == "skimage:label": - parsed_label_props = (parsed_label_props or set()) | _MASK_PROPS.copy() - elif m == "skimage:label+image": - parsed_intensity_props = (parsed_intensity_props or set()) | _INTENSITY_PROPS.copy() - elif m == "cpmeasure:core": - has_cpmeasure_core = True - elif m == "cpmeasure:correlation": - has_cpmeasure_correlation = True - elif m == "squidpy:summary": - has_squidpy_summary = True - elif m == "squidpy:texture": - has_squidpy_texture = True - elif m == "squidpy:color_hist": - has_squidpy_color_hist = True - elif m not in available_measurements: +# cp_measure feature name → make_featurizer_config keyword(s) +_CPMEASURE_FLAGS: dict[str, dict[str, bool]] = { + "cpmeasure:intensity": {"intensity": True}, + "cpmeasure:sizeshape": {"sizeshape": True}, + "cpmeasure:texture": {"texture": True}, + "cpmeasure:granularity": {"granularity": True}, + "cpmeasure:zernike": {"zernike": True}, + "cpmeasure:feret": {"feret": True}, + "cpmeasure:radial": {"radial_distribution": True, "radial_zernikes": True}, + "cpmeasure:correlation": { + "correlation_pearson": True, + "correlation_costes": True, + "correlation_manders_fold": True, + "correlation_rwc": True, + }, + "cpmeasure:correlation_pearson": {"correlation_pearson": True}, + "cpmeasure:correlation_costes": {"correlation_costes": True}, + "cpmeasure:correlation_manders_fold": {"correlation_manders_fold": True}, + "cpmeasure:correlation_rwc": {"correlation_rwc": True}, +} + +# All known top-level feature group names (used for validation) +_ALL_FEATURES = ( + set(_CPMEASURE_FLAGS.keys()) + | {"skimage:label", "skimage:label+image"} + | {"squidpy:summary", "squidpy:texture", "squidpy:color_hist"} +) + + +# --------------------------------------------------------------------------- +# Feature parsing +# --------------------------------------------------------------------------- + + +class _ParsedFeatures(NamedTuple): + cp_flags: dict[str, bool] | None # kwargs for make_featurizer_config + skimage_label_props: frozenset[str] | None + skimage_intensity_props: frozenset[str] | None + squidpy_summary: bool + squidpy_texture: bool + squidpy_color_hist: bool + + +def _parse_features(features: list[str] | str | None) -> _ParsedFeatures: + """Parse user-facing feature names into structured config.""" + if features is None: + # Default: all cp_measure features + return _ParsedFeatures( + cp_flags={}, # empty dict → all defaults (all True) + skimage_label_props=None, + skimage_intensity_props=None, + squidpy_summary=False, + squidpy_texture=False, + squidpy_color_hist=False, + ) + + if isinstance(features, str): + features = [features] + + cp_flags: dict[str, bool] = {} + has_any_cp = False + label_props: set[str] | None = None + intensity_props: set[str] | None = None + sq_summary = False + sq_texture = False + sq_color_hist = False + + for f in features: + # cp_measure features + if f in _CPMEASURE_FLAGS: + has_any_cp = True + cp_flags.update(_CPMEASURE_FLAGS[f]) + + # skimage group-level + elif f == "skimage:label": + label_props = set(_MASK_PROPS) + elif f == "skimage:label+image": + intensity_props = set(_INTENSITY_PROPS) + + # skimage fine-grained: "skimage:label:prop" or "skimage:label+image:prop" + elif f.startswith("skimage:label:"): + prop = f.split(":", 2)[2] + if prop not in _MASK_PROPS: + raise ValueError(f"Unknown skimage label property: '{prop}'. Available: {sorted(_MASK_PROPS)}") + label_props = (label_props or set()) | {prop} + elif f.startswith("skimage:label+image:"): + prop = f.split(":", 2)[2] + if prop not in _INTENSITY_PROPS: + raise ValueError(f"Unknown skimage intensity property: '{prop}'. Available: {sorted(_INTENSITY_PROPS)}") + intensity_props = (intensity_props or set()) | {prop} + + # squidpy features + elif f == "squidpy:summary": + sq_summary = True + elif f == "squidpy:texture": + sq_texture = True + elif f == "squidpy:color_hist": + sq_color_hist = True + + else: raise ValueError( - f"Invalid measurement: '{m}'. " - f"Available: {available_measurements}, " - f"or use 'skimage:label:property' / 'skimage:label+image:property' for individual properties" + f"Unknown feature: '{f}'. Available top-level features: {sorted(_ALL_FEATURES)}, " + f"or use 'skimage:label:property' / 'skimage:label+image:property' for individual properties." ) - return ParsedMeasurements( - measurements, - parsed_label_props, - parsed_intensity_props, - has_cpmeasure_core, - has_cpmeasure_correlation, - has_squidpy_summary, - has_squidpy_texture, - has_squidpy_color_hist, + return _ParsedFeatures( + cp_flags=cp_flags if has_any_cp else None, + skimage_label_props=frozenset(label_props) if label_props else None, + skimage_intensity_props=frozenset(intensity_props) if intensity_props else None, + squidpy_summary=sq_summary, + squidpy_texture=sq_texture, + squidpy_color_hist=sq_color_hist, ) -@d.dedent -@inject_docs(f=ImageFeature) -def calculate_image_features( - sdata: SpatialData, - image_key: str, - labels_key: str | None = None, - shapes_key: str | None = None, - scale: str | None = None, - measurements: list[str] | str | None = None, - adata_key_added: str = "morphology", - invalid_as_zero: bool = True, - n_jobs: int | None = None, - backend: str = "loky", - show_progress_bar: bool = False, # slower, needs to be optimised - verbose: bool = False, - inplace: bool = True, -) -> pd.DataFrame | None: +def _has_any_features(parsed: _ParsedFeatures) -> bool: + return ( + parsed.cp_flags is not None + or parsed.skimage_label_props is not None + or parsed.skimage_intensity_props is not None + or parsed.squidpy_summary + or parsed.squidpy_texture + or parsed.squidpy_color_hist + ) + + +# --------------------------------------------------------------------------- +# cp_measure config builder +# --------------------------------------------------------------------------- + + +def _build_cp_config(cp_flags: dict[str, bool], channel_names: list[str]) -> dict: + """Build a cp_measure featurizer config from parsed flags. + + When ``cp_flags`` is empty (the default-all case), every feature is + enabled. Otherwise, only the explicitly requested features are turned on. """ - Calculate features from segmentation masks using CellProfiler measurements. + if not cp_flags: + # All defaults (everything True) + return make_featurizer_config(channel_names) + + # Start with everything off, then enable requested features + all_off = { + "intensity": False, + "texture": False, + "granularity": False, + "radial_distribution": False, + "radial_zernikes": False, + "sizeshape": False, + "zernike": False, + "feret": False, + "correlation_pearson": False, + "correlation_costes": False, + "correlation_manders_fold": False, + "correlation_rwc": False, + } + all_off.update(cp_flags) + return make_featurizer_config(channel_names, **all_off) + + +# --------------------------------------------------------------------------- +# Per-tile feature computation +# --------------------------------------------------------------------------- - This function uses the `cp_measure` package to extract features from - segmentation masks. It supports both basic shape features and - intensity-based features if an intensity image is provided. + +def _relabel_contiguous(labels: np.ndarray) -> tuple[np.ndarray, dict[int, int]]: + """Relabel a mask to contiguous IDs 1..N, returning the new mask and a mapping. + + Returns + ------- + relabeled + Label image with contiguous IDs. + new_to_orig + Mapping from new contiguous ID → original label ID. + """ + unique_ids = np.unique(labels) + unique_ids = unique_ids[unique_ids != 0] + new_to_orig: dict[int, int] = {} + relabeled = np.zeros_like(labels) + for new_id, orig_id in enumerate(unique_ids, start=1): + relabeled[labels == orig_id] = new_id + new_to_orig[new_id] = int(orig_id) + return relabeled, new_to_orig + + +def _featurize_tile( + tile_image: np.ndarray, + tile_labels: np.ndarray, + parsed: _ParsedFeatures, + channel_names: list[str], +) -> pd.DataFrame: + """Compute all requested features for a single tile. Parameters ---------- - sdata - The spatial data object containing the segmentation masks. - labels_key - Key in :attr:`spatialdata.SpatialData.labels` containing the - segmentation masks. - shapes_key - Key in :attr:`spatialdata.SpatialData.shapes` containing the - shape features. - image_key - Key in :attr:`spatialdata.SpatialData.images` containing the - intensity image. - adata_key_added - Key to store the AnnData object in the SpatialData object. - %(parallelize)s + tile_image + ``(C, H, W)`` image tile. + tile_labels + ``(H, W)`` label tile with only owned cells. + parsed + Parsed feature configuration. + channel_names + Channel names for column naming. Returns ------- - A :class:`pandas.DataFrame` with the calculated features. If the image has - multiple channels, features are calculated for each channel separately and - channel names are appended to the feature names. - - Notes - ----- - This is an experimental feature that requires the `cp_measure` package - to be installed. - - Per-property selection is supported, e.g. ``"skimage:label:area"`` or - ``"skimage:label+image:intensity_mean"``. Full groups remain available via - ``"skimage:label"`` and ``"skimage:label+image"``. + DataFrame indexed by cell label ID with one column per feature. """ + cell_ids = np.unique(tile_labels) + cell_ids = cell_ids[cell_ids != 0] + if len(cell_ids) == 0: + return pd.DataFrame() + + parts: list[pd.DataFrame] = [] + + # --- cp_measure features --- + if parsed.cp_flags is not None: + cp_config = _build_cp_config(parsed.cp_flags, channel_names) + # Relabel to contiguous IDs (1..N) — cp_measure assumes dense labels + # internally and will index-error on sparse IDs like [1, 37, 82]. + contiguous_labels, orig_ids = _relabel_contiguous(tile_labels) + masks_3d = contiguous_labels[np.newaxis, :, :] # (1, H, W) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + data, columns, rows = featurize(tile_image, masks_3d, cp_config) + if data.shape[0] > 0: + # Map contiguous labels back to original IDs. + # rows may include labels that produced no data (cp_measure bug); + # use only the first data.shape[0] rows. + # TODO: switch to return_as="anndata" once afermg/cp_measure#38 + # fixes the rows/data length mismatch. + row_labels = [orig_ids[r[2]] for r in rows[: data.shape[0]]] + cp_df = pd.DataFrame(data, index=row_labels, columns=columns) + parts.append(cp_df) + + # --- skimage regionprops --- + if parsed.skimage_label_props is not None or parsed.skimage_intensity_props is not None: + df = _compute_skimage_features( + tile_labels, tile_image, parsed.skimage_label_props, parsed.skimage_intensity_props, channel_names + ) + if not df.empty: + parts.append(df) - if image_key not in sdata.images.keys(): - raise ValueError(f"Image key '{image_key}' not found, valid keys: {list(sdata.images.keys())}") + # --- squidpy per-cell features --- + if parsed.squidpy_summary or parsed.squidpy_texture or parsed.squidpy_color_hist: + df = _compute_squidpy_per_cell(tile_labels, tile_image, parsed, channel_names) + if not df.empty: + parts.append(df) - if labels_key is None and shapes_key is None: - raise ValueError("Provide either `labels_key` or `shapes_key`.") + if not parts: + return pd.DataFrame(index=cell_ids) - if labels_key is not None and shapes_key is not None: - raise ValueError("Use either `labels_key` or `shapes_key`, not both.") + combined = pd.concat(parts, axis=1) + combined = combined.reindex(cell_ids) + return combined - if labels_key is not None and labels_key not in sdata.labels.keys(): - raise ValueError(f"Labels key '{labels_key}' not found, valid keys: {list(sdata.labels.keys())}") - if shapes_key is not None and shapes_key not in sdata.shapes.keys(): - raise ValueError(f"Shapes key '{shapes_key}' not found, valid keys: {list(sdata.shapes.keys())}") +# --------------------------------------------------------------------------- +# skimage regionprops +# --------------------------------------------------------------------------- - if ( - isinstance(sdata.images[image_key], xr.DataTree) or isinstance(sdata.labels[labels_key], xr.DataTree) - ) and scale is None: - raise ValueError("When using multi-scale data, please specify the scale.") - if scale is not None and not isinstance(scale, str): - raise ValueError("Scale must be a string.") +def _regionprops_to_row(region: Any, props: frozenset[str]) -> dict[str, float]: + """Extract scalar features from a single regionprops object.""" + row: dict[str, float] = {} + for prop in props: + try: + value = getattr(region, prop) + arr = np.asarray(value) + if arr.ndim == 0: + row[prop] = float(arr) + elif arr.ndim == 1: + for i, v in enumerate(arr): + row[f"{prop}_{i}"] = float(v) + elif arr.ndim == 2: + for i in range(arr.shape[0]): + for j in range(arr.shape[1]): + row[f"{prop}_{i}x{j}"] = float(arr[i, j]) + except (ValueError, TypeError, AttributeError): + continue + return row - image = _get_array_from_DataTree_or_DataArray(sdata.images[image_key], scale) - labels = _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale) if labels_key is not None else None - if labels is not None and image.shape[1:] != labels.shape: - raise ValueError( - f"Image dimensions {image.shape[1:]} do not match labels dimensions {labels.shape} at scale '{scale}'" - ) +def _compute_skimage_features( + labels: np.ndarray, + image: np.ndarray, + label_props: frozenset[str] | None, + intensity_props: frozenset[str] | None, + channel_names: list[str], +) -> pd.DataFrame: + """Compute skimage regionprops features for all cells in a tile.""" + parts: list[pd.DataFrame] = [] - if shapes_key is not None: - scale_str = f" (using scale '{scale}')" if scale is not None else "" - logg.info(f"Converting shapes to labels{scale_str}.") - _, max_y, max_x = image.shape - try: - labels = np.asarray( - rasterize( - sdata.shapes[shapes_key], - ["x", "y"], - min_coordinate=[0, 0], - max_coordinate=[max_x, max_y], - target_coordinate_system="global", - target_unit_to_pixels=1.0, - return_regions_as_labels=True, - ) - ) - except ValueError as e: - raise ValueError( - "Failed to rasterize shapes; geometries may be empty or unsupported for rasterization. " - "Filter out empty/non-polygon geometries or choose a different shapes_key." - ) from e - else: - labels = _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale) - - available_measurements = [ - "skimage:label", - "skimage:label+image", - "cpmeasure:core", - "cpmeasure:correlation", - "squidpy:summary", - "squidpy:texture", - "squidpy:color_hist", - ] - - parsed = _parse_measurements(measurements, available_measurements) - - if ( - parsed.label_props is None - and parsed.intensity_props is None - and not parsed.has_cpmeasure_core - and not parsed.has_cpmeasure_correlation - and not parsed.has_squidpy_summary - and not parsed.has_squidpy_texture - and not parsed.has_squidpy_color_hist - ): - raise ValueError("No valid measurements requested") + if label_props is not None: + regions = measure.regionprops(labels) + rows = {r.label: _regionprops_to_row(r, label_props) for r in regions} + parts.append(pd.DataFrame.from_dict(rows, orient="index")) - if labels.size == 0: - raise ValueError("Labels array is empty") + if intensity_props is not None: + for ch_idx, ch_name in enumerate(channel_names): + regions = measure.regionprops(labels, intensity_image=image[ch_idx]) + rows = {r.label: _regionprops_to_row(r, intensity_props) for r in regions} + df = pd.DataFrame.from_dict(rows, orient="index") + df = df.rename(columns=lambda c, _ch=ch_name: f"{c}_{_ch}") + parts.append(df) - max_label = int(labels.max()) - if max_label == 0: - raise ValueError("No cells found in labels (max label is 0)") + if not parts: + return pd.DataFrame() + return pd.concat(parts, axis=1) - channel_names = None - if hasattr(sdata.images[image_key], "coords") and "c" in sdata.images[image_key].coords: - channel_names = sdata.images[image_key].coords["c"].values - if image.ndim == 2: - image = image[None, :, :] - elif image.ndim != 3: - raise ValueError(f"Expected 2D or 3D image, got shape {image.shape}") +# --------------------------------------------------------------------------- +# squidpy per-cell features +# --------------------------------------------------------------------------- - if image.shape[1:] != labels.shape: - raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}") - if parsed.has_cpmeasure_correlation: - measurements_corr = get_correlation_measurements() +def _compute_squidpy_per_cell( + labels: np.ndarray, + image: np.ndarray, + parsed: _ParsedFeatures, + channel_names: list[str], +) -> pd.DataFrame: + """Compute squidpy features per cell within a tile.""" + regions = measure.regionprops(labels) + n_channels = image.shape[0] + rows: dict[int, dict[str, float]] = {} - cell_ids = np.unique(labels) - cell_ids = cell_ids[cell_ids != 0] - # Sort cell_ids to ensure consistent order - cell_ids = np.sort(cell_ids) - cell_ids_list = cell_ids.tolist() # Convert to list for parallelize + for region in regions: + lid = region.label + bbox = region.bbox # (min_row, min_col, max_row, max_col) + cell_features: dict[str, float] = {} + + # Extract cell's bounding box from image + img_crop = image[:, bbox[0] : bbox[2], bbox[1] : bbox[3]] + mask_crop = labels[bbox[0] : bbox[2], bbox[1] : bbox[3]] == lid - all_features = [] - n_channels = image.shape[0] - n_jobs = _get_n_cores(n_jobs) - - logg.info(f"Using '{n_jobs}' core(s).") - - if parsed.has_squidpy_summary or parsed.has_squidpy_texture or parsed.has_squidpy_color_hist: - sq_feats = _compute_squidpy_channel_features(image, labels, cell_ids, channel_names, parsed) - all_features.extend(sq_feats) - - if parsed.label_props is not None: - logg.info("Calculating 'skimage' label features.") - res = parallelize( - _get_regionprops_features, - collection=cell_ids_list, - extractor=pd.concat, - n_jobs=n_jobs, - backend=backend, - show_progress_bar=show_progress_bar, - verbose=verbose, - )(labels=labels, intensity_image=None, props=parsed.label_props) - all_features.append(res) - - if parsed.intensity_props is not None: - for ch_idx in range(n_channels): - ch_name = channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" - ch_image = image[ch_idx] - logg.info(f"Calculating 'skimage' image features for channel '{ch_name}'.") - res = parallelize( - _get_regionprops_features, - collection=cell_ids_list, - extractor=pd.concat, - n_jobs=n_jobs, - backend=backend, - show_progress_bar=show_progress_bar, - verbose=verbose, - )(labels=labels, intensity_image=ch_image, props=parsed.intensity_props) - # Append channel names to each feature column - res = res.rename(columns=lambda col, ch_name=ch_name: f"{col}_{ch_name}") - all_features.append(res) - - if parsed.has_cpmeasure_core: - measurements_core = get_core_measurements() for ch_idx in range(n_channels): - ch_name = channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" - ch_image = image[ch_idx] - logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.") - res = parallelize( - _calculate_features_helper, - collection=cell_ids_list, - extractor=pd.concat, - n_jobs=n_jobs, - backend=backend, - show_progress_bar=show_progress_bar, - verbose=verbose, - )(labels, ch_image, None, measurements_core, ch_name) - all_features.append(res) - - if parsed.has_cpmeasure_correlation: - for ch1_idx in range(n_channels): - for ch2_idx in range(ch1_idx + 1, n_channels): - ch1_name = channel_names[ch1_idx] if channel_names is not None else f"{ch1_idx}" - ch2_name = channel_names[ch2_idx] if channel_names is not None else f"{ch2_idx}" - logg.info( - f"Calculating 'cpmeasure' correlation features between channels '{ch1_name}' and '{ch2_name}'." - ) - ch1_image = image[ch1_idx] - ch2_image = image[ch2_idx] - res = parallelize( - _calculate_features_helper, - collection=cell_ids_list, - extractor=pd.concat, - n_jobs=n_jobs, - backend=backend, - show_progress_bar=show_progress_bar, - verbose=verbose, - )(labels, ch1_image, ch2_image, measurements_corr, ch1_name, ch2_name) - all_features.append(res) - - combined_features = pd.concat(all_features, axis=1) + ch_name = channel_names[ch_idx] + ch_crop = img_crop[ch_idx].astype(np.float32) + masked_vals = ch_crop[mask_crop] - if invalid_as_zero: - combined_features = combined_features.replace([np.inf, -np.inf], 0) - combined_features = combined_features.fillna(0) + if len(masked_vals) == 0: + continue - # Ensure cell IDs are preserved in the correct order - combined_features = combined_features.loc[cell_ids] + if parsed.squidpy_summary: + cell_features[f"summary_mean_{ch_name}"] = float(np.mean(masked_vals)) + cell_features[f"summary_std_{ch_name}"] = float(np.std(masked_vals)) + cell_features[f"summary_min_{ch_name}"] = float(np.min(masked_vals)) + cell_features[f"summary_max_{ch_name}"] = float(np.max(masked_vals)) - adata = ad.AnnData(X=combined_features) - adata.obs_names = [f"cell_{i}" for i in cell_ids] - adata.var_names = combined_features.columns + if parsed.squidpy_texture: + cell_features.update(_glcm_features(ch_crop, mask_crop, ch_name)) - adata.uns["spatialdata_attrs"] = { - "region": labels_key if labels_key is not None else shapes_key, - "region_key": "region", - "instance_key": "label_id", - } - adata.obs["region"] = pd.Categorical([labels_key if labels_key is not None else shapes_key] * len(adata)) - # here we either use the cell_ids or the index of the shapes. Needed - # because when converting the shapes to labels, a potential index 0 - # in the shapes is set to 1 in the labels and therefore we'd otherwise - # be off-by-one in the label_id. - if shapes_key is not None and len(sdata.shapes[shapes_key]) == len(adata): - adata.obs["label_id"] = sdata.shapes[shapes_key].index.values - else: - adata.obs["label_id"] = cell_ids + if parsed.squidpy_color_hist: + cell_features.update(_histogram_features(masked_vals, ch_name)) - if inplace: - sdata.tables[adata_key_added] = TableModel.parse(adata) + rows[lid] = cell_features + + return pd.DataFrame.from_dict(rows, orient="index") + + +def _glcm_features(channel_crop: np.ndarray, mask: np.ndarray, ch_name: str) -> dict[str, float]: + """GLCM texture features for a single channel within a cell's bbox.""" + quant_levels = 32 + ch = channel_crop.copy() + # Zero out non-cell pixels so they don't affect GLCM + ch[~mask] = 0 + ch_min, ch_max = float(ch[mask].min()), float(ch[mask].max()) + if ch_max > ch_min: + ch = (ch - ch_min) / (ch_max - ch_min) else: - return combined_features + ch = np.zeros_like(ch) + ch_q = np.clip((ch * (quant_levels - 1)).round().astype(np.uint8), 0, quant_levels - 1) + ch_q[~mask] = 0 + try: + glcm = graycomatrix(ch_q, distances=[1], angles=[0], levels=quant_levels, symmetric=True, normed=True) + return { + f"texture_contrast_{ch_name}": float(graycoprops(glcm, "contrast")[0, 0]), + f"texture_dissimilarity_{ch_name}": float(graycoprops(glcm, "dissimilarity")[0, 0]), + f"texture_homogeneity_{ch_name}": float(graycoprops(glcm, "homogeneity")[0, 0]), + f"texture_energy_{ch_name}": float(graycoprops(glcm, "energy")[0, 0]), + f"texture_ASM_{ch_name}": float(graycoprops(glcm, "ASM")[0, 0]), + f"texture_correlation_{ch_name}": float(graycoprops(glcm, "correlation")[0, 0]), + } + except (ValueError, IndexError): + return {} + + +def _histogram_features(masked_vals: np.ndarray, ch_name: str, bins: int = 16) -> dict[str, float]: + """Per-cell intensity histogram features.""" + lo, hi = float(masked_vals.min()), float(masked_vals.max()) + hist, _ = np.histogram(masked_vals, bins=bins, range=(lo, hi if hi > lo else lo + 1)) + hist = hist.astype(np.float32) + hist_sum = hist.sum() + if hist_sum > 0: + hist = hist / hist_sum + return {f"color_hist_bin{b}_{ch_name}": float(v) for b, v in enumerate(hist)} + + +# --------------------------------------------------------------------------- +# Input preparation (lazy — returns xarray DataArrays, not numpy) +# --------------------------------------------------------------------------- + + +# --------------------------------------------------------------------------- +# Coordinate-system aware alignment +# --------------------------------------------------------------------------- + + +def _shared_coordinate_system(sdata: SpatialData, image_key: str, labels_key: str) -> str: + img_t = get_transformation(sdata.images[image_key], get_all=True) + lbl_t = get_transformation(sdata.labels[labels_key], get_all=True) + shared = set(img_t) & set(lbl_t) + if not shared: + raise ValueError( + f"Image '{image_key}' and labels '{labels_key}' share no coordinate " + f"system (image: {sorted(img_t)}, labels: {sorted(lbl_t)})." + ) + return "global" if "global" in shared else sorted(shared)[0] -def _extract_features_from_regionprops( - region_obj: Any, - props: set[str], - cell_id: int, - skip_callable: bool = False, -) -> dict[str, float]: - """Extract features from a regionprops object given a list of properties.""" - cell_features = {} - for prop in props: - try: - value = getattr(region_obj, prop) - if skip_callable and callable(value): - continue +def _relative_affine(sdata: SpatialData, image_key: str, labels_key: str, cs: str) -> np.ndarray: + """Return the 3x3 affine that maps labels-pixel-coords to image-pixel-coords. + + Uses ``(x, y)`` axis order to match :mod:`spatialdata` convention. + """ + t_img = get_transformation(sdata.images[image_key], to_coordinate_system=cs) + t_lbl = get_transformation(sdata.labels[labels_key], to_coordinate_system=cs) + # image_pixel <- global <- labels_pixel + m_img_to_global = t_img.to_affine_matrix(input_axes=("x", "y"), output_axes=("x", "y")) + m_lbl_to_global = t_lbl.to_affine_matrix(input_axes=("x", "y"), output_axes=("x", "y")) + m_global_to_img = np.linalg.inv(m_img_to_global) + return m_global_to_img @ m_lbl_to_global - if prop in _SCALAR_PROPS: - cell_features[prop] = float(value) - elif prop in _ARRAY_1D_PROPS: - # Convert to array only once - value = np.asarray(value) - for i, v in enumerate(value): - cell_features[f"{prop}_{i}"] = float(v) - elif prop in _ARRAY_2D_PROPS: - # Convert to array only once - value = np.asarray(value) - for i in range(value.shape[0]): - for j in range(value.shape[1]): - cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) - elif prop in _SPECIAL_PROPS: - # Convert to array only once - value = np.asarray(value) - for i, v in enumerate(value): - cell_features[f"{prop}_{i}"] = float(v) - else: - # Fallback for any other properties - if isinstance(value, np.ndarray | list | tuple): - value = np.asarray(value) - if value.ndim == 1: - for i, v in enumerate(value): - cell_features[f"{prop}_{i}"] = float(v) - elif value.ndim == 2: - for i in range(value.shape[0]): - for j in range(value.shape[1]): - cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) - else: - cell_features[prop] = float(value.flatten()[0]) - else: - cell_features[prop] = float(value) - - except (ValueError, TypeError, AttributeError) as e: - logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}") - continue - return cell_features +def _is_close_identity(m: np.ndarray, atol: float = 1e-6) -> bool: + return bool(np.allclose(m, np.eye(m.shape[0]), atol=atol)) -def _calculate_regionprops_from_crop( - cell_mask_cropped: NDArray, - intensity_image_cropped: NDArray | None, - cell_id: int, - props: set[str], -) -> dict[str, float]: +def _decompose_pixel_translation(m: np.ndarray, atol: float = 1e-6) -> tuple[int, int] | None: + """If ``m`` is identity-plus-integer-translation, return ``(tx, ty)``; else None. + + ``m`` is a 3x3 affine in (x, y) axis order. """ - Calculate regionprops features from pre-cropped arrays. - Uses intensity-based properties if an intensity image is provided. + rotscale = m[:2, :2] + if not np.allclose(rotscale, np.eye(2), atol=atol): + return None + tx, ty = float(m[0, 2]), float(m[1, 2]) + if not (abs(tx - round(tx)) < atol and abs(ty - round(ty)) < atol): + return None + return int(round(tx)), int(round(ty)) + + +def _align_to_image_grid( + sdata: SpatialData, + image_key: str, + labels_key: str, + image_da: xr.DataArray, + labels_da: xr.DataArray, + align_mode: Literal["strict", "rasterize"], + drop_report: DropReport, +) -> tuple[xr.DataArray, xr.DataArray]: + """Crop image and labels to their pixel-grid overlap, honoring transforms. + + See module docstring of concern 3 fix for full semantics. Mutates + ``drop_report`` to count cells dropped because they fall outside the + overlap rectangle. """ - if intensity_image_cropped is None: - region_props = measure.regionprops(label_image=label(cell_mask_cropped)) - if not region_props: - return {} - return _extract_features_from_regionprops(region_props[0], props, cell_id) + cs = _shared_coordinate_system(sdata, image_key, labels_key) + m = _relative_affine(sdata, image_key, labels_key, cs) + + # Integer-pixel offset of labels relative to image. (tx, ty) means + # labels pixel (0, 0) lands at image pixel (tx, ty) in (x, y) order. + tx: int + ty: int + if _is_close_identity(m): + tx, ty = 0, 0 else: - region_props = measure.regionprops( - label_image=label(cell_mask_cropped), - intensity_image=intensity_image_cropped, + decomposed = _decompose_pixel_translation(m) + if decomposed is not None: + tx, ty = decomposed + else: + if align_mode == "strict": + raise ValueError( + f"Labels not aligned to image pixel grid in coordinate system '{cs}'. " + f"Relative affine (x,y) =\n{m}\n" + f"Pass align_mode='rasterize' to resample labels onto the image grid " + f"(via spatialdata.rasterize), or pre-align with spatialdata.rasterize " + f"in your pipeline." + ) + # align_mode == "rasterize": materialize labels onto image grid. + logg.warning( + "align_mode='rasterize' triggered: resampling labels onto the image grid " + "via spatialdata.rasterize. This materializes labels in memory; lazy " + "behavior is lost for this run." + ) + img_h = int(image_da.sizes["y"]) + img_w = int(image_da.sizes["x"]) + rasterized = rasterize( + sdata.labels[labels_key], + ["x", "y"], + min_coordinate=[0, 0], + max_coordinate=[img_w, img_h], + target_coordinate_system=cs, + target_unit_to_pixels=1.0, + return_regions_as_labels=True, + ) + if isinstance(rasterized, xr.DataArray): + labels_da = rasterized + else: + labels_da = xr.DataArray(np.asarray(rasterized), dims=["y", "x"]) + tx, ty = 0, 0 + + # Determine overlap rectangle in image-pixel coords. + img_h = int(image_da.sizes["y"]) + img_w = int(image_da.sizes["x"]) + lbl_h = int(labels_da.sizes.get("y", labels_da.shape[-2])) + lbl_w = int(labels_da.sizes.get("x", labels_da.shape[-1])) + + # Labels pixel (i_y, i_x) in label coords maps to image pixel (i_y+ty, i_x+tx). + img_y0 = max(0, ty) + img_x0 = max(0, tx) + img_y1 = min(img_h, lbl_h + ty) + img_x1 = min(img_w, lbl_w + tx) + if img_y1 <= img_y0 or img_x1 <= img_x0: + raise ValueError(f"Image '{image_key}' and labels '{labels_key}' do not overlap in coordinate system '{cs}'.") + + lbl_y0 = img_y0 - ty + lbl_x0 = img_x0 - tx + lbl_y1 = img_y1 - ty + lbl_x1 = img_x1 - tx + + image_crop = image_da.isel(y=slice(img_y0, img_y1), x=slice(img_x0, img_x1)) + labels_crop = labels_da.isel(y=slice(lbl_y0, lbl_y1), x=slice(lbl_x0, lbl_x1)) + + # Count cells that fall (partially) outside the labels_crop. + cells_inside, cells_partial, cells_outside = _classify_dropped_cells(labels_da, lbl_y0, lbl_x0, lbl_y1, lbl_x1) + if cells_outside or cells_partial: + drop_report.outside_image_extent += cells_outside + drop_report.partial_at_image_boundary += cells_partial + warnings.warn( + f"Dropping {cells_outside} cells outside the image extent and " + f"{cells_partial} cells partially outside. See end-of-run drop report.", + UserWarning, + stacklevel=2, ) - if not region_props: - return {} - return _extract_features_from_regionprops(region_props[0], props, cell_id, skip_callable=True) - - -def _append_channel_names( - features: dict[str, Any], - channel1: str | None, - channel2: str | None = None, -) -> dict[str, Any]: - """Append channel name(s) to all keys in the feature dictionary.""" - if channel2 is None: - return {f"{k}_{channel1}": v for k, v in features.items()} - else: - return {f"{k}_{channel1}_{channel2}": v for k, v in features.items()} + + return image_crop, labels_crop -def _prepare_images_for_measurement( - name: str, - cell_mask: NDArray, - img1: NDArray, - img2: NDArray | None, - conv_params: dict[str, Any], -) -> tuple[NDArray, NDArray | None, NDArray | None]: +def _classify_dropped_cells( + labels_da: xr.DataArray, + y0: int, + x0: int, + y1: int, + x1: int, +) -> tuple[int, int, int]: + """Return ``(fully_inside, partially_inside, fully_outside)`` cell counts. + + A cell whose entire bounding box is inside the crop window is counted as + fully inside. If only part of its bounding box is inside, it is + partially inside. If the cell does not appear inside the crop window at + all, it is fully outside. """ - Convert inputs to the appropriate dtype based on the measurement type. + lbl_h = int(labels_da.sizes.get("y", labels_da.shape[-2])) + lbl_w = int(labels_da.sizes.get("x", labels_da.shape[-1])) + # Skip the work entirely when nothing is cropped away. + if y0 <= 0 and x0 <= 0 and y1 >= lbl_h and x1 >= lbl_w: + return 0, 0, 0 + + arr = labels_da.values + if arr.ndim > 2: + arr = arr.squeeze() + inside = arr[y0:y1, x0:x1] + ids_total = {int(v) for v in np.unique(arr) if v != 0} + ids_inside_any = {int(v) for v in np.unique(inside) if v != 0} + + fully_outside = len(ids_total - ids_inside_any) + + # Find which "inside_any" cells are only partial: their bbox in `arr` + # extends outside the crop window. + partial = 0 + for lid in ids_inside_any: + ys, xs = np.where(arr == lid) + if ys.min() < y0 or ys.max() >= y1 or xs.min() < x0 or xs.max() >= x1: + partial += 1 + fully_inside = len(ids_inside_any) - partial + return fully_inside, partial, fully_outside + + +def _resolve_da(node: xr.DataTree | xr.DataArray, scale: str | None) -> xr.DataArray: + """Get a DataArray from a DataTree or single-scale element (stays lazy).""" + if not isinstance(node, xr.DataTree): + return node + if scale is None: + raise ValueError("Scale must be provided for DataTree data.") + if scale not in node: + raise ValueError(f"Scale '{scale}' not found. Available: {list(node.keys())}") + return node[scale].ds["image"] + + +def _validate_inputs( + sdata: SpatialData, + image_key: str, + labels_key: str | None, + shapes_key: str | None, + scale: str | None, +) -> None: + """Run all input validation checks (no data loading).""" + if image_key not in sdata.images: + raise ValueError(f"Image key '{image_key}' not found, valid keys: {list(sdata.images.keys())}") + if labels_key is None and shapes_key is None: + raise ValueError("Provide either `labels_key` or `shapes_key`.") + if labels_key is not None and shapes_key is not None: + raise ValueError("Use either `labels_key` or `shapes_key`, not both.") + if labels_key is not None and labels_key not in sdata.labels: + raise ValueError(f"Labels key '{labels_key}' not found, valid keys: {list(sdata.labels.keys())}") + if shapes_key is not None and shapes_key not in sdata.shapes: + raise ValueError(f"Shapes key '{shapes_key}' not found, valid keys: {list(sdata.shapes.keys())}") + if labels_key is not None and isinstance(sdata.labels[labels_key], xr.DataTree) and scale is None: + raise ValueError("When using multi-scale labels, please specify the scale.") + if isinstance(sdata.images[image_key], xr.DataTree) and scale is None: + raise ValueError("When using multi-scale images, please specify the scale.") + + +def _prepare_lazy( + sdata: SpatialData, + image_key: str, + labels_key: str | None, + shapes_key: str | None, + scale: str | None, + channels: list[str] | list[int] | None, + align_mode: Literal["strict", "rasterize"], + drop_report: DropReport, +) -> tuple[xr.DataArray, xr.DataArray, list[str]]: + """Return lazy (dask-backed) image and labels DataArrays, plus channel names. + + Does NOT call ``.compute()`` — arrays stay lazy for on-demand tile reads. + For the shapes→labels path, labels are materialized (rasterize returns + an in-memory array) but wrapped in a DataArray for a uniform interface. """ - if name in conv_params.get("uint8_features", []): - mask = cell_mask.astype(np.uint8) - image1_prepared = img1.astype(np.uint8) - image2_prepared = None if img2 is None else img2.astype(np.uint8) - elif name == "texture": - mask = cell_mask.astype(np.uint8) - image1_prepared = (img1.astype(np.float32) - conv_params["img1_min"]) / conv_params["img1_range"] - image2_prepared = ( - None if img2 is None else (img2.astype(np.float32) - conv_params["img2_min"]) / conv_params["img2_range"] - ) - elif name in conv_params.get("float_features", []): - mask = cell_mask.astype(np.float32) - image1_prepared = img1.astype(np.float32) - image2_prepared = None if img2 is None else img2.astype(np.float32) - else: - mask = cell_mask.astype(np.float32) - image1_prepared = img1.astype(np.float32) - image2_prepared = None if img2 is None else img2.astype(np.float32) - return mask, image1_prepared, image2_prepared - - -def _get_label_bbox(labels: NDArray) -> tuple[int, int, int, int]: - """Return tight bounding box (y_min, y_max, x_min, x_max) for non-zero labels.""" - yx = np.argwhere(labels > 0) - if yx.size == 0: - return 0, labels.shape[0], 0, labels.shape[1] - y_min, x_min = yx.min(axis=0) - y_max, x_max = yx.max(axis=0) + 1 - return int(y_min), int(y_max), int(x_min), int(x_max) - - -def _compute_squidpy_channel_features( - image: NDArray, - labels: NDArray, - cell_ids: NDArray, - channel_names: NDArray | None, - parsed: ParsedMeasurements, -) -> list[pd.DataFrame]: - """Compute squidpy legacy-like features and broadcast to cells.""" - feats: list[pd.DataFrame] = [] - - # Crop to label bbox to speed computations - y_min, y_max, x_min, x_max = _get_label_bbox(labels) - img_crop = image[:, y_min:y_max, x_min:x_max] - - n_channels = img_crop.shape[0] - ch_names = channel_names if channel_names is not None else np.arange(n_channels).astype(str) - - if parsed.has_squidpy_summary: - summary_vals: dict[str, float] = {} - for ch_idx in range(n_channels): - ch = img_crop[ch_idx].astype(np.float32) - summary_vals.update( - { - f"summary_mean_{ch_names[ch_idx]}": float(np.mean(ch)), - f"summary_std_{ch_names[ch_idx]}": float(np.std(ch)), - f"summary_min_{ch_names[ch_idx]}": float(np.min(ch)), - f"summary_max_{ch_names[ch_idx]}": float(np.max(ch)), - } - ) - df = pd.DataFrame([summary_vals] * len(cell_ids), index=cell_ids) - feats.append(df) + _validate_inputs(sdata, image_key, labels_key, shapes_key, scale) - if parsed.has_squidpy_texture: - tex_vals: dict[str, float] = {} - # Quantize to 32 levels to keep GLCM small - quant_levels = 32 - for ch_idx in range(n_channels): - ch = img_crop[ch_idx] - # normalize to [0, quant_levels-1] - ch_norm = ch.astype(np.float32) - if ch_norm.max() > ch_norm.min(): - ch_norm = (ch_norm - ch_norm.min()) / (ch_norm.max() - ch_norm.min()) - ch_q = np.clip((ch_norm * (quant_levels - 1)).round().astype(np.uint8), 0, quant_levels - 1) - glcm = graycomatrix(ch_q, distances=[1], angles=[0], levels=quant_levels, symmetric=True, normed=True) - tex_vals.update( - { - f"texture_contrast_{ch_names[ch_idx]}": float(graycoprops(glcm, "contrast")[0, 0]), - f"texture_dissimilarity_{ch_names[ch_idx]}": float(graycoprops(glcm, "dissimilarity")[0, 0]), - f"texture_homogeneity_{ch_names[ch_idx]}": float(graycoprops(glcm, "homogeneity")[0, 0]), - f"texture_energy_{ch_names[ch_idx]}": float(graycoprops(glcm, "energy")[0, 0]), - f"texture_ASM_{ch_names[ch_idx]}": float(graycoprops(glcm, "ASM")[0, 0]), - f"texture_correlation_{ch_names[ch_idx]}": float(graycoprops(glcm, "correlation")[0, 0]), - } - ) - df = pd.DataFrame([tex_vals] * len(cell_ids), index=cell_ids) - feats.append(df) + # Image DataArray (lazy) + image_da = _resolve_da(sdata.images[image_key], scale) + if "c" not in image_da.dims: + image_da = image_da.expand_dims("c") - if parsed.has_squidpy_color_hist: - hist_vals: dict[str, float] = {} - bins = 16 - for ch_idx in range(n_channels): - ch = img_crop[ch_idx].astype(np.float32) - hist, bin_edges = np.histogram( - ch, bins=bins, range=(ch.min(), ch.max() if ch.max() > ch.min() else ch.min() + 1) + # Labels DataArray (lazy for labels_key, materialized for shapes_key) + if labels_key is not None: + labels_da = _resolve_da(sdata.labels[labels_key], scale) + else: + logg.info("Converting shapes to labels.") + img_shape = {d: image_da.sizes[d] for d in ("y", "x")} + try: + labels_result = rasterize( + sdata.shapes[shapes_key], + ["x", "y"], + min_coordinate=[0, 0], + max_coordinate=[img_shape["x"], img_shape["y"]], + target_coordinate_system="global", + target_unit_to_pixels=1.0, + return_regions_as_labels=True, ) - hist = hist.astype(np.float32) - hist_sum = hist.sum() - if hist_sum > 0: - hist = hist / hist_sum - hist_vals.update({f"color_hist_bin{b}_{ch_names[ch_idx]}": float(v) for b, v in enumerate(hist)}) - df = pd.DataFrame([hist_vals] * len(cell_ids), index=cell_ids) - feats.append(df) - - return feats - - -@njit(fastmath=True) -def _get_cell_crops_numba( - cell_id: int, - labels: npt.NDArray[np.int_], - image1: npt.NDArray[np.float32], - image2: npt.NDArray[np.float32], - pad: int = 1, -) -> tuple[npt.NDArray[np.bool_], npt.NDArray[np.float32], npt.NDArray[np.float32]]: - """Numba-accelerated version of _get_cell_crops. - - Note: image1 and image2 should be passed as empty arrays (np.zeros((0,0))) if not used. - """ - # Find cell boundaries using vectorized operations - cell_mask = labels == cell_id - if not np.any(cell_mask): - return ( - np.zeros((0, 0), dtype=np.bool_), - np.zeros((0, 0), dtype=image1.dtype), - np.zeros((0, 0), dtype=image2.dtype), + except ValueError as e: + raise ValueError( + "Failed to rasterize shapes; geometries may be empty or unsupported. " + "Filter out empty/non-polygon geometries or choose a different shapes_key." + ) from e + if isinstance(labels_result, xr.DataArray): + labels_da = labels_result + else: + labels_da = xr.DataArray(np.asarray(labels_result), dims=["y", "x"]) + + # Align labels to image pixel grid via SpatialData transformations. + # For the shapes_key path, rasterize already targets the image grid, so + # the transforms are identity and this is a cheap no-op. + if labels_key is not None: + image_da, labels_da = _align_to_image_grid( + sdata, image_key, labels_key, image_da, labels_da, align_mode, drop_report ) - # Get non-zero indices efficiently - y_indices, x_indices = np.nonzero(cell_mask) - y_min, y_max = y_indices.min(), y_indices.max() - x_min, x_max = x_indices.min(), x_indices.max() - - # Get image dimensions - height, width = labels.shape - - # Calculate padding with boundary checks in one step - y_pad_min = min(pad, y_min) - y_pad_max = min(pad, height - y_max - 1) - x_pad_min = min(pad, x_min) - x_pad_max = min(pad, width - x_max - 1) - - # Calculate crop dimensions with padding - y_start = y_min - y_pad_min - y_end = y_max + y_pad_max + 1 - x_start = x_min - x_pad_min - x_end = x_max + x_pad_max + 1 - - # Create output arrays with exact size - y_size = y_end - y_start - x_size = x_end - x_start - - # Create cell mask crop - cell_mask_cropped = np.zeros((y_size, x_size), dtype=np.bool_) - for i in range(y_size): - for j in range(x_size): - cell_mask_cropped[i, j] = cell_mask[y_start + i, x_start + j] - - # Handle image crops efficiently - if image1.size > 0: - image1_cropped = np.zeros((y_size, x_size), dtype=image1.dtype) - for i in range(y_size): - for j in range(x_size): - image1_cropped[i, j] = image1[y_start + i, x_start + j] + # Resolve channel names through spatialdata's canonical accessor so we + # honor c_coords set at parse time. Always cast to str. + all_ch = [str(v) for v in get_channel_names(sdata.images[image_key])] + if len(all_ch) != image_da.sizes["c"]: + # Multiscale element where get_channel_names may report from a + # different scale than image_da. Fall back to positional naming. + all_ch = [str(i) for i in range(image_da.sizes["c"])] + + ch_names: list[str] + if channels is not None: + selected_idx: list[int] = [] + ch_names = [] + for ch in channels: + if isinstance(ch, int): + if ch < 0 or ch >= len(all_ch): + raise ValueError(f"Channel index {ch} out of range [0, {len(all_ch)}).") + selected_idx.append(ch) + ch_names.append(all_ch[ch]) + else: + ch_str = str(ch) + if ch_str not in all_ch: + raise ValueError(f"Channel '{ch}' not found. Available: {all_ch}") + selected_idx.append(all_ch.index(ch_str)) + ch_names.append(ch_str) + image_da = image_da.isel(c=selected_idx) else: - image1_cropped = np.zeros((0, 0), dtype=image1.dtype) + ch_names = all_ch - if image2.size > 0: - image2_cropped = np.zeros((y_size, x_size), dtype=image2.dtype) - for i in range(y_size): - for j in range(x_size): - image2_cropped[i, j] = image2[y_start + i, x_start + j] - else: - image2_cropped = np.zeros((0, 0), dtype=image2.dtype) - - return cell_mask_cropped, image1_cropped, image2_cropped - - -def _get_cell_crops( - cell_id: int, - labels: NDArray, - image1: NDArray | None = None, - image2: NDArray | None = None, - pad: int = 1, - verbose: bool = False, -) -> tuple[NDArray, NDArray | None, NDArray | None] | None: - """Generator function to get cropped arrays for a cell.""" - # Create empty arrays for unused images - empty_image = np.zeros((0, 0), dtype=np.float32) - image1_np = image1 if image1 is not None else empty_image - image2_np = image2 if image2 is not None else empty_image - - # Use Numba-accelerated version - cell_mask_cropped, image1_cropped, image2_cropped = _get_cell_crops_numba( - cell_id, labels, image1_np, image2_np, pad - ) + return image_da, labels_da, ch_names - if cell_mask_cropped.size == 0: - return None - # Convert back to None for unused images - image1_cropped = image1_cropped if image1 is not None else None - image2_cropped = image2_cropped if image2 is not None else None +def _compute_centroids( + sdata: SpatialData, + labels_key: str | None, + labels_da: xr.DataArray, + scale: str | None, +) -> dict: + """Compute cell centroids using the most efficient strategy available.""" + # Multiscale labels → use coarsest scale + if labels_key is not None and isinstance(sdata.labels[labels_key], xr.DataTree): + logg.info("Computing centroids from coarse scale.") + return compute_cell_info_multiscale(sdata.labels[labels_key], target_scale=scale or "scale0") + + # Small enough to fit in memory → direct regionprops + n_pixels = labels_da.sizes.get("y", 1) * labels_da.sizes.get("x", 1) + if n_pixels <= 4096 * 4096: + lbl_np = labels_da.values + if lbl_np.ndim > 2: + lbl_np = lbl_np.squeeze() + return compute_cell_info(lbl_np) + + # Large single-scale → tiled centroid computation + logg.info("Computing centroids in tiled mode (large single-scale labels).") + return compute_cell_info_tiled(labels_da) + + +# --------------------------------------------------------------------------- +# Main function +# --------------------------------------------------------------------------- - return cell_mask_cropped, image1_cropped, image2_cropped +def calculate_image_features( + sdata: SpatialData, + image_key: str, + labels_key: str | None = None, + shapes_key: str | None = None, + scale: str | None = None, + channels: list[str] | list[int] | None = None, + features: list[str] | str | None = None, + tile_size: int = 2048, + overlap_margin: int | Literal["auto"] = "auto", + align_mode: Literal["strict", "rasterize"] = "strict", + adata_key_added: str = "morphology", + invalid_as_zero: bool = True, + n_jobs: int = 1, + inplace: bool = True, +) -> ad.AnnData | None: + """ + Calculate per-cell features from segmentation masks. -def _get_regionprops_features( - cell_ids: Sequence[int], - labels: NDArray, - intensity_image: NDArray | None = None, - queue: Any | None = None, - props: set[str] | None = None, -) -> pd.DataFrame: - """Calculate regionprops features for each cell from the full label image.""" - # Initialize features dictionary with None values to preserve order - features = dict.fromkeys(cell_ids, None) - - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - # Process cells in order to preserve order - for cell_id in cell_ids: - crop = _get_cell_crops(cell_id, labels, image1=intensity_image) - if crop is None: - continue - cell_mask_cropped, intensity_image_cropped, _ = crop - # Default to full property sets for backward compatibility - if props is None: - props = _INTENSITY_PROPS if intensity_image is not None else _MASK_PROPS - cell_features = _calculate_regionprops_from_crop( - cell_mask_cropped, - intensity_image_cropped, - cell_id, - props, - ) - features[cell_id] = cell_features - if queue is not None: - queue.put(Signal.UPDATE) - if queue is not None: - queue.put(Signal.FINISH) - - # Convert to DataFrame while preserving order - df = pd.DataFrame.from_dict(features, orient="index") - # Ensure the index matches the input cell_ids order - df = df.reindex(cell_ids) - return df - - -def _measurement_wrapper( - func: Callable[..., dict[str, Any]], - mask: NDArray, - image1: NDArray | None, - image2: NDArray | None = None, -) -> dict[str, Any]: - """Wrapper function to handle both core and correlation measurements. + Uses `cp_measure `_ for + CellProfiler-derived features, scikit-image ``regionprops`` for + morphological/intensity features, and squidpy-specific per-cell + metrics (summary statistics, GLCM texture, colour histograms). + + Large images are automatically tiled into ``tile_size × tile_size`` + chunks with overlap so that every cell is fully contained in exactly + one tile. Parameters ---------- - func - The measurement function to call - mask - The cell mask - image1 - First image (or only image for core measurements) - image2 - Second image for correlation measurements. If None, this is a core - measurement. + sdata + SpatialData object. + image_key + Key in ``sdata.images``. + labels_key + Key in ``sdata.labels`` with segmentation masks. + shapes_key + Key in ``sdata.shapes`` (rasterized to labels internally). + scale + Scale level for multi-scale data. + channels + Subset of channels to use. ``None`` uses all channels. + features + Which features to compute. Accepts a list of strings: + + - ``"cpmeasure:intensity"``, ``"cpmeasure:sizeshape"``, + ``"cpmeasure:texture"``, ``"cpmeasure:granularity"``, + ``"cpmeasure:zernike"``, ``"cpmeasure:feret"``, + ``"cpmeasure:radial"``, ``"cpmeasure:correlation"`` + - ``"skimage:label"`` (all mask props), ``"skimage:label:area"`` + (single prop), ``"skimage:label+image"`` (all intensity props), + ``"skimage:label+image:intensity_mean"`` (single prop) + - ``"squidpy:summary"``, ``"squidpy:texture"``, + ``"squidpy:color_hist"`` + + ``None`` enables all cp_measure features. + tile_size + Side length of the tiling grid (pixels). + overlap_margin + Overlap around each tile to capture boundary cells. + ``"auto"`` computes the minimum from the largest cell's bounding box. + align_mode + How to handle image/labels coordinate-system alignment when their + pixel grids do not match. + + * ``"strict"`` (default): require the relative transform between + image and labels to be identity or an integer-pixel translation. + Raise otherwise with a hint pointing to :func:`spatialdata.rasterize`. + * ``"rasterize"``: silently resample labels onto the image pixel + grid using :func:`spatialdata.rasterize` when the transforms are + not pixel-aligned. Logs a warning because this materializes the + full label grid in memory. + adata_key_added + Key under which to store the result in ``sdata.tables``. + invalid_as_zero + Replace ``inf`` and ``NaN`` values with zero. + n_jobs + Number of parallel jobs for tile processing. + inplace + If ``True``, store result in ``sdata.tables``. Otherwise return it. Returns ------- - Dictionary of feature values + :class:`~anndata.AnnData` when ``inplace=False``, otherwise ``None``. """ - if image1 is None: - return {} # Return empty dict if no image data + # --- Parse & validate --- + parsed = _parse_features(features) + if not _has_any_features(parsed): + raise ValueError("No valid features requested.") - try: - if image2 is None: - return func(mask, image1) + drop_report = DropReport() + + image_da, labels_da, channel_names = _prepare_lazy( + sdata, image_key, labels_key, shapes_key, scale, channels, align_mode, drop_report + ) + + # --- Warmup: compute centroids without materializing full arrays --- + cell_info = _compute_centroids(sdata, labels_key, labels_da, scale) + if not cell_info: + logg.info(drop_report.summary()) + raise ValueError("No cells found in labels (all zeros).") + + H = int(labels_da.sizes.get("y", labels_da.shape[-2])) + W = int(labels_da.sizes.get("x", labels_da.shape[-1])) + + # --- Tile --- + specs = build_tile_specs((H, W), cell_info, tile_size=tile_size, overlap_margin=overlap_margin) + total_tiles = len(specs) + logg.info(f"Processing {total_tiles} tiles ({tile_size}x{tile_size}, margin={overlap_margin}).") + + # --- Process tiles (each worker materializes only its own ~2k x 2k crop) --- + def _process_one(spec): + tile_img, tile_lbl = extract_tile_lazy(image_da, labels_da, spec) + return _featurize_tile(tile_img, tile_lbl, parsed, channel_names) + + log_every = max(1, total_tiles // 10) + start_t = time.monotonic() + tile_dfs: list[pd.DataFrame] = [] + results_iter = Parallel(n_jobs=n_jobs, prefer="threads", return_as="generator_unordered")( + delayed(_process_one)(spec) for spec in specs + ) + for done, df in enumerate( + tqdm(results_iter, total=total_tiles, desc="Featurizing tiles", unit="tile"), + start=1, + ): + if df.empty: + drop_report.empty_tile_drop += 1 else: - # Check if we have valid data for correlation - if not np.any(mask) or not np.any(image1) or not np.any(image2): - # Get feature names from a successful call to maintain structure - dummy_mask = np.ones((2, 2), dtype=bool) - dummy_img = np.ones((2, 2), dtype=image1.dtype) - feature_names = func(dummy_img, dummy_img, dummy_mask).keys() - # Return dictionary with NaN values for all features - return dict.fromkeys(feature_names, np.nan) - return func(image1, image2, mask) - except (IndexError, ValueError) as e: - # Handle cases where correlation calculation fails - if "index 0 is out of bounds" in str(e) or "size 0" in str(e): - # Get feature names from a successful call to maintain structure - dummy_mask = np.ones((2, 2), dtype=bool) - dummy_img = np.ones((2, 2), dtype=image1.dtype) - feature_names = func(dummy_img, dummy_img, dummy_mask).keys() - # Return dictionary with NaN values for all features - return dict.fromkeys(feature_names, np.nan) - raise # Re-raise other errors - - -def _calculate_features_helper( - cell_ids: Sequence[int], - labels: NDArray, - image1: NDArray, - image2: NDArray | None, - measurements: dict[str, Any], - channel1_name: str | None = None, - channel2_name: str | None = None, - queue: Any | None = None, - verbose: bool = False, -) -> pd.DataFrame: - """Helper function to calculate features for a subset of cells.""" - # Initialize features dictionary with None values to preserve order - features_dict = dict.fromkeys(cell_ids, None) - - # Pre-allocate lists for type conversion - uint8_features = [ - "radial_distribution", - "radial_zernikes", - "intensity", - "sizeshape", - "zernike", - "ferret", - ] - float_features = ["manders_fold", "rwc"] - - # Pre-compute normalization if needed - conv_params: dict[str, Any] = { - "uint8_features": uint8_features, - "float_features": float_features, + tile_dfs.append(df) + if done == 1 or done == total_tiles or done % log_every == 0: + elapsed = time.monotonic() - start_t + logg.info(f"Tile {done}/{total_tiles} done (elapsed {elapsed:.1f}s).") + + if not tile_dfs: + logg.info(drop_report.summary()) + raise ValueError("No features computed for any tile.") + + combined = pd.concat(tile_dfs, axis=0) + + # --- Post-process --- + if invalid_as_zero: + combined = combined.replace([np.inf, -np.inf], 0).fillna(0) + + # Sort by cell label for deterministic output + combined = combined.sort_index() + + # --- Build AnnData --- + adata = ad.AnnData(X=combined.values.astype(np.float32)) + adata.obs_names = [f"cell_{i}" for i in combined.index] + adata.var_names = list(combined.columns) + + region_key_value = labels_key if labels_key is not None else shapes_key + adata.uns["spatialdata_attrs"] = { + "region": region_key_value, + "region_key": "region", + "instance_key": "label_id", } - if "texture" in measurements: - img1_min = image1.min() - img1_max = image1.max() - conv_params["img1_min"] = img1_min - conv_params["img1_range"] = img1_max - img1_min + 1e-10 - if image2 is not None: - img2_min = image2.min() - img2_max = image2.max() - conv_params["img2_min"] = img2_min - conv_params["img2_range"] = img2_max - img2_min + 1e-10 - - # Process cells in order to preserve order - for cell_id in cell_ids: - crop = _get_cell_crops(cell_id, labels, image1, image2, verbose=verbose) - if crop is None: - continue - cell_mask_cropped, image1_cropped, image2_cropped = crop - cell_features = {} + adata.obs["region"] = pd.Categorical([region_key_value] * len(adata)) - # Calculate regionprops features using cached crop - try: - region_features = _calculate_regionprops_from_crop( - cell_mask_cropped, - image1_cropped if image2 is None else None, - cell_id, - ) - if image2 is None: - region_features = _append_channel_names(region_features, channel1_name) - else: - region_features = _append_channel_names(region_features, channel1_name, channel2_name) - cell_features.update(region_features) - except (ValueError, TypeError, AttributeError) as e: - if verbose: - logg.warning(f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}") - - # Calculate cp-measure features for each measurement - for name, func in measurements.items(): - try: - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - if image1_cropped is None: - continue - mask_conv, img1_conv, img2_conv = _prepare_images_for_measurement( - name, - cell_mask_cropped, - image1_cropped, - image2_cropped, - conv_params, - ) - feature_dict = _measurement_wrapper(func, mask_conv, img1_conv, img2_conv) - # Ensure each feature returns a single value - for k, v in feature_dict.items(): - if len(v) > 1: - raise ValueError(f"Feature {k} has more than one value.") - else: - feature_dict[k] = float(v[0]) - if image2 is None: - feature_dict = _append_channel_names(feature_dict, channel1_name) - else: - feature_dict = _append_channel_names(feature_dict, channel1_name, channel2_name) - cell_features.update(feature_dict) - except (ValueError, TypeError, AttributeError) as e: - if verbose: - logg.warning(f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}") - - features_dict[cell_id] = cell_features - - if queue is not None: - queue.put(Signal.UPDATE) - - if queue is not None: - queue.put(Signal.FINISH) - - # Convert to DataFrame while preserving order - df = pd.DataFrame.from_dict(features_dict, orient="index") - # Ensure the index matches the input cell_ids order - df = df.reindex(cell_ids) - return df - - -def _get_array_from_DataTree_or_DataArray( - data: xr.DataTree | xr.DataArray, - scale: str | None = None, -) -> NDArray: - """ - Returns a NumPy array for the given data and scale. - If data is an xr.DataTree, it checks for the scale key and computes the image. - If data is an xr.DataArray, it computes the array (ignoring scale). + if shapes_key is not None and len(sdata.shapes[shapes_key]) == len(adata): + adata.obs["label_id"] = sdata.shapes[shapes_key].index.values + else: + adata.obs["label_id"] = combined.index.values - Parameters - ---------- - data - The xarray data to convert to a NumPy array - scale - Optional scale key for DataTree data + logg.info(drop_report.summary()) - Returns - ------- - np.ndarray - The computed NumPy array - """ - if not isinstance(data, xr.DataTree): - return np.asarray(data.compute()) - if scale is None: - raise ValueError("Scale must be provided for DataTree data") - if scale not in data: - raise ValueError(f"Scale '{scale}' not found. Available scales: {list(data.keys())}") - return np.asarray(data[scale].image.compute()) + if inplace: + sdata.tables[adata_key_added] = TableModel.parse(adata) + return None + return adata diff --git a/src/squidpy/experimental/im/_tiling.py b/src/squidpy/experimental/im/_tiling.py index 6154faa44..b3d906576 100644 --- a/src/squidpy/experimental/im/_tiling.py +++ b/src/squidpy/experimental/im/_tiling.py @@ -5,6 +5,15 @@ the tile whose non-overlapping base region contains the centroid owns the cell. Non-owned cells are zeroed out in each tile's mask so that downstream processing never double-counts. + +Two parallel APIs are exposed: + +* In-memory: ``compute_cell_info(labels) -> dict`` + ``extract_tile``. +* Lazy / xarray-backed: ``compute_cell_info_multiscale``, + ``compute_cell_info_tiled``, ``extract_tile_lazy``. + +``build_tile_specs`` takes only ``(shape, cell_info)``, so it is agnostic +to whether the labels are in memory, dask-backed, or multiscale. """ from __future__ import annotations @@ -13,8 +22,21 @@ from typing import Literal import numpy as np +import xarray as xr from skimage.measure import regionprops +__all__ = [ + "CellInfo", + "TileSpec", + "build_tile_specs", + "compute_cell_info", + "compute_cell_info_multiscale", + "compute_cell_info_tiled", + "extract_tile", + "extract_tile_lazy", + "verify_coverage", +] + @dataclass(frozen=True) class CellInfo: @@ -23,8 +45,8 @@ class CellInfo: label: int centroid_y: float centroid_x: float - bbox_h: int # height of bounding box - bbox_w: int # width of bounding box + bbox_h: int # height of bounding box (pixels) + bbox_w: int # width of bounding box (pixels) @dataclass(frozen=True) @@ -34,10 +56,10 @@ class TileSpec: Attributes ---------- base - The non-overlapping region ``(y0, x0, y1, x1)`` used for centroid + Non-overlapping region ``(y0, x0, y1, x1)`` used for centroid ownership. Tiles partition the image into a grid of base regions. crop - The extended region ``(y0, x0, y1, x1)`` that includes the overlap + Extended region ``(y0, x0, y1, x1)`` that includes the overlap margin. This is the actual slice extracted from the image/labels. owned_ids Label IDs whose centroid falls inside ``base``. Only these labels @@ -49,7 +71,12 @@ class TileSpec: owned_ids: frozenset[int] = field(default_factory=frozenset) -def _compute_cell_info(labels: np.ndarray) -> dict[int, CellInfo]: +# --------------------------------------------------------------------------- +# Cell info — in-memory +# --------------------------------------------------------------------------- + + +def compute_cell_info(labels: np.ndarray) -> dict[int, CellInfo]: """Compute centroid and bounding-box size for every label. Parameters @@ -61,64 +88,222 @@ def _compute_cell_info(labels: np.ndarray) -> dict[int, CellInfo]: ------- Mapping from label ID to :class:`CellInfo`. """ + if labels.ndim != 2: + raise ValueError(f"Expected 2-D labels, got shape {labels.shape}") props = regionprops(labels) info: dict[int, CellInfo] = {} for p in props: min_row, min_col, max_row, max_col = p.bbox info[p.label] = CellInfo( label=p.label, - centroid_y=p.centroid[0], - centroid_x=p.centroid[1], + centroid_y=float(p.centroid[0]), + centroid_x=float(p.centroid[1]), bbox_h=max_row - min_row, bbox_w=max_col - min_col, ) return info +# --------------------------------------------------------------------------- +# Cell info — multiscale (read coarse pyramid level, scale back to target) +# --------------------------------------------------------------------------- + + +def _pick_coarsest_scale(label_tree: xr.DataTree) -> str: + """Return the coarsest scale key in a multiscale DataTree.""" + scales = sorted(label_tree.keys(), key=lambda s: int(s.replace("scale", ""))) + return scales[-1] + + +def _scale_dims(node: xr.DataTree | xr.DataArray) -> tuple[int, int]: + """Return (H, W) of a single scale level.""" + if isinstance(node, xr.DataTree): + # spatialdata stores the array under .ds["image"] + da = node.ds["image"] + else: + da = node + return int(da.sizes["y"]), int(da.sizes["x"]) + + +def compute_cell_info_multiscale( + label_tree: xr.DataTree, + target_scale: str, +) -> dict[int, CellInfo]: + """Compute cell info from the coarsest scale, rescaled to target scale. + + Reading the coarsest scale avoids materializing the full-res labels + just to find centroids. + + Parameters + ---------- + label_tree + Multi-scale labels (e.g. ``sdata.labels[key]``). + target_scale + Scale level whose pixel grid the returned centroids/bbox refer to. + + Returns + ------- + Cell info dict, in ``target_scale`` pixel coordinates. + """ + if target_scale not in label_tree: + raise ValueError(f"target_scale '{target_scale}' not found in DataTree. Available: {list(label_tree.keys())}") + + coarsest = _pick_coarsest_scale(label_tree) + if coarsest == target_scale: + labels_arr = label_tree[coarsest].ds["image"].values + if labels_arr.ndim > 2: + labels_arr = labels_arr.squeeze() + return compute_cell_info(labels_arr) + + coarse_h, coarse_w = _scale_dims(label_tree[coarsest]) + target_h, target_w = _scale_dims(label_tree[target_scale]) + + sy = target_h / coarse_h + sx = target_w / coarse_w + + labels_arr = label_tree[coarsest].ds["image"].values + if labels_arr.ndim > 2: + labels_arr = labels_arr.squeeze() + coarse_info = compute_cell_info(labels_arr) + + rescaled: dict[int, CellInfo] = {} + for lid, ci in coarse_info.items(): + rescaled[lid] = CellInfo( + label=ci.label, + centroid_y=ci.centroid_y * sy, + centroid_x=ci.centroid_x * sx, + bbox_h=int(np.ceil(ci.bbox_h * sy)), + bbox_w=int(np.ceil(ci.bbox_w * sx)), + ) + return rescaled + + +# --------------------------------------------------------------------------- +# Cell info — tiled (single-scale large labels, no full materialization) +# --------------------------------------------------------------------------- + + +def compute_cell_info_tiled( + labels_da: xr.DataArray, + chunk: int = 4096, +) -> dict[int, CellInfo]: + """Compute cell info by tile-streaming the labels array. + + Accumulates pixel sums + bbox per label across non-overlapping tiles. + Cells that span tile boundaries are merged correctly because the per- + label statistics are additive. + + Parameters + ---------- + labels_da + Lazy/eager 2-D xarray DataArray of integer labels. + chunk + Tile side length for streaming reads. + + Returns + ------- + Cell info dict in ``labels_da``'s native pixel grid. + """ + if labels_da.ndim > 2: + labels_da = labels_da.squeeze() + if labels_da.ndim != 2: + raise ValueError(f"Expected 2-D labels, got shape {labels_da.shape}") + + H, W = int(labels_da.sizes["y"]), int(labels_da.sizes["x"]) + + # Per-label accumulators + area: dict[int, int] = {} + sum_y: dict[int, float] = {} + sum_x: dict[int, float] = {} + min_y: dict[int, int] = {} + min_x: dict[int, int] = {} + max_y: dict[int, int] = {} + max_x: dict[int, int] = {} + + for y0 in range(0, H, chunk): + y1 = min(y0 + chunk, H) + for x0 in range(0, W, chunk): + x1 = min(x0 + chunk, W) + tile = labels_da.isel(y=slice(y0, y1), x=slice(x0, x1)).values + if tile.ndim > 2: + tile = tile.squeeze() + ids = np.unique(tile) + ids = ids[ids != 0] + if ids.size == 0: + continue + for lid in ids: + lid_int = int(lid) + ys, xs = np.where(tile == lid) + # Convert tile-local coords to global + ys_g = ys + y0 + xs_g = xs + x0 + area[lid_int] = area.get(lid_int, 0) + ys.size + sum_y[lid_int] = sum_y.get(lid_int, 0.0) + float(ys_g.sum()) + sum_x[lid_int] = sum_x.get(lid_int, 0.0) + float(xs_g.sum()) + ymin, ymax = int(ys_g.min()), int(ys_g.max()) + xmin, xmax = int(xs_g.min()), int(xs_g.max()) + min_y[lid_int] = min(min_y.get(lid_int, ymin), ymin) + min_x[lid_int] = min(min_x.get(lid_int, xmin), xmin) + max_y[lid_int] = max(max_y.get(lid_int, ymax), ymax) + max_x[lid_int] = max(max_x.get(lid_int, xmax), xmax) + + info: dict[int, CellInfo] = {} + for lid_int, a in area.items(): + info[lid_int] = CellInfo( + label=lid_int, + centroid_y=sum_y[lid_int] / a, + centroid_x=sum_x[lid_int] / a, + bbox_h=max_y[lid_int] - min_y[lid_int] + 1, + bbox_w=max_x[lid_int] - min_x[lid_int] + 1, + ) + return info + + +# --------------------------------------------------------------------------- +# Tile specification +# --------------------------------------------------------------------------- + + def _auto_margin(cell_info: dict[int, CellInfo]) -> int: """Compute the minimum margin that covers the largest cell's half-extent.""" if not cell_info: return 0 - max_half = max(max(c.bbox_h, c.bbox_w) for c in cell_info.values()) - # Full bbox extent: a cell's centroid can be at most half a bbox away - # from its edge, so margin = ceil(max_extent / 2) guarantees coverage. - # Add 1 pixel for safety (rounding / off-by-one). - return int(np.ceil(max_half / 2)) + 1 + max_extent = max(max(c.bbox_h, c.bbox_w) for c in cell_info.values()) + return int(np.ceil(max_extent / 2)) + 1 def build_tile_specs( - labels: np.ndarray, + shape: tuple[int, int], + cell_info: dict[int, CellInfo], tile_size: int = 2048, overlap_margin: int | Literal["auto"] = "auto", ) -> list[TileSpec]: - """Build tile specifications for a label image. + """Build tile specifications from precomputed cell info. - Each tile gets a non-overlapping *base* region (for centroid ownership) - and an extended *crop* region (base + margin on each side). Every - nonzero label is assigned to exactly one tile. + The new ``(shape, cell_info)`` signature makes this agnostic to label + materialization — caller supplies dims and centroids, this function + just partitions. Parameters ---------- - labels - 2-D integer label image (0 = background). + shape + ``(H, W)`` of the labels array. + cell_info + Output of :func:`compute_cell_info` (or one of its variants). tile_size Side length of the non-overlapping base grid cells. overlap_margin - Pixel margin added around each base region. ``"auto"`` computes the - minimum margin from the largest cell's bounding box. + Pixel margin added around each base region. ``"auto"`` computes + the minimum margin from the largest cell's bounding box. Returns ------- List of :class:`TileSpec`, one per grid cell that owns at least one - label. Empty tiles (no cells) are omitted. + label. Empty tiles are omitted. """ - if labels.ndim != 2: - raise ValueError(f"Expected 2-D labels, got shape {labels.shape}") if tile_size <= 0: raise ValueError(f"tile_size must be positive, got {tile_size}") - - H, W = labels.shape - cell_info = _compute_cell_info(labels) + H, W = shape if isinstance(overlap_margin, str) and overlap_margin == "auto": margin = _auto_margin(cell_info) @@ -130,18 +315,16 @@ def build_tile_specs( # Assign each cell to a base-grid cell by its centroid cell_to_tile: dict[int, tuple[int, int]] = {} for lid, ci in cell_info.items(): - tile_row = min(int(ci.centroid_y) // tile_size, (H - 1) // tile_size) - tile_col = min(int(ci.centroid_x) // tile_size, (W - 1) // tile_size) + tile_row = min(int(ci.centroid_y) // tile_size, max((H - 1) // tile_size, 0)) + tile_col = min(int(ci.centroid_x) // tile_size, max((W - 1) // tile_size, 0)) cell_to_tile[lid] = (tile_row, tile_col) - # Group cells by tile tile_to_cells: dict[tuple[int, int], set[int]] = {} for lid, key in cell_to_tile.items(): tile_to_cells.setdefault(key, set()).add(lid) - # Build specs (skip empty tiles) - n_rows = (H + tile_size - 1) // tile_size - n_cols = (W + tile_size - 1) // tile_size + n_rows = max((H + tile_size - 1) // tile_size, 1) + n_cols = max((W + tile_size - 1) // tile_size, 1) specs: list[TileSpec] = [] for row in range(n_rows): @@ -149,19 +332,14 @@ def build_tile_specs( owned = tile_to_cells.get((row, col), set()) if not owned: continue - - # Base region (non-overlapping) by0 = row * tile_size bx0 = col * tile_size by1 = min(by0 + tile_size, H) bx1 = min(bx0 + tile_size, W) - - # Crop region (with margin, clamped) cy0 = max(by0 - margin, 0) cx0 = max(bx0 - margin, 0) cy1 = min(by1 + margin, H) cx1 = min(bx1 + margin, W) - specs.append( TileSpec( base=(by0, bx0, by1, bx1), @@ -169,16 +347,30 @@ def build_tile_specs( owned_ids=frozenset(owned), ) ) - return specs +# --------------------------------------------------------------------------- +# Tile extraction +# --------------------------------------------------------------------------- + + +def _zero_non_owned(tile_labels: np.ndarray, owned: frozenset[int]) -> np.ndarray: + """Return a copy of ``tile_labels`` with non-owned labels set to 0.""" + out = tile_labels.copy() + unique_in_crop = np.unique(out) + for lid in unique_in_crop: + if lid != 0 and int(lid) not in owned: + out[out == lid] = 0 + return out + + def extract_tile( image: np.ndarray, labels: np.ndarray, spec: TileSpec, ) -> tuple[np.ndarray, np.ndarray]: - """Extract a tile's image and mask, zeroing out non-owned cells. + """Extract a tile's image and mask from in-memory arrays. Parameters ---------- @@ -187,7 +379,7 @@ def extract_tile( labels 2-D integer label image of shape ``(H, W)``. spec - Tile specification from :func:`build_tile_specs`. + Tile specification. Returns ------- @@ -198,39 +390,70 @@ def extract_tile( """ cy0, cx0, cy1, cx1 = spec.crop tile_image = image[:, cy0:cy1, cx0:cx1] - tile_labels = labels[cy0:cy1, cx0:cx1].copy() + tile_labels = _zero_non_owned(labels[cy0:cy1, cx0:cx1], spec.owned_ids) + return tile_image, tile_labels - # Zero out labels not owned by this tile - unique_in_crop = np.unique(tile_labels) - for lid in unique_in_crop: - if lid != 0 and lid not in spec.owned_ids: - tile_labels[tile_labels == lid] = 0 +def extract_tile_lazy( + image_da: xr.DataArray, + labels_da: xr.DataArray, + spec: TileSpec, +) -> tuple[np.ndarray, np.ndarray]: + """Extract a tile by lazy slicing then materializing only the crop. + + Parameters + ---------- + image_da + Lazy DataArray of shape ``(C, H, W)``. + labels_da + Lazy 2-D DataArray of labels. + spec + Tile specification. + + Returns + ------- + tile_image + Numpy ``(C, crop_h, crop_w)``. + tile_labels + Numpy ``(crop_h, crop_w)`` with non-owned cells zeroed out. + """ + cy0, cx0, cy1, cx1 = spec.crop + tile_image = image_da.isel(y=slice(cy0, cy1), x=slice(cx0, cx1)).values + tile_labels_raw = labels_da.isel(y=slice(cy0, cy1), x=slice(cx0, cx1)).values + if tile_labels_raw.ndim > 2: + tile_labels_raw = tile_labels_raw.squeeze() + tile_labels = _zero_non_owned(tile_labels_raw, spec.owned_ids) return tile_image, tile_labels -def verify_coverage( - labels: np.ndarray, - specs: list[TileSpec], -) -> None: +# --------------------------------------------------------------------------- +# Coverage verification +# --------------------------------------------------------------------------- + + +def verify_coverage(label_ids: set[int], specs: list[TileSpec]) -> None: """Assert that tile specs provide full, non-overlapping cell coverage. + Parameters + ---------- + label_ids + Set of all expected nonzero label IDs. + specs + Tile specifications. + Raises ------ AssertionError - If any cell is missing or assigned to more than one tile. + If any cell is missing, duplicated, or unknown. """ - all_label_ids = set(np.unique(labels)) - all_label_ids.discard(0) - owned_union: set[int] = set() for spec in specs: overlap = owned_union & spec.owned_ids assert not overlap, f"Cells {overlap} assigned to multiple tiles" owned_union |= spec.owned_ids - missing = all_label_ids - owned_union + missing = label_ids - owned_union assert not missing, f"Cells {missing} not assigned to any tile" - extra = owned_union - all_label_ids + extra = owned_union - label_ids assert not extra, f"Tile specs reference non-existent labels {extra}" diff --git a/tests/experimental/test_calculate_image_features.py b/tests/experimental/test_calculate_image_features.py index 14df7a401..7da9afb73 100644 --- a/tests/experimental/test_calculate_image_features.py +++ b/tests/experimental/test_calculate_image_features.py @@ -1,5 +1,12 @@ +"""Tests for calculate_image_features. + +Uses a small synthetic SpatialData (200×200 image, ~20 cells) so tests +run in seconds without downloading real data. +""" + from __future__ import annotations +import anndata as ad import numpy as np import pandas as pd import pytest @@ -11,289 +18,625 @@ @pytest.fixture() -def sdata_hne_small(sdata_hne): - """Small subset of sdata_hne for faster tests (aim for 10–100 spots).""" - - if "spots" not in sdata_hne.shapes: - return sdata_hne +def sdata_synthetic(): + """Synthetic SpatialData with a small 3-channel image and ~20 rectangular cells.""" + rng = np.random.default_rng(42) + H, W, C = 200, 200, 3 + + image_data = rng.integers(0, 255, (C, H, W), dtype=np.uint8) + image_xr = xr.DataArray( + image_data, + dims=["c", "y", "x"], + coords={"c": ["R", "G", "B"]}, + ) - spots = sdata_hne.shapes["spots"] - try: - spots = spots.loc[~spots.geometry.is_empty] # type: ignore[attr-defined] - except AttributeError: - pass + # Place ~20 rectangular cells in a grid (non-overlapping, 30×30 each) + labels_data = np.zeros((H, W), dtype=np.int32) + cell_id = 0 + for y in range(10, H - 30, 40): + for x in range(10, W - 30, 40): + cell_id += 1 + labels_data[y : y + 30, x : x + 30] = cell_id - # Take the first ~100 spots to keep rasterization fast and non-empty - spots_subset = spots.iloc[:100] if len(spots) > 100 else spots - if len(spots_subset) == 0: - return sdata_hne + labels_xr = xr.DataArray(labels_data, dims=["y", "x"]) return SpatialData( - images=sdata_hne.images, - labels=sdata_hne.labels, - shapes={"spots": spots_subset}, - tables=sdata_hne.tables, + images={"test_img": Image2DModel.parse(image_xr)}, + labels={"test_labels": Labels2DModel.parse(labels_xr)}, ) class TestCalculateImageFeatures: """Tests for calculate_image_features function.""" - def test_calculate_features_with_shapes(self, sdata_hne_small): - """Test basic feature calculation with shapes.""" - # Use minimal measurements to keep test fast + # --- Basic functionality --- + + def test_skimage_label_inplace(self, sdata_synthetic): + """Inplace stores AnnData in sdata.tables.""" sq.experimental.im.calculate_image_features( - sdata_hne_small, - image_key="hne", - shapes_key="spots", - scale="scale0", - measurements=["skimage:label"], + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["skimage:label"], adata_key_added="morphology", - n_jobs=1, inplace=True, ) - # Check that the table was added - assert "morphology" in sdata_hne_small.tables - adata = sdata_hne_small.tables["morphology"] - - # Check basic structure + assert "morphology" in sdata_synthetic.tables + adata = sdata_synthetic.tables["morphology"] assert adata.n_obs > 0 assert adata.n_vars > 0 - - # Check that spatialdata_attrs is set assert "spatialdata_attrs" in adata.uns - assert adata.uns["spatialdata_attrs"]["region"] == "spots" - assert adata.uns["spatialdata_attrs"]["region_key"] == "region" - assert adata.uns["spatialdata_attrs"]["instance_key"] == "label_id" - - # Check that region and label_id are in obs + assert adata.uns["spatialdata_attrs"]["region"] == "test_labels" assert "region" in adata.obs assert "label_id" in adata.obs - def test_calculate_features_copy(self, sdata_hne_small): - """Test that copy=False returns DataFrame.""" + def test_not_inplace_returns_anndata(self, sdata_synthetic): result = sq.experimental.im.calculate_image_features( - sdata_hne_small, - image_key="hne", - shapes_key="spots", - scale="scale0", - measurements=["skimage:label"], - n_jobs=1, + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["skimage:label"], inplace=False, ) + assert isinstance(result, ad.AnnData) + assert result.n_obs > 0 + assert result.n_vars > 0 - # Should return DataFrame when inplace=False - assert isinstance(result, pd.DataFrame) - assert result.shape[0] > 0 - assert result.shape[1] > 0 + # --- Feature sources --- + + def test_skimage_label_properties(self, sdata_synthetic): + """skimage:label produces mask-only morphological features.""" + result = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["skimage:label"], + inplace=False, + ) + assert "area" in result.var_names + + def test_skimage_label_single_property(self, sdata_synthetic): + """Fine-grained: skimage:label:area → only area column.""" + result = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["skimage:label:area"], + inplace=False, + ) + assert list(result.var_names) == ["area"] - def test_invalid_image_key(self, sdata_hne_small): - """Test error when image key doesn't exist.""" + def test_skimage_intensity(self, sdata_synthetic): + """skimage:label+image produces per-channel intensity features.""" + result = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["skimage:label+image"], + inplace=False, + ) + assert result.n_vars > 0 + assert any("_" in col for col in result.var_names) + + def test_skimage_intensity_single_property(self, sdata_synthetic): + """Fine-grained: only intensity_mean per channel.""" + result = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["skimage:label+image:intensity_mean"], + inplace=False, + ) + assert all(col.startswith("intensity_mean_") for col in result.var_names) + assert not any(col.startswith("intensity_max") for col in result.var_names) + + def test_cpmeasure_sizeshape(self, sdata_synthetic): + result = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["cpmeasure:sizeshape"], + inplace=False, + ) + assert isinstance(result, ad.AnnData) + assert result.n_obs > 0 + assert any("Area" in col for col in result.var_names) + + def test_cpmeasure_intensity(self, sdata_synthetic): + result = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["cpmeasure:intensity"], + inplace=False, + ) + assert isinstance(result, ad.AnnData) + assert result.n_obs > 0 + # intensity features are per-channel, so column names contain "__" + assert any("__" in col for col in result.var_names) + + def test_cpmeasure_correlation(self, sdata_synthetic): + result = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["cpmeasure:correlation"], + inplace=False, + ) + assert isinstance(result, ad.AnnData) + assert result.n_obs > 0 + + def test_cpmeasure_default_all(self, sdata_synthetic): + """features=None enables all cp_measure features.""" + result = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + inplace=False, + ) + assert isinstance(result, ad.AnnData) + assert result.n_obs > 0 + assert result.n_vars > 50 + + def test_squidpy_summary(self, sdata_synthetic): + result = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["squidpy:summary"], + inplace=False, + ) + assert isinstance(result, ad.AnnData) + assert result.n_obs > 0 + assert any(col.startswith("summary_mean") for col in result.var_names) + + def test_squidpy_texture(self, sdata_synthetic): + result = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["squidpy:texture"], + inplace=False, + ) + assert isinstance(result, ad.AnnData) + assert result.n_obs > 0 + assert any(col.startswith("texture_contrast") for col in result.var_names) + + def test_squidpy_color_hist(self, sdata_synthetic): + result = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["squidpy:color_hist"], + inplace=False, + ) + assert isinstance(result, ad.AnnData) + assert result.n_obs > 0 + assert any(col.startswith("color_hist_bin") for col in result.var_names) + + # --- Mixed sources --- + + def test_mixed_cpmeasure_and_skimage(self, sdata_synthetic): + result = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["cpmeasure:sizeshape", "skimage:label:area"], + inplace=False, + ) + assert "area" in result.var_names + assert any("Area" in col for col in result.var_names) + + # --- Validation errors --- + + def test_invalid_image_key(self, sdata_synthetic): with pytest.raises(ValueError, match="Image key 'nonexistent' not found"): sq.experimental.im.calculate_image_features( - sdata_hne_small, + sdata_synthetic, image_key="nonexistent", - shapes_key="spots", - measurements=["skimage:label"], + labels_key="test_labels", + features=["skimage:label"], ) - def test_invalid_shapes_key(self, sdata_hne_small): - """Test error when shapes key doesn't exist.""" - with pytest.raises(ValueError, match="Shapes key 'nonexistent' not found"): + def test_invalid_labels_key(self, sdata_synthetic): + with pytest.raises(ValueError, match="Labels key 'nonexistent' not found"): sq.experimental.im.calculate_image_features( - sdata_hne_small, - image_key="hne", - shapes_key="nonexistent", - measurements=["skimage:label"], + sdata_synthetic, + image_key="test_img", + labels_key="nonexistent", + features=["skimage:label"], ) - def test_both_labels_and_shapes_error(self, sdata_hne_small): - """Test error when both labels_key and shapes_key are provided.""" - with pytest.raises(ValueError, match="Use either `labels_key` or `shapes_key`, not both"): + def test_both_labels_and_shapes_error(self, sdata_synthetic): + with pytest.raises(ValueError, match="Use either"): sq.experimental.im.calculate_image_features( - sdata_hne_small, - image_key="hne", - labels_key="fake_labels", - shapes_key="spots", - measurements=["skimage:label"], + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + shapes_key="fake", + features=["skimage:label"], ) - def test_missing_labels_and_shapes(self, sdata_hne_small): - """Test error when neither labels_key nor shapes_key is provided.""" - with pytest.raises(ValueError, match="Provide either `labels_key` or `shapes_key`."): + def test_missing_labels_and_shapes(self, sdata_synthetic): + with pytest.raises(ValueError, match="Provide either"): sq.experimental.im.calculate_image_features( - sdata_hne_small, - image_key="hne", - measurements=["skimage:label"], + sdata_synthetic, + image_key="test_img", + features=["skimage:label"], ) - def test_invalid_measurement(self, sdata_hne_small): - """Test error with invalid measurement type.""" - with pytest.raises(ValueError, match="Invalid measurement"): + def test_invalid_feature(self, sdata_synthetic): + with pytest.raises(ValueError, match="Unknown feature"): sq.experimental.im.calculate_image_features( - sdata_hne_small, - image_key="hne", - shapes_key="spots", - scale="scale0", - measurements=["nonexistent:measurement"], + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["nonexistent:measurement"], ) - def test_no_valid_measurements(self, sdata_hne_small): - """Test error when no valid measurements are requested.""" - with pytest.raises(ValueError, match="No valid measurements requested"): + def test_no_valid_features(self, sdata_synthetic): + with pytest.raises(ValueError, match="No valid features requested"): sq.experimental.im.calculate_image_features( - sdata_hne_small, - image_key="hne", - shapes_key="spots", - scale="scale0", - measurements=[], - n_jobs=1, + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=[], inplace=False, ) - def test_with_intensity_features(self, sdata_hne_small): - """Test intensity-based features with multi-channel image.""" - result = sq.experimental.im.calculate_image_features( - sdata_hne_small, - image_key="hne", - shapes_key="spots", - scale="scale0", - measurements=["skimage:label+image"], - n_jobs=1, - inplace=False, - ) - - assert result.shape[0] > 0 - assert result.shape[1] > 0 - # Column names should include channel information - assert any("_" in col for col in result.columns) - - def test_dimension_mismatch(self): - """Test error when image and labels have mismatched dimensions.""" + def test_dimension_mismatch_identity_succeeds_with_overlap(self, capsys): + """Image and labels have different dims but identity transforms; + we crop to the overlap rectangle and process the cells that fall inside. + Regression for the PR #982 'fails despite alignment transform' note. + """ rng = np.random.default_rng(42) - - # Create image: 100x100, 3 channels - image_data = rng.integers(0, 255, (3, 100, 100), dtype=np.uint8) image_xr = xr.DataArray( - image_data, + rng.integers(0, 255, (3, 200, 200), dtype=np.uint8), dims=["c", "y", "x"], coords={"c": ["R", "G", "B"]}, ) + labels_arr = np.zeros((100, 100), dtype=np.int32) + labels_arr[10:40, 10:40] = 1 + labels_arr[60:90, 60:90] = 2 + labels_xr = xr.DataArray(labels_arr, dims=["y", "x"]) + sdata = SpatialData( + images={"img": Image2DModel.parse(image_xr)}, + labels={"lbl": Labels2DModel.parse(labels_xr)}, + ) - # Create labels: 80x80 (different dimensions) - labels_data = rng.integers(1, 10, (80, 80), dtype=np.uint32) - labels_xr = xr.DataArray(labels_data, dims=["y", "x"]) + adata = sq.experimental.im.calculate_image_features( + sdata, + image_key="img", + labels_key="lbl", + features=["skimage:label"], + inplace=False, + ) + assert adata is not None and adata.n_obs == 2 + captured = capsys.readouterr() + # No cells dropped: labels (100, 100) sits fully inside image (200, 200) at origin + assert "Cell drop report" in captured.out + + # --- Channel selection --- + + def test_channel_selection_by_name(self, sdata_synthetic): + """Selecting a single channel reduces feature columns.""" + result_all = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + features=["skimage:label+image:intensity_mean"], + inplace=False, + ) + # Image2DModel.parse converts channel coords to integers [0,1,2] + result_one = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + channels=["0"], + features=["skimage:label+image:intensity_mean"], + inplace=False, + ) + # All channels → 3 columns; one channel → 1 column + assert result_all.n_vars == 3 + assert result_one.n_vars == 1 + assert "intensity_mean_0" in result_one.var_names - sdata = SpatialData( - images={"test_img": Image2DModel.parse(image_xr)}, - labels={"test_labels": Labels2DModel.parse(labels_xr)}, + def test_channel_selection_by_index(self, sdata_synthetic): + """Channel selection by integer index.""" + result = sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + channels=[0], + features=["squidpy:summary"], + inplace=False, ) + assert result.n_obs > 0 + # Only channel 0 features + assert all("_0" in col for col in result.var_names) - with pytest.raises(ValueError, match="do not match"): + def test_channel_selection_invalid(self, sdata_synthetic): + with pytest.raises(ValueError, match="Channel 'DAPI' not found"): sq.experimental.im.calculate_image_features( - sdata, + sdata_synthetic, image_key="test_img", labels_key="test_labels", - measurements=["skimage:label"], - n_jobs=1, + channels=["DAPI"], + features=["skimage:label"], ) - def test_with_progress_bar(self, sdata_hne_small): - """Test that progress bar can be enabled.""" - result = sq.experimental.im.calculate_image_features( - sdata_hne_small, - image_key="hne", - shapes_key="spots", - scale="scale0", - measurements=["skimage:label"], - show_progress_bar=True, - n_jobs=1, - inplace=False, + # --- Tiled vs non-tiled equivalence --- + + def test_tiled_vs_single_tile_equivalence(self, sdata_synthetic): + """Tile-invariant features should be identical whether we tile or not. + + Position-dependent features (centroid, perimeter_crofton) are expected + to differ across tile boundaries, so we test with ``area`` and + ``squidpy:summary`` which depend only on the cell's pixel values. + """ + kw = { + "image_key": "test_img", + "labels_key": "test_labels", + "features": ["skimage:label:area", "squidpy:summary"], + "inplace": False, + "invalid_as_zero": True, + } + # Single tile (tile_size >= image → no tiling) + result_single = sq.experimental.im.calculate_image_features(sdata_synthetic, tile_size=1000, **kw) + # Multiple tiles (tile_size=100 → 4 tiles on 200×200) + result_tiled = sq.experimental.im.calculate_image_features(sdata_synthetic, tile_size=100, **kw) + + # Same cells, same features + assert result_single.n_obs == result_tiled.n_obs + assert set(result_single.var_names) == set(result_tiled.var_names) + + # Align columns and rows for comparison + common_cols = list(result_single.var_names) + df_single = pd.DataFrame(result_single.X, index=result_single.obs["label_id"].values, columns=common_cols) + df_tiled = pd.DataFrame( + result_tiled[:, common_cols].X, index=result_tiled.obs["label_id"].values, columns=common_cols + ) + df_single = df_single.sort_index() + df_tiled = df_tiled.sort_index() + + np.testing.assert_array_equal(df_single.index, df_tiled.index) + np.testing.assert_allclose(df_single.values, df_tiled.values, rtol=1e-5, atol=1e-5) + + # --- Parallelization --- + + def test_n_jobs_produces_same_result(self, sdata_synthetic): + """n_jobs>1 produces the same result as n_jobs=1.""" + kw = { + "image_key": "test_img", + "labels_key": "test_labels", + "features": ["skimage:label:area"], + "inplace": False, + } + result_seq = sq.experimental.im.calculate_image_features(sdata_synthetic, n_jobs=1, **kw) + result_par = sq.experimental.im.calculate_image_features(sdata_synthetic, n_jobs=2, **kw) + + assert result_seq.n_obs == result_par.n_obs + np.testing.assert_array_equal( + result_seq.X[np.argsort(result_seq.obs["label_id"].values)], + result_par.X[np.argsort(result_par.obs["label_id"].values)], ) - assert isinstance(result, pd.DataFrame) - assert result.shape[0] > 0 - def test_single_mask_property(self, sdata_hne_small): - """Test selecting a single skimage mask property (area) only.""" - result = sq.experimental.im.calculate_image_features( - sdata_hne_small, - image_key="hne", - shapes_key="spots", - scale="scale0", - measurements=["skimage:label:area"], +# --------------------------------------------------------------------------- +# Per-PR-#982-concern regression tests +# --------------------------------------------------------------------------- + + +def _toy_sdata( + image_shape: tuple[int, int] = (200, 200), + n_channels: int = 3, + channel_names: list[str] | None = None, + labels_shape: tuple[int, int] | None = None, + labels_translation: tuple[float, float] | None = None, + labels_scale: tuple[float, float] | None = None, + label_ids: list[int] | None = None, +) -> SpatialData: + """Build a synthetic SpatialData with controllable label/image transforms.""" + from spatialdata.transformations import Scale, Translation, set_transformation + + rng = np.random.default_rng(0) + H, W = image_shape + image_data = rng.integers(0, 255, (n_channels, H, W), dtype=np.uint8) + image_xr = xr.DataArray(image_data, dims=["c", "y", "x"]) + + LH, LW = labels_shape if labels_shape is not None else image_shape + labels_data = np.zeros((LH, LW), dtype=np.int32) + ids = label_ids if label_ids is not None else list(range(1, 6)) + cell_h, cell_w = max(LH // 8, 4), max(LW // 8, 4) + for i, lid in enumerate(ids): + row = i // 3 + col = i % 3 + y0 = 10 + row * (cell_h + 6) + x0 = 10 + col * (cell_w + 6) + if y0 + cell_h > LH or x0 + cell_w > LW: + continue + labels_data[y0 : y0 + cell_h, x0 : x0 + cell_w] = lid + labels_xr = xr.DataArray(labels_data, dims=["y", "x"]) + + img_el = ( + Image2DModel.parse(image_xr, c_coords=channel_names) + if channel_names is not None + else Image2DModel.parse(image_xr) + ) + lbl_el = Labels2DModel.parse(labels_xr) + + if labels_translation is not None: + ty, tx = labels_translation + set_transformation(lbl_el, Translation([tx, ty], axes=("x", "y")), "global") + if labels_scale is not None: + sy, sx = labels_scale + set_transformation(lbl_el, Scale([sx, sy], axes=("x", "y")), "global") + + return SpatialData(images={"img": img_el}, labels={"lbl": lbl_el}) + + +class TestPR982Concerns: + """Regression tests for the six open concerns on PR #982.""" + + # -- Concern 1: channel names are str-typed in output columns -- + + def test_concern1_channel_str_names_in_columns(self): + sdata = _toy_sdata(channel_names=["DAPI", "CD3", "CD8"]) + adata = sq.experimental.im.calculate_image_features( + sdata, + image_key="img", + labels_key="lbl", + features=["squidpy:summary"], inplace=False, - n_jobs=1, ) + cols = list(adata.var_names) + assert any("_DAPI" in c for c in cols) + assert any("_CD3" in c for c in cols) + assert any("_CD8" in c for c in cols) + # Make sure the numeric-fallback names did not slip in: + assert not any(c.endswith("_0") or c.endswith("_1") or c.endswith("_2") for c in cols) - assert isinstance(result, pd.DataFrame) - assert result.shape[0] > 0 - assert list(result.columns) == ["area"] + # -- Concern 2: progress logs are emitted -- - def test_single_intensity_property(self, sdata_hne_small): - """Test selecting a single intensity property (mean) per channel.""" - result = sq.experimental.im.calculate_image_features( - sdata_hne_small, - image_key="hne", - shapes_key="spots", - scale="scale0", - measurements=["skimage:label+image:intensity_mean"], + def test_concern2_progress_log_emitted(self, capsys): + sdata = _toy_sdata() + sq.experimental.im.calculate_image_features( + sdata, + image_key="img", + labels_key="lbl", + features=["skimage:label:area"], + tile_size=80, # forces >1 tile on 200x200 + inplace=False, + ) + captured = capsys.readouterr() + import re + + # spatialdata's logger renders via rich and injects ANSI escapes + # between tokens, so the digits in "Tile 1/9" are wrapped. + ansi_re = re.compile(r"\x1b\[[0-9;]*m") + plain = ansi_re.sub("", captured.out) + assert re.search(r"Tile \d+/\d+", plain), f"no progress log in:\n{plain}" + + # -- Concern 3 (a): identity transforms, dim mismatch -> overlap path -- + + def test_concern3_identity_dim_mismatch_uses_overlap(self, capsys): + # labels (100, 100) sits inside image (200, 200) at the origin under Identity. + sdata = _toy_sdata(image_shape=(200, 200), labels_shape=(100, 100)) + adata = sq.experimental.im.calculate_image_features( + sdata, + image_key="img", + labels_key="lbl", + features=["skimage:label:area"], inplace=False, - n_jobs=1, ) + captured = capsys.readouterr() + assert "Cell drop report" in captured.out + # All cells fit inside the (100, 100) labels image, so none are dropped: + assert adata.n_obs > 0 - # Expect one column per channel - assert isinstance(result, pd.DataFrame) - assert result.shape[0] > 0 - assert all(col.endswith(("_0", "_1", "_2")) or "_" in col for col in result.columns) - # Should not contain other intensity props - assert not any(col.startswith("intensity_max") for col in result.columns) + # -- Concern 3 (b): integer translation -> overlap is offset -- - def test_squidpy_summary(self, sdata_hne_small): - """Test squidpy summary stats per channel.""" - result = sq.experimental.im.calculate_image_features( - sdata_hne_small, - image_key="hne", - shapes_key="spots", - scale="scale0", - measurements=["squidpy:summary"], + def test_concern3_integer_translation(self): + sdata = _toy_sdata( + image_shape=(200, 200), + labels_shape=(100, 100), + labels_translation=(50.0, 0.0), + ) + adata = sq.experimental.im.calculate_image_features( + sdata, + image_key="img", + labels_key="lbl", + features=["skimage:label:area"], inplace=False, - n_jobs=1, ) + assert adata.n_obs > 0 - assert isinstance(result, pd.DataFrame) - assert result.shape[0] > 0 - assert any(col.startswith("summary_mean") for col in result.columns) + # -- Concern 3 (c): non-integer scale, strict -> raises with hint -- - def test_squidpy_texture(self, sdata_hne_small): - """Test squidpy texture stats per channel.""" - result = sq.experimental.im.calculate_image_features( - sdata_hne_small, - image_key="hne", - shapes_key="spots", - scale="scale0", - measurements=["squidpy:texture"], - inplace=False, - n_jobs=1, + def test_concern3_strict_raises_on_non_integer_scale(self): + sdata = _toy_sdata( + image_shape=(200, 200), + labels_shape=(100, 100), + labels_scale=(1.7, 1.7), ) + with pytest.raises(ValueError, match="spatialdata.rasterize"): + sq.experimental.im.calculate_image_features( + sdata, + image_key="img", + labels_key="lbl", + features=["skimage:label:area"], + align_mode="strict", + inplace=False, + ) - assert isinstance(result, pd.DataFrame) - assert result.shape[0] > 0 - assert any(col.startswith("texture_contrast") for col in result.columns) + # -- Concern 3 (d): align_mode='rasterize' resamples and proceeds -- - def test_squidpy_color_hist(self, sdata_hne_small): - """Test squidpy color histogram per channel.""" - result = sq.experimental.im.calculate_image_features( - sdata_hne_small, - image_key="hne", - shapes_key="spots", - scale="scale0", - measurements=["squidpy:color_hist"], + def test_concern3_rasterize_path_succeeds(self, capsys): + sdata = _toy_sdata( + image_shape=(200, 200), + labels_shape=(100, 100), + labels_scale=(1.7, 1.7), + ) + adata = sq.experimental.im.calculate_image_features( + sdata, + image_key="img", + labels_key="lbl", + features=["skimage:label:area"], + align_mode="rasterize", + inplace=False, + ) + assert adata.n_obs > 0 + captured = capsys.readouterr() + # rasterize path emits a warning about materialization + assert "rasterize" in captured.out.lower() or "rasterize" in captured.err.lower() + + # -- Concern 4: channel subset selection -- + + def test_concern4_channel_subset_by_index(self): + sdata = _toy_sdata(n_channels=4, channel_names=["c0", "c1", "c2", "c3"]) + adata = sq.experimental.im.calculate_image_features( + sdata, + image_key="img", + labels_key="lbl", + features=["squidpy:summary"], + channels=[0, 2], inplace=False, - n_jobs=1, ) + cols = list(adata.var_names) + assert any("_c0" in c for c in cols) + assert any("_c2" in c for c in cols) + assert not any("_c1" in c for c in cols) + assert not any("_c3" in c for c in cols) - assert isinstance(result, pd.DataFrame) - assert result.shape[0] > 0 - assert any(col.startswith("color_hist_bin") for col in result.columns) + # -- Concern 5: spatialdata_attrs on output table -- + + def test_concern5_spatialdata_attrs_present(self): + sdata = _toy_sdata() + sq.experimental.im.calculate_image_features( + sdata, + image_key="img", + labels_key="lbl", + features=["skimage:label:area"], + inplace=True, + adata_key_added="morphology", + ) + attrs = sdata.tables["morphology"].uns["spatialdata_attrs"] + assert "region" in attrs + assert "region_key" in attrs + assert "instance_key" in attrs + assert attrs["region"] == "lbl" + + # -- Concern 6: non-contiguous label IDs survive cp_measure roundtrip -- + + def test_concern6_non_contiguous_label_ids(self): + sdata = _toy_sdata(label_ids=[1, 37, 82]) + adata = sq.experimental.im.calculate_image_features( + sdata, + image_key="img", + labels_key="lbl", + features=["skimage:label:area"], + inplace=False, + ) + observed = set(adata.obs["label_id"].astype(int).tolist()) + assert {1, 37, 82}.issubset(observed) diff --git a/tests/experimental/test_tiling.py b/tests/experimental/test_tiling.py index 15ca8d761..e8428093f 100644 --- a/tests/experimental/test_tiling.py +++ b/tests/experimental/test_tiling.py @@ -12,11 +12,15 @@ import matplotlib.pyplot as plt import numpy as np import pytest +import xarray as xr from squidpy.experimental.im._tiling import ( - TileSpec, build_tile_specs, + compute_cell_info, + compute_cell_info_multiscale, + compute_cell_info_tiled, extract_tile, + extract_tile_lazy, verify_coverage, ) from tests.conftest import PlotTester, PlotTesterMeta @@ -95,6 +99,22 @@ def _expected_tile_key(cy: float, cx: float, tile_size: int, image_size: int) -> return (row, col) +_TILE_SIZE = 250 # 500 / 250 = 2×2 = 4 tiles + + +def _specs_from_labels(labels, tile_size=_TILE_SIZE, overlap_margin="auto"): + """Convenience: compute cell info + build tile specs from a numpy label array.""" + cell_info = compute_cell_info(labels) + return build_tile_specs(labels.shape, cell_info, tile_size=tile_size, overlap_margin=overlap_margin) + + +def _label_ids(labels): + """All nonzero label IDs as a set.""" + ids = set(np.unique(labels).tolist()) + ids.discard(0) + return ids + + # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @@ -117,26 +137,24 @@ def brick_image(): # build_tile_specs — deterministic checks # --------------------------------------------------------------------------- -_TILE_SIZE = 250 # 500 / 250 = 2×2 = 4 tiles - class TestBuildTileSpecs: def test_four_tiles(self, brick_labels): """500×500 with tile_size=250 produces at most 4 tiles.""" labels, _, _ = brick_labels - specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) assert len(specs) <= 4 def test_full_coverage(self, brick_labels): """Every cell is assigned to exactly one tile.""" labels, _, _ = brick_labels - specs = build_tile_specs(labels, tile_size=_TILE_SIZE) - verify_coverage(labels, specs) + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) + verify_coverage(_label_ids(labels), specs) def test_cell_assigned_to_centroid_tile(self, brick_labels): """Each cell's tile matches the tile we predict from its centroid.""" labels, centroids, _ = brick_labels - specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) # Build actual mapping: cell_id → tile base origin actual: dict[int, tuple[int, int]] = {} @@ -154,7 +172,7 @@ def test_cell_assigned_to_centroid_tile(self, brick_labels): def test_no_duplicates(self, brick_labels): """No cell ID appears in more than one tile.""" labels, _, _ = brick_labels - specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) seen: set[int] = set() for spec in specs: @@ -168,8 +186,6 @@ def test_boundary_cells_exist(self, brick_labels): # A cell straddles a boundary if its rectangle crosses y=250 or x=250 # but its centroid is on one side boundary_cells = [] - step_y = _CELL_H + gap - step_x = _CELL_W + gap for lid, (cy, cx) in centroids.items(): half_h = _CELL_H / 2.0 half_w = _CELL_W / 2.0 @@ -183,7 +199,7 @@ def test_boundary_cells_exist(self, brick_labels): # With cell_h=20 and various gaps, we expect some boundary cells # (the brick offset makes this likely for odd rows near y=250) # Just verify they're all assigned somewhere - specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) all_owned = set() for s in specs: all_owned |= s.owned_ids @@ -193,7 +209,7 @@ def test_boundary_cells_exist(self, brick_labels): def test_crop_contains_owned_cells_fully(self, brick_labels): """Every owned cell's rectangle fits inside its tile's crop region.""" labels, centroids, _ = brick_labels - specs = build_tile_specs(labels, tile_size=_TILE_SIZE, overlap_margin="auto") + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE, overlap_margin="auto") for spec in specs: cy0, cx0, cy1, cx1 = spec.crop @@ -216,33 +232,27 @@ def test_crop_contains_owned_cells_fully(self, brick_labels): class TestBuildTileSpecsEdgeCases: def test_empty_labels(self): labels = np.zeros((500, 500), dtype=np.int32) - specs = build_tile_specs(labels, tile_size=250) + specs = _specs_from_labels(labels, tile_size=250) assert specs == [] - verify_coverage(labels, specs) + verify_coverage(_label_ids(labels), specs) def test_single_cell_whole_image(self): """One cell that fills most of the image.""" labels = np.zeros((500, 500), dtype=np.int32) labels[10:490, 10:490] = 1 - specs = build_tile_specs(labels, tile_size=250) - verify_coverage(labels, specs) + specs = _specs_from_labels(labels, tile_size=250) + verify_coverage(_label_ids(labels), specs) assert len(specs) == 1 # centroid is at ~(250,250), lands in one tile def test_invalid_tile_size(self): - labels = np.zeros((100, 100), dtype=np.int32) with pytest.raises(ValueError, match="tile_size must be positive"): - build_tile_specs(labels, tile_size=0) - - def test_invalid_labels_ndim(self): - labels = np.zeros((2, 100, 100), dtype=np.int32) - with pytest.raises(ValueError, match="Expected 2-D labels"): - build_tile_specs(labels, tile_size=100) + build_tile_specs((100, 100), {}, tile_size=0) def test_tile_size_larger_than_image(self): """tile_size > image → single tile.""" labels, _ = _make_brick_labels(image_size=100, gap=5) - specs = build_tile_specs(labels, tile_size=1000) - verify_coverage(labels, specs) + specs = _specs_from_labels(labels, tile_size=1000) + verify_coverage(_label_ids(labels), specs) assert len(specs) == 1 @@ -255,7 +265,7 @@ class TestExtractTile: def test_non_owned_cells_zeroed(self, brick_labels, brick_image): """Only owned cells survive in the extracted tile mask.""" labels, _, _ = brick_labels - specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) for spec in specs: _, tile_lbl = extract_tile(brick_image, labels, spec) @@ -266,7 +276,7 @@ def test_non_owned_cells_zeroed(self, brick_labels, brick_image): def test_owned_cell_pixels_preserved(self, brick_labels, brick_image): """Pixel values for owned cells match the original labels.""" labels, _, _ = brick_labels - specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) for spec in specs: cy0, cx0, cy1, cx1 = spec.crop @@ -279,7 +289,7 @@ def test_owned_cell_pixels_preserved(self, brick_labels, brick_image): def test_original_labels_not_mutated(self, brick_labels, brick_image): labels, _, _ = brick_labels labels_copy = labels.copy() - specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) for spec in specs: extract_tile(brick_image, labels, spec) np.testing.assert_array_equal(labels, labels_copy) @@ -287,7 +297,7 @@ def test_original_labels_not_mutated(self, brick_labels, brick_image): def test_image_crop_shape(self, brick_labels, brick_image): """Extracted image has shape (C, crop_h, crop_w).""" labels, _, _ = brick_labels - specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) for spec in specs: tile_img, tile_lbl = extract_tile(brick_image, labels, spec) cy0, cx0, cy1, cx1 = spec.crop @@ -304,8 +314,8 @@ class TestEndToEnd: def test_roundtrip_no_cells_lost(self, brick_labels, brick_image): """Build specs → extract tiles → union of labels == all cells.""" labels, centroids, _ = brick_labels - specs = build_tile_specs(labels, tile_size=_TILE_SIZE) - verify_coverage(labels, specs) + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) + verify_coverage(_label_ids(labels), specs) recovered: set[int] = set() for spec in specs: @@ -323,8 +333,8 @@ def test_touching_cells_no_merge(self): n_cells = len(centroids) assert n_cells > 0 - specs = build_tile_specs(labels, tile_size=_TILE_SIZE) - verify_coverage(labels, specs) + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) + verify_coverage(_label_ids(labels), specs) # Total owned cells across all tiles == total cells total_owned = sum(len(s.owned_ids) for s in specs) @@ -336,8 +346,8 @@ def test_nontouching_cells_same_result(self): n_cells = len(centroids) assert n_cells > 0 - specs = build_tile_specs(labels, tile_size=_TILE_SIZE) - verify_coverage(labels, specs) + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) + verify_coverage(_label_ids(labels), specs) total_owned = sum(len(s.owned_ids) for s in specs) assert total_owned == n_cells @@ -390,15 +400,131 @@ def _plot_tile_assignment(labels, specs, title=""): ax.set_ylabel("y") +# --------------------------------------------------------------------------- +# Lazy / multiscale helpers +# --------------------------------------------------------------------------- + + +def _make_multiscale_tree(labels: np.ndarray, n_scales: int = 3) -> xr.DataTree: + """Build a tiny multiscale DataTree by integer-downsampling.""" + scales: dict[str, xr.DataTree] = {} + for i in range(n_scales): + step = 2**i + sub = labels[::step, ::step] + ds = xr.Dataset({"image": xr.DataArray(sub, dims=("y", "x"))}) + scales[f"scale{i}"] = xr.DataTree(ds) + return xr.DataTree.from_dict(scales) + + +class TestComputeCellInfoMultiscale: + def test_target_is_coarsest_matches_eager(self): + labels, _ = _make_brick_labels(gap=10) + tree = _make_multiscale_tree(labels, n_scales=3) + # scale2 is coarsest. Target it -> use that scale directly. + info_ms = compute_cell_info_multiscale(tree, target_scale="scale2") + info_eager = compute_cell_info(tree["scale2"].ds["image"].values) + assert set(info_ms.keys()) == set(info_eager.keys()) + for lid in info_ms: + assert info_ms[lid].centroid_y == pytest.approx(info_eager[lid].centroid_y, abs=0.5) + assert info_ms[lid].centroid_x == pytest.approx(info_eager[lid].centroid_x, abs=0.5) + + def test_rescale_to_finer(self): + labels, _ = _make_brick_labels(gap=10) + tree = _make_multiscale_tree(labels, n_scales=3) + info_ms = compute_cell_info_multiscale(tree, target_scale="scale0") + info_eager = compute_cell_info(labels) + # Centroids should be close (within ~1 px due to coarse-scale quantization) + assert set(info_ms.keys()) == set(info_eager.keys()) + for lid in info_ms: + assert info_ms[lid].centroid_y == pytest.approx(info_eager[lid].centroid_y, abs=4.0) + assert info_ms[lid].centroid_x == pytest.approx(info_eager[lid].centroid_x, abs=4.0) + + def test_missing_target_raises(self): + labels, _ = _make_brick_labels(gap=10) + tree = _make_multiscale_tree(labels, n_scales=2) + with pytest.raises(ValueError, match="not found in DataTree"): + compute_cell_info_multiscale(tree, target_scale="scale99") + + +class TestComputeCellInfoTiled: + def test_matches_eager_no_cell_spans_tiles(self): + labels, _ = _make_brick_labels(gap=10) # cells are 20x30, well below chunk + labels_da = xr.DataArray(labels, dims=("y", "x")) + info_tiled = compute_cell_info_tiled(labels_da, chunk=128) + info_eager = compute_cell_info(labels) + assert set(info_tiled.keys()) == set(info_eager.keys()) + for lid in info_eager: + assert info_tiled[lid].centroid_y == pytest.approx(info_eager[lid].centroid_y, abs=1e-6) + assert info_tiled[lid].centroid_x == pytest.approx(info_eager[lid].centroid_x, abs=1e-6) + assert info_tiled[lid].bbox_h == info_eager[lid].bbox_h + assert info_tiled[lid].bbox_w == info_eager[lid].bbox_w + + def test_matches_eager_cells_span_tile_boundary(self): + # A 100x100 cell crossing chunk boundary at 50. + labels = np.zeros((200, 200), dtype=np.int32) + labels[30:130, 30:130] = 1 + labels_da = xr.DataArray(labels, dims=("y", "x")) + info_tiled = compute_cell_info_tiled(labels_da, chunk=50) + info_eager = compute_cell_info(labels) + assert set(info_tiled.keys()) == set(info_eager.keys()) + for lid in info_eager: + assert info_tiled[lid].centroid_y == pytest.approx(info_eager[lid].centroid_y, abs=1e-6) + assert info_tiled[lid].centroid_x == pytest.approx(info_eager[lid].centroid_x, abs=1e-6) + assert info_tiled[lid].bbox_h == info_eager[lid].bbox_h + assert info_tiled[lid].bbox_w == info_eager[lid].bbox_w + + def test_empty_labels(self): + labels = np.zeros((100, 100), dtype=np.int32) + labels_da = xr.DataArray(labels, dims=("y", "x")) + assert compute_cell_info_tiled(labels_da, chunk=32) == {} + + +class TestExtractTileLazy: + def test_matches_eager(self, brick_labels, brick_image): + labels, _, _ = brick_labels + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) + labels_da = xr.DataArray(labels, dims=("y", "x")) + image_da = xr.DataArray(brick_image, dims=("c", "y", "x")) + for spec in specs: + img_e, lbl_e = extract_tile(brick_image, labels, spec) + img_l, lbl_l = extract_tile_lazy(image_da, labels_da, spec) + np.testing.assert_array_equal(img_e, img_l) + np.testing.assert_array_equal(lbl_e, lbl_l) + + +class TestVerifyCoverage: + def test_detects_duplicate(self): + spec_a = build_tile_specs((100, 100), {1: _make_ci(1, 25, 25)}, tile_size=50) + spec_b = build_tile_specs((100, 100), {1: _make_ci(1, 25, 25)}, tile_size=50) + with pytest.raises(AssertionError, match="multiple tiles"): + verify_coverage({1}, spec_a + spec_b) + + def test_detects_missing(self): + specs = build_tile_specs((100, 100), {}, tile_size=50) + with pytest.raises(AssertionError, match="not assigned"): + verify_coverage({42}, specs) + + def test_detects_extra(self): + specs = build_tile_specs((100, 100), {1: _make_ci(1, 25, 25)}, tile_size=50, overlap_margin=0) + with pytest.raises(AssertionError, match="non-existent"): + verify_coverage(set(), specs) + + +def _make_ci(label: int, cy: float, cx: float, h: int = 4, w: int = 4): + from squidpy.experimental.im._tiling import CellInfo + + return CellInfo(label=label, centroid_y=cy, centroid_x=cx, bbox_h=h, bbox_w=w) + + class TestTilingVisual(PlotTester, metaclass=PlotTesterMeta): def test_plot_tile_assignment_gap(self): """Visual: brick pattern (gap=10), cells colored by tile.""" labels, _ = _make_brick_labels(gap=10) - specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) _plot_tile_assignment(labels, specs, title="Tile assignment (gap=10)") def test_plot_tile_assignment_touching(self): """Visual: brick pattern (gap=0, touching), cells colored by tile.""" labels, _ = _make_brick_labels(gap=0) - specs = build_tile_specs(labels, tile_size=_TILE_SIZE) + specs = _specs_from_labels(labels, tile_size=_TILE_SIZE) _plot_tile_assignment(labels, specs, title="Tile assignment (gap=0, touching)") From 9848a6f288062b56fbfb9c0f801f59e39f4735fe Mon Sep 17 00:00:00 2001 From: anon Date: Thu, 14 May 2026 17:21:21 +0200 Subject: [PATCH 33/37] Simplify hot paths and reuse existing utilities * compute_cell_info_tiled: replace per-id np.where with scipy.ndimage.find_objects and np.bincount sums. One vectorized pass per tile instead of O(n_cells) scans. * _zero_non_owned: replace per-id rewrite loop with np.isin + np.where. * _classify_dropped_cells: drop the full-array .values + per-cell np.where; use compute_cell_info_tiled bboxes for inside/partial/outside classification, so the full label array is no longer materialized. * CellInfo: add bbox_y0/bbox_x0 fields so callers can do bbox math without reconstructing from the centroid (which is area-weighted, not bbox-centered). * _relabel_contiguous: replaced by skimage.segmentation.relabel_sequential. * _align_to_image_grid: flatten nested if/else with elif chain; extract _rasterize_to_image_grid so the shapes-key path and the align_mode='rasterize' path no longer duplicate the rasterize call. * DropReport: empty_tile_drop -> empty_tiles (the counter increments per tile). Co-Authored-By: Claude Opus 4.7 --- src/squidpy/experimental/im/_feature.py | 143 +++++++++--------------- src/squidpy/experimental/im/_tiling.py | 58 ++++++---- 2 files changed, 92 insertions(+), 109 deletions(-) diff --git a/src/squidpy/experimental/im/_feature.py b/src/squidpy/experimental/im/_feature.py index d161f6697..aeb6c0e11 100644 --- a/src/squidpy/experimental/im/_feature.py +++ b/src/squidpy/experimental/im/_feature.py @@ -20,6 +20,7 @@ from joblib import Parallel, delayed from skimage import measure from skimage.feature import graycomatrix, graycoprops +from skimage.segmentation import relabel_sequential from spatialdata import SpatialData, rasterize from spatialdata._logging import logger as logg from spatialdata.models import TableModel, get_channel_names @@ -50,7 +51,7 @@ class DropReport: outside_image_extent: int = 0 partial_at_image_boundary: int = 0 cp_measure_no_data: int = 0 - empty_tile_drop: int = 0 + empty_tiles: int = 0 other: dict[str, int] = field(default_factory=dict) def summary(self) -> str: @@ -269,26 +270,6 @@ def _build_cp_config(cp_flags: dict[str, bool], channel_names: list[str]) -> dic # --------------------------------------------------------------------------- -def _relabel_contiguous(labels: np.ndarray) -> tuple[np.ndarray, dict[int, int]]: - """Relabel a mask to contiguous IDs 1..N, returning the new mask and a mapping. - - Returns - ------- - relabeled - Label image with contiguous IDs. - new_to_orig - Mapping from new contiguous ID → original label ID. - """ - unique_ids = np.unique(labels) - unique_ids = unique_ids[unique_ids != 0] - new_to_orig: dict[int, int] = {} - relabeled = np.zeros_like(labels) - for new_id, orig_id in enumerate(unique_ids, start=1): - relabeled[labels == orig_id] = new_id - new_to_orig[new_id] = int(orig_id) - return relabeled, new_to_orig - - def _featurize_tile( tile_image: np.ndarray, tile_labels: np.ndarray, @@ -322,20 +303,15 @@ def _featurize_tile( # --- cp_measure features --- if parsed.cp_flags is not None: cp_config = _build_cp_config(parsed.cp_flags, channel_names) - # Relabel to contiguous IDs (1..N) — cp_measure assumes dense labels - # internally and will index-error on sparse IDs like [1, 37, 82]. - contiguous_labels, orig_ids = _relabel_contiguous(tile_labels) - masks_3d = contiguous_labels[np.newaxis, :, :] # (1, H, W) + # cp_measure assumes dense 1..N IDs and index-errors on sparse IDs. + contiguous_labels, _, inverse = relabel_sequential(tile_labels) + masks_3d = contiguous_labels[np.newaxis, :, :] with warnings.catch_warnings(): warnings.simplefilter("ignore") data, columns, rows = featurize(tile_image, masks_3d, cp_config) if data.shape[0] > 0: - # Map contiguous labels back to original IDs. - # rows may include labels that produced no data (cp_measure bug); - # use only the first data.shape[0] rows. - # TODO: switch to return_as="anndata" once afermg/cp_measure#38 - # fixes the rows/data length mismatch. - row_labels = [orig_ids[r[2]] for r in rows[: data.shape[0]]] + # cp_measure may return more rows than data; trim and remap. + row_labels = [int(inverse[r[2]]) for r in rows[: data.shape[0]]] cp_df = pd.DataFrame(data, index=row_labels, columns=columns) parts.append(cp_df) @@ -540,6 +516,28 @@ def _relative_affine(sdata: SpatialData, image_key: str, labels_key: str, cs: st return m_global_to_img @ m_lbl_to_global +def _rasterize_to_image_grid(element: Any, image_da: xr.DataArray, cs: str) -> xr.DataArray: + """Rasterize a spatialdata element onto an image DataArray's pixel grid.""" + logg.warning( + f"Materializing element onto image grid via spatialdata.rasterize in '{cs}'. " + f"Lazy behavior is lost for this run." + ) + img_h = int(image_da.sizes["y"]) + img_w = int(image_da.sizes["x"]) + result = rasterize( + element, + ["x", "y"], + min_coordinate=[0, 0], + max_coordinate=[img_w, img_h], + target_coordinate_system=cs, + target_unit_to_pixels=1.0, + return_regions_as_labels=True, + ) + if isinstance(result, xr.DataArray): + return result + return xr.DataArray(np.asarray(result), dims=["y", "x"]) + + def _is_close_identity(m: np.ndarray, atol: float = 1e-6) -> bool: return bool(np.allclose(m, np.eye(m.shape[0]), atol=atol)) @@ -578,45 +576,21 @@ def _align_to_image_grid( # Integer-pixel offset of labels relative to image. (tx, ty) means # labels pixel (0, 0) lands at image pixel (tx, ty) in (x, y) order. - tx: int - ty: int if _is_close_identity(m): tx, ty = 0, 0 + elif (decomposed := _decompose_pixel_translation(m)) is not None: + tx, ty = decomposed + elif align_mode == "strict": + raise ValueError( + f"Labels not aligned to image pixel grid in coordinate system '{cs}'. " + f"Relative affine (x,y) =\n{m}\n" + f"Pass align_mode='rasterize' to resample labels onto the image grid " + f"(via spatialdata.rasterize), or pre-align with spatialdata.rasterize " + f"in your pipeline." + ) else: - decomposed = _decompose_pixel_translation(m) - if decomposed is not None: - tx, ty = decomposed - else: - if align_mode == "strict": - raise ValueError( - f"Labels not aligned to image pixel grid in coordinate system '{cs}'. " - f"Relative affine (x,y) =\n{m}\n" - f"Pass align_mode='rasterize' to resample labels onto the image grid " - f"(via spatialdata.rasterize), or pre-align with spatialdata.rasterize " - f"in your pipeline." - ) - # align_mode == "rasterize": materialize labels onto image grid. - logg.warning( - "align_mode='rasterize' triggered: resampling labels onto the image grid " - "via spatialdata.rasterize. This materializes labels in memory; lazy " - "behavior is lost for this run." - ) - img_h = int(image_da.sizes["y"]) - img_w = int(image_da.sizes["x"]) - rasterized = rasterize( - sdata.labels[labels_key], - ["x", "y"], - min_coordinate=[0, 0], - max_coordinate=[img_w, img_h], - target_coordinate_system=cs, - target_unit_to_pixels=1.0, - return_regions_as_labels=True, - ) - if isinstance(rasterized, xr.DataArray): - labels_da = rasterized - else: - labels_da = xr.DataArray(np.asarray(rasterized), dims=["y", "x"]) - tx, ty = 0, 0 + labels_da = _rasterize_to_image_grid(sdata.labels[labels_key], image_da, cs) + tx, ty = 0, 0 # Determine overlap rectangle in image-pixel coords. img_h = int(image_da.sizes["y"]) @@ -664,34 +638,27 @@ def _classify_dropped_cells( ) -> tuple[int, int, int]: """Return ``(fully_inside, partially_inside, fully_outside)`` cell counts. - A cell whose entire bounding box is inside the crop window is counted as - fully inside. If only part of its bounding box is inside, it is - partially inside. If the cell does not appear inside the crop window at - all, it is fully outside. + Uses per-cell bounding boxes computed via tile-streamed reads so the + full label array is never materialized. """ lbl_h = int(labels_da.sizes.get("y", labels_da.shape[-2])) lbl_w = int(labels_da.sizes.get("x", labels_da.shape[-1])) - # Skip the work entirely when nothing is cropped away. if y0 <= 0 and x0 <= 0 and y1 >= lbl_h and x1 >= lbl_w: return 0, 0, 0 - arr = labels_da.values - if arr.ndim > 2: - arr = arr.squeeze() - inside = arr[y0:y1, x0:x1] - ids_total = {int(v) for v in np.unique(arr) if v != 0} - ids_inside_any = {int(v) for v in np.unique(inside) if v != 0} - - fully_outside = len(ids_total - ids_inside_any) - - # Find which "inside_any" cells are only partial: their bbox in `arr` - # extends outside the crop window. + cell_info = compute_cell_info_tiled(labels_da) + fully_inside = 0 partial = 0 - for lid in ids_inside_any: - ys, xs = np.where(arr == lid) - if ys.min() < y0 or ys.max() >= y1 or xs.min() < x0 or xs.max() >= x1: + fully_outside = 0 + for ci in cell_info.values(): + by0, bx0 = ci.bbox_y0, ci.bbox_x0 + by1, bx1 = by0 + ci.bbox_h, bx0 + ci.bbox_w + if by1 <= y0 or by0 >= y1 or bx1 <= x0 or bx0 >= x1: + fully_outside += 1 + elif by0 >= y0 and by1 <= y1 and bx0 >= x0 and bx1 <= x1: + fully_inside += 1 + else: partial += 1 - fully_inside = len(ids_inside_any) - partial return fully_inside, partial, fully_outside @@ -974,7 +941,7 @@ def _process_one(spec): start=1, ): if df.empty: - drop_report.empty_tile_drop += 1 + drop_report.empty_tiles += 1 else: tile_dfs.append(df) if done == 1 or done == total_tiles or done % log_every == 0: diff --git a/src/squidpy/experimental/im/_tiling.py b/src/squidpy/experimental/im/_tiling.py index b3d906576..2cc6b4bde 100644 --- a/src/squidpy/experimental/im/_tiling.py +++ b/src/squidpy/experimental/im/_tiling.py @@ -23,6 +23,7 @@ import numpy as np import xarray as xr +from scipy import ndimage as ndi from skimage.measure import regionprops __all__ = [ @@ -47,6 +48,8 @@ class CellInfo: centroid_x: float bbox_h: int # height of bounding box (pixels) bbox_w: int # width of bounding box (pixels) + bbox_y0: int = 0 # top-left y of bounding box + bbox_x0: int = 0 # top-left x of bounding box @dataclass(frozen=True) @@ -100,6 +103,8 @@ def compute_cell_info(labels: np.ndarray) -> dict[int, CellInfo]: centroid_x=float(p.centroid[1]), bbox_h=max_row - min_row, bbox_w=max_col - min_col, + bbox_y0=min_row, + bbox_x0=min_col, ) return info @@ -174,6 +179,8 @@ def compute_cell_info_multiscale( centroid_x=ci.centroid_x * sx, bbox_h=int(np.ceil(ci.bbox_h * sy)), bbox_w=int(np.ceil(ci.bbox_w * sx)), + bbox_y0=int(np.floor(ci.bbox_y0 * sy)), + bbox_x0=int(np.floor(ci.bbox_x0 * sx)), ) return rescaled @@ -211,7 +218,6 @@ def compute_cell_info_tiled( H, W = int(labels_da.sizes["y"]), int(labels_da.sizes["x"]) - # Per-label accumulators area: dict[int, int] = {} sum_y: dict[int, float] = {} sum_x: dict[int, float] = {} @@ -227,21 +233,30 @@ def compute_cell_info_tiled( tile = labels_da.isel(y=slice(y0, y1), x=slice(x0, x1)).values if tile.ndim > 2: tile = tile.squeeze() - ids = np.unique(tile) - ids = ids[ids != 0] - if ids.size == 0: + flat = tile.ravel() + counts = np.bincount(flat) + if counts.size <= 1 or not counts[1:].any(): continue - for lid in ids: - lid_int = int(lid) - ys, xs = np.where(tile == lid) - # Convert tile-local coords to global - ys_g = ys + y0 - xs_g = xs + x0 - area[lid_int] = area.get(lid_int, 0) + ys.size - sum_y[lid_int] = sum_y.get(lid_int, 0.0) + float(ys_g.sum()) - sum_x[lid_int] = sum_x.get(lid_int, 0.0) + float(xs_g.sum()) - ymin, ymax = int(ys_g.min()), int(ys_g.max()) - xmin, xmax = int(xs_g.min()), int(xs_g.max()) + + yy, xx = np.mgrid[y0 : y0 + tile.shape[0], x0 : x0 + tile.shape[1]] + sums_y = np.bincount(flat, weights=yy.ravel(), minlength=counts.size) + sums_x = np.bincount(flat, weights=xx.ravel(), minlength=counts.size) + slices = ndi.find_objects(tile) + + for lid_int, count in enumerate(counts): + if lid_int == 0 or count == 0: + continue + area[lid_int] = area.get(lid_int, 0) + int(count) + sum_y[lid_int] = sum_y.get(lid_int, 0.0) + float(sums_y[lid_int]) + sum_x[lid_int] = sum_x.get(lid_int, 0.0) + float(sums_x[lid_int]) + sl = slices[lid_int - 1] + if sl is None: + continue + y_slice, x_slice = sl + ymin = y0 + y_slice.start + ymax = y0 + y_slice.stop - 1 + xmin = x0 + x_slice.start + xmax = x0 + x_slice.stop - 1 min_y[lid_int] = min(min_y.get(lid_int, ymin), ymin) min_x[lid_int] = min(min_x.get(lid_int, xmin), xmin) max_y[lid_int] = max(max_y.get(lid_int, ymax), ymax) @@ -255,6 +270,8 @@ def compute_cell_info_tiled( centroid_x=sum_x[lid_int] / a, bbox_h=max_y[lid_int] - min_y[lid_int] + 1, bbox_w=max_x[lid_int] - min_x[lid_int] + 1, + bbox_y0=min_y[lid_int], + bbox_x0=min_x[lid_int], ) return info @@ -357,12 +374,11 @@ def build_tile_specs( def _zero_non_owned(tile_labels: np.ndarray, owned: frozenset[int]) -> np.ndarray: """Return a copy of ``tile_labels`` with non-owned labels set to 0.""" - out = tile_labels.copy() - unique_in_crop = np.unique(out) - for lid in unique_in_crop: - if lid != 0 and int(lid) not in owned: - out[out == lid] = 0 - return out + if not owned: + return np.zeros_like(tile_labels) + owned_arr = np.fromiter(owned, dtype=tile_labels.dtype, count=len(owned)) + keep = np.isin(tile_labels, owned_arr) + return np.where(keep, tile_labels, 0) def extract_tile( From d649597990cd97b2983df130a8bd764412cbbea4 Mon Sep 17 00:00:00 2001 From: anon Date: Tue, 26 May 2026 23:09:13 +0200 Subject: [PATCH 34/37] Hoist cp_measure config build out of per-tile worker; bump pin * _featurize_tile: accept a pre-built cp_config and drop the per-tile _build_cp_config rebuild. Config is now constructed once in calculate_image_features and reused across every tile (matters on 100kx100k images with thousands of tiles). * pyproject: cp-measure>=0.1.4 -> >=0.1.19 to pick up the granularity correctness fix (#44, #47), 3D-only feature filtering (#35), and static typing (#45). No upper cap left in place; bump when upstream ships a breaking release. Co-Authored-By: Claude Opus 4.7 --- pyproject.toml | 2 +- src/squidpy/experimental/im/_feature.py | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1c2870d58..35d0af91f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "aiohttp>=3.8.1", "anndata>=0.9", "centrosome>=1.2.3", - "cp-measure>=0.1.4", + "cp-measure>=0.1.19", "cycler>=0.11", "dask[array]>=2021.2", "dask-image>=0.5", diff --git a/src/squidpy/experimental/im/_feature.py b/src/squidpy/experimental/im/_feature.py index aeb6c0e11..6903ba4bf 100644 --- a/src/squidpy/experimental/im/_feature.py +++ b/src/squidpy/experimental/im/_feature.py @@ -275,6 +275,7 @@ def _featurize_tile( tile_labels: np.ndarray, parsed: _ParsedFeatures, channel_names: list[str], + cp_config: dict | None, ) -> pd.DataFrame: """Compute all requested features for a single tile. @@ -288,6 +289,10 @@ def _featurize_tile( Parsed feature configuration. channel_names Channel names for column naming. + cp_config + Pre-built cp_measure featurizer config, or ``None`` when no cp_measure + features are requested. Built once by the caller and reused across + every tile. Returns ------- @@ -301,8 +306,7 @@ def _featurize_tile( parts: list[pd.DataFrame] = [] # --- cp_measure features --- - if parsed.cp_flags is not None: - cp_config = _build_cp_config(parsed.cp_flags, channel_names) + if cp_config is not None: # cp_measure assumes dense 1..N IDs and index-errors on sparse IDs. contiguous_labels, _, inverse = relabel_sequential(tile_labels) masks_3d = contiguous_labels[np.newaxis, :, :] @@ -925,10 +929,13 @@ def calculate_image_features( total_tiles = len(specs) logg.info(f"Processing {total_tiles} tiles ({tile_size}x{tile_size}, margin={overlap_margin}).") + # Build cp_measure config once; the same dict is reused for every tile. + cp_config = _build_cp_config(parsed.cp_flags, channel_names) if parsed.cp_flags is not None else None + # --- Process tiles (each worker materializes only its own ~2k x 2k crop) --- def _process_one(spec): tile_img, tile_lbl = extract_tile_lazy(image_da, labels_da, spec) - return _featurize_tile(tile_img, tile_lbl, parsed, channel_names) + return _featurize_tile(tile_img, tile_lbl, parsed, channel_names, cp_config) log_every = max(1, total_tiles // 10) start_t = time.monotonic() From 5bba27201ef98c0430d8a7346e194d073790437d Mon Sep 17 00:00:00 2001 From: anon Date: Wed, 27 May 2026 01:25:29 +0200 Subject: [PATCH 35/37] Address review feedback: cap cp-measure pin, soften private signature * pyproject: cp-measure>=0.1.19,<0.2 -- pre-1.0 dep, cap upper bound so a future 0.2.x release doesn't silently break installs. * _featurize_tile: cp_config is now keyword-only with a default of None and falls back to _build_cp_config when not supplied. Preserves the pre-hoist call signature for direct (test/notebook) callers while the caller-built reuse path in calculate_image_features still skips the per-tile rebuild. Co-Authored-By: Claude Opus 4.7 --- pyproject.toml | 2 +- src/squidpy/experimental/im/_feature.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 35d0af91f..381d5794b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "aiohttp>=3.8.1", "anndata>=0.9", "centrosome>=1.2.3", - "cp-measure>=0.1.19", + "cp-measure>=0.1.19,<0.2", "cycler>=0.11", "dask[array]>=2021.2", "dask-image>=0.5", diff --git a/src/squidpy/experimental/im/_feature.py b/src/squidpy/experimental/im/_feature.py index 6903ba4bf..0fc8049b8 100644 --- a/src/squidpy/experimental/im/_feature.py +++ b/src/squidpy/experimental/im/_feature.py @@ -275,7 +275,8 @@ def _featurize_tile( tile_labels: np.ndarray, parsed: _ParsedFeatures, channel_names: list[str], - cp_config: dict | None, + *, + cp_config: dict | None = None, ) -> pd.DataFrame: """Compute all requested features for a single tile. @@ -290,9 +291,10 @@ def _featurize_tile( channel_names Channel names for column naming. cp_config - Pre-built cp_measure featurizer config, or ``None`` when no cp_measure - features are requested. Built once by the caller and reused across - every tile. + Pre-built cp_measure featurizer config. When ``None`` (default), the + config is built locally from ``parsed.cp_flags``. ``calculate_image_features`` + builds it once and reuses it across tiles; direct callers can rely on + the fallback. Returns ------- @@ -306,6 +308,8 @@ def _featurize_tile( parts: list[pd.DataFrame] = [] # --- cp_measure features --- + if cp_config is None and parsed.cp_flags is not None: + cp_config = _build_cp_config(parsed.cp_flags, channel_names) if cp_config is not None: # cp_measure assumes dense 1..N IDs and index-errors on sparse IDs. contiguous_labels, _, inverse = relabel_sequential(tile_labels) @@ -935,7 +939,7 @@ def calculate_image_features( # --- Process tiles (each worker materializes only its own ~2k x 2k crop) --- def _process_one(spec): tile_img, tile_lbl = extract_tile_lazy(image_da, labels_da, spec) - return _featurize_tile(tile_img, tile_lbl, parsed, channel_names, cp_config) + return _featurize_tile(tile_img, tile_lbl, parsed, channel_names, cp_config=cp_config) log_every = max(1, total_tiles // 10) start_t = time.monotonic() From 3a26b3dc5a7fea7856dd1be750c5199b4a7dd37e Mon Sep 17 00:00:00 2001 From: anon Date: Wed, 27 May 2026 02:36:11 +0200 Subject: [PATCH 36/37] Rename compute_cell_info_tiled `chunk` -> `chunk_size` Aligns with main's tl/_tiling_qc.py tests that already call ``compute_cell_info_tiled(labels_da, chunk_size=...)`` and with the numpy/dask convention. Internal body and our own test in tests/experimental/test_tiling.py updated accordingly. Co-Authored-By: Claude Opus 4.7 --- src/squidpy/experimental/im/_tiling.py | 12 ++++++------ tests/experimental/test_tiling.py | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/squidpy/experimental/im/_tiling.py b/src/squidpy/experimental/im/_tiling.py index 575cd711e..dbdd2b745 100644 --- a/src/squidpy/experimental/im/_tiling.py +++ b/src/squidpy/experimental/im/_tiling.py @@ -193,7 +193,7 @@ def compute_cell_info_multiscale( def compute_cell_info_tiled( labels_da: xr.DataArray, - chunk: int = 4096, + chunk_size: int = 4096, ) -> dict[int, CellInfo]: """Compute cell info by tile-streaming the labels array. @@ -205,7 +205,7 @@ def compute_cell_info_tiled( ---------- labels_da Lazy/eager 2-D xarray DataArray of integer labels. - chunk + chunk_size Tile side length for streaming reads. Returns @@ -227,10 +227,10 @@ def compute_cell_info_tiled( max_y: dict[int, int] = {} max_x: dict[int, int] = {} - for y0 in range(0, H, chunk): - y1 = min(y0 + chunk, H) - for x0 in range(0, W, chunk): - x1 = min(x0 + chunk, W) + for y0 in range(0, H, chunk_size): + y1 = min(y0 + chunk_size, H) + for x0 in range(0, W, chunk_size): + x1 = min(x0 + chunk_size, W) tile = labels_da.isel(y=slice(y0, y1), x=slice(x0, x1)).values if tile.ndim > 2: tile = tile.squeeze() diff --git a/tests/experimental/test_tiling.py b/tests/experimental/test_tiling.py index e8428093f..d8cf01b4a 100644 --- a/tests/experimental/test_tiling.py +++ b/tests/experimental/test_tiling.py @@ -450,7 +450,7 @@ class TestComputeCellInfoTiled: def test_matches_eager_no_cell_spans_tiles(self): labels, _ = _make_brick_labels(gap=10) # cells are 20x30, well below chunk labels_da = xr.DataArray(labels, dims=("y", "x")) - info_tiled = compute_cell_info_tiled(labels_da, chunk=128) + info_tiled = compute_cell_info_tiled(labels_da, chunk_size=128) info_eager = compute_cell_info(labels) assert set(info_tiled.keys()) == set(info_eager.keys()) for lid in info_eager: @@ -464,7 +464,7 @@ def test_matches_eager_cells_span_tile_boundary(self): labels = np.zeros((200, 200), dtype=np.int32) labels[30:130, 30:130] = 1 labels_da = xr.DataArray(labels, dims=("y", "x")) - info_tiled = compute_cell_info_tiled(labels_da, chunk=50) + info_tiled = compute_cell_info_tiled(labels_da, chunk_size=50) info_eager = compute_cell_info(labels) assert set(info_tiled.keys()) == set(info_eager.keys()) for lid in info_eager: @@ -476,7 +476,7 @@ def test_matches_eager_cells_span_tile_boundary(self): def test_empty_labels(self): labels = np.zeros((100, 100), dtype=np.int32) labels_da = xr.DataArray(labels, dims=("y", "x")) - assert compute_cell_info_tiled(labels_da, chunk=32) == {} + assert compute_cell_info_tiled(labels_da, chunk_size=32) == {} class TestExtractTileLazy: From a8dadcf6f1588b2031933b04d8bfa525d197ac60 Mon Sep 17 00:00:00 2001 From: anon Date: Wed, 27 May 2026 03:11:48 +0200 Subject: [PATCH 37/37] Restrict channels= to names; reject integer indices Convention in the scverse ecosystem is to address channels by name only. Passing an int now raises TypeError; passing a non-existent name still raises ValueError as before. A channel whose name happens to be the string "0" is still accepted -- the check discriminates on Python type (isinstance(ch, str)), not on the string contents. * _prepare_lazy: type hint list[str] | list[int] | None -> list[str] | None and add an isinstance(ch, str) guard before lookup. * calculate_image_features: same type hint update. * Docstring clarified that integer indices are not accepted. * test_channel_selection_by_index renamed to test_channel_selection_rejects_int and now asserts TypeError. * test_concern4_channel_subset_by_index renamed to ..._by_name and passes ["c0", "c2"] instead of [0, 2]. Co-Authored-By: Claude Opus 4.7 --- src/squidpy/experimental/im/_feature.py | 26 ++++++++--------- .../test_calculate_image_features.py | 28 +++++++++---------- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/squidpy/experimental/im/_feature.py b/src/squidpy/experimental/im/_feature.py index 0fc8049b8..bf84b9360 100644 --- a/src/squidpy/experimental/im/_feature.py +++ b/src/squidpy/experimental/im/_feature.py @@ -711,7 +711,7 @@ def _prepare_lazy( labels_key: str | None, shapes_key: str | None, scale: str | None, - channels: list[str] | list[int] | None, + channels: list[str] | None, align_mode: Literal["strict", "rasterize"], drop_report: DropReport, ) -> tuple[xr.DataArray, xr.DataArray, list[str]]: @@ -775,17 +775,15 @@ def _prepare_lazy( selected_idx: list[int] = [] ch_names = [] for ch in channels: - if isinstance(ch, int): - if ch < 0 or ch >= len(all_ch): - raise ValueError(f"Channel index {ch} out of range [0, {len(all_ch)}).") - selected_idx.append(ch) - ch_names.append(all_ch[ch]) - else: - ch_str = str(ch) - if ch_str not in all_ch: - raise ValueError(f"Channel '{ch}' not found. Available: {all_ch}") - selected_idx.append(all_ch.index(ch_str)) - ch_names.append(ch_str) + if not isinstance(ch, str): + raise TypeError( + f"channels must contain strings (channel names); got {type(ch).__name__} {ch!r}. " + f"Available channel names: {all_ch}." + ) + if ch not in all_ch: + raise ValueError(f"Channel '{ch}' not found. Available: {all_ch}") + selected_idx.append(all_ch.index(ch)) + ch_names.append(ch) image_da = image_da.isel(c=selected_idx) else: ch_names = all_ch @@ -864,7 +862,9 @@ def calculate_image_features( scale Scale level for multi-scale data. channels - Subset of channels to use. ``None`` uses all channels. + Subset of channel names to use, matching those returned by + :func:`spatialdata.models.get_channel_names`. ``None`` uses all + channels. Integer indices are not accepted -- always pass names. features Which features to compute. Accepts a list of strings: diff --git a/tests/experimental/test_calculate_image_features.py b/tests/experimental/test_calculate_image_features.py index 7da9afb73..4a7e50e41 100644 --- a/tests/experimental/test_calculate_image_features.py +++ b/tests/experimental/test_calculate_image_features.py @@ -342,19 +342,17 @@ def test_channel_selection_by_name(self, sdata_synthetic): assert result_one.n_vars == 1 assert "intensity_mean_0" in result_one.var_names - def test_channel_selection_by_index(self, sdata_synthetic): - """Channel selection by integer index.""" - result = sq.experimental.im.calculate_image_features( - sdata_synthetic, - image_key="test_img", - labels_key="test_labels", - channels=[0], - features=["squidpy:summary"], - inplace=False, - ) - assert result.n_obs > 0 - # Only channel 0 features - assert all("_0" in col for col in result.var_names) + def test_channel_selection_rejects_int(self, sdata_synthetic): + """Integer channel indices are no longer accepted -- names only.""" + with pytest.raises(TypeError, match="channels must contain strings"): + sq.experimental.im.calculate_image_features( + sdata_synthetic, + image_key="test_img", + labels_key="test_labels", + channels=[0], # int, not str -- should fail validation + features=["squidpy:summary"], + inplace=False, + ) def test_channel_selection_invalid(self, sdata_synthetic): with pytest.raises(ValueError, match="Channel 'DAPI' not found"): @@ -593,14 +591,14 @@ def test_concern3_rasterize_path_succeeds(self, capsys): # -- Concern 4: channel subset selection -- - def test_concern4_channel_subset_by_index(self): + def test_concern4_channel_subset_by_name(self): sdata = _toy_sdata(n_channels=4, channel_names=["c0", "c1", "c2", "c3"]) adata = sq.experimental.im.calculate_image_features( sdata, image_key="img", labels_key="lbl", features=["squidpy:summary"], - channels=[0, 2], + channels=["c0", "c2"], inplace=False, ) cols = list(adata.var_names)