From 7d4b180a1a7b46dc34d177056bd75a70b341030b Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Sat, 11 Apr 2026 00:09:05 +0200 Subject: [PATCH] Add tiling QC metric for detecting tile-boundary segmentation artifacts Cells segmented in tiles get cut at tile borders, producing fragments with artificially straight edges. This adds: - `sq.experimental.tl.calculate_tiling_qc`: per-cell scoring via collinearity-based straight-edge detection (max_straight_edge_ratio, cardinal_alignment_score, cut_score). Scores stored in .obs of a QC AnnData table linked to the labels element via spatialdata_attrs. Algorithm parameters recorded in .uns["tiling_qc"]. - `sq.experimental.pl.tiling_qc`: diagnostic plot via spatialdata-plot (renders labels coloured by score; tile grid emerges from the data). - `extract_labels_tile_lazy` in _tiling.py for labels-only tile extraction without materialising an image. - Test fixture with 400x400 dask-backed ellipsoid cells cut by a 3x3 tile grid, with ground-truth cut/intact classification. - 35 tests (unit, integration, visual regression). Co-Authored-By: Claude Opus 4.6 (1M context) --- src/squidpy/experimental/__init__.py | 4 +- src/squidpy/experimental/im/_tiling.py | 263 ++++++++-- src/squidpy/experimental/pl/__init__.py | 4 +- src/squidpy/experimental/pl/_tiling_qc.py | 75 +++ src/squidpy/experimental/tl/__init__.py | 5 + src/squidpy/experimental/tl/_tiling_qc.py | 475 ++++++++++++++++++ .../TilingQCVisual_tiling_qc_cut_score.png | Bin 0 -> 7213 bytes ...QCVisual_tiling_qc_straight_edge_ratio.png | Bin 0 -> 7213 bytes tests/experimental/conftest.py | 213 ++++++++ tests/experimental/test_tiling_qc.py | 350 +++++++++++++ 10 files changed, 1351 insertions(+), 38 deletions(-) create mode 100644 src/squidpy/experimental/pl/_tiling_qc.py create mode 100644 src/squidpy/experimental/tl/__init__.py create mode 100644 src/squidpy/experimental/tl/_tiling_qc.py create mode 100644 tests/_images/TilingQCVisual_tiling_qc_cut_score.png create mode 100644 tests/_images/TilingQCVisual_tiling_qc_straight_edge_ratio.png create mode 100644 tests/experimental/conftest.py create mode 100644 tests/experimental/test_tiling_qc.py diff --git a/src/squidpy/experimental/__init__.py b/src/squidpy/experimental/__init__.py index 435cd0098..5f4c695ab 100644 --- a/src/squidpy/experimental/__init__.py +++ b/src/squidpy/experimental/__init__.py @@ -6,6 +6,6 @@ from __future__ import annotations -from . import im, pl +from . import im, pl, tl -__all__ = ["im", "pl"] +__all__ = ["im", "pl", "tl"] diff --git a/src/squidpy/experimental/im/_tiling.py b/src/squidpy/experimental/im/_tiling.py index 6154faa44..259bc2319 100644 --- a/src/squidpy/experimental/im/_tiling.py +++ b/src/squidpy/experimental/im/_tiling.py @@ -5,6 +5,9 @@ the tile whose non-overlapping base region contains the centroid owns the cell. Non-owned cells are zeroed out in each tile's mask so that downstream processing never double-counts. + +All functions accept pre-computed centroid dicts and image shapes — they +never materialize the full image or label array. """ from __future__ import annotations @@ -13,6 +16,7 @@ from typing import Literal import numpy as np +import xarray as xr from skimage.measure import regionprops @@ -49,8 +53,13 @@ class TileSpec: owned_ids: frozenset[int] = field(default_factory=frozenset) -def _compute_cell_info(labels: np.ndarray) -> dict[int, CellInfo]: - """Compute centroid and bounding-box size for every label. +# --------------------------------------------------------------------------- +# Centroid computation +# --------------------------------------------------------------------------- + + +def compute_cell_info(labels: np.ndarray) -> dict[int, CellInfo]: + """Compute centroid and bounding-box size for every label from a numpy array. Parameters ---------- @@ -75,32 +84,148 @@ def _compute_cell_info(labels: np.ndarray) -> dict[int, CellInfo]: return info +def compute_cell_info_multiscale( + labels_node: xr.DataTree, + target_scale: str = "scale0", +) -> dict[int, CellInfo]: + """Compute centroids using the coarsest scale of a multiscale label pyramid. + + Reads only the smallest resolution, then scales coordinates to *target_scale*. + """ + available = list(labels_node.keys()) + if not available: + return {} + + # Pick coarsest scale (highest numeric suffix) + def _scale_idx(k: str) -> int: + num = "".join(c for c in k if c.isdigit()) + return int(num) if num else 0 + + coarsest = max(available, key=_scale_idx) + coarse_da = labels_node[coarsest].ds["image"] + coarse_labels = np.asarray(coarse_da.values).squeeze() + + if coarse_labels.ndim != 2: + raise ValueError(f"Expected 2-D labels at scale {coarsest}, got shape {coarse_labels.shape}") + + # Compute scale factors to target resolution + target_da = labels_node[target_scale].ds["image"] + target_h, target_w = target_da.sizes.get("y", target_da.shape[-2]), target_da.sizes.get("x", target_da.shape[-1]) + coarse_h, coarse_w = coarse_labels.shape + scale_y = target_h / coarse_h + scale_x = target_w / coarse_w + + props = regionprops(coarse_labels) + return { + p.label: CellInfo( + label=p.label, + centroid_y=p.centroid[0] * scale_y, + centroid_x=p.centroid[1] * scale_x, + bbox_h=int(np.ceil((p.bbox[2] - p.bbox[0]) * scale_y)), + bbox_w=int(np.ceil((p.bbox[3] - p.bbox[1]) * scale_x)), + ) + for p in props + } + + +def compute_cell_info_tiled( + labels_da: xr.DataArray, + chunk_size: int = 4096, +) -> dict[int, CellInfo]: + """Compute centroids by reading label tiles — never materializes the full array. + + For cells spanning multiple chunks, centroids are computed as + area-weighted means of per-chunk centroids. + + Parameters + ---------- + labels_da + 2-D (y, x) dask-backed xarray DataArray. + chunk_size + Size of chunks to read at a time. + """ + H = int(labels_da.sizes.get("y", labels_da.shape[-2])) + W = int(labels_da.sizes.get("x", labels_da.shape[-1])) + + # Per-label accumulators: [sum_y*area, sum_x*area, total_area, min_y, max_y, min_x, max_x] + stats: dict[int, list[float]] = {} + + for y0 in range(0, H, chunk_size): + y1 = min(y0 + chunk_size, H) + for x0 in range(0, W, chunk_size): + x1 = min(x0 + chunk_size, W) + chunk = labels_da.isel(y=slice(y0, y1), x=slice(x0, x1)).values + if chunk.ndim > 2: + chunk = chunk.squeeze() + + for p in regionprops(chunk): + lid = p.label + cy_global = p.centroid[0] + y0 + cx_global = p.centroid[1] + x0 + area = p.area + min_row = p.bbox[0] + y0 + max_row = p.bbox[2] + y0 + min_col = p.bbox[1] + x0 + max_col = p.bbox[3] + x0 + + if lid not in stats: + stats[lid] = [cy_global * area, cx_global * area, area, min_row, max_row, min_col, max_col] + else: + s = stats[lid] + s[0] += cy_global * area + s[1] += cx_global * area + s[2] += area + s[3] = min(s[3], min_row) + s[4] = max(s[4], max_row) + s[5] = min(s[5], min_col) + s[6] = max(s[6], max_col) + + result: dict[int, CellInfo] = {} + for lid, s in stats.items(): + if lid == 0: + continue + result[lid] = CellInfo( + label=lid, + centroid_y=s[0] / s[2], + centroid_x=s[1] / s[2], + bbox_h=int(s[4] - s[3]), + bbox_w=int(s[6] - s[5]), + ) + return result + + +# --------------------------------------------------------------------------- +# Tile spec building +# --------------------------------------------------------------------------- + + def _auto_margin(cell_info: dict[int, CellInfo]) -> int: """Compute the minimum margin that covers the largest cell's half-extent.""" if not cell_info: return 0 max_half = max(max(c.bbox_h, c.bbox_w) for c in cell_info.values()) - # Full bbox extent: a cell's centroid can be at most half a bbox away - # from its edge, so margin = ceil(max_extent / 2) guarantees coverage. + # Centroid can be at most half a bbox away from the cell's edge. # Add 1 pixel for safety (rounding / off-by-one). return int(np.ceil(max_half / 2)) + 1 def build_tile_specs( - labels: np.ndarray, + image_shape: tuple[int, int], + cell_info: dict[int, CellInfo], tile_size: int = 2048, overlap_margin: int | Literal["auto"] = "auto", ) -> list[TileSpec]: - """Build tile specifications for a label image. + """Build tile specifications from pre-computed centroids. - Each tile gets a non-overlapping *base* region (for centroid ownership) - and an extended *crop* region (base + margin on each side). Every - nonzero label is assigned to exactly one tile. + No pixel data is needed — only the image dimensions and centroid dict. Parameters ---------- - labels - 2-D integer label image (0 = background). + image_shape + ``(H, W)`` of the full-resolution image/labels. + cell_info + Pre-computed centroids from :func:`compute_cell_info`, + :func:`compute_cell_info_multiscale`, or :func:`compute_cell_info_tiled`. tile_size Side length of the non-overlapping base grid cells. overlap_margin @@ -112,14 +237,10 @@ def build_tile_specs( List of :class:`TileSpec`, one per grid cell that owns at least one label. Empty tiles (no cells) are omitted. """ - if labels.ndim != 2: - raise ValueError(f"Expected 2-D labels, got shape {labels.shape}") + H, W = image_shape if tile_size <= 0: raise ValueError(f"tile_size must be positive, got {tile_size}") - H, W = labels.shape - cell_info = _compute_cell_info(labels) - if isinstance(overlap_margin, str) and overlap_margin == "auto": margin = _auto_margin(cell_info) else: @@ -150,13 +271,11 @@ def build_tile_specs( if not owned: continue - # Base region (non-overlapping) by0 = row * tile_size bx0 = col * tile_size by1 = min(by0 + tile_size, H) bx1 = min(bx0 + tile_size, W) - # Crop region (with margin, clamped) cy0 = max(by0 - margin, 0) cx0 = max(bx0 - margin, 0) cy1 = min(by1 + margin, H) @@ -173,56 +292,130 @@ def build_tile_specs( return specs +# --------------------------------------------------------------------------- +# Tile extraction +# --------------------------------------------------------------------------- + + def extract_tile( image: np.ndarray, labels: np.ndarray, spec: TileSpec, ) -> tuple[np.ndarray, np.ndarray]: - """Extract a tile's image and mask, zeroing out non-owned cells. + """Extract a tile from numpy arrays, zeroing out non-owned cells. Parameters ---------- image - 3-D array of shape ``(C, H, W)``. + ``(C, H, W)`` numpy array. labels - 2-D integer label image of shape ``(H, W)``. + ``(H, W)`` numpy label array. spec - Tile specification from :func:`build_tile_specs`. + Tile specification. Returns ------- - tile_image - Cropped image of shape ``(C, crop_h, crop_w)``. - tile_labels - Cropped label image with non-owned cells zeroed out. + tile_image, tile_labels """ cy0, cx0, cy1, cx1 = spec.crop tile_image = image[:, cy0:cy1, cx0:cx1] tile_labels = labels[cy0:cy1, cx0:cx1].copy() + _zero_non_owned(tile_labels, spec.owned_ids) + return tile_image, tile_labels - # Zero out labels not owned by this tile - unique_in_crop = np.unique(tile_labels) - for lid in unique_in_crop: - if lid != 0 and lid not in spec.owned_ids: - tile_labels[tile_labels == lid] = 0 +def extract_tile_lazy( + image_da: xr.DataArray, + labels_da: xr.DataArray, + spec: TileSpec, +) -> tuple[np.ndarray, np.ndarray]: + """Extract a tile from dask-backed xarray arrays. + + Materializes only the tile's crop region (~2k×2k), not the full image. + + Parameters + ---------- + image_da + ``(c, y, x)`` dask-backed DataArray. + labels_da + ``(y, x)`` dask-backed DataArray. + spec + Tile specification. + + Returns + ------- + tile_image + ``(C, crop_h, crop_w)`` numpy array. + tile_labels + ``(crop_h, crop_w)`` numpy array with non-owned cells zeroed. + """ + cy0, cx0, cy1, cx1 = spec.crop + tile_image = image_da.isel(y=slice(cy0, cy1), x=slice(cx0, cx1)).values + tile_labels = labels_da.isel(y=slice(cy0, cy1), x=slice(cx0, cx1)).values.copy() + if tile_labels.ndim > 2: + tile_labels = tile_labels.squeeze() + _zero_non_owned(tile_labels, spec.owned_ids) return tile_image, tile_labels +def extract_labels_tile_lazy( + labels_da: xr.DataArray, + spec: TileSpec, +) -> np.ndarray: + """Extract a labels-only tile from a dask-backed DataArray. + + Like :func:`extract_tile_lazy` but skips the image entirely. + Materializes only the crop region. + + Parameters + ---------- + labels_da + ``(y, x)`` dask-backed DataArray. + spec + Tile specification. + + Returns + ------- + ``(crop_h, crop_w)`` numpy array with non-owned cells zeroed. + """ + cy0, cx0, cy1, cx1 = spec.crop + tile_labels = labels_da.isel(y=slice(cy0, cy1), x=slice(cx0, cx1)).values.copy() + if tile_labels.ndim > 2: + tile_labels = tile_labels.squeeze() + _zero_non_owned(tile_labels, spec.owned_ids) + return tile_labels + + +def _zero_non_owned(tile_labels: np.ndarray, owned_ids: frozenset[int]) -> None: + """Zero out labels not in *owned_ids* (in-place).""" + for lid in np.unique(tile_labels): + if lid != 0 and lid not in owned_ids: + tile_labels[tile_labels == lid] = 0 + + +# --------------------------------------------------------------------------- +# Coverage verification +# --------------------------------------------------------------------------- + + def verify_coverage( - labels: np.ndarray, + all_label_ids: set[int], specs: list[TileSpec], ) -> None: """Assert that tile specs provide full, non-overlapping cell coverage. + Parameters + ---------- + all_label_ids + Set of all nonzero label IDs expected in the image. + specs + Tile specifications to verify. + Raises ------ AssertionError If any cell is missing or assigned to more than one tile. """ - all_label_ids = set(np.unique(labels)) - all_label_ids.discard(0) - owned_union: set[int] = set() for spec in specs: overlap = owned_union & spec.owned_ids diff --git a/src/squidpy/experimental/pl/__init__.py b/src/squidpy/experimental/pl/__init__.py index 4d21ee850..bee5417ae 100644 --- a/src/squidpy/experimental/pl/__init__.py +++ b/src/squidpy/experimental/pl/__init__.py @@ -1,3 +1,5 @@ from __future__ import annotations -__all__ = [] +from ._tiling_qc import tiling_qc + +__all__ = ["tiling_qc"] diff --git a/src/squidpy/experimental/pl/_tiling_qc.py b/src/squidpy/experimental/pl/_tiling_qc.py new file mode 100644 index 000000000..c17f33448 --- /dev/null +++ b/src/squidpy/experimental/pl/_tiling_qc.py @@ -0,0 +1,75 @@ +"""Diagnostic plot for tiling segmentation QC.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from spatialdata import SpatialData + +__all__ = ["tiling_qc"] + + +def tiling_qc( + sdata: SpatialData, + labels_key: str, + qc_key: str | None = None, + score_col: str = "cut_score", + cmap: str = "Reds", + figsize: tuple[float, float] | None = None, + **kwargs: Any, +) -> None: + """Plot labels coloured by their tiling-artifact score. + + Uses :mod:`spatialdata_plot` to render the label element coloured + by the chosen QC score from the linked table. If tile-boundary + artifacts are present the tile grid emerges as lines of + high-scoring cells. + + Parameters + ---------- + sdata + SpatialData object (must contain the QC table). + labels_key + Key in ``sdata.labels`` with the segmentation mask. + qc_key + Key in ``sdata.tables`` with the QC AnnData. Defaults to + ``"{labels_key}_qc"``. + score_col + Which ``.obs`` column to colour by. One of + ``"cut_score"``, ``"max_straight_edge_ratio"``, + ``"cardinal_alignment_score"``. + cmap + Matplotlib colormap name. + figsize + Figure size passed to :meth:`spatialdata.SpatialData.pl.show`. + **kwargs + Forwarded to :meth:`spatialdata.SpatialData.pl.render_labels`. + """ + import spatialdata_plot # noqa: F401 — registers accessor + + table_key = qc_key if qc_key is not None else f"{labels_key}_qc" + if table_key not in sdata.tables: + raise ValueError( + f"QC table '{table_key}' not found in sdata.tables. " + f"Run calculate_tiling_qc(sdata, labels_key='{labels_key}') first." + ) + + adata = sdata.tables[table_key] + if score_col not in adata.obs.columns: + raise ValueError( + f"Score column '{score_col}' not found in .obs. " + f"Available: {[c for c in adata.obs.columns if c not in ('region', 'label_id')]}" + ) + + show_kwargs: dict[str, Any] = {} + if figsize is not None: + show_kwargs["figsize"] = figsize + + sdata.pl.render_labels( + element=labels_key, + color=score_col, + table_name=table_key, + cmap=cmap, + **kwargs, + ).pl.show(**show_kwargs) diff --git a/src/squidpy/experimental/tl/__init__.py b/src/squidpy/experimental/tl/__init__.py new file mode 100644 index 000000000..e4e529771 --- /dev/null +++ b/src/squidpy/experimental/tl/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from ._tiling_qc import calculate_tiling_qc + +__all__ = ["calculate_tiling_qc"] diff --git a/src/squidpy/experimental/tl/_tiling_qc.py b/src/squidpy/experimental/tl/_tiling_qc.py new file mode 100644 index 000000000..92af45200 --- /dev/null +++ b/src/squidpy/experimental/tl/_tiling_qc.py @@ -0,0 +1,475 @@ +"""QC metrics for detecting tile-boundary segmentation artifacts. + +Cells cut by tile borders during segmentation have characteristic +straight edges that natural cell boundaries never produce. This module +computes per-cell metrics that quantify this artifact: + +- **max_straight_edge_ratio**: length of the longest straight contour + segment normalised by the cell's equivalent diameter. +- **cardinal_alignment_score**: how closely that segment aligns with + 0° or 90° (axis-aligned tile borders). +- **cut_score**: product of the two, combining evidence from shape and + orientation. + +All heavy computation is done per-tile via the tiling infrastructure +in :mod:`squidpy.experimental.im._tiling`, so this scales to +100k×100k images without materialising the full array. +""" + +from __future__ import annotations + +from typing import Literal + +import anndata as ad +import numpy as np +import pandas as pd +import xarray as xr +from joblib import Parallel, delayed +from skimage.measure import find_contours, regionprops +from spatialdata import SpatialData +from spatialdata._logging import logger as logg +from spatialdata.models import TableModel + +from squidpy.experimental.im._tiling import ( + build_tile_specs, + compute_cell_info, + compute_cell_info_multiscale, + compute_cell_info_tiled, + extract_labels_tile_lazy, +) + +__all__ = ["calculate_tiling_qc"] + +# Minimum cell area in pixels — smaller cells produce noisy contours +_MIN_CELL_AREA = 20 + +# Default perpendicular distance tolerance for collinearity (pixels). +# Points within this distance of the start→end line are considered +# part of the same straight segment. 0.75 px works well for +# sub-pixel contours from marching squares. +_DEFAULT_DISTANCE_TOL = 0.75 + + +# --------------------------------------------------------------------------- +# Core geometry +# --------------------------------------------------------------------------- + + +def _longest_collinear_segment( + contour: np.ndarray, + distance_tol: float = _DEFAULT_DISTANCE_TOL, +) -> tuple[float, float]: + """Find the longest collinear run of contour points. + + Uses a two-pointer approach: for each start index, extend the end + index as long as all intermediate points are within + ``distance_tol`` of the line from start to end. Early exit when + remaining points cannot beat the current best. + + Parameters + ---------- + contour + ``(N, 2)`` array of ``(row, col)`` contour coordinates. + distance_tol + Maximum perpendicular distance (pixels) from the start→end + line for a point to be considered part of the straight segment. + + Returns + ------- + run_length + Euclidean length of the longest straight segment (pixels). + run_angle + Angle (radians, ``[-π, π]``) of that segment. + """ + n = len(contour) + if n < 3: + return 0.0, 0.0 + + best_len = 0.0 + best_angle = 0.0 + + # Precompute cumulative arc length for early-exit bound + diffs_all = np.diff(contour, axis=0) + seg_lengths = np.sqrt((diffs_all**2).sum(axis=1)) + cum_arc = np.zeros(n, dtype=np.float64) + cum_arc[1:] = np.cumsum(seg_lengths) + total_arc = cum_arc[-1] + + for start in range(n - 2): + # Upper bound: arc length from start to end of contour + remaining_arc = total_arc - cum_arc[start] + if remaining_arc <= best_len: + break # no start from here or later can beat best + + for end in range(start + 2, n): + p0 = contour[start] + p1 = contour[end] + d = p1 - p0 + seg_len = np.sqrt(d[0] ** 2 + d[1] ** 2) + if seg_len < 1e-12: + continue + + # Perpendicular distances of all intermediate points + intermediates = contour[start + 1 : end] + rel = intermediates - p0 + cross = np.abs(d[0] * rel[:, 1] - d[1] * rel[:, 0]) + max_perp = cross.max() / seg_len + + if max_perp > distance_tol: + break + + if seg_len > best_len: + best_len = seg_len + best_angle = float(np.arctan2(d[0], d[1])) + + return best_len, best_angle + + +def _cardinal_alignment(angle: float) -> float: + """Score how close an angle is to a cardinal direction (0° or 90°). + + Returns a value in ``[0, 1]`` where 1 means perfectly axis-aligned + and 0 means maximally diagonal (45°). + """ + # Normalise angle to [0, π) — direction, not orientation + a = abs(angle) % np.pi + + # Distance to nearest cardinal: 0, π/2, π + dist = min(a, abs(a - np.pi / 2), abs(a - np.pi)) + + # Map [0, π/4] → [1, 0] + return float(1.0 - dist / (np.pi / 4)) + + +def _straight_edge_metrics( + contour: np.ndarray, + cell_area: float, + distance_tol: float = _DEFAULT_DISTANCE_TOL, +) -> tuple[float, float, float]: + """Compute straight-edge metrics for a single cell contour. + + Parameters + ---------- + contour + ``(N, 2)`` contour coordinates from :func:`skimage.measure.find_contours`. + cell_area + Area of the cell in pixels (for normalisation). + distance_tol + Perpendicular distance tolerance for collinearity (pixels). + + Returns + ------- + straight_edge_ratio + Longest collinear segment / equivalent diameter. + cardinal_score + Cardinal alignment of the longest straight segment. + cut_score + Product of the two. + """ + eq_diam = np.sqrt(4 * cell_area / np.pi) + if eq_diam == 0: + return 0.0, 0.0, 0.0 + + run_length, run_angle = _longest_collinear_segment(contour, distance_tol) + straight_ratio = run_length / eq_diam + cardinal = _cardinal_alignment(run_angle) + cut_score = straight_ratio * cardinal + + return float(straight_ratio), float(cardinal), float(cut_score) + + +# --------------------------------------------------------------------------- +# Per-tile scoring +# --------------------------------------------------------------------------- + + +def _score_tile( + tile_labels: np.ndarray, + distance_tol: float = _DEFAULT_DISTANCE_TOL, + min_area: int = _MIN_CELL_AREA, + downsample: int = 1, +) -> pd.DataFrame: + """Compute tiling QC metrics for all cells in a numpy label tile. + + Parameters + ---------- + tile_labels + ``(H, W)`` label array (background = 0, owned cells only). + distance_tol + Perpendicular distance tolerance for collinearity (pixels). + min_area + Cells smaller than this (in pixels at analysis resolution) + are skipped and get NaN values. + downsample + Factor by which to downsample each cell's bounding-box crop + before contour extraction. ``1`` = full resolution, ``2`` = + half, etc. Straight edges are scale-invariant so moderate + downsampling (2–4x) is safe and much faster for large cells. + + Returns + ------- + DataFrame with columns ``max_straight_edge_ratio``, + ``cardinal_alignment_score``, ``cut_score``, indexed by cell label. + """ + regions = regionprops(tile_labels) + if not regions: + return pd.DataFrame( + columns=["max_straight_edge_ratio", "cardinal_alignment_score", "cut_score"], + dtype=float, + ) + + rows: dict[int, dict[str, float]] = {} + + for region in regions: + lid = region.label + area = region.area + + if area < min_area * (downsample**2): + rows[lid] = { + "max_straight_edge_ratio": np.nan, + "cardinal_alignment_score": np.nan, + "cut_score": np.nan, + } + continue + + # Extract bbox crop for efficient contour finding. + # Pad with 1px of zeros so find_contours can trace cells + # that touch the crop edge (e.g., cells filling their bbox). + min_row, min_col, max_row, max_col = region.bbox + crop = (tile_labels[min_row:max_row, min_col:max_col] == lid).astype(np.float32) + crop = np.pad(crop, 1, mode="constant", constant_values=0) + + # Optionally downsample for speed + if downsample > 1: + crop = crop[::downsample, ::downsample] + + contours = find_contours(crop, 0.5) + if not contours: + rows[lid] = { + "max_straight_edge_ratio": np.nan, + "cardinal_alignment_score": np.nan, + "cut_score": np.nan, + } + continue + + # Use the longest contour (exterior boundary) + contour = max(contours, key=len) + + # Scale area to analysis resolution for consistent normalisation + analysis_area = area / (downsample**2) if downsample > 1 else area + + ser, cas, cs = _straight_edge_metrics(contour, analysis_area, distance_tol) + + rows[lid] = { + "max_straight_edge_ratio": ser, + "cardinal_alignment_score": cas, + "cut_score": cs, + } + + return pd.DataFrame.from_dict(rows, orient="index") + + +# --------------------------------------------------------------------------- +# Centroid computation (shared logic with _feature.py) +# --------------------------------------------------------------------------- + + +def _compute_centroids_for_labels( + sdata: SpatialData, + labels_key: str, + labels_da: xr.DataArray, + scale: str | None, +) -> dict: + """Compute cell centroids using the most efficient strategy available.""" + if isinstance(sdata.labels[labels_key], xr.DataTree): + logg.info("Computing centroids from coarse scale.") + return compute_cell_info_multiscale( + sdata.labels[labels_key], target_scale=scale or "scale0" + ) + + n_pixels = labels_da.sizes.get("y", 1) * labels_da.sizes.get("x", 1) + if n_pixels <= 4096 * 4096: + lbl_np = labels_da.values + if lbl_np.ndim > 2: + lbl_np = lbl_np.squeeze() + return compute_cell_info(lbl_np) + + logg.info("Computing centroids in tiled mode (large single-scale labels).") + return compute_cell_info_tiled(labels_da) + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +_METHOD_KEY = "tiling_qc" + + +def calculate_tiling_qc( + sdata: SpatialData, + labels_key: str, + scale: str | None = None, + tile_size: int = 2048, + overlap_margin: int | Literal["auto"] = "auto", + distance_tol: float = _DEFAULT_DISTANCE_TOL, + min_area: int = _MIN_CELL_AREA, + downsample: int = 1, + n_jobs: int = 1, + adata_key_added: str | None = None, + inplace: bool = True, +) -> ad.AnnData | None: + """Score cells for tile-boundary segmentation artifacts. + + Computes per-cell metrics that detect artificially straight edges + caused by tiled segmentation. Large images are processed via the + same tiling infrastructure as + :func:`~squidpy.experimental.im.calculate_image_features`. + + Results are stored in a QC table (default + ``sdata.tables["{labels_key}_qc"]``). Scores live in ``.obs``; + the ``.X`` matrix is empty. Algorithm parameters are recorded in + ``.uns["tiling_qc"]``. + + Parameters + ---------- + sdata + SpatialData object. + labels_key + Key in ``sdata.labels`` with segmentation masks. + scale + Scale level for multi-scale labels. + tile_size + Side length of the tiling grid (pixels). + overlap_margin + Overlap around each tile. ``"auto"`` computes the minimum from + the largest cell's bounding box. + distance_tol + Maximum perpendicular distance (pixels) from the fitted line + for a contour point to be considered part of a straight + segment. Default 0.75 px. + min_area + Cells smaller than this (pixels) are skipped (NaN scores). + downsample + Factor by which to downsample each cell's bounding-box crop + before contour extraction. Straightness is scale-invariant, + so ``2``–``4`` is safe and much faster on large cells. + n_jobs + Number of parallel jobs for tile processing. + adata_key_added + Key under which to store the result in ``sdata.tables``. + Defaults to ``"{labels_key}_qc"``. + inplace + If ``True``, store result in ``sdata.tables``. Otherwise + return the AnnData directly. + + Returns + ------- + :class:`~anndata.AnnData` when ``inplace=False``, otherwise ``None``. + The AnnData ``.obs`` contains three scores per cell: + + - ``max_straight_edge_ratio``: longest collinear boundary segment / + equivalent diameter. + - ``cardinal_alignment_score``: axis-alignment of that segment + (1 = cardinal, 0 = diagonal). + - ``cut_score``: product of the two. + """ + # --- Validate --- + if labels_key not in sdata.labels: + raise ValueError( + f"Labels key '{labels_key}' not found, valid keys: {list(sdata.labels.keys())}" + ) + + # --- Resolve labels DataArray (stays lazy) --- + labels_node = sdata.labels[labels_key] + if isinstance(labels_node, xr.DataTree): + if scale is None: + raise ValueError("When using multi-scale labels, please specify the scale.") + labels_da = labels_node[scale].ds["image"] + else: + labels_da = labels_node + + # --- Compute centroids --- + cell_info = _compute_centroids_for_labels(sdata, labels_key, labels_da, scale) + if not cell_info: + raise ValueError("No cells found in labels (all zeros).") + + H = int(labels_da.sizes.get("y", labels_da.shape[-2])) + W = int(labels_da.sizes.get("x", labels_da.shape[-1])) + + # --- Build tile specs --- + specs = build_tile_specs( + (H, W), cell_info, tile_size=tile_size, overlap_margin=overlap_margin + ) + logg.info( + f"Tiling QC: {len(specs)} tiles ({tile_size}x{tile_size}, " + f"margin={overlap_margin}, downsample={downsample}x)." + ) + + # --- Process tiles (labels only — no image needed) --- + def _process_one(spec, idx): + tile_lbl = extract_labels_tile_lazy(labels_da, spec) + logg.debug(f"Tiling QC tile {idx + 1}/{len(specs)}: {len(spec.owned_ids)} cells.") + return _score_tile(tile_lbl, distance_tol=distance_tol, min_area=min_area, downsample=downsample) + + results = Parallel(n_jobs=n_jobs, prefer="threads")( + delayed(_process_one)(spec, i) for i, spec in enumerate(specs) + ) + tile_dfs = [df for df in results if not df.empty] + + if not tile_dfs: + raise ValueError("No cells scored — labels may be empty or all below min_area.") + + combined = pd.concat(tile_dfs, axis=0).sort_index() + + # Sanity: each cell should appear in exactly one tile + if combined.index.duplicated().any(): + dups = combined.index[combined.index.duplicated()].unique().tolist() + raise RuntimeError( + f"Duplicate cell IDs across tiles — tile ownership may be broken. " + f"Duplicates: {dups}" + ) + + # --- Build AnnData (scores in .obs, empty .X) --- + n_cells = len(combined) + adata = ad.AnnData( + X=np.empty((n_cells, 0), dtype=np.float32), + ) + adata.obs_names = [f"cell_{i}" for i in combined.index] + + # Spatialdata linking + adata.obs["region"] = pd.Categorical([labels_key] * n_cells) + adata.obs["label_id"] = combined.index.values + adata.uns["spatialdata_attrs"] = { + "region": labels_key, + "region_key": "region", + "instance_key": "label_id", + } + + # QC scores in obs + for col in combined.columns: + adata.obs[col] = combined[col].values + + # Centroids (already computed without materialising the full array) + adata.obs["centroid_y"] = np.array( + [cell_info[lid].centroid_y for lid in combined.index] + ) + adata.obs["centroid_x"] = np.array( + [cell_info[lid].centroid_x for lid in combined.index] + ) + + # Algorithm parameters in uns + adata.uns[_METHOD_KEY] = { + "scale": scale, + "tile_size": tile_size, + "overlap_margin": overlap_margin, + "distance_tol": distance_tol, + "min_area": min_area, + "downsample": downsample, + } + + if inplace: + table_key = adata_key_added if adata_key_added is not None else f"{labels_key}_qc" + sdata.tables[table_key] = TableModel.parse(adata) + return None + return adata diff --git a/tests/_images/TilingQCVisual_tiling_qc_cut_score.png b/tests/_images/TilingQCVisual_tiling_qc_cut_score.png new file mode 100644 index 0000000000000000000000000000000000000000..5097a44fffbbd03ffad27fbcf6816d9e0654f1d6 GIT binary patch literal 7213 zcmV+|9Ma>7P)005u}1^@s6i_d2*0000wbVXQnQ*UN; zcVTj608L?ZaBOdMY-wU3c4cyNX>V>bE-^4JI4mG&baZfYIxjD6VRUe8Z***FVlHoT zXD`89buV3H5*5wDSX*VEG_~x5$)B_JZ zpiVg91hsPIN;PiWIJITV7S-O~u8utNNHu8CAT?pa1ohEJAF0{1XRF=2cdKWgeO5jB zhAAeL^w{BIlX3a|bG%|(40!F}+B}>$@Wy{pkrAyU;0|)f6#~*)O&6_t*J@wR6 z+N%Be=b!3=3og*_Iq$sl)O+u}r$&w(sjj~IYBhN9VD;gLAEtd8nLAEke^see z(vOyw7JaOuYpc5R&O6oKy?fR0;louC^*R*A5F7z_+;NAx z<(6C2amO83CEc`X)3gO-SjO`8>(@`MS+hnx^w2|k$el7}idw#WxmvYqmHOk4KQuXy zIp&zOPbD*GDFV^%cy#fgmkUJWOyHB4!s;#X}O`JHf&Yv&P)e(t+ z_uO-jcBXNz$BY@H-hKDo#%Y5o>sCGf`RAXjUw-*T`!`1% zaYX#BERak5{rBJMth3HiC!KVX=j;3T?~g{n0Rsl8ZQHh~{{8#w*`6JMB{VT?*f5nA zI3aM@VTToMQ0cZo;-4<*u~?R7g$3Q6=;>JUGxc@_nhUL2A|J%twQE;(nGfM7 zQg@>1%|JpqFV3YI&Znse*wN8(2>Aei|Ni^$s%OuhxxZkX3ppY@vy_Qwqg0CD1LFHx zljoTcghwNOr>O`iM6(4B;NZc7T2+AtF|XjfPz#A`v2Wi#eZ%$Z*Do?Tf~+Wg_1ka1 z>AVzKn@5mv^wCG_)PO9)w^iSN|Gk`XD|KGQ7U)7^W4=p#36Lcy$6Mo>r zF+}Il3es+fAmlRz1HF6qb|#<@j~f;8VnMN_@_EPy$t79JRL1PtvqzIKi^nl-e_nkWBlvAooj3}U!?2a?R9S!lr z2pB}-q+quJ0y6YM8XLQI?b0L+J1@S2?`1e8uLQEuCr2H1ls?Am92Xgu{agG`|0PX8 z+EcR{=HiPlR@Ys3owj-me=olHqCO7E&;*(UtOs;n5c%Txel7qPSwyr+TT(yK`TqU; zoh4I3Kob}SjzLHvpWp9eNOENXH*DCTF2DS8?L$CDNC$rY`RD%#3{5>?R1KO~Si5sg za2L5)3W2o2_Prn=uQh4{;ME4mFR7e1N~MxhTR0Ca9{pgB7Faco;UjzXpLvYm3F_lU zz(o-2OTg1kJ56WtV*zK*oT=V^`)&P<*^kTm0Xm;fgoJ)@8GgNB`(9WId?=13*h2&g+R=h&7mCfiIoi_VVED5CNeQM#{&3SiF$_1AYwz%B>W=2AEFQ* zsR!LFlMO~7`Cr=3Y#5Sz`I@}1mT*r}$V98;4!42jtF`lFXb%FzpI7WLZg3u6=g=#{ z3J>q&JMgP`nBT{1xJq8+JNDRPGfTeASU^ChQr<#>sQk|bs;Be#4u&|d5QzAMNd#yi z2*bT;1^L~+ef!2TBNr=+C1gTk2B5P{zDyC2j$$aL(?DDohB_?7+aQ|@7{++7OkSj2 z^;}5tW3fh%Lo8M)Msi3yr67`6%&a3b1Qf&_qvN7*;k`)cHH)+b5diYkYfbnp--DHE zWGu2JXF`YU-y&8p)qF%DA*(w0lMnz4!7+Q1!gv1q>#zU$Zf4DXGwNn1NSCvKUVspU zO&|jP9K^wTVNqQDMBc-&7qpl!!><<_xmwee(V7<-lWBn<2@itTtUWV4x&(7U9{i%N zgxp^ZER*Q$>ID|jZ&vI|@B&RnK#36s1(p56EVa&>?cX98Udw7sfpGlic1LP8e*Ab{ zaC^?2IeMsL_@f9nWoB2bSfMZY+_`hLPA~~zm6$}Zj*A~?`f0ot=aQOCSj6WOtC+^n zT`rfas~gQac{zlD2p6Az`e_~1gor3F9((LDwRrJjwR-hxT@n8E*I(B|9^|CWfMa7A2# z)Ouj5U9u=htrcZU$V$J;%1s`Gxvqj#-`fc?ZQs8A5Dg&16CHxauBi0hc$7WF1rV{B z>dO0uVFnHysGV;#f%rqn@wxLTQpp{*V8Mb|1gVWcGqjap97p@@<-j=GP!O=e z5TbeEV8$mBwiHeh`{P^^EC|>@h;rM{%Js@{zAljUcC-Bi!J`&d$^-$q5Jd6K;*)lI zVHP3nWi^P-H>WeSF&=z6QqD57? zR;|b?@dT~~eXwwl6`7hB@NM6hSKA$HkOFia4-bv^WKbmZpp;KF8o2o^ADKz|oTEQai?*5RuaTqDlEQP;}1 zM+XcTpytn?uWLoa@=>EksV!T!sGD!TSPPO!KFQpqc9^@aB{v9qh{W2q#;KTE!MK|rfAmY?NIGAFmQGP5MeZ3-5SKmK_A zTF??EL!{R%7xNg#bc95UHOM7BZ0; zrNpyjSxFSXSUIdDzbgqpJZHsG_kv&BXgB_kj~?7 zCqBk7jPD4rWHMW}FO=-1;Wex?DM>2S;ulaZ8Jo^XXh?$KwAODZQfAU8LCO&`fF z0aM_ZYO=ozRIW)CEz6)eAnZ0`UgeR6PS)$GMRaaf5 zw`6s#j_O*Vl1O}zb1YTwMZoG8jD;v(8Z=iOoVWnIUnVd-b7RyS8}n;}#Q4nD_w3oD zt##b-xI7i8tP-%TtxZ28NF6a^MAgECRY!FNpAU;qnIQx&3f2%(@_>lk$=t;*RYHRx zj=H@f04bx8A&SrH731rytng$ZAeZr8;U9aHo3>sbT4TjdGazUaYL_8|+Fxg5jpC zA(l<7vzJBVyXjX`+TSN6JyTX_WJ!`6A2)#;#f!{&M3sPcHB=k*R!60ynXQ_EW2yX} z=B&(>c(yO+1yM?KPWDFIlAZ~XIDbjVD*61jgsEgQD3BW6Ocb*+lg&*S2BPIPM^+%h zPQSPz1eIK2JLVWg(0TCSK|L|_dkrEs!|qLqUwygjqgtLUAt=lvVA!m62dB=8Y$^yY zS$cUuJ{iyexM~h%!i;$(Ln4!YO=*Xb<@uldW1N2?rdkrai;XGrBwhftR%tonY97mdO;B5Q`_IYt&|k zJ$m%Wd|}HdXpY;c4oHC52E$;~p4ft%f2v({RB)xfc zsWQ0{WppzVFHjH2nlG7VY}q!^}q35onFX-SBMB{x~UV94MhDqC z=bBI?>6uAbzJnos!Dx}?{E^&QKp{?$YM6`fw|k`t*|Qiw5X;77MyfD)Wx|P2fQu>{ z*x2n7O<2fKo+Eh8L485D!AR2ubbl^O8XB2nZkp7S8NkE)o`@o();#a}v*kd{V|&3a87~ zq*CTBZmZH+sS+kc=EiU>8Inijl7K)ih#4@BD*)_QmB8ypfE;n{yU|%G8EjfnNY71% zn_&J%lyl8;23^B%D>{`9lm{0yE7eGYzaFqZx8G_h7Ebi_*I%#oe>5Bm!|@khcp;J?RaPpzHb$ZIq%K}om#zE?DcOi) z4>p&4V2Eih?n@$6nEZ{;WVwV#m4KWGf>g})g%@6^rCC(Pnm&EH+Prylpdi&t4~X9+ zC39_XH}gIcY~sxDk5dQ7L_xu94?k!DF) zCXTk319l`Acv)jvcb2km-1KHW>8unYcOu~W_3PD{XP&7(`|L9<$sap*>>&cwAVI3F z2jp&b-96kTU>p@$5CcSMLuaLMd(dbgH$&J?nIJ2hW=GNk(&#Bursx>jwbx#&zW@Gv zHFM@n?W<8QdcuSWzJk=SN&`XQz4dN-oCa#GTIz+CN?0c-xGv$JAY^r-;Y7eoF1bWM zy1F3M1tQ5Z*PQFw&b6hbCDa#=GV9F~vKt_35X@DES_mT*N==^)GV-0&h^ZAld-kku zwrHaWL`Q%uYq`z_d2dpi!$2C@xS0zsL%(Y{ND4=KO&FL^)#J;SD)}b_qD>GAtH-)Z zP#tT{IZDtxD}-){1>_{{-M+mbS^tfMlsh9NWQIA!=fdicfbV(}1g*i=QV-BB=f+Sh zH?LIas)u~`$_h6MzmpZK+7JSY8pW^1vA{*KQV+Z;+S`yVtXyVnVRie=^woMskHcb0 z1(n&geT@F_THZ?o4f}OEF=(?qUTdUdElXQrD+MA>bwSw0%F{$)-@bjmK4()*LL{?{ zu#V(X*xI^Af!~s3r7%Dm*Wp*BSzfLM$9X->N+yX?rJwys#?4uee;EfF{>%1Gk4fwa z6110x3E2+JwLMZj!G`PslEql&;UQKz?J^!s8-@v+5J>PhhOokyE?2u>S*D+}ftuV2nE4muZ7Lg4BB2Cv&0|qwW-g!0BKZX74J*5W zvRWhML$VN%_7tR!A3t8*b=O_GMj&?yJErp*CEM_Qo_OMk#FQ3vqP_G5@M}08mVo5a z!7^#_)6HciXj@BKrim;N&Pk980l6snOXr_|zV?SGQpt|#l=HP2YfY`aI15r|&z`N4hD#^fi8|6*wd`1wb>g`|;*^bW z8*Eocd`zMdwPxOxvfi0tc_or3ApvB3MrOwhr%Yhdj`Z7*YOeUMe^&}?Ms1_tY*yl* zjoM4tYgtcrcCU4^lj%61ti9SIO}h$}EJ2B%$_1c9m{bu$@mjx41LC`xTY$AxeApS8*R?Nzj!H_EMvZW%Y^%2nh*xM;(qcPe4wZ4z*{)bhPLRgSNQGXURg{W{EZ0 zg*D5S<5+qo4KD38#j8>_&UGI-87UY(dB_TpOPw1@_4Fel6UL~u2s2GUz{zlD)5+oh z(-Cx>d7^%y7?*6aBtZMTs=C_JDA%^uB3>1Ns%bf;NC3nMODPa>gRGU?fY4FP0%zI> zv}dUdr98Zf0eN*E7A5W6Sl5|r5|HIaFSypX5$Tzq%qwaYhRj>QC{~hTh>kRel&s8b z!z^338aBg;6H$COAz@g$gVXP-CV|YDdAO9(O&XGs=C@SWH-QRPGLo2WkcnQ@K9k{? zo1KXjH@=)>mn2|bthB6e3%YgdrU{V5o^(=TSbRZ#QzkF9kl!qEY#xh(f`ADRb6hM5 z8};bXqZ&w0YN8}$i!(G;kL4~1m@tF@S(zNAjY9~?I6ivxX!Y>J533a`R#dmbLJzoV z)hd0A_spF;R}CFHG`&_AXc`=1JK3@o;ejj_J@UvS>h#l3SF2aA))ldyd+s^4bm>yH zc=6)&T3nzxE#UIy%hiYxBeYYBeh>`fGLWx#ucB5SXtL@U_=oQ~mq**Sf%uKmJ&qbIv(c{onB6!?nZA@Jk@mYlA;X zn|6}RrcImlbDHi!(T4wnSZq*W8je*7_{JM==;!(K=j-aI5OUV6S=y>mE1Fr_)TvW- z3~9lF1)hRb`|q?+v>`if2m-?9mU0ri>J=#9gKVr*73i{vB@RfsW_x?PUi@!sYf}>^ zPE<*N?IXzG!*884XHMc_qm3Iks)Y*|>Ln=>*JWp=B-`+9%=^34ikd@sixw>^fAGNv<#FT2l@A;^P~N$7 zXZgk(Z)`{#R(}ewBAs^GvSrol;Ih9%=_#M00000NkvXXu0mjfT56yb literal 0 HcmV?d00001 diff --git a/tests/_images/TilingQCVisual_tiling_qc_straight_edge_ratio.png b/tests/_images/TilingQCVisual_tiling_qc_straight_edge_ratio.png new file mode 100644 index 0000000000000000000000000000000000000000..5097a44fffbbd03ffad27fbcf6816d9e0654f1d6 GIT binary patch literal 7213 zcmV+|9Ma>7P)005u}1^@s6i_d2*0000wbVXQnQ*UN; zcVTj608L?ZaBOdMY-wU3c4cyNX>V>bE-^4JI4mG&baZfYIxjD6VRUe8Z***FVlHoT zXD`89buV3H5*5wDSX*VEG_~x5$)B_JZ zpiVg91hsPIN;PiWIJITV7S-O~u8utNNHu8CAT?pa1ohEJAF0{1XRF=2cdKWgeO5jB zhAAeL^w{BIlX3a|bG%|(40!F}+B}>$@Wy{pkrAyU;0|)f6#~*)O&6_t*J@wR6 z+N%Be=b!3=3og*_Iq$sl)O+u}r$&w(sjj~IYBhN9VD;gLAEtd8nLAEke^see z(vOyw7JaOuYpc5R&O6oKy?fR0;louC^*R*A5F7z_+;NAx z<(6C2amO83CEc`X)3gO-SjO`8>(@`MS+hnx^w2|k$el7}idw#WxmvYqmHOk4KQuXy zIp&zOPbD*GDFV^%cy#fgmkUJWOyHB4!s;#X}O`JHf&Yv&P)e(t+ z_uO-jcBXNz$BY@H-hKDo#%Y5o>sCGf`RAXjUw-*T`!`1% zaYX#BERak5{rBJMth3HiC!KVX=j;3T?~g{n0Rsl8ZQHh~{{8#w*`6JMB{VT?*f5nA zI3aM@VTToMQ0cZo;-4<*u~?R7g$3Q6=;>JUGxc@_nhUL2A|J%twQE;(nGfM7 zQg@>1%|JpqFV3YI&Znse*wN8(2>Aei|Ni^$s%OuhxxZkX3ppY@vy_Qwqg0CD1LFHx zljoTcghwNOr>O`iM6(4B;NZc7T2+AtF|XjfPz#A`v2Wi#eZ%$Z*Do?Tf~+Wg_1ka1 z>AVzKn@5mv^wCG_)PO9)w^iSN|Gk`XD|KGQ7U)7^W4=p#36Lcy$6Mo>r zF+}Il3es+fAmlRz1HF6qb|#<@j~f;8VnMN_@_EPy$t79JRL1PtvqzIKi^nl-e_nkWBlvAooj3}U!?2a?R9S!lr z2pB}-q+quJ0y6YM8XLQI?b0L+J1@S2?`1e8uLQEuCr2H1ls?Am92Xgu{agG`|0PX8 z+EcR{=HiPlR@Ys3owj-me=olHqCO7E&;*(UtOs;n5c%Txel7qPSwyr+TT(yK`TqU; zoh4I3Kob}SjzLHvpWp9eNOENXH*DCTF2DS8?L$CDNC$rY`RD%#3{5>?R1KO~Si5sg za2L5)3W2o2_Prn=uQh4{;ME4mFR7e1N~MxhTR0Ca9{pgB7Faco;UjzXpLvYm3F_lU zz(o-2OTg1kJ56WtV*zK*oT=V^`)&P<*^kTm0Xm;fgoJ)@8GgNB`(9WId?=13*h2&g+R=h&7mCfiIoi_VVED5CNeQM#{&3SiF$_1AYwz%B>W=2AEFQ* zsR!LFlMO~7`Cr=3Y#5Sz`I@}1mT*r}$V98;4!42jtF`lFXb%FzpI7WLZg3u6=g=#{ z3J>q&JMgP`nBT{1xJq8+JNDRPGfTeASU^ChQr<#>sQk|bs;Be#4u&|d5QzAMNd#yi z2*bT;1^L~+ef!2TBNr=+C1gTk2B5P{zDyC2j$$aL(?DDohB_?7+aQ|@7{++7OkSj2 z^;}5tW3fh%Lo8M)Msi3yr67`6%&a3b1Qf&_qvN7*;k`)cHH)+b5diYkYfbnp--DHE zWGu2JXF`YU-y&8p)qF%DA*(w0lMnz4!7+Q1!gv1q>#zU$Zf4DXGwNn1NSCvKUVspU zO&|jP9K^wTVNqQDMBc-&7qpl!!><<_xmwee(V7<-lWBn<2@itTtUWV4x&(7U9{i%N zgxp^ZER*Q$>ID|jZ&vI|@B&RnK#36s1(p56EVa&>?cX98Udw7sfpGlic1LP8e*Ab{ zaC^?2IeMsL_@f9nWoB2bSfMZY+_`hLPA~~zm6$}Zj*A~?`f0ot=aQOCSj6WOtC+^n zT`rfas~gQac{zlD2p6Az`e_~1gor3F9((LDwRrJjwR-hxT@n8E*I(B|9^|CWfMa7A2# z)Ouj5U9u=htrcZU$V$J;%1s`Gxvqj#-`fc?ZQs8A5Dg&16CHxauBi0hc$7WF1rV{B z>dO0uVFnHysGV;#f%rqn@wxLTQpp{*V8Mb|1gVWcGqjap97p@@<-j=GP!O=e z5TbeEV8$mBwiHeh`{P^^EC|>@h;rM{%Js@{zAljUcC-Bi!J`&d$^-$q5Jd6K;*)lI zVHP3nWi^P-H>WeSF&=z6QqD57? zR;|b?@dT~~eXwwl6`7hB@NM6hSKA$HkOFia4-bv^WKbmZpp;KF8o2o^ADKz|oTEQai?*5RuaTqDlEQP;}1 zM+XcTpytn?uWLoa@=>EksV!T!sGD!TSPPO!KFQpqc9^@aB{v9qh{W2q#;KTE!MK|rfAmY?NIGAFmQGP5MeZ3-5SKmK_A zTF??EL!{R%7xNg#bc95UHOM7BZ0; zrNpyjSxFSXSUIdDzbgqpJZHsG_kv&BXgB_kj~?7 zCqBk7jPD4rWHMW}FO=-1;Wex?DM>2S;ulaZ8Jo^XXh?$KwAODZQfAU8LCO&`fF z0aM_ZYO=ozRIW)CEz6)eAnZ0`UgeR6PS)$GMRaaf5 zw`6s#j_O*Vl1O}zb1YTwMZoG8jD;v(8Z=iOoVWnIUnVd-b7RyS8}n;}#Q4nD_w3oD zt##b-xI7i8tP-%TtxZ28NF6a^MAgECRY!FNpAU;qnIQx&3f2%(@_>lk$=t;*RYHRx zj=H@f04bx8A&SrH731rytng$ZAeZr8;U9aHo3>sbT4TjdGazUaYL_8|+Fxg5jpC zA(l<7vzJBVyXjX`+TSN6JyTX_WJ!`6A2)#;#f!{&M3sPcHB=k*R!60ynXQ_EW2yX} z=B&(>c(yO+1yM?KPWDFIlAZ~XIDbjVD*61jgsEgQD3BW6Ocb*+lg&*S2BPIPM^+%h zPQSPz1eIK2JLVWg(0TCSK|L|_dkrEs!|qLqUwygjqgtLUAt=lvVA!m62dB=8Y$^yY zS$cUuJ{iyexM~h%!i;$(Ln4!YO=*Xb<@uldW1N2?rdkrai;XGrBwhftR%tonY97mdO;B5Q`_IYt&|k zJ$m%Wd|}HdXpY;c4oHC52E$;~p4ft%f2v({RB)xfc zsWQ0{WppzVFHjH2nlG7VY}q!^}q35onFX-SBMB{x~UV94MhDqC z=bBI?>6uAbzJnos!Dx}?{E^&QKp{?$YM6`fw|k`t*|Qiw5X;77MyfD)Wx|P2fQu>{ z*x2n7O<2fKo+Eh8L485D!AR2ubbl^O8XB2nZkp7S8NkE)o`@o();#a}v*kd{V|&3a87~ zq*CTBZmZH+sS+kc=EiU>8Inijl7K)ih#4@BD*)_QmB8ypfE;n{yU|%G8EjfnNY71% zn_&J%lyl8;23^B%D>{`9lm{0yE7eGYzaFqZx8G_h7Ebi_*I%#oe>5Bm!|@khcp;J?RaPpzHb$ZIq%K}om#zE?DcOi) z4>p&4V2Eih?n@$6nEZ{;WVwV#m4KWGf>g})g%@6^rCC(Pnm&EH+Prylpdi&t4~X9+ zC39_XH}gIcY~sxDk5dQ7L_xu94?k!DF) zCXTk319l`Acv)jvcb2km-1KHW>8unYcOu~W_3PD{XP&7(`|L9<$sap*>>&cwAVI3F z2jp&b-96kTU>p@$5CcSMLuaLMd(dbgH$&J?nIJ2hW=GNk(&#Bursx>jwbx#&zW@Gv zHFM@n?W<8QdcuSWzJk=SN&`XQz4dN-oCa#GTIz+CN?0c-xGv$JAY^r-;Y7eoF1bWM zy1F3M1tQ5Z*PQFw&b6hbCDa#=GV9F~vKt_35X@DES_mT*N==^)GV-0&h^ZAld-kku zwrHaWL`Q%uYq`z_d2dpi!$2C@xS0zsL%(Y{ND4=KO&FL^)#J;SD)}b_qD>GAtH-)Z zP#tT{IZDtxD}-){1>_{{-M+mbS^tfMlsh9NWQIA!=fdicfbV(}1g*i=QV-BB=f+Sh zH?LIas)u~`$_h6MzmpZK+7JSY8pW^1vA{*KQV+Z;+S`yVtXyVnVRie=^woMskHcb0 z1(n&geT@F_THZ?o4f}OEF=(?qUTdUdElXQrD+MA>bwSw0%F{$)-@bjmK4()*LL{?{ zu#V(X*xI^Af!~s3r7%Dm*Wp*BSzfLM$9X->N+yX?rJwys#?4uee;EfF{>%1Gk4fwa z6110x3E2+JwLMZj!G`PslEql&;UQKz?J^!s8-@v+5J>PhhOokyE?2u>S*D+}ftuV2nE4muZ7Lg4BB2Cv&0|qwW-g!0BKZX74J*5W zvRWhML$VN%_7tR!A3t8*b=O_GMj&?yJErp*CEM_Qo_OMk#FQ3vqP_G5@M}08mVo5a z!7^#_)6HciXj@BKrim;N&Pk980l6snOXr_|zV?SGQpt|#l=HP2YfY`aI15r|&z`N4hD#^fi8|6*wd`1wb>g`|;*^bW z8*Eocd`zMdwPxOxvfi0tc_or3ApvB3MrOwhr%Yhdj`Z7*YOeUMe^&}?Ms1_tY*yl* zjoM4tYgtcrcCU4^lj%61ti9SIO}h$}EJ2B%$_1c9m{bu$@mjx41LC`xTY$AxeApS8*R?Nzj!H_EMvZW%Y^%2nh*xM;(qcPe4wZ4z*{)bhPLRgSNQGXURg{W{EZ0 zg*D5S<5+qo4KD38#j8>_&UGI-87UY(dB_TpOPw1@_4Fel6UL~u2s2GUz{zlD)5+oh z(-Cx>d7^%y7?*6aBtZMTs=C_JDA%^uB3>1Ns%bf;NC3nMODPa>gRGU?fY4FP0%zI> zv}dUdr98Zf0eN*E7A5W6Sl5|r5|HIaFSypX5$Tzq%qwaYhRj>QC{~hTh>kRel&s8b z!z^338aBg;6H$COAz@g$gVXP-CV|YDdAO9(O&XGs=C@SWH-QRPGLo2WkcnQ@K9k{? zo1KXjH@=)>mn2|bthB6e3%YgdrU{V5o^(=TSbRZ#QzkF9kl!qEY#xh(f`ADRb6hM5 z8};bXqZ&w0YN8}$i!(G;kL4~1m@tF@S(zNAjY9~?I6ivxX!Y>J533a`R#dmbLJzoV z)hd0A_spF;R}CFHG`&_AXc`=1JK3@o;ejj_J@UvS>h#l3SF2aA))ldyd+s^4bm>yH zc=6)&T3nzxE#UIy%hiYxBeYYBeh>`fGLWx#ucB5SXtL@U_=oQ~mq**Sf%uKmJ&qbIv(c{onB6!?nZA@Jk@mYlA;X zn|6}RrcImlbDHi!(T4wnSZq*W8je*7_{JM==;!(K=j-aI5OUV6S=y>mE1Fr_)TvW- z3~9lF1)hRb`|q?+v>`if2m-?9mU0ri>J=#9gKVr*73i{vB@RfsW_x?PUi@!sYf}>^ zPE<*N?IXzG!*884XHMc_qm3Iks)Y*|>Ln=>*JWp=B-`+9%=^34ikd@sixw>^fAGNv<#FT2l@A;^P~N$7 zXZgk(Z)`{#R(}ewBAs^GvSrol;Ih9%=_#M00000NkvXXu0mjfT56yb literal 0 HcmV?d00001 diff --git a/tests/experimental/conftest.py b/tests/experimental/conftest.py new file mode 100644 index 000000000..4e33b9c3b --- /dev/null +++ b/tests/experimental/conftest.py @@ -0,0 +1,213 @@ +"""Shared fixtures for experimental tests. + +Provides synthetic SpatialData objects for testing segmentation QC metrics. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field + +import dask.array as da +import numpy as np +import pytest +import xarray as xr +from scipy import ndimage +from skimage.draw import ellipse +from spatialdata import SpatialData +from spatialdata.models import Image2DModel, Labels2DModel + +# --------------------------------------------------------------------------- +# Tile-boundary QC fixture +# --------------------------------------------------------------------------- + +_IMAGE_SIZE = 400 +_TILE_BORDERS = (133, 267) # 3×3 grid on 400 px → borders at 133, 267 +_BORDER_GAP = 2 # pixels zeroed at each tile border +_CELL_GAP = 2 # minimum gap between any two cells +_N_CELLS_TARGET = 40 +_SEMI_AXIS_RANGE = (8, 20) # semi-axis lengths in pixels + + +@dataclass +class TileBoundaryGroundTruth: + """Ground-truth metadata for the tile-boundary fixture.""" + + cut_cell_ids: frozenset[int] = field(default_factory=frozenset) + intact_cell_ids: frozenset[int] = field(default_factory=frozenset) + original_n_cells: int = 0 + tile_borders_y: tuple[int, ...] = _TILE_BORDERS + tile_borders_x: tuple[int, ...] = _TILE_BORDERS + + +def _place_ellipsoids( + shape: tuple[int, int], + n_target: int, + semi_range: tuple[int, int], + cell_gap: int, + rng: np.random.Generator, +) -> np.ndarray: + """Place non-overlapping ellipsoids via rejection sampling. + + Returns an ``(H, W)`` int32 label array with IDs 1..N. + """ + H, W = shape + labels = np.zeros(shape, dtype=np.int32) + cell_id = 0 + max_attempts = n_target * 20 # allow generous retries + + for _ in range(max_attempts): + if cell_id >= n_target: + break + + # Random ellipse parameters + cy = rng.integers(semi_range[1] + cell_gap, H - semi_range[1] - cell_gap) + cx = rng.integers(semi_range[1] + cell_gap, W - semi_range[1] - cell_gap) + r_radius = rng.integers(semi_range[0], semi_range[1] + 1) + c_radius = rng.integers(semi_range[0], semi_range[1] + 1) + angle = rng.uniform(0, np.pi) + + # Rasterise candidate ellipse + rr, cc = ellipse(cy, cx, r_radius, c_radius, shape=shape, rotation=angle) + if len(rr) == 0: + continue + + # Check for overlap (including gap buffer) + # Dilate existing labels by cell_gap and check candidate pixels + if cell_gap > 0: + occupied = ndimage.binary_dilation( + labels > 0, + iterations=cell_gap, + ) + else: + occupied = labels > 0 + + if occupied[rr, cc].any(): + continue + + cell_id += 1 + labels[rr, cc] = cell_id + + return labels + + +def _apply_tile_cuts( + labels: np.ndarray, + borders_y: tuple[int, ...], + borders_x: tuple[int, ...], + gap: int, +) -> np.ndarray: + """Zero out pixels along tile borders to simulate segmentation seams. + + For each border coordinate, a stripe of width ``gap`` centred on the + border is erased. + """ + out = labels.copy() + half = gap // 2 + + for by in borders_y: + out[by - half : by - half + gap, :] = 0 + for bx in borders_x: + out[:, bx - half : bx - half + gap] = 0 + + return out + + +def _relabel_and_track( + original: np.ndarray, + cut: np.ndarray, +) -> tuple[np.ndarray, frozenset[int], frozenset[int]]: + """Relabel connected components after cutting and classify fragments. + + Returns + ------- + relabelled + New label array with unique IDs for each fragment. + cut_ids + Fragment IDs that came from an original cell that was split. + intact_ids + Fragment IDs from cells that remained whole. + """ + # Relabel connected components (each fragment gets a new ID) + relabelled, n_fragments = ndimage.label(cut > 0) + + # Map each fragment back to its original cell ID + # For each new fragment, find which original cell(s) it overlaps with + cut_ids: set[int] = set() + intact_ids: set[int] = set() + + # Build reverse mapping: original_id → set of fragment_ids + orig_to_fragments: dict[int, set[int]] = {} + for frag_id in range(1, n_fragments + 1): + frag_mask = relabelled == frag_id + orig_ids_in_frag = set(np.unique(original[frag_mask])) - {0} + + for oid in orig_ids_in_frag: + orig_to_fragments.setdefault(oid, set()).add(frag_id) + + # Classify: if an original cell maps to >1 fragment, all its fragments are "cut" + for _orig_id, frag_set in orig_to_fragments.items(): + if len(frag_set) > 1: + cut_ids.update(frag_set) + else: + intact_ids.update(frag_set) + + return relabelled, frozenset(cut_ids), frozenset(intact_ids) + + +def make_tile_boundary_sdata() -> tuple[SpatialData, TileBoundaryGroundTruth]: + """Build a 400x400 SpatialData with ellipsoid cells cut by a 3x3 tile grid. + + Returns a tuple of ``(sdata, ground_truth)`` where ``ground_truth`` + contains the sets of cut and intact cell IDs for test assertions. + + The labels are dask-backed to exercise lazy codepaths. + """ + rng = np.random.default_rng(42) + + # 1. Place ellipsoids + original_labels = _place_ellipsoids( + shape=(_IMAGE_SIZE, _IMAGE_SIZE), + n_target=_N_CELLS_TARGET, + semi_range=_SEMI_AXIS_RANGE, + cell_gap=_CELL_GAP, + rng=rng, + ) + n_original = len(np.unique(original_labels)) - 1 # exclude background + + # 2. Apply tile cuts (zero out 2px stripes at borders) + cut_labels = _apply_tile_cuts( + original_labels, + borders_y=_TILE_BORDERS, + borders_x=_TILE_BORDERS, + gap=_BORDER_GAP, + ) + + # 3. Relabel fragments and track ground truth + relabelled, cut_ids, intact_ids = _relabel_and_track(original_labels, cut_labels) + + # 4. Wrap as dask array (chunks matching ~tile size for realistic access) + dask_labels = da.from_array(relabelled, chunks=(200, 200)) + labels_xr = xr.DataArray(dask_labels, dims=["y", "x"]) + + # 5. Dummy image for API compatibility + image_data = rng.integers(0, 255, (3, _IMAGE_SIZE, _IMAGE_SIZE), dtype=np.uint8) + image_xr = xr.DataArray(image_data, dims=["c", "y", "x"], coords={"c": ["R", "G", "B"]}) + + sdata = SpatialData( + images={"image": Image2DModel.parse(image_xr)}, + labels={"labels": Labels2DModel.parse(labels_xr)}, + ) + + ground_truth = TileBoundaryGroundTruth( + cut_cell_ids=cut_ids, + intact_cell_ids=intact_ids, + original_n_cells=n_original, + ) + + return sdata, ground_truth + + +@pytest.fixture() +def sdata_tile_boundary() -> tuple[SpatialData, TileBoundaryGroundTruth]: + """Fixture wrapper around :func:`make_tile_boundary_sdata`.""" + return make_tile_boundary_sdata() diff --git a/tests/experimental/test_tiling_qc.py b/tests/experimental/test_tiling_qc.py new file mode 100644 index 000000000..700f590a9 --- /dev/null +++ b/tests/experimental/test_tiling_qc.py @@ -0,0 +1,350 @@ +"""Tests for tiling segmentation QC metrics.""" + +from __future__ import annotations + +import numpy as np +import pytest + +from squidpy.experimental.pl._tiling_qc import tiling_qc +from squidpy.experimental.tl._tiling_qc import ( + _cardinal_alignment, + _longest_collinear_segment, + _score_tile, + _straight_edge_metrics, + calculate_tiling_qc, +) +from tests.conftest import PlotTester, PlotTesterMeta + +# --------------------------------------------------------------------------- +# Unit tests for geometry helpers +# --------------------------------------------------------------------------- + + +class TestLongestCollinearSegment: + """Tests for _longest_collinear_segment.""" + + def test_perfectly_straight_horizontal(self): + """A horizontal line should be detected as fully straight.""" + contour = np.array([[0.0, i] for i in range(20)]) + length, angle = _longest_collinear_segment(contour) + assert length == pytest.approx(19.0, abs=0.1) + # angle of horizontal line (row=0, col increasing) -> arctan2(0, 1) = 0 + assert abs(angle) < 0.1 + + def test_perfectly_straight_vertical(self): + """A vertical line should be detected as fully straight.""" + contour = np.array([[i, 0.0] for i in range(15)]) + length, angle = _longest_collinear_segment(contour) + assert length == pytest.approx(14.0, abs=0.1) + assert abs(angle - np.pi / 2) < 0.1 + + def test_staircase_is_straight(self): + """A pixel staircase (alternating cardinal/diagonal steps) is collinear.""" + contour = np.array([ + [0.0, 0.5], + [0.5, 1.0], + [0.5, 2.0], + [1.0, 2.5], + [1.0, 3.5], + [1.5, 4.0], + ]) + length, _ = _longest_collinear_segment(contour, distance_tol=0.75) + assert length > 3.0 + + def test_circle_has_short_segments(self): + """A circle should have no long straight segments.""" + t = np.linspace(0, 2 * np.pi, 60, endpoint=False) + contour = np.column_stack([10 * np.sin(t), 10 * np.cos(t)]) + length, _ = _longest_collinear_segment(contour) + assert length < 10.0 + + def test_too_few_points(self): + """Contours with fewer than 3 points return zero.""" + length, angle = _longest_collinear_segment(np.array([[0, 0], [1, 1]])) + assert length == 0.0 + + def test_empty_contour(self): + length, angle = _longest_collinear_segment(np.array([]).reshape(0, 2)) + assert length == 0.0 + + +class TestCardinalAlignment: + """Tests for _cardinal_alignment.""" + + def test_horizontal(self): + assert _cardinal_alignment(0.0) == pytest.approx(1.0) + + def test_vertical(self): + assert _cardinal_alignment(np.pi / 2) == pytest.approx(1.0) + + def test_diagonal(self): + assert _cardinal_alignment(np.pi / 4) == pytest.approx(0.0) + + def test_negative_angle(self): + assert _cardinal_alignment(-np.pi / 2) == pytest.approx(1.0) + + def test_near_pi(self): + assert _cardinal_alignment(np.pi - 0.01) == pytest.approx(1.0, abs=0.05) + + +class TestStraightEdgeMetrics: + """Tests for _straight_edge_metrics.""" + + def test_output_range(self): + contour = np.array([[0.0, i] for i in range(10)]) + ser, cas, cs = _straight_edge_metrics(contour, cell_area=50.0) + assert ser >= 0 + assert 0 <= cas <= 1.0 + assert cs >= 0 + + def test_zero_area(self): + contour = np.array([[0.0, i] for i in range(10)]) + ser, cas, cs = _straight_edge_metrics(contour, cell_area=0.0) + assert ser == 0.0 + assert cas == 0.0 + assert cs == 0.0 + + +# --------------------------------------------------------------------------- +# Per-tile scoring +# --------------------------------------------------------------------------- + + +class TestScoreTile: + """Tests for _score_tile.""" + + def test_empty_labels(self): + labels = np.zeros((50, 50), dtype=np.int32) + df = _score_tile(labels) + assert df.empty + assert list(df.columns) == [ + "max_straight_edge_ratio", + "cardinal_alignment_score", + "cut_score", + ] + + def test_single_cell(self): + from skimage.draw import disk + labels = np.zeros((50, 50), dtype=np.int32) + rr, cc = disk((25, 25), 15, shape=(50, 50)) + labels[rr, cc] = 1 + df = _score_tile(labels) + assert len(df) == 1 + assert df.index[0] == 1 + assert not df.isna().any().any() + + def test_cell_below_min_area_gets_nan(self): + labels = np.zeros((50, 50), dtype=np.int32) + labels[10, 10] = 1 + df = _score_tile(labels, min_area=5) + assert len(df) == 1 + assert df.iloc[0].isna().all() + + def test_rectangle_has_high_straight_ratio(self): + labels = np.zeros((50, 50), dtype=np.int32) + labels[5:45, 5:15] = 1 + df = _score_tile(labels) + assert df.loc[1, "max_straight_edge_ratio"] > 1.0 + + def test_downsample(self): + from skimage.draw import disk + labels = np.zeros((100, 100), dtype=np.int32) + rr, cc = disk((50, 50), 30, shape=(100, 100)) + labels[rr, cc] = 1 + df1 = _score_tile(labels, downsample=1) + df2 = _score_tile(labels, downsample=2) + assert len(df2) == 1 + assert abs(df1.loc[1, "max_straight_edge_ratio"] - df2.loc[1, "max_straight_edge_ratio"]) < 0.3 + + +# --------------------------------------------------------------------------- +# End-to-end tests with fixture +# --------------------------------------------------------------------------- + + +class TestCalculateTilingQC: + """Tests for calculate_tiling_qc using the tile-boundary fixture.""" + + def test_returns_anndata_with_scores_in_obs(self, sdata_tile_boundary): + sdata, gt = sdata_tile_boundary + adata = calculate_tiling_qc( + sdata, labels_key="labels", tile_size=200, inplace=False, + ) + assert adata.n_obs == len(gt.cut_cell_ids) + len(gt.intact_cell_ids) + assert adata.n_vars == 0 + for col in ["max_straight_edge_ratio", "cardinal_alignment_score", "cut_score"]: + assert col in adata.obs.columns + + def test_inplace_stores_in_default_table_key(self, sdata_tile_boundary): + sdata, _ = sdata_tile_boundary + result = calculate_tiling_qc( + sdata, labels_key="labels", tile_size=200, inplace=True, + ) + assert result is None + assert "labels_qc" in sdata.tables + + def test_spatialdata_attrs(self, sdata_tile_boundary): + sdata, _ = sdata_tile_boundary + adata = calculate_tiling_qc( + sdata, labels_key="labels", tile_size=200, inplace=False, + ) + assert adata.uns["spatialdata_attrs"]["region"] == "labels" + assert "label_id" in adata.obs.columns + + def test_centroids_stored_in_obs(self, sdata_tile_boundary): + sdata, _ = sdata_tile_boundary + adata = calculate_tiling_qc( + sdata, labels_key="labels", tile_size=200, inplace=False, + ) + assert "centroid_y" in adata.obs.columns + assert "centroid_x" in adata.obs.columns + # Centroids should be within image bounds (400x400) + assert (adata.obs["centroid_y"] >= 0).all() + assert (adata.obs["centroid_x"] >= 0).all() + assert (adata.obs["centroid_y"] < 400).all() + assert (adata.obs["centroid_x"] < 400).all() + + def test_params_stored_in_uns(self, sdata_tile_boundary): + sdata, _ = sdata_tile_boundary + adata = calculate_tiling_qc( + sdata, labels_key="labels", tile_size=200, + distance_tol=1.0, min_area=10, downsample=2, inplace=False, + ) + params = adata.uns["tiling_qc"] + assert params["tile_size"] == 200 + assert params["distance_tol"] == 1.0 + assert params["min_area"] == 10 + assert params["downsample"] == 2 + assert params["scale"] is None + + def test_cut_cells_score_higher_than_intact(self, sdata_tile_boundary): + sdata, gt = sdata_tile_boundary + adata = calculate_tiling_qc( + sdata, labels_key="labels", tile_size=200, inplace=False, + ) + obs = adata.obs + cut = obs[obs["label_id"].isin(gt.cut_cell_ids)]["max_straight_edge_ratio"].dropna() + intact = obs[obs["label_id"].isin(gt.intact_cell_ids)]["max_straight_edge_ratio"].dropna() + assert cut.mean() > intact.mean() + + def test_score_ranges(self, sdata_tile_boundary): + sdata, _ = sdata_tile_boundary + adata = calculate_tiling_qc( + sdata, labels_key="labels", tile_size=200, inplace=False, + ) + for col in ["max_straight_edge_ratio", "cardinal_alignment_score", "cut_score"]: + valid = adata.obs[col].dropna() + assert (valid >= 0).all(), f"{col} has negative values" + + def test_cardinal_score_bounded(self, sdata_tile_boundary): + sdata, _ = sdata_tile_boundary + adata = calculate_tiling_qc( + sdata, labels_key="labels", tile_size=200, inplace=False, + ) + cardinal = adata.obs["cardinal_alignment_score"].dropna() + assert (cardinal >= 0).all() + assert (cardinal <= 1.0).all() + + def test_tiled_vs_single_tile(self, sdata_tile_boundary): + sdata, _ = sdata_tile_boundary + adata_tiled = calculate_tiling_qc( + sdata, labels_key="labels", tile_size=200, inplace=False, + ) + adata_single = calculate_tiling_qc( + sdata, labels_key="labels", tile_size=2000, inplace=False, + ) + df1 = adata_tiled.obs.set_index("label_id").sort_index() + df2 = adata_single.obs.set_index("label_id").sort_index() + + assert set(df1.index) == set(df2.index) + for col in ["max_straight_edge_ratio", "cardinal_alignment_score", "cut_score"]: + np.testing.assert_allclose( + df1[col].values, df2[col].values, atol=1e-10, equal_nan=True, + ) + + def test_invalid_labels_key(self, sdata_tile_boundary): + sdata, _ = sdata_tile_boundary + with pytest.raises(ValueError, match="not found"): + calculate_tiling_qc(sdata, labels_key="nonexistent", inplace=False) + + def test_custom_adata_key(self, sdata_tile_boundary): + sdata, _ = sdata_tile_boundary + calculate_tiling_qc( + sdata, labels_key="labels", tile_size=200, + adata_key_added="my_qc", inplace=True, + ) + assert "my_qc" in sdata.tables + + +# --------------------------------------------------------------------------- +# Diagnostic plot +# --------------------------------------------------------------------------- + + +class TestPlotTilingQC: + """Tests for tiling_qc plot (delegates to spatialdata-plot).""" + + def test_plot_renders(self, sdata_tile_boundary): + import matplotlib + matplotlib.use("Agg") + + sdata, _ = sdata_tile_boundary + calculate_tiling_qc( + sdata, labels_key="labels", tile_size=200, inplace=True, + ) + tiling_qc(sdata, labels_key="labels") + import matplotlib.pyplot as plt + plt.close("all") + + def test_plot_custom_score_col(self, sdata_tile_boundary): + import matplotlib + matplotlib.use("Agg") + + sdata, _ = sdata_tile_boundary + calculate_tiling_qc( + sdata, labels_key="labels", tile_size=200, inplace=True, + ) + tiling_qc( + sdata, labels_key="labels", score_col="max_straight_edge_ratio", + ) + import matplotlib.pyplot as plt + plt.close("all") + + def test_plot_missing_qc_table(self, sdata_tile_boundary): + sdata, _ = sdata_tile_boundary + with pytest.raises(ValueError, match="not found"): + tiling_qc(sdata, labels_key="labels", qc_key="nonexistent") + + def test_plot_invalid_score_col(self, sdata_tile_boundary): + sdata, _ = sdata_tile_boundary + calculate_tiling_qc( + sdata, labels_key="labels", tile_size=200, inplace=True, + ) + with pytest.raises(ValueError, match="not found"): + tiling_qc(sdata, labels_key="labels", score_col="invalid") + + +# --------------------------------------------------------------------------- +# Visual regression tests (PlotTester) +# --------------------------------------------------------------------------- + + +@pytest.fixture() +def sdata_with_qc(sdata_tile_boundary): + """SpatialData with tiling QC already computed.""" + sdata, _ = sdata_tile_boundary + calculate_tiling_qc(sdata, labels_key="labels", tile_size=200, inplace=True) + return sdata + + +class TestTilingQCVisual(PlotTester, metaclass=PlotTesterMeta): + def test_plot_tiling_qc_cut_score(self, sdata_with_qc): + """Visual: labels coloured by cut_score.""" + tiling_qc(sdata_with_qc, labels_key="labels", score_col="cut_score") + + def test_plot_tiling_qc_straight_edge_ratio(self, sdata_with_qc): + """Visual: labels coloured by max_straight_edge_ratio.""" + tiling_qc( + sdata_with_qc, labels_key="labels", + score_col="max_straight_edge_ratio", + )