From 5e1f197f9eca43742230f58d4ea650a2e36dbc8d Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 6 Nov 2025 18:56:41 +0100 Subject: [PATCH 1/2] Switch to name conventions --- benchmarks/benchmarks/backed_hdf5.py | 6 +- pyproject.toml | 3 + src/anndata/_core/anndata.py | 14 +- src/anndata/_core/index.py | 13 +- src/anndata/_core/merge.py | 12 +- src/anndata/_core/raw.py | 2 +- src/anndata/_core/sparse_dataset.py | 30 ++-- src/anndata/_io/read.py | 4 +- src/anndata/_io/specs/registry.py | 2 +- src/anndata/compat/__init__.py | 8 +- src/anndata/experimental/merge.py | 6 +- .../multi_files/_anncollection.py | 52 +++--- src/anndata/tests/helpers.py | 72 ++++---- tests/lazy/conftest.py | 26 ++- tests/test_awkward.py | 6 +- tests/test_backed_hdf5.py | 14 +- tests/test_base.py | 40 ++--- tests/test_concatenate.py | 164 +++++++++--------- tests/test_concatenate_disk.py | 8 +- tests/test_dask.py | 72 ++++---- tests/test_dask_view_mem.py | 59 ++++--- tests/test_deprecations.py | 32 ++-- tests/test_helpers.py | 34 ++-- tests/test_inplace_subset.py | 15 +- tests/test_io_dispatched.py | 16 +- tests/test_io_elementwise.py | 72 ++++---- tests/test_io_partial.py | 24 ++- tests/test_raw.py | 2 +- tests/test_readwrite.py | 29 ++-- tests/test_structured_arrays.py | 19 +- tests/test_transpose.py | 2 +- tests/test_views.py | 49 +++--- tests/test_x.py | 35 ++-- 33 files changed, 507 insertions(+), 435 deletions(-) diff --git a/benchmarks/benchmarks/backed_hdf5.py b/benchmarks/benchmarks/backed_hdf5.py index 1bf63e95d..164f21bab 100644 --- a/benchmarks/benchmarks/backed_hdf5.py +++ b/benchmarks/benchmarks/backed_hdf5.py @@ -13,8 +13,8 @@ class BackedHDF5Indexing: param_names = ("arr_type",) params = ("sparse",) - def setup_cache(self): - X_sparse = sparse.random( + def setup_cache(self) -> None: + x_sparse = sparse.random( 10000, 50000, density=0.01, @@ -22,7 +22,7 @@ def setup_cache(self): random_state=np.random.default_rng(42), ) for X, arr_type in [ - (X_sparse, "sparse"), + (x_sparse, "sparse"), ]: n_obs, n_var = X.shape diff --git a/pyproject.toml b/pyproject.toml index 2f33da724..b7a36d124 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -198,6 +198,7 @@ select = [ "I", # isort "ICN", # Follow import conventions "ISC", # Implicit string concatenation + "N", # Naming conventions "PERF", # Performance "PIE", # Syntax simplifications "PTH", # Pathlib instead of os.path @@ -241,6 +242,8 @@ extend-immutable-calls = [ "slice" ] [tool.ruff.lint.flake8-type-checking] exempt-modules = [ ] strict = true +[tool.ruff.lint.pep8-naming] +extend-ignore-names = [ "X" ] [tool.ruff.lint.pylint] max-args = 7 max-positional-args = 5 diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index eb74b990b..926fbf401 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -1267,7 +1267,7 @@ def to_df(self, layer: str | None = None) -> pd.DataFrame: """ if layer is not None: X = self.layers[layer] - elif not self._has_X(): + elif not self._has_x(): msg = "X is None, cannot convert to dataframe." raise ValueError(msg) else: @@ -1276,7 +1276,7 @@ def to_df(self, layer: str | None = None) -> pd.DataFrame: X = X.toarray() return pd.DataFrame(X, index=self.obs_names, columns=self.var_names) - def _get_X(self, *, use_raw: bool = False, layer: str | None = None): + def _get_x(self, *, use_raw: bool = False, layer: str | None = None): """\ Convenience method for getting expression values with common arguments and error handling. @@ -1403,7 +1403,7 @@ def _mutated_copy(self, **kwargs): new[key] = getattr(self, key).copy() if "X" in kwargs: new["X"] = kwargs["X"] - elif self._has_X(): + elif self._has_x(): new["X"] = self.X.copy() if "uns" in kwargs: new["uns"] = kwargs["uns"] @@ -1464,7 +1464,7 @@ def to_memory(self, *, copy: bool = False) -> AnnData: def copy(self, filename: PathLike[str] | str | None = None) -> AnnData: """Full copy, optionally on disk.""" if not self.isbacked: - if self.is_view and self._has_X(): + if self.is_view and self._has_x(): # TODO: How do I unambiguously check if this is a copy? # Subsetting this way means we don’t have to have a view type # defined for the matrix, which is needed for some of the @@ -1992,7 +1992,7 @@ def write_zarr( convert_strings_to_categoricals=convert_strings_to_categoricals, ) - def chunked_X(self, chunk_size: int | None = None): + def chunked_X(self, chunk_size: int | None = None): # noqa: N802 """\ Return an iterator over the rows of the data matrix :attr:`X`. @@ -2014,7 +2014,7 @@ def chunked_X(self, chunk_size: int | None = None): yield (self.X[start:n], start, n) @old_positionals("replace") - def chunk_X( + def chunk_X( # noqa: N802 self, select: int | Sequence[int] | np.ndarray = 1000, *, @@ -2059,7 +2059,7 @@ def chunk_X( selection = selection.toarray() if issparse(selection) else selection return selection if reverse is None else selection[reverse] - def _has_X(self) -> bool: + def _has_x(self) -> bool: """ Check if X is None. diff --git a/src/anndata/_core/index.py b/src/anndata/_core/index.py index 3b92a99ac..b92586226 100644 --- a/src/anndata/_core/index.py +++ b/src/anndata/_core/index.py @@ -14,9 +14,12 @@ from .xarray import Dataset2D if TYPE_CHECKING: + from typing import Literal + from numpy.typing import NDArray from ..compat import Index, Index1D, Index1DNorm + from .anndata import AnnData def _normalize_indices( @@ -340,7 +343,13 @@ def make_slice(idx, dimidx: int, n: int = 2) -> tuple[slice, ...]: return tuple(mut) -def get_vector(adata, k, coldim, idxdim, layer=None): +def get_vector( + adata: AnnData, + k: str, + coldim: Literal["obs", "var"], + idxdim: Literal["obs", "var"], + layer: str | None = None, +): # adata could be self if Raw and AnnData shared a parent dims = ("obs", "var") col = getattr(adata, coldim).columns @@ -360,7 +369,7 @@ def get_vector(adata, k, coldim, idxdim, layer=None): elif in_idx: selected_dim = dims.index(idxdim) idx = adata._normalize_indices(make_slice(k, selected_dim)) - a = adata._get_X(layer=layer)[idx] + a = adata._get_x(layer=layer)[idx] if issparse(a): a = a.toarray() return np.ravel(a) diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py index 5febda780..2b898aac9 100644 --- a/src/anndata/_core/merge.py +++ b/src/anndata/_core/merge.py @@ -1156,7 +1156,7 @@ def axis_indices(adata: AnnData, axis: Literal["obs", 0, "var", 1]) -> pd.Index: # TODO: Resolve https://github.com/scverse/anndata/issues/678 and remove this function -def concat_Xs(adatas, reindexers, axis, fill_value): +def concat_xs(adatas, reindexers, axis, fill_value): """ Shimy until support for some missing X's is implemented. @@ -1165,10 +1165,10 @@ def concat_Xs(adatas, reindexers, axis, fill_value): This is not done inline in `concat` because we don't want to maintain references to the values of a.X. """ - Xs = [a.X for a in adatas] - if all(X is None for X in Xs): + xs = [a.X for a in adatas] + if all(X is None for X in xs): return None - elif any(X is None for X in Xs): + elif any(X is None for X in xs): msg = ( "Some (but not all) of the AnnData's to be concatenated had no .X value. " "Concatenation is currently only implemented for cases where all or none of" @@ -1176,7 +1176,7 @@ def concat_Xs(adatas, reindexers, axis, fill_value): ) raise NotImplementedError(msg) else: - return concat_arrays(Xs, reindexers, axis=axis, fill_value=fill_value) + return concat_arrays(xs, reindexers, axis=axis, fill_value=fill_value) def make_dask_col_from_extension_dtype( @@ -1706,7 +1706,7 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915 ) alt_annot.true_index_dim = "merge_index" - X = concat_Xs(adatas, reindexers, axis=axis, fill_value=fill_value) + X = concat_xs(adatas, reindexers, axis=axis, fill_value=fill_value) if join == "inner": concat_aligned_mapping = inner_concat_aligned_mapping diff --git a/src/anndata/_core/raw.py b/src/anndata/_core/raw.py index 90325948f..165c6f2c2 100644 --- a/src/anndata/_core/raw.py +++ b/src/anndata/_core/raw.py @@ -60,7 +60,7 @@ def __init__( msg = "Cannot specify X if adata is backed" raise ValueError(msg) - def _get_X(self, layer=None): + def _get_x(self, layer=None): if layer is not None: raise ValueError() return self.X diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py index f6beff9c3..c264e7fe1 100644 --- a/src/anndata/_core/sparse_dataset.py +++ b/src/anndata/_core/sparse_dataset.py @@ -148,16 +148,16 @@ def _zero_many(self, i: Sequence[int], j: Sequence[int]): def _offsets( self, i: Iterable[int], j: Iterable[int], n_samples: int ) -> np.ndarray: - i, j, M, N = self._prepare_indices(i, j) + i, j, m, n = self._prepare_indices(i, j) offsets = np.empty(n_samples, dtype=self.indices.dtype) ret = _sparsetools.csr_sample_offsets( - M, N, self.indptr, self.indices, n_samples, i, j, offsets + m, n, self.indptr, self.indices, n_samples, i, j, offsets ) if ret == 1: # rinse and repeat self.sum_duplicates() _sparsetools.csr_sample_offsets( - M, N, self.indptr, self.indices, n_samples, i, j, offsets + m, n, self.indptr, self.indices, n_samples, i, j, offsets ) return offsets @@ -181,13 +181,13 @@ def _get_contiguous_compressed_slice( return new_data, new_indices, new_indptr -class backed_csr_matrix(BackedSparseMatrix, ss.csr_matrix): - def _get_intXslice(self, row: int, col: slice) -> ss.csr_matrix: +class BackedCSRMatrix(BackedSparseMatrix, ss.csr_matrix): + def _get_intXslice(self, row: int, col: slice) -> ss.csr_matrix: # noqa: N802 return ss.csr_matrix( get_compressed_vector(self, row), shape=(1, self.shape[1]) )[:, col] - def _get_sliceXslice(self, row: slice, col: slice) -> ss.csr_matrix: + def _get_sliceXslice(self, row: slice, col: slice) -> ss.csr_matrix: # noqa: N802 row = _fix_slice_bounds(row, self.shape[0]) col = _fix_slice_bounds(col, self.shape[1]) @@ -205,7 +205,7 @@ def _get_sliceXslice(self, row: slice, col: slice) -> ss.csr_matrix: ) return res if out_shape[1] == self.shape[1] else res[:, col] - def _get_arrayXslice(self, row: Sequence[int], col: slice) -> ss.csr_matrix: + def _get_arrayXslice(self, row: Sequence[int], col: slice) -> ss.csr_matrix: # noqa: N802 idxs = np.asarray(row) if len(idxs) == 0: return ss.csr_matrix((0, self.shape[1])) @@ -216,13 +216,13 @@ def _get_arrayXslice(self, row: Sequence[int], col: slice) -> ss.csr_matrix: )[:, col] -class backed_csc_matrix(BackedSparseMatrix, ss.csc_matrix): - def _get_sliceXint(self, row: slice, col: int) -> ss.csc_matrix: +class BackedCSCMatrix(BackedSparseMatrix, ss.csc_matrix): + def _get_sliceXint(self, row: slice, col: int) -> ss.csc_matrix: # noqa: N802 return ss.csc_matrix( get_compressed_vector(self, col), shape=(self.shape[0], 1) )[row, :] - def _get_sliceXslice(self, row: slice, col: slice) -> ss.csc_matrix: + def _get_sliceXslice(self, row: slice, col: slice) -> ss.csc_matrix: # noqa: N802 row = _fix_slice_bounds(row, self.shape[0]) col = _fix_slice_bounds(col, self.shape[1]) @@ -240,7 +240,7 @@ def _get_sliceXslice(self, row: slice, col: slice) -> ss.csc_matrix: ) return res if out_shape[0] == self.shape[0] else res[row, :] - def _get_sliceXarray(self, row: slice, col: Sequence[int]) -> ss.csc_matrix: + def _get_sliceXarray(self, row: slice, col: Sequence[int]) -> ss.csc_matrix: # noqa: N802 idxs = np.asarray(col) if len(idxs) == 0: return ss.csc_matrix((self.shape[0], 0)) @@ -252,10 +252,10 @@ def _get_sliceXarray(self, row: slice, col: Sequence[int]) -> ss.csc_matrix: FORMATS = [ - BackedFormat("csr", backed_csr_matrix, ss.csr_matrix), - BackedFormat("csc", backed_csc_matrix, ss.csc_matrix), - BackedFormat("csr", backed_csr_matrix, ss.csr_array), - BackedFormat("csc", backed_csc_matrix, ss.csc_array), + BackedFormat("csr", BackedCSRMatrix, ss.csr_matrix), + BackedFormat("csc", BackedCSCMatrix, ss.csc_matrix), + BackedFormat("csr", BackedCSRMatrix, ss.csr_array), + BackedFormat("csc", BackedCSCMatrix, ss.csc_array), ] diff --git a/src/anndata/_io/read.py b/src/anndata/_io/read.py index f6bb271d5..86f87ad27 100644 --- a/src/anndata/_io/read.py +++ b/src/anndata/_io/read.py @@ -178,7 +178,7 @@ def read_loom( # noqa: PLR0912, PLR0913 *, sparse: bool = True, cleanup: bool = False, - X_name: str = "spliced", + X_name: str = "spliced", # noqa: N803 obs_names: str = "CellID", obsm_names: Mapping[str, Iterable[str]] | None = None, var_names: str = "Gene", @@ -273,7 +273,7 @@ def read_loom( # noqa: PLR0912, PLR0913 assert lc.layers is not None if X_name not in lc.layers: - X_name = "" + X_name = "" # noqa: N806 X = lc.layers[X_name].sparse().T.tocsr() if sparse else lc.layers[X_name][()].T X = X.astype(dtype, copy=False) diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py index 12c8abd59..b5ec5846d 100644 --- a/src/anndata/_io/specs/registry.py +++ b/src/anndata/_io/specs/registry.py @@ -410,7 +410,7 @@ def read_elem(elem: StorageType) -> RWAble: def read_elem_lazy( - elem: StorageType, chunks: tuple[int, ...] | None = None, **kwargs + elem: StorageType, chunks: tuple[int | None, ...] | None = None, **kwargs ) -> LazyDataStructures: """ Read an element from a store lazily. diff --git a/src/anndata/compat/__init__.py b/src/anndata/compat/__init__.py index e55526367..0472399d2 100644 --- a/src/anndata/compat/__init__.py +++ b/src/anndata/compat/__init__.py @@ -161,10 +161,10 @@ def is_cupy_importable() -> bool: if is_cupy_importable() or TYPE_CHECKING: - from cupy import ndarray as CupyArray - from cupyx.scipy.sparse import csc_matrix as CupyCSCMatrix - from cupyx.scipy.sparse import csr_matrix as CupyCSRMatrix - from cupyx.scipy.sparse import spmatrix as CupySparseMatrix + from cupy import ndarray as CupyArray # noqa: N812 + from cupyx.scipy.sparse import csc_matrix as CupyCSCMatrix # noqa: N812 + from cupyx.scipy.sparse import csr_matrix as CupyCSRMatrix # noqa: N812 + from cupyx.scipy.sparse import spmatrix as CupySparseMatrix # noqa: N812 try: import dask.array as da diff --git a/src/anndata/experimental/merge.py b/src/anndata/experimental/merge.py index 33cecedc7..7eaffd5ec 100644 --- a/src/anndata/experimental/merge.py +++ b/src/anndata/experimental/merge.py @@ -635,11 +635,11 @@ def concat_on_disk( # noqa: PLR0912, PLR0913, PLR0915 output_group.attrs.update({"encoding-type": "anndata", "encoding-version": "0.1.0"}) # Read the backed objects of Xs - Xs = [read_as_backed(g["X"]) for g in groups] + xs = [read_as_backed(g["X"]) for g in groups] # Label column label_col = pd.Categorical.from_codes( - np.repeat(np.arange(len(groups)), [x.shape[axis] for x in Xs]), + np.repeat(np.arange(len(groups)), [x.shape[axis] for x in xs]), categories=keys, ) @@ -682,7 +682,7 @@ def concat_on_disk( # noqa: PLR0912, PLR0913, PLR0915 # Write X _write_concat_arrays( - arrays=Xs, + arrays=xs, output_group=output_group, output_path="X", axis=axis, diff --git a/src/anndata/experimental/multi_files/_anncollection.py b/src/anndata/experimental/multi_files/_anncollection.py index a6b72e639..092a5b9be 100644 --- a/src/anndata/experimental/multi_files/_anncollection.py +++ b/src/anndata/experimental/multi_files/_anncollection.py @@ -17,6 +17,7 @@ from ..._core.sparse_dataset import BaseCompressedSparseDataset from ..._core.views import _resolve_idx from ...compat import old_positionals +from ...utils import warn if TYPE_CHECKING: from collections.abc import Iterable, Sequence @@ -334,11 +335,11 @@ def _lazy_init_attr(self, attr: str, *, set_vidx: bool = False): ), ) - def _gather_X(self): + def _gather_x(self): if self._X is not None: return self._X - Xs = [] + xs = [] for i, oidx in enumerate(self.adatas_oidx): if oidx is None: continue @@ -354,31 +355,31 @@ def _gather_X(self): # TODO: fix memory inefficient approach of X[oidx][:, vidx] arr = X[oidx, vidx] if isinstance(vidx, slice) else X[oidx][:, vidx] - Xs.append(arr if reverse is None else arr[reverse]) + xs.append(arr if reverse is None else arr[reverse]) elif isinstance(X, BaseCompressedSparseDataset): # very slow indexing with two arrays if isinstance(vidx, slice) or len(vidx) <= 1000: - Xs.append(X[oidx, vidx]) + xs.append(X[oidx, vidx]) else: - Xs.append(X[oidx][:, vidx]) + xs.append(X[oidx][:, vidx]) else: # if vidx is present it is less memory efficient idx = oidx, vidx idx = np.ix_(*idx) if not isinstance(vidx, slice) else idx - Xs.append(X[idx]) + xs.append(X[idx]) - if len(Xs) > 1: - _X = _merge(Xs) + if len(xs) > 1: + _x = _merge(xs) # todo: get rid of reverse for dense arrays - _X = _X if self.reverse is None else _X[self.reverse] + _x = _x if self.reverse is None else _x[self.reverse] else: - _X = Xs[0] + _x = xs[0] if self._dtypes is not None: - _X = _X.astype(self._dtypes["X"], copy=False) + _x = _x.astype(self._dtypes["X"], copy=False) - self._X = _X + self._X = _x - return _X + return _x @property def X(self): @@ -389,13 +390,13 @@ def X(self): Nothing is copied until `.X` is accessed, no real concatenation of the underlying `.X` attributes is done. """ - # inconsistent behavior here, _X can be changed, + # inconsistent behavior here, _x can be changed, # but the other attributes can't be changed. - # maybe do return ... _X.copy() or _X.setflags(write=False) + # maybe do return ... _x.copy() or _x.setflags(write=False) - _X = self._gather_X() + _x = self._gather_x() - return self._convert_X(_X) if self._convert_X is not None else _X + return self._convert_X(_x) if self._convert_X is not None else _x @property def layers(self): @@ -531,7 +532,14 @@ def __repr__(self): return descr @old_positionals("ignore_X", "ignore_layers") - def to_adata(self, *, ignore_X: bool = False, ignore_layers: bool = False): + def to_adata( + self, + *, + ignore_x: bool = False, + ignore_layers: bool = False, + # deprecated + ignore_X: bool | None = None, # noqa: N803 + ) -> AnnData: """Convert this AnnCollectionView object to an AnnData object. Parameters @@ -552,11 +560,15 @@ def to_adata(self, *, ignore_X: bool = False, ignore_layers: bool = False): else pd.DataFrame(self.obs.to_dict(use_convert=False)) ) - if ignore_X: + if ignore_X is not None: + msg = "ignore_X is deprecated, use ignore_x instead" + warn(msg, FutureWarning) + ignore_x = ignore_X + if ignore_x: X = None shape = self.shape else: - X = self._gather_X() + X = self._gather_x() shape = None adata = AnnData(X, obs=obs, obsm=obsm, layers=layers, shape=shape) diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py index b1802b034..7784cbcbe 100644 --- a/src/anndata/tests/helpers.py +++ b/src/anndata/tests/helpers.py @@ -196,7 +196,7 @@ def gen_typed_df( def _gen_awkward_inner(shape, rng, dtype): # the maximum length a ragged dimension can take - MAX_RAGGED_DIM_LEN = 20 + max_ragged_dim_len = 20 if not len(shape): # abort condition -> no dimension left, return an actual value instead return dtype(rng.randrange(1000)) @@ -204,7 +204,7 @@ def _gen_awkward_inner(shape, rng, dtype): curr_dim_len = shape[0] if curr_dim_len is None: # ragged dimension, set random length - curr_dim_len = rng.randrange(MAX_RAGGED_DIM_LEN) + curr_dim_len = rng.randrange(max_ragged_dim_len) return [_gen_awkward_inner(shape[1:], rng, dtype) for _ in range(curr_dim_len)] @@ -282,9 +282,9 @@ def maybe_add_sparse_array( # TODO: Use hypothesis for this? def gen_adata( # noqa: PLR0913 shape: tuple[int, int], - X_type: Callable[[np.ndarray], object] = sparse.csr_matrix, + x_type: Callable[[np.ndarray], object] = sparse.csr_matrix, *, - X_dtype: np.dtype = np.float32, + x_dtype: np.dtype = np.float32, obs_dtypes: Collection[ np.dtype | pd.api.extensions.ExtensionDtype ] = DEFAULT_COL_TYPES, @@ -310,10 +310,10 @@ def gen_adata( # noqa: PLR0913 ------ shape What shape you want the anndata to be. - X_type + x_type What kind of container should `X` be? This will be called on a randomly generated 2d array. - X_dtype + x_dtype What should the dtype of the `.X` container be? obsm_types What kinds of containers should be in `.obsm`? @@ -330,11 +330,11 @@ def gen_adata( # noqa: PLR0913 if random_state is None: random_state = np.random.default_rng() - M, N = shape + m, n = shape obs_names = pd.Index(f"cell{i}" for i in range(shape[0])) var_names = pd.Index(f"gene{i}" for i in range(shape[1])) - obs = gen_typed_df(M, obs_names, dtypes=obs_dtypes) - var = gen_typed_df(N, var_names, dtypes=var_dtypes) + obs = gen_typed_df(m, obs_names, dtypes=obs_dtypes) + var = gen_typed_df(n, var_names, dtypes=var_dtypes) # For #147 obs.rename(columns=dict(cat="obs_cat"), inplace=True) var.rename(columns=dict(cat="var_cat"), inplace=True) @@ -345,31 +345,31 @@ def gen_adata( # noqa: PLR0913 if var_xdataset: var = XDataset.from_dataframe(var) - if X_type is None: + if x_type is None: X = None else: - X = X_type(random_state.binomial(100, 0.005, (M, N)).astype(X_dtype)) + X = x_type(random_state.binomial(100, 0.005, (m, n)).astype(x_dtype)) obsm = dict( - array=np.random.random((M, 50)), - sparse=sparse.random(M, 100, format=sparse_fmt, random_state=random_state), - df=gen_typed_df(M, obs_names, dtypes=obs_dtypes), - awk_2d_ragged=gen_awkward((M, None)), - da=da.random.random((M, 50)), + array=np.random.random((m, 50)), + sparse=sparse.random(m, 100, format=sparse_fmt, random_state=random_state), + df=gen_typed_df(m, obs_names, dtypes=obs_dtypes), + awk_2d_ragged=gen_awkward((m, None)), + da=da.random.random((m, 50)), ) varm = dict( - array=np.random.random((N, 50)), - sparse=sparse.random(N, 100, format=sparse_fmt, random_state=random_state), - df=gen_typed_df(N, var_names, dtypes=var_dtypes), - awk_2d_ragged=gen_awkward((N, None)), - da=da.random.random((N, 50)), + array=np.random.random((n, 50)), + sparse=sparse.random(n, 100, format=sparse_fmt, random_state=random_state), + df=gen_typed_df(n, var_names, dtypes=var_dtypes), + awk_2d_ragged=gen_awkward((n, None)), + da=da.random.random((n, 50)), ) if has_xr: obsm["xdataset"] = XDataset.from_dataframe( - gen_typed_df(M, obs_names, dtypes=obs_dtypes) + gen_typed_df(m, obs_names, dtypes=obs_dtypes) ) varm["xdataset"] = XDataset.from_dataframe( - gen_typed_df(N, var_names, dtypes=var_dtypes) + gen_typed_df(n, var_names, dtypes=var_dtypes) ) obsm = {k: v for k, v in obsm.items() if type(v) in obsm_types} obsm = maybe_add_sparse_array( @@ -377,7 +377,7 @@ def gen_adata( # noqa: PLR0913 types=obsm_types, format=sparse_fmt, random_state=random_state, - shape=(M, 100), + shape=(m, 100), ) varm = {k: v for k, v in varm.items() if type(v) in varm_types} varm = maybe_add_sparse_array( @@ -385,37 +385,37 @@ def gen_adata( # noqa: PLR0913 types=varm_types, format=sparse_fmt, random_state=random_state, - shape=(N, 100), + shape=(n, 100), ) layers = dict( - array=np.random.random((M, N)), - sparse=sparse.random(M, N, format=sparse_fmt, random_state=random_state), - da=da.random.random((M, N)), + array=np.random.random((m, n)), + sparse=sparse.random(m, n, format=sparse_fmt, random_state=random_state), + da=da.random.random((m, n)), ) layers = maybe_add_sparse_array( mapping=layers, types=layers_types, format=sparse_fmt, random_state=random_state, - shape=(M, N), + shape=(m, n), ) layers = {k: v for k, v in layers.items() if type(v) in layers_types} obsp = dict( - array=np.random.random((M, M)), - sparse=sparse.random(M, M, format=sparse_fmt, random_state=random_state), + array=np.random.random((m, m)), + sparse=sparse.random(m, m, format=sparse_fmt, random_state=random_state), ) obsp["sparse_array"] = sparse.csr_array( - sparse.random(M, M, format=sparse_fmt, random_state=random_state) + sparse.random(m, m, format=sparse_fmt, random_state=random_state) ) varp = dict( - array=np.random.random((N, N)), - sparse=sparse.random(N, N, format=sparse_fmt, random_state=random_state), + array=np.random.random((n, n)), + sparse=sparse.random(n, n, format=sparse_fmt, random_state=random_state), ) varp["sparse_array"] = sparse.csr_array( - sparse.random(N, N, format=sparse_fmt, random_state=random_state) + sparse.random(n, n, format=sparse_fmt, random_state=random_state) ) uns = dict( - O_recarray=gen_vstr_recarray(N, 5), + O_recarray=gen_vstr_recarray(n, 5), nested=dict( scalar_str="str", scalar_int=42, diff --git a/tests/lazy/conftest.py b/tests/lazy/conftest.py index 61e5419c0..426f68e0f 100644 --- a/tests/lazy/conftest.py +++ b/tests/lazy/conftest.py @@ -122,16 +122,15 @@ def adata_remote_with_store_tall_skinny_path( worker_id: str = "serial", ) -> Path: orig_path = tmp_path_factory.mktemp(f"orig_{worker_id}.zarr") - M = 1000 - N = 5 - obs_names = pd.Index(f"cell{i}" for i in range(M)) - var_names = pd.Index(f"gene{i}" for i in range(N)) - obs = gen_typed_df(M, obs_names) - var = gen_typed_df(N, var_names) + m, n = 1000, 5 + obs_names = pd.Index(f"cell{i}" for i in range(m)) + var_names = pd.Index(f"gene{i}" for i in range(n)) + obs = gen_typed_df(m, obs_names) + var = gen_typed_df(n, var_names) orig = AnnData( obs=obs, var=var, - X=mtx_format(np.random.binomial(100, 0.005, (M, N)).astype(np.float32)), + X=mtx_format(np.random.binomial(100, 0.005, (m, n)).astype(np.float32)), ) orig.raw = orig.copy() orig.write_zarr(orig_path) @@ -153,24 +152,23 @@ def adatas_paths_var_indices_for_concatenation( adatas = [] var_indices = [] paths = [] - M = 1000 - N = 50 + m, n = 1000, 50 n_datasets = 3 for dataset_index in range(n_datasets): orig_path = tmp_path_factory.mktemp(f"orig_{worker_id}_{dataset_index}.zarr") paths.append(orig_path) - obs_names = pd.Index(f"cell_{dataset_index}_{i}" for i in range(M)) + obs_names = pd.Index(f"cell_{dataset_index}_{i}" for i in range(m)) var_names = pd.Index( f"gene_{i}{f'_{dataset_index}_ds' if are_vars_different and (i % 2) else ''}" - for i in range(N) + for i in range(n) ) var_indices.append(var_names) - obs = gen_typed_df(M, obs_names) - var = gen_typed_df(N, var_names) + obs = gen_typed_df(m, obs_names) + var = gen_typed_df(n, var_names) orig = AnnData( obs=obs, var=var, - X=np.random.binomial(100, 0.005, (M, N)).astype(np.float32), + X=np.random.binomial(100, 0.005, (m, n)).astype(np.float32), ) orig.write_zarr(orig_path) adatas.append(orig) diff --git a/tests/test_awkward.py b/tests/test_awkward.py index 20b67bdee..9d93d598f 100644 --- a/tests/test_awkward.py +++ b/tests/test_awkward.py @@ -160,16 +160,16 @@ def test_view_of_awkward_array_with_custom_behavior(): from uuid import uuid4 - BEHAVIOUR_ID = str(uuid4()) + behavior_id = str(uuid4()) class ReversibleArray(ak.Array): def reversed(self): return self[..., ::-1] - ak.behavior[BEHAVIOUR_ID] = ReversibleArray + ak.behavior[behavior_id] = ReversibleArray adata = gen_adata((3, 3), varm_types=(), obsm_types=(), layers_types=()) adata.obsm["awk_string"] = ak.with_parameter( - ak.Array(["AAA", "BBB", "CCC"]), "__list__", BEHAVIOUR_ID + ak.Array(["AAA", "BBB", "CCC"]), "__list__", behavior_id ) adata_view = adata[:2] diff --git a/tests/test_backed_hdf5.py b/tests/test_backed_hdf5.py index 789898431..26f050a36 100644 --- a/tests/test_backed_hdf5.py +++ b/tests/test_backed_hdf5.py @@ -37,12 +37,12 @@ @pytest.fixture def adata() -> ad.AnnData: - X_list = [ + x_list = [ [1, 2, 3], [4, 5, 6], [7, 8, 9], ] # data matrix of shape n_obs x n_vars - X = np.array(X_list) + X = np.array(x_list) obs_dict = dict( # annotation of observations / rows row_names=["name1", "name2", "name3"], # row annotation oanno1=["cat1", "cat2", "cat2"], # categorical annotation @@ -100,7 +100,7 @@ def as_dense(request) -> tuple[str] | tuple: @pytest.mark.filterwarnings("ignore:`product` is deprecated as of NumPy 1.25.0") # TODO: Check to make sure obs, obsm, layers, ... are written and read correctly as well @pytest.mark.filterwarnings("error") -def test_read_write_X( +def test_read_write_x( tmp_path: Path, mtx_format: Callable[ [np.ndarray], DaskArray | np.ndarray | sparse.csr_array | sparse.csr_matrix @@ -236,7 +236,7 @@ def test_backed_raw_subset( ): backed_pth = tmp_path / "backed.h5ad" final_pth = tmp_path / "final.h5ad" - mem_adata = gen_adata((10, 10), X_type=array_type, **GEN_ADATA_NO_XARRAY_ARGS) + mem_adata = gen_adata((10, 10), x_type=array_type, **GEN_ADATA_NO_XARRAY_ARGS) mem_adata.raw = mem_adata obs_idx = subset_func(mem_adata.obs_names) var_idx = subset_func2(mem_adata.var_names) @@ -276,8 +276,8 @@ def test_to_memory_full( array_type: Callable[[np.ndarray], np.ndarray | DaskArray | sparse.csr_matrix], ): backed_pth = tmp_path / "backed.h5ad" - mem_adata = gen_adata((15, 10), X_type=array_type, **GEN_ADATA_DASK_ARGS) - mem_adata.raw = gen_adata((15, 12), X_type=array_type, **GEN_ADATA_DASK_ARGS) + mem_adata = gen_adata((15, 10), x_type=array_type, **GEN_ADATA_DASK_ARGS) + mem_adata.raw = gen_adata((15, 12), x_type=array_type, **GEN_ADATA_DASK_ARGS) mem_adata.write_h5ad(backed_pth, compression="lzf") backed_adata = ad.read_h5ad(backed_pth, backed="r") @@ -408,7 +408,7 @@ def test_backed_duplicate_indices(tmp_path, obs_idx, var_idx): backed_pth = tmp_path / "backed.h5ad" # Create test data - mem_adata = gen_adata((6, 4), X_type=asarray, **GEN_ADATA_NO_XARRAY_ARGS) + mem_adata = gen_adata((6, 4), x_type=asarray, **GEN_ADATA_NO_XARRAY_ARGS) mem_adata.write(backed_pth) # Load backed data diff --git a/tests/test_base.py b/tests/test_base.py index 37cc8ff7e..f24703dbf 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -103,7 +103,7 @@ def test_creation_error(src, src_arg, dim_msg, dim, dim_arg, msg: str | None): AnnData(**{src: src_arg, dim: dim_arg(dim)}) -def test_invalid_X(): +def test_invalid_x() -> None: with pytest.raises( ValueError, match=r"X needs to be of one of .*not \.", @@ -590,14 +590,14 @@ def test_pickle(): assert adata2.obsm.parent is adata2 -def test_to_df_dense(): - X_df = adata_dense.to_df() +def test_to_df_dense() -> None: + x_df = adata_dense.to_df() layer_df = adata_dense.to_df(layer="test") np.testing.assert_array_equal(adata_dense.layers["test"], layer_df.values) - np.testing.assert_array_equal(adata_dense.X, X_df.values) - pd.testing.assert_index_equal(X_df.columns, layer_df.columns) - pd.testing.assert_index_equal(X_df.index, layer_df.index) + np.testing.assert_array_equal(adata_dense.X, x_df.values) + pd.testing.assert_index_equal(x_df.columns, layer_df.columns) + pd.testing.assert_index_equal(x_df.index, layer_df.index) def test_convenience(): @@ -642,27 +642,27 @@ def assert_same_op_result(a1, a2, op): ) -def test_1d_slice_dtypes(): - N, M = 10, 20 +def test_1d_slice_dtypes() -> None: + n, m = 10, 20 obs_df = pd.DataFrame( dict( - cat=pd.Categorical(np.arange(N, dtype=int)), - int=np.arange(N, dtype=int), - float=np.arange(N, dtype=float), - obj=[str(i) for i in np.arange(N, dtype=int)], + cat=pd.Categorical(np.arange(n, dtype=int)), + int=np.arange(n, dtype=int), + float=np.arange(n, dtype=float), + obj=[str(i) for i in np.arange(n, dtype=int)], ), - index=[f"cell{i}" for i in np.arange(N, dtype=int)], + index=[f"cell{i}" for i in np.arange(n, dtype=int)], ) var_df = pd.DataFrame( dict( - cat=pd.Categorical(np.arange(M, dtype=int)), - int=np.arange(M, dtype=int), - float=np.arange(M, dtype=float), - obj=[str(i) for i in np.arange(M, dtype=int)], + cat=pd.Categorical(np.arange(m, dtype=int)), + int=np.arange(m, dtype=int), + float=np.arange(m, dtype=float), + obj=[str(i) for i in np.arange(m, dtype=int)], ), - index=[f"gene{i}" for i in np.arange(M, dtype=int)], + index=[f"gene{i}" for i in np.arange(m, dtype=int)], ) - adata = AnnData(X=np.random.random((N, M)), obs=obs_df, var=var_df) + adata = AnnData(X=np.random.random((n, m)), obs=obs_df, var=var_df) new_obs_df = pd.DataFrame(index=adata.obs_names) for k in obs_df.columns: @@ -682,7 +682,7 @@ def test_to_df_sparse(): assert df.values.tolist() == X.tolist() -def test_to_df_no_X(): +def test_to_df_no_x() -> None: adata = AnnData( obs=pd.DataFrame(index=[f"cell-{i:02}" for i in range(20)]), var=pd.DataFrame(index=[f"gene-{i:02}" for i in range(30)]), diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py index 9c26bbe19..a6421aa52 100644 --- a/tests/test_concatenate.py +++ b/tests/test_concatenate.py @@ -248,7 +248,7 @@ def test_concatenate_roundtrip( pytest.skip("unsupported") adata = gen_adata( (100, 10), - X_type=array_type, + x_type=array_type, obs_xdataset=use_xdataset, var_xdataset=use_xdataset, **GEN_ADATA_DASK_ARGS, @@ -284,44 +284,44 @@ def test_concatenate_roundtrip( @mark_legacy_concatenate -def test_concatenate_dense(): +def test_concatenate_dense() -> None: # dense data - X1 = np.array([[1, 2, 3], [4, 5, 6]]) - X2 = np.array([[1, 2, 3], [4, 5, 6]]) - X3 = np.array([[1, 2, 3], [4, 5, 6]]) + x1 = np.array([[1, 2, 3], [4, 5, 6]]) + x2 = np.array([[1, 2, 3], [4, 5, 6]]) + x3 = np.array([[1, 2, 3], [4, 5, 6]]) adata1 = AnnData( - X1, + x1, dict(obs_names=["s1", "s2"], anno1=["c1", "c2"]), dict(var_names=["a", "b", "c"], annoA=[0, 1, 2]), - obsm=dict(X_1=X1, X_2=X2, X_3=X3), - layers=dict(Xs=X1), + obsm=dict(X_1=x1, X_2=x2, X_3=x3), + layers=dict(Xs=x1), ) adata2 = AnnData( - X2, + x2, dict(obs_names=["s3", "s4"], anno1=["c3", "c4"]), dict(var_names=["d", "c", "b"], annoA=[0, 1, 2]), - obsm=dict(X_1=X1, X_2=X2, X_3=X3), - layers={"Xs": X2}, + obsm=dict(X_1=x1, X_2=x2, X_3=x3), + layers={"Xs": x2}, ) adata3 = AnnData( - X3, + x3, dict(obs_names=["s1", "s2"], anno2=["d3", "d4"]), dict(var_names=["d", "c", "b"], annoB=[0, 1, 2]), - obsm=dict(X_1=X1, X_2=X2), - layers=dict(Xs=X3), + obsm=dict(X_1=x1, X_2=x2), + layers=dict(Xs=x3), ) # inner join adata = adata1.concatenate(adata2, adata3) - X_combined = [[2, 3], [5, 6], [3, 2], [6, 5], [3, 2], [6, 5]] - assert adata.X.astype(int).tolist() == X_combined - assert adata.layers["Xs"].astype(int).tolist() == X_combined + x_combined = [[2, 3], [5, 6], [3, 2], [6, 5], [3, 2], [6, 5]] + assert adata.X.astype(int).tolist() == x_combined + assert adata.layers["Xs"].astype(int).tolist() == x_combined assert adata.obs.columns.tolist() == ["anno1", "anno2", "batch"] assert adata.var.columns.tolist() == ["annoA-0", "annoA-1", "annoB-2"] assert adata.var.values.tolist() == [[1, 2, 2], [2, 1, 1]] assert adata.obsm.keys() == {"X_1", "X_2"} - assert adata.obsm["X_1"].tolist() == np.concatenate([X1, X1, X1]).tolist() + assert adata.obsm["X_1"].tolist() == np.concatenate([x1, x1, x1]).tolist() # with batch_key and batch_categories adata = adata1.concatenate(adata2, adata3, batch_key="batch1") @@ -333,7 +333,7 @@ def test_concatenate_dense(): # outer join adata = adata1.concatenate(adata2, adata3, join="outer") - X_ref = np.array([ + x_ref = np.array([ [1.0, 2.0, 3.0, np.nan], [4.0, 5.0, 6.0, np.nan], [np.nan, 3.0, 2.0, 1.0], @@ -341,7 +341,7 @@ def test_concatenate_dense(): [np.nan, 3.0, 2.0, 1.0], [np.nan, 6.0, 5.0, 4.0], ]) - np.testing.assert_equal(adata.X, X_ref) + np.testing.assert_equal(adata.X, x_ref) var_ma = ma.masked_invalid(adata.var.values.tolist()) var_ma_ref = ma.masked_invalid( np.array([ @@ -598,14 +598,14 @@ def get_obs_els(adata): @mark_legacy_concatenate -def test_concatenate_dense_duplicates(): - X1 = np.array([[1, 2, 3], [4, 5, 6]]) - X2 = np.array([[1, 2, 3], [4, 5, 6]]) - X3 = np.array([[1, 2, 3], [4, 5, 6]]) +def test_concatenate_dense_duplicates() -> None: + x1 = np.array([[1, 2, 3], [4, 5, 6]]) + x2 = np.array([[1, 2, 3], [4, 5, 6]]) + x3 = np.array([[1, 2, 3], [4, 5, 6]]) # inner join duplicates adata1 = AnnData( - X1, + x1, dict(obs_names=["s1", "s2"], anno1=["c1", "c2"]), dict( var_names=["a", "b", "c"], @@ -616,7 +616,7 @@ def test_concatenate_dense_duplicates(): ), ) adata2 = AnnData( - X2, + x2, dict(obs_names=["s3", "s4"], anno1=["c3", "c4"]), dict( var_names=["a", "b", "c"], @@ -627,7 +627,7 @@ def test_concatenate_dense_duplicates(): ), ) adata3 = AnnData( - X3, + x3, dict(obs_names=["s1", "s2"], anno2=["d3", "d4"]), dict( var_names=["a", "b", "c"], @@ -650,38 +650,38 @@ def test_concatenate_dense_duplicates(): @mark_legacy_concatenate -def test_concatenate_sparse(): +def test_concatenate_sparse() -> None: # sparse data from scipy.sparse import csr_matrix - X1 = csr_matrix([[0, 2, 3], [0, 5, 6]]) - X2 = csr_matrix([[0, 2, 3], [0, 5, 6]]) - X3 = csr_matrix([[1, 2, 0], [0, 5, 6]]) + x1 = csr_matrix([[0, 2, 3], [0, 5, 6]]) + x2 = csr_matrix([[0, 2, 3], [0, 5, 6]]) + x3 = csr_matrix([[1, 2, 0], [0, 5, 6]]) adata1 = AnnData( - X1, + x1, dict(obs_names=["s1", "s2"], anno1=["c1", "c2"]), dict(var_names=["a", "b", "c"]), - layers=dict(Xs=X1), + layers=dict(Xs=x1), ) adata2 = AnnData( - X2, + x2, dict(obs_names=["s3", "s4"], anno1=["c3", "c4"]), dict(var_names=["d", "c", "b"]), - layers=dict(Xs=X2), + layers=dict(Xs=x2), ) adata3 = AnnData( - X3, + x3, dict(obs_names=["s5", "s6"], anno2=["d3", "d4"]), dict(var_names=["d", "c", "b"]), - layers=dict(Xs=X3), + layers=dict(Xs=x3), ) # inner join adata = adata1.concatenate(adata2, adata3) - X_combined = [[2, 3], [5, 6], [3, 2], [6, 5], [0, 2], [6, 5]] - assert adata.X.toarray().astype(int).tolist() == X_combined - assert adata.layers["Xs"].toarray().astype(int).tolist() == X_combined + x_combined = [[2, 3], [5, 6], [3, 2], [6, 5], [0, 2], [6, 5]] + assert adata.X.toarray().astype(int).tolist() == x_combined + assert adata.layers["Xs"].toarray().astype(int).tolist() == x_combined # outer join adata = adata1.concatenate(adata2, adata3, join="outer") @@ -696,34 +696,34 @@ def test_concatenate_sparse(): @mark_legacy_concatenate -def test_concatenate_mixed(): - X1 = sparse.csr_matrix(np.array([[1, 2, 0], [4, 0, 6], [0, 0, 9]])) - X2 = sparse.csr_matrix(np.array([[0, 2, 3], [4, 0, 0], [7, 0, 9]])) - X3 = sparse.csr_matrix(np.array([[1, 0, 3], [0, 0, 6], [0, 8, 0]])) - X4 = np.array([[0, 2, 3], [4, 0, 0], [7, 0, 9]]) +def test_concatenate_mixed() -> None: + x1 = sparse.csr_matrix(np.array([[1, 2, 0], [4, 0, 6], [0, 0, 9]])) + x2 = sparse.csr_matrix(np.array([[0, 2, 3], [4, 0, 0], [7, 0, 9]])) + x3 = sparse.csr_matrix(np.array([[1, 0, 3], [0, 0, 6], [0, 8, 0]])) + x4 = np.array([[0, 2, 3], [4, 0, 0], [7, 0, 9]]) adata1 = AnnData( - X1, + x1, dict(obs_names=["s1", "s2", "s3"], anno1=["c1", "c2", "c3"]), dict(var_names=["a", "b", "c"], annoA=[0, 1, 2]), - layers=dict(counts=X1), + layers=dict(counts=x1), ) adata2 = AnnData( - X2, + x2, dict(obs_names=["s4", "s5", "s6"], anno1=["c3", "c4", "c5"]), dict(var_names=["d", "c", "b"], annoA=[0, 1, 2]), - layers=dict(counts=X4), # sic + layers=dict(counts=x4), # sic ) adata3 = AnnData( - X3, + x3, dict(obs_names=["s7", "s8", "s9"], anno2=["d3", "d4", "d5"]), dict(var_names=["d", "c", "b"], annoA=[0, 2, 3], annoB=[0, 1, 2]), - layers=dict(counts=X3), + layers=dict(counts=x3), ) adata4 = AnnData( - X4, + x4, dict(obs_names=["s4", "s5", "s6"], anno1=["c3", "c4", "c5"]), dict(var_names=["d", "c", "b"], annoA=[0, 1, 2]), - layers=dict(counts=X2), # sic + layers=dict(counts=x2), # sic ) adata_all = AnnData.concatenate(adata1, adata2, adata3, adata4) @@ -732,38 +732,38 @@ def test_concatenate_mixed(): @mark_legacy_concatenate -def test_concatenate_with_raw(): +def test_concatenate_with_raw() -> None: # dense data - X1 = np.array([[1, 2, 3], [4, 5, 6]]) - X2 = np.array([[1, 2, 3], [4, 5, 6]]) - X3 = np.array([[1, 2, 3], [4, 5, 6]]) + x1 = np.array([[1, 2, 3], [4, 5, 6]]) + x2 = np.array([[1, 2, 3], [4, 5, 6]]) + x3 = np.array([[1, 2, 3], [4, 5, 6]]) - X4 = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) + x4 = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) adata1 = AnnData( - X1, + x1, dict(obs_names=["s1", "s2"], anno1=["c1", "c2"]), dict(var_names=["a", "b", "c"], annoA=[0, 1, 2]), - layers=dict(Xs=X1), + layers=dict(Xs=x1), ) adata2 = AnnData( - X2, + x2, dict(obs_names=["s3", "s4"], anno1=["c3", "c4"]), dict(var_names=["d", "c", "b"], annoA=[0, 1, 2]), - layers=dict(Xs=X2), + layers=dict(Xs=x2), ) adata3 = AnnData( - X3, + x3, dict(obs_names=["s1", "s2"], anno2=["d3", "d4"]), dict(var_names=["d", "c", "b"], annoB=[0, 1, 2]), - layers=dict(Xs=X3), + layers=dict(Xs=x3), ) adata4 = AnnData( - X4, + x4, dict(obs_names=["s1", "s2"], anno1=["c1", "c2"]), dict(var_names=["a", "b", "c", "z"], annoA=[0, 1, 2, 3]), - layers=dict(Xs=X4), + layers=dict(Xs=x4), ) adata1.raw = adata1.copy() @@ -902,7 +902,7 @@ def test_pairwise_concat(axis_name, array_type): axis_sizes = [[100, 200, 50], [50, 50, 50]] if axis_name == "var": axis_sizes.reverse() - Ms, Ns = axis_sizes + ms, ns = axis_sizes axis_attr = f"{axis_name}p" alt_attr = f"{alt_axis_name}p" @@ -915,7 +915,7 @@ def gen_axis_array(m): obsp={"arr": gen_axis_array(m)}, varp={"arr": gen_axis_array(n)}, ) - for k, m, n in zip("abc", Ms, Ns, strict=True) + for k, m, n in zip("abc", ms, ns, strict=True) } w_pairwise = concat(adatas, axis=axis, label="orig", pairwise=True) @@ -1237,14 +1237,14 @@ def test_transposed_concat( alt_axis = 1 - axis lhs = gen_adata( (10, 10), - X_type=array_type, + x_type=array_type, obs_xdataset=use_xdataset, var_xdataset=use_xdataset, **GEN_ADATA_DASK_ARGS, ) rhs = gen_adata( (10, 12), - X_type=array_type, + x_type=array_type, obs_xdataset=use_xdataset, var_xdataset=use_xdataset, **GEN_ADATA_DASK_ARGS, @@ -1596,27 +1596,27 @@ def test_concatenate_size_0_axis(): assert b.concatenate([a]).shape == (10, 0) -def test_concat_null_X(use_xdataset): +def test_concat_null_x(*, use_xdataset: bool) -> None: adatas_orig = { k: gen_adata((20, 10), obs_xdataset=use_xdataset, var_xdataset=use_xdataset) for k in list("abc") } - adatas_no_X = {} + adatas_no_x = {} for k, v in adatas_orig.items(): v = v.copy() del v.X - adatas_no_X[k] = v + adatas_no_x[k] = v orig = concat(adatas_orig, index_unique="-") - no_X = concat(adatas_no_X, index_unique="-") + no_x = concat(adatas_no_x, index_unique="-") del orig.X - assert_equal(no_X, orig) + assert_equal(no_x, orig) # https://github.com/scverse/ehrapy/issues/151#issuecomment-1016753744 @pytest.mark.parametrize("sparse_indexer_type", [np.int64, np.int32]) -def test_concat_X_dtype(cpu_array_type, sparse_indexer_type): +def test_concat_x_dtype(cpu_array_type, sparse_indexer_type): adatas_orig = { k: AnnData(cpu_array_type(np.ones((20, 10), dtype=np.int8))) for k in list("abc") @@ -1813,16 +1813,16 @@ def test_concat_on_var_outer_join(array_type): def test_concat_dask_sparse_matches_memory(join_type, merge_strategy): import dask.array as da - X = sparse.random(50, 20, density=0.5, format="csr") - X_dask = da.from_array(X, chunks=(5, 20)) + x = sparse.random(50, 20, density=0.5, format="csr") + x_dask = da.from_array(x, chunks=(5, 20)) var_names_1 = [f"gene_{i}" for i in range(20)] var_names_2 = [f"gene_{i}{'_foo' if (i % 2) else ''}" for i in range(20)] - ad1 = AnnData(X=X, var=pd.DataFrame(index=var_names_1)) - ad2 = AnnData(X=X, var=pd.DataFrame(index=var_names_2)) + ad1 = AnnData(X=x, var=pd.DataFrame(index=var_names_1)) + ad2 = AnnData(X=x, var=pd.DataFrame(index=var_names_2)) - ad1_dask = AnnData(X=X_dask, var=pd.DataFrame(index=var_names_1)) - ad2_dask = AnnData(X=X_dask, var=pd.DataFrame(index=var_names_2)) + ad1_dask = AnnData(X=x_dask, var=pd.DataFrame(index=var_names_1)) + ad2_dask = AnnData(X=x_dask, var=pd.DataFrame(index=var_names_2)) res_in_memory = concat([ad1, ad2], join=join_type, merge=merge_strategy) res_dask = concat([ad1_dask, ad2_dask], join=join_type, merge=merge_strategy) diff --git a/tests/test_concatenate_disk.py b/tests/test_concatenate_disk.py index 2b62c7faa..041e3acf6 100644 --- a/tests/test_concatenate_disk.py +++ b/tests/test_concatenate_disk.py @@ -147,12 +147,12 @@ def test_anndatas( ) ) - adatas = [] + adatas: list[AnnData] = [] for i in range(3): - M, N = (np.random.randint(5, 10) if a in random_axes else 50 for a in (0, 1)) + m, n = (np.random.randint(5, 10) if a in random_axes else 50 for a in (0, 1)) a = gen_adata( - (M, N), - X_type=get_array_type(array_type, axis), + (m, n), + x_type=get_array_type(array_type, axis), sparse_fmt=sparse_fmt, obs_dtypes=[pd.CategoricalDtype(ordered=False)], var_dtypes=[pd.CategoricalDtype(ordered=False)], diff --git a/tests/test_dask.py b/tests/test_dask.py index 8a54ad4c1..c8e6656fa 100644 --- a/tests/test_dask.py +++ b/tests/test_dask.py @@ -49,35 +49,35 @@ [(20, 10), (1, 1)], ] ) -def sizes(request): +def sizes(request: pytest.FixtureRequest) -> tuple[tuple[int, int], tuple[int, int]]: return request.param @pytest.fixture -def adata(sizes): +def adata(sizes: tuple[tuple[int, int], tuple[int, int]]) -> AnnData: import dask.array as da import numpy as np - (M, N), chunks = sizes - X = da.random.random((M, N), chunks=chunks) + (m, n), chunks = sizes + X = da.random.random((m, n), chunks=chunks) obs = pd.DataFrame( - {"batch": np.random.choice(["a", "b"], M)}, - index=[f"cell{i:03d}" for i in range(M)], + {"batch": np.random.choice(["a", "b"], m)}, + index=[f"cell{i:03d}" for i in range(m)], ) - var = pd.DataFrame(index=[f"gene{i:03d}" for i in range(N)]) + var = pd.DataFrame(index=[f"gene{i:03d}" for i in range(n)]) return AnnData(X, obs=obs, var=var) -def test_dask_X_view(): +def test_dask_x_view() -> None: import dask.array as da - M, N = 50, 30 + m, n = 50, 30 adata = ad.AnnData( - obs=pd.DataFrame(index=[f"cell{i:02}" for i in range(M)]), - var=pd.DataFrame(index=[f"gene{i:02}" for i in range(N)]), + obs=pd.DataFrame(index=[f"cell{i:02}" for i in range(m)]), + var=pd.DataFrame(index=[f"gene{i:02}" for i in range(n)]), ) - adata.X = da.ones((M, N)) + adata.X = da.ones((m, n)) view = adata[:30] view.copy() @@ -90,10 +90,10 @@ def test_dask_write(adata, tmp_path, diskfmt): write = lambda x, y: getattr(x, f"write_{diskfmt}")(y) read = lambda x: getattr(ad, f"read_{diskfmt}")(x) - M, N = adata.X.shape - adata.obsm["a"] = da.random.random((M, 10)) - adata.obsm["b"] = da.random.random((M, 10)) - adata.varm["a"] = da.random.random((N, 10)) + m, n = adata.X.shape + adata.obsm["a"] = da.random.random((m, 10)) + adata.obsm["b"] = da.random.random((m, 10)) + adata.varm["a"] = da.random.random((n, 10)) orig = adata write(orig, pth) @@ -137,10 +137,10 @@ def test_dask_distributed_write( g = as_group(pth, mode="w") with dd.Client(local_cluster_addr): - M, N = adata.X.shape - adata.obsm["a"] = da.random.random((M, 10)) - adata.obsm["b"] = da.random.random((M, 10)) - adata.varm["a"] = da.random.random((N, 10)) + m, n = adata.X.shape + adata.obsm["a"] = da.random.random((m, 10)) + adata.obsm["b"] = da.random.random((m, 10)) + adata.varm["a"] = da.random.random((n, 10)) orig = adata with ad.settings.override(auto_shard_zarr_v3=auto_shard_zarr_v3): ad.io.write_elem(g, "", orig) @@ -173,10 +173,10 @@ def test_dask_to_memory_check_array_types(adata, tmp_path, diskfmt): write = lambda x, y: getattr(x, f"write_{diskfmt}")(y) read = lambda x: getattr(ad, f"read_{diskfmt}")(x) - M, N = adata.X.shape - adata.obsm["a"] = da.random.random((M, 10)) - adata.obsm["b"] = da.random.random((M, 10)) - adata.varm["a"] = da.random.random((N, 10)) + m, n = adata.X.shape + adata.obsm["a"] = da.random.random((m, 10)) + adata.obsm["b"] = da.random.random((m, 10)) + adata.varm["a"] = da.random.random((n, 10)) orig = adata write(orig, pth) @@ -215,10 +215,10 @@ def test_dask_to_memory_copy_check_array_types(adata, tmp_path, diskfmt): write = lambda x, y: getattr(x, f"write_{diskfmt}")(y) read = lambda x: getattr(ad, f"read_{diskfmt}")(x) - M, N = adata.X.shape - adata.obsm["a"] = da.random.random((M, 10)) - adata.obsm["b"] = da.random.random((M, 10)) - adata.varm["a"] = da.random.random((N, 10)) + m, n = adata.X.shape + adata.obsm["a"] = da.random.random((m, 10)) + adata.obsm["b"] = da.random.random((m, 10)) + adata.varm["a"] = da.random.random((n, 10)) orig = adata write(orig, pth) @@ -245,13 +245,13 @@ def test_dask_to_memory_copy_check_array_types(adata, tmp_path, diskfmt): assert isinstance(orig.varm["a"], DaskArray) -def test_dask_copy_check_array_types(adata): +def test_dask_copy_check_array_types(adata: AnnData) -> None: import dask.array as da - M, N = adata.X.shape - adata.obsm["a"] = da.random.random((M, 10)) - adata.obsm["b"] = da.random.random((M, 10)) - adata.varm["a"] = da.random.random((N, 10)) + m, n = adata.X.shape + adata.obsm["a"] = da.random.random((m, 10)) + adata.obsm["b"] = da.random.random((m, 10)) + adata.varm["a"] = da.random.random((n, 10)) orig = adata curr = adata.copy() @@ -270,7 +270,7 @@ def test_dask_copy_check_array_types(adata): assert isinstance(orig.varm["a"], DaskArray) -def test_assign_X(adata): +def test_assign_x(adata: AnnData) -> None: """Check if assignment works""" import dask.array as da import numpy as np @@ -308,7 +308,7 @@ def test_assign_X(adata): ], ) def test_dask_to_memory_unbacked(array_func, mem_type): - orig = gen_adata((15, 10), X_type=array_func, **GEN_ADATA_DASK_ARGS) + orig = gen_adata((15, 10), x_type=array_func, **GEN_ADATA_DASK_ARGS) orig.uns = {"da": {"da": array_func(np.ones((4, 12)))}} assert isinstance(orig.X, DaskArray) @@ -354,7 +354,7 @@ def test_dask_to_disk_view( def test_dask_to_memory_copy_unbacked(): import numpy as np - orig = gen_adata((15, 10), X_type=as_dense_dask_array, **GEN_ADATA_DASK_ARGS) + orig = gen_adata((15, 10), x_type=as_dense_dask_array, **GEN_ADATA_DASK_ARGS) orig.uns = {"da": {"da": as_dense_dask_array(np.ones(12))}} curr = orig.to_memory(copy=True) diff --git a/tests/test_dask_view_mem.py b/tests/test_dask_view_mem.py index b7fe7c72c..ee1e83886 100644 --- a/tests/test_dask_view_mem.py +++ b/tests/test_dask_view_mem.py @@ -7,6 +7,8 @@ import anndata as ad if TYPE_CHECKING: + from typing import Literal + import pandas as pd pytest.importorskip("pytest_memray") @@ -39,18 +41,18 @@ def give_chunks(request): # Does some stuff so that dask can cache the # subclasscheck before the run. @pytest.fixture -def _alloc_cache(): +def _alloc_cache() -> None: import dask.array as da - N = 2**6 - size = ((N, N), (N, N)) + n = 2**6 + size = ((n, n), (n, n)) adata = ad.AnnData( da.random.random(*size), layers=dict(m=da.random.random(*size)), obsm=dict(m=da.random.random(*size)), - obs=dict(m=da.random.random(N)), - var=dict(m=da.random.random(N)), + obs=dict(m=da.random.random(n)), + var=dict(m=da.random.random(n)), varm=dict(m=da.random.random(*size)), ) subset = adata[:10, :][:, :10] @@ -73,11 +75,13 @@ def _alloc_cache(): # TODO: Why? @pytest.mark.usefixtures("_alloc_cache") @pytest.mark.limit_memory("2.2 MB") -def test_size_of_view(mapping_name, give_chunks): +def test_size_of_view( + *, mapping_name: Literal["layers", "obsm", "varm"], give_chunks: bool +) -> None: import dask.array as da - N = 2**8 - size = ((N, N), (N, N)) if give_chunks else ((N, N), "auto") + n = 2**8 + size = ((n, n), (n, n)) if give_chunks else ((n, n), "auto") adata = ad.AnnData( da.random.random(*size), @@ -94,19 +98,20 @@ def test_size_of_view(mapping_name, give_chunks): # for index this should be ok @pytest.mark.usefixtures("_alloc_cache") @pytest.mark.limit_memory("1.5 MB") -def test_modify_view_mapping_component_memory(mapping_name, give_chunks): +def test_modify_view_mapping_component_memory( + *, mapping_name: Literal["layers", "obsm", "varm"], give_chunks: bool +) -> None: import dask.array as da - N = 2**8 - M = 2**9 + m, n = 2**9, 2**8 - size = ((M, M), (M, M)) if give_chunks else ((M, M), "auto") + size = ((m, m), (m, m)) if give_chunks else ((m, m), "auto") adata = ad.AnnData( da.random.random(*size), **{mapping_name: dict(m=da.random.random(*size))}, ) - subset = adata[:N, :N] + subset = adata[:n, :n] assert subset.is_view m = getattr(subset, mapping_name)["m"] m[0, 0] = 100 @@ -120,26 +125,27 @@ def test_modify_view_mapping_component_memory(mapping_name, give_chunks): # for index this should be ok @pytest.mark.usefixtures("_alloc_cache") @pytest.mark.limit_memory("1.5 MB") -def test_modify_view_X_memory(mapping_name, give_chunks): +def test_modify_view_x_memory( + *, mapping_name: Literal["layers", "obsm", "varm"], give_chunks: bool +) -> None: import dask.array as da - N = 2**8 - M = 2**9 + m, n = 2**9, 2**8 - size = ((M, M), (M, M)) if give_chunks else ((M, M), "auto") + size = ((m, m), (m, m)) if give_chunks else ((m, m), "auto") adata = ad.AnnData( da.random.random(*size), **{mapping_name: dict(m=da.random.random(*size))}, ) - subset = adata[:N, :N] + subset = adata[:n, :n] assert subset.is_view - m = subset.X + x = subset.X with pytest.warns( ad.ImplicitModificationWarning, match=r"Trying to modify attribute `.X` of view, initializing view as actual.", ): - m[0, 0] = 100 + x[0, 0] = 100 # Normally should expect something around 90 kbs @@ -150,19 +156,20 @@ def test_modify_view_X_memory(mapping_name, give_chunks): # for index this should be ok @pytest.mark.usefixtures("_alloc_cache") @pytest.mark.limit_memory("1.5 MB") -def test_modify_view_mapping_obs_var_memory(attr_name, give_chunks): +def test_modify_view_mapping_obs_var_memory( + *, attr_name: Literal["obs", "var"], give_chunks: bool +) -> None: import dask.array as da - N = 2**8 - M = 2**9 + m, n = 2**9, 2**8 - size = ((M, M), (M, M)) if give_chunks else ((M, M), "auto") + size = ((m, m), (m, m)) if give_chunks else ((m, m), "auto") adata = ad.AnnData( da.random.random(*size), - **{attr_name: dict(m=da.random.random(M))}, + **{attr_name: dict(m=da.random.random(m))}, ) - subset = adata[:N, :N] + subset = adata[:n, :n] assert subset.is_view m: pd.Series = getattr(subset, attr_name)["m"] m.iloc[0] = 100 diff --git a/tests/test_deprecations.py b/tests/test_deprecations.py index 7a1b4308a..2b4bac03a 100644 --- a/tests/test_deprecations.py +++ b/tests/test_deprecations.py @@ -7,6 +7,7 @@ from __future__ import annotations import warnings +from typing import TYPE_CHECKING import h5py import numpy as np @@ -17,6 +18,9 @@ from anndata import AnnData from anndata.tests.helpers import assert_equal +if TYPE_CHECKING: + from pathlib import Path + @pytest.fixture def adata(): @@ -59,7 +63,7 @@ def test_get_obsvar_array(adata): ) -def test_obsvar_vector_Xlayer(adata): +def test_obsvar_vector_x_layer(adata: AnnData) -> None: with pytest.warns(FutureWarning): adata.var_vector("s1", layer="X") with pytest.warns(FutureWarning): @@ -83,37 +87,37 @@ def test_dtype_warning(): # This shouldn't warn, shouldn't copy with warnings.catch_warnings(record=True) as record: - b_X = np.ones((3, 3), dtype=np.float64) - b = AnnData(b_X) + b_x = np.ones((3, 3), dtype=np.float64) + b = AnnData(b_x) assert not record - assert b_X is b.X + assert b_x is b.X assert b.X.dtype == np.float64 # Should warn, should copy - c_X = np.ones((3, 3), dtype=np.float32) + c_x = np.ones((3, 3), dtype=np.float32) with pytest.warns(FutureWarning): - c = AnnData(c_X, dtype=np.float64) + c = AnnData(c_x, dtype=np.float64) assert not record - assert c_X is not c.X + assert c_x is not c.X assert c.X.dtype == np.float64 -def test_deprecated_write_attribute(tmp_path): +def test_deprecated_write_attribute(tmp_path: Path) -> None: pth = tmp_path / "file.h5" - A = np.random.randn(20, 10) + arr = np.random.randn(20, 10) from anndata._io.utils import read_attribute, write_attribute from anndata.io import read_elem with h5py.File(pth, "w") as f, pytest.warns(FutureWarning, match=r"write_elem"): - write_attribute(f, "written_attribute", A) + write_attribute(f, "written_attribute", arr) with h5py.File(pth, "r") as f: - elem_A = read_elem(f["written_attribute"]) + elem_a = read_elem(f["written_attribute"]) with pytest.warns(FutureWarning, match=r"read_elem"): - attribute_A = read_attribute(f["written_attribute"]) + attribute_a = read_attribute(f["written_attribute"]) - assert_equal(elem_A, attribute_A) - assert_equal(A, attribute_A) + assert_equal(elem_a, attribute_a) + assert_equal(arr, attribute_a) @pytest.mark.parametrize( diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 08c4d0b30..a193c88e7 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -293,22 +293,22 @@ def test_assert_equal_dask_sparse_arrays(): ), ], ) -def test_as_dask_functions(input_type, as_dask_type, mem_type): - SHAPE = (1000, 100) +def test_as_dask_functions(input_type, as_dask_type, mem_type) -> None: + shape = (1000, 100) rng = np.random.default_rng(42) - X_source = rng.poisson(size=SHAPE).astype(np.float32) - X_input = input_type(X_source) - X_output = as_dask_type(X_input) - X_computed = X_output.compute() + x_source = rng.poisson(size=shape).astype(np.float32) + x_input = input_type(x_source) + x_output = as_dask_type(x_input) + x_computed = x_output.compute() - assert isinstance(X_output, DaskArray) - assert X_output.shape == SHAPE - assert X_output.dtype == X_input.dtype + assert isinstance(x_output, DaskArray) + assert x_output.shape == shape + assert x_output.dtype == x_input.dtype - assert isinstance(X_computed, mem_type) + assert isinstance(x_computed, mem_type) - assert_equal(asarray(X_computed), X_source) + assert_equal(asarray(x_computed), x_source) @pytest.mark.parametrize("dask_matrix_type", DASK_MATRIX_PARAMS) @@ -316,10 +316,10 @@ def test_as_dask_functions(input_type, as_dask_type, mem_type): def test_as_cupy_dask(request: pytest.FixtureRequest, dask_matrix_type) -> None: if dask_matrix_type is as_sparse_dask_array: request.applymarker(pytest.mark.xfail(reason="cupy does not support CSArray")) - SHAPE = (100, 10) + shape = (100, 10) rng = np.random.default_rng(42) - X_cpu = dask_matrix_type(rng.normal(size=SHAPE)) - X_gpu_roundtripped = as_cupy(X_cpu).map_blocks(lambda x: x.get(), meta=X_cpu._meta) - assert isinstance(X_gpu_roundtripped._meta, type(X_cpu._meta)) - assert isinstance(X_gpu_roundtripped.compute(), type(X_cpu.compute())) - assert_equal(X_gpu_roundtripped.compute(), X_cpu.compute()) + x_cpu = dask_matrix_type(rng.normal(size=shape)) + x_gpu_roundtripped = as_cupy(x_cpu).map_blocks(lambda x: x.get(), meta=x_cpu._meta) + assert isinstance(x_gpu_roundtripped._meta, type(x_cpu._meta)) + assert isinstance(x_gpu_roundtripped.compute(), type(x_cpu.compute())) + assert_equal(x_gpu_roundtripped.compute(), x_cpu.compute()) diff --git a/tests/test_inplace_subset.py b/tests/test_inplace_subset.py index 075503276..82bd8cb19 100644 --- a/tests/test_inplace_subset.py +++ b/tests/test_inplace_subset.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np import pytest from scipy import sparse @@ -11,6 +13,11 @@ ) from anndata.utils import asarray +if TYPE_CHECKING: + from typing import Literal + + from anndata.tests.helpers import _SubsetFunc + @pytest.fixture( params=[ @@ -43,7 +50,7 @@ def subset_dim(adata, *, obs=slice(None), var=slice(None)): # TODO: Test values of .uns def test_inplace_subset_var(matrix_type, subset_func): - orig = gen_adata((30, 30), X_type=matrix_type) + orig = gen_adata((30, 30), x_type=matrix_type) subset_idx = subset_func(orig.var_names) modified = orig.copy() @@ -63,7 +70,7 @@ def test_inplace_subset_var(matrix_type, subset_func): def test_inplace_subset_obs(matrix_type, subset_func): - orig = gen_adata((30, 30), X_type=matrix_type) + orig = gen_adata((30, 30), x_type=matrix_type) subset_idx = subset_func(orig.obs_names) modified = orig.copy() @@ -83,7 +90,9 @@ def test_inplace_subset_obs(matrix_type, subset_func): @pytest.mark.parametrize("dim", ["obs", "var"]) -def test_inplace_subset_no_X(subset_func, dim): +def test_inplace_subset_no_x( + subset_func: _SubsetFunc, dim: Literal["obs", "var"] +) -> None: orig = gen_adata((30, 30)) del orig.X diff --git a/tests/test_io_dispatched.py b/tests/test_io_dispatched.py index e3fc7ba17..c66d4883f 100644 --- a/tests/test_io_dispatched.py +++ b/tests/test_io_dispatched.py @@ -129,7 +129,7 @@ def determine_chunks(elem_shape, specified_chunks): ) adata = gen_adata((100, 50), **GEN_ADATA_NO_XARRAY_ARGS) - M, N = 13, 8 + m, n = 13, 8 def write_chunked(func, store, k, elem, dataset_kwargs, iospec): def set_copy(d, **kwargs): @@ -143,15 +143,15 @@ def set_copy(d, **kwargs): elem, CSMatrix | CSArray | ad.AnnData ): if re.match(r"^/((X)|(layers)).*", path): - chunks = (M, N) + chunks = (m, n) elif path.startswith("/obsp"): - chunks = (M, M) + chunks = (m, m) elif path.startswith("/obs"): - chunks = (M,) + chunks = (m,) elif path.startswith("/varp"): - chunks = (N, N) + chunks = (n, n) elif path.startswith("/var"): - chunks = (N,) + chunks = (n,) else: chunks = dataset_kwargs.get("chunks", ()) func( @@ -177,9 +177,9 @@ def check_chunking(k: str, v: ZarrGroup | zarr.Array): ): return if re.match(r"obs[mp]?/\w+", k): - assert v.chunks[0] == M + assert v.chunks[0] == m elif re.match(r"var[mp]?/\w+", k): - assert v.chunks[0] == N + assert v.chunks[0] == n if is_zarr_v2(): z.visititems(check_chunking) diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py index d44e9c312..3ade66943 100644 --- a/tests/test_io_elementwise.py +++ b/tests/test_io_elementwise.py @@ -93,20 +93,20 @@ def create_sparse_store[G: (H5Group, ZarrGroup)]( """ import dask.array as da - X = sparse.random( + x = sparse.random( shape[0], shape[1], format=sparse_format, density=0.01, random_state=np.random.default_rng(), ) - X_dask = da.from_array( - X, + x_dask = da.from_array( + x, chunks=(100 if format == "csr" else SIZE, SIZE * 2 if format == "csr" else 100), ) - write_elem(store, "X", X) - write_elem(store, "X_dask", X_dask) + write_elem(store, "X", x) + write_elem(store, "X_dask", x_dask) return store @@ -274,37 +274,41 @@ def test_io_spec_cupy(store, value, encoding_type, as_dask): assert get_spec(store[key]) == _REGISTRY.get_spec(value) -def test_dask_write_sparse(sparse_format, store): +def test_dask_write_sparse( + sparse_format: Literal["csr", "csc"], store: H5Group | ZarrGroup +) -> None: x_sparse_store = create_sparse_store(sparse_format, store) - X_from_disk = read_elem(x_sparse_store["X"]) - X_dask_from_disk = read_elem(x_sparse_store["X_dask"]) + x_from_disk = read_elem(x_sparse_store["X"]) + x_dask_from_disk = read_elem(x_sparse_store["X_dask"]) - assert_equal(X_from_disk, X_dask_from_disk) + assert_equal(x_from_disk, x_dask_from_disk) assert_equal(dict(x_sparse_store["X"].attrs), dict(x_sparse_store["X_dask"].attrs)) assert x_sparse_store["X_dask/indptr"].dtype == np.int64 assert x_sparse_store["X_dask/indices"].dtype == np.int64 -def test_read_lazy_2d_dask(sparse_format, store): +def test_read_lazy_2d_dask( + sparse_format: Literal["csr", "csc"], store: H5Group | ZarrGroup +) -> None: arr_store = create_sparse_store(sparse_format, store) - X_dask_from_disk = read_elem_lazy(arr_store["X"]) - X_from_disk = read_elem(arr_store["X"]) + x_dask_from_disk = read_elem_lazy(arr_store["X"]) + x_from_disk = read_elem(arr_store["X"]) - assert_equal(X_from_disk, X_dask_from_disk) + assert_equal(x_from_disk, x_dask_from_disk) random_int_indices = np.random.randint(0, SIZE, (SIZE // 10,)) random_int_indices.sort() index_slice = slice(0, SIZE // 10) for index in [random_int_indices, index_slice]: - assert_equal(X_from_disk[index, :], X_dask_from_disk[index, :]) - assert_equal(X_from_disk[:, index], X_dask_from_disk[:, index]) + assert_equal(x_from_disk[index, :], x_dask_from_disk[index, :]) + assert_equal(x_from_disk[:, index], x_dask_from_disk[:, index]) random_bool_mask = np.random.randn(SIZE) > 0 assert_equal( - X_from_disk[random_bool_mask, :], X_dask_from_disk[random_bool_mask, :] + x_from_disk[random_bool_mask, :], x_dask_from_disk[random_bool_mask, :] ) random_bool_mask = np.random.randn(SIZE * 2) > 0 assert_equal( - X_from_disk[:, random_bool_mask], X_dask_from_disk[:, random_bool_mask] + x_from_disk[:, random_bool_mask], x_dask_from_disk[:, random_bool_mask] ) assert arr_store["X_dask/indptr"].dtype == np.int64 @@ -325,18 +329,20 @@ def test_read_lazy_2d_dask(sparse_format, store): (2, (40, None)), ], ) -def test_read_lazy_subsets_nd_dask(store: H5Group | ZarrGroup, n_dims, chunks) -> None: +def test_read_lazy_subsets_nd_dask( + store: H5Group | ZarrGroup, n_dims: int, chunks: tuple[int | None] | None +) -> None: arr_store = create_dense_store(store, shape=DEFAULT_SHAPE[:n_dims]) - X_dask_from_disk = read_elem_lazy(arr_store["X"], chunks=chunks) - X_from_disk = read_elem(arr_store["X"]) - assert_equal(X_from_disk, X_dask_from_disk) + x_dask_from_disk = read_elem_lazy(arr_store["X"], chunks=chunks) + x_from_disk = read_elem(arr_store["X"]) + assert_equal(x_from_disk, x_dask_from_disk) random_int_indices = np.random.randint(0, SIZE, (SIZE // 10,)) random_int_indices.sort() random_bool_mask = np.random.randn(SIZE) > 0 index_slice = slice(0, SIZE // 10) for index in [random_int_indices, index_slice, random_bool_mask]: - assert_equal(X_from_disk[index], X_dask_from_disk[index]) + assert_equal(x_from_disk[index], x_dask_from_disk[index]) @pytest.mark.xdist_group("dask") @@ -351,18 +357,18 @@ def test_read_lazy_h5_cluster( with h5py.File(tmp_path / "test.h5", "w") as file: store = file["/"] arr_store = create_sparse_store(sparse_format, store) - X_dask_from_disk = read_elem_lazy(arr_store["X"]) - X_from_disk = read_elem(arr_store["X"]) + x_dask_from_disk = read_elem_lazy(arr_store["X"]) + x_from_disk = read_elem(arr_store["X"]) with dd.Client(local_cluster_addr): - assert_equal(X_from_disk, X_dask_from_disk) + assert_equal(x_from_disk, x_dask_from_disk) def test_undersized_shape_to_default(store: H5Group | ZarrGroup) -> None: shape = (1000, 50) arr_store = create_dense_store(store, shape=shape) - X_dask_from_disk = read_elem_lazy(arr_store["X"]) - assert all(c <= s for c, s in zip(X_dask_from_disk.chunksize, shape, strict=True)) - assert X_dask_from_disk.shape == shape + x_dask_from_disk = read_elem_lazy(arr_store["X"]) + assert all(c <= s for c, s in zip(x_dask_from_disk.chunksize, shape, strict=True)) + assert x_dask_from_disk.shape == shape @pytest.mark.parametrize( @@ -391,13 +397,13 @@ def test_read_lazy_2d_chunk_kwargs( ) -> None: if arr_type == "dense": arr_store = create_dense_store(store) - X_dask_from_disk = read_elem_lazy(arr_store["X"], chunks=chunks) + x_dask_from_disk = read_elem_lazy(arr_store["X"], chunks=chunks) else: arr_store = create_sparse_store(arr_type, store) - X_dask_from_disk = read_elem_lazy(arr_store["X"], chunks=chunks) - assert X_dask_from_disk.chunksize == expected_chunksize - X_from_disk = read_elem(arr_store["X"]) - assert_equal(X_from_disk, X_dask_from_disk) + x_dask_from_disk = read_elem_lazy(arr_store["X"], chunks=chunks) + assert x_dask_from_disk.chunksize == expected_chunksize + x_from_disk = read_elem(arr_store["X"]) + assert_equal(x_from_disk, x_dask_from_disk) def test_read_lazy_bad_chunk_kwargs(tmp_path): diff --git a/tests/test_io_partial.py b/tests/test_io_partial.py index 70fb9b995..c971b9e8f 100644 --- a/tests/test_io_partial.py +++ b/tests/test_io_partial.py @@ -2,6 +2,7 @@ from importlib.util import find_spec from pathlib import Path +from typing import TYPE_CHECKING import h5py import numpy as np @@ -13,15 +14,26 @@ from anndata._io.specs.registry import read_elem_partial from anndata.io import read_elem, write_h5ad, write_zarr +if TYPE_CHECKING: + from collections.abc import Callable + from typing import Literal + + from anndata.compat import CSMatrix + + X = np.array([[1.0, 0.0, 3.0], [4.0, 0.0, 6.0], [0.0, 8.0, 0.0]], dtype="float32") -X_check = np.array([[4.0, 0.0], [0.0, 8.0]], dtype="float32") +X_CHECK = np.array([[4.0, 0.0], [0.0, 8.0]], dtype="float32") WRITER = dict(h5ad=write_h5ad, zarr=write_zarr) READER = dict(h5ad=h5py.File, zarr=zarr.open) @pytest.mark.parametrize("typ", [np.asarray, csr_matrix]) -def test_read_partial_X(tmp_path, typ, diskfmt): +def test_read_partial_x( + tmp_path: Path, + typ: Callable[[np.ndarray], np.ndarray | CSMatrix], + diskfmt: Literal["h5ad", "zarr"], +) -> None: adata = AnnData(X=typ(X)) path = Path(tmp_path) / ("test_tp_X." + diskfmt) @@ -30,14 +42,14 @@ def test_read_partial_X(tmp_path, typ, diskfmt): store = READER[diskfmt](path, mode="r") if diskfmt == "zarr": - X_part = read_elem_partial(store["X"], indices=([1, 2], [0, 1])) + x_part = read_elem_partial(store["X"], indices=([1, 2], [0, 1])) else: # h5py doesn't allow fancy indexing across multiple dimensions - X_part = read_elem_partial(store["X"], indices=([1, 2],)) - X_part = X_part[:, [0, 1]] + x_part = read_elem_partial(store["X"], indices=([1, 2],)) + x_part = x_part[:, [0, 1]] store.close() - assert np.all(X_check == X_part) + assert np.all(x_part == X_CHECK) @pytest.mark.skipif(not find_spec("scanpy"), reason="Scanpy is not installed") diff --git a/tests/test_raw.py b/tests/test_raw.py index d0ee86833..3deec568c 100644 --- a/tests/test_raw.py +++ b/tests/test_raw.py @@ -168,7 +168,7 @@ def test_to_adata_populates_obs(): def test_no_copy(): - adata = gen_adata((20, 10), X_type=np.asarray) + adata = gen_adata((20, 10), x_type=np.asarray) adata.raw = adata # no .copy() herer np.log1p(adata.X, out=adata.X) assert adata.X is adata.raw.X diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index ee250a761..efc6abded 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -93,8 +93,8 @@ def dataset_kwargs(request): @pytest.fixture def rw(backing_h5ad): - M, N = 100, 101 - orig = gen_adata((M, N), **GEN_ADATA_NO_XARRAY_ARGS) + m, n = 100, 101 + orig = gen_adata((m, n), **GEN_ADATA_NO_XARRAY_ARGS) orig.write(backing_h5ad) curr = ad.read_h5ad(backing_h5ad) return curr, orig @@ -206,7 +206,7 @@ def test_readwrite_kitchensink(tmp_path, storage, typ, backing_h5ad, dataset_kwa @pytest.mark.parametrize("typ", [np.array, csr_matrix, csr_array, as_dense_dask_array]) -def test_readwrite_maintain_X_dtype(typ, backing_h5ad): +def test_readwrite_maintain_x_dtype(typ, backing_h5ad): X = typ(X_list).astype("int8") adata_src = ad.AnnData(X) adata_src.write(backing_h5ad) @@ -271,8 +271,8 @@ def test_readwrite_equivalent_h5ad_zarr(tmp_path, typ): h5ad_pth = tmp_path / "adata.h5ad" zarr_pth = tmp_path / "adata.zarr" - M, N = 100, 101 - adata = gen_adata((M, N), X_type=typ, **GEN_ADATA_NO_XARRAY_ARGS) + m, n = 100, 101 + adata = gen_adata((m, n), x_type=typ, **GEN_ADATA_NO_XARRAY_ARGS) adata.raw = adata.copy() adata.write_h5ad(h5ad_pth) @@ -604,8 +604,7 @@ def test_dataframe_reserved_columns(tmp_path, diskfmt, colname, attr): def test_write_large_categorical(tmp_path, diskfmt): - M = 30_000 - N = 1000 + m, n = 30_000, 1_000 ls = np.array(list(ascii_letters)) def random_cats(n): @@ -621,10 +620,10 @@ def random_cats(n): adata_pth = tmp_path / f"adata.{diskfmt}" n_cats = len(np.unique(cats)) orig = ad.AnnData( - csr_matrix(([1], ([0], [0])), shape=(M, N)), + csr_matrix(([1], ([0], [0])), shape=(m, n)), obs=dict( - cat1=cats[np.random.choice(n_cats, M)], - cat2=pd.Categorical.from_codes(np.random.choice(n_cats, M), cats), + cat1=cats[np.random.choice(n_cats, m)], + cat2=pd.Categorical.from_codes(np.random.choice(n_cats, m), cats), ), ) getattr(orig, f"write_{diskfmt}")(adata_pth) @@ -663,11 +662,11 @@ def test_hdf5_attribute_conversion(tmp_path, teststring, encoding, length): @pytest.mark.zarr_io -def test_zarr_chunk_X(tmp_path): +def test_zarr_chunk_x(tmp_path: Path) -> None: import zarr zarr_pth = Path(tmp_path) / "test.zarr" - adata = gen_adata((100, 100), X_type=np.array, **GEN_ADATA_NO_XARRAY_ARGS) + adata = gen_adata((100, 100), x_type=np.array, **GEN_ADATA_NO_XARRAY_ARGS) adata.write_zarr(zarr_pth, chunks=(10, 10)) z = zarr.open(zarr_pth) @@ -839,11 +838,11 @@ def test_io_dtype(tmp_path, diskfmt, dtype, roundtrip): assert curr.X.dtype == dtype -def test_h5py_attr_limit(tmp_path): - N = 10_000 +def test_h5py_attr_limit(tmp_path: Path) -> None: + n = 10_000 a = ad.AnnData(np.ones((5, 10))) a.obsm["df"] = pd.DataFrame( - np.ones((5, N)), index=a.obs_names, columns=[str(i) for i in range(N)] + np.ones((5, n)), index=a.obs_names, columns=[str(i) for i in range(n)] ) a.write(tmp_path / "tmp.h5ad") diff --git a/tests/test_structured_arrays.py b/tests/test_structured_arrays.py index ef79716ac..097f214fc 100644 --- a/tests/test_structured_arrays.py +++ b/tests/test_structured_arrays.py @@ -12,19 +12,22 @@ from anndata.tests.helpers import gen_vstr_recarray if TYPE_CHECKING: + from collections.abc import Iterable from typing import Literal -def assert_str_contents_equal(A, B): - lA = [ - [str(el) if not isinstance(el, bytes) else el.decode("utf-8") for el in a] - for a in A +def assert_str_contents_equal( + a: Iterable[Iterable[object]], b: Iterable[Iterable[object]] +) -> None: + l_a = [ + [str(el) if not isinstance(el, bytes) else el.decode("utf-8") for el in row] + for row in a ] - lB = [ - [str(el) if not isinstance(el, bytes) else el.decode("utf-8") for el in b] - for b in B + l_b = [ + [str(el) if not isinstance(el, bytes) else el.decode("utf-8") for el in row] + for row in b ] - assert lA == lB + assert l_a == l_b def test_io( diff --git a/tests/test_transpose.py b/tests/test_transpose.py index b104f4abe..e75f620ae 100644 --- a/tests/test_transpose.py +++ b/tests/test_transpose.py @@ -37,7 +37,7 @@ def _add_raw(adata, *, var_subset=slice(None)): pytest.param(gen_adata((50, 20)), id="csr_X"), pytest.param(gen_adata((50, 20), sparse.csc_matrix), id="csc_X"), pytest.param(_add_raw(gen_adata((50, 20))), id="with_raw"), - pytest.param(gen_adata((20, 10), X_type=None), id="None_X"), + pytest.param(gen_adata((20, 10), x_type=None), id="None_X"), ] ) def adata(request): diff --git a/tests/test_views.py b/tests/test_views.py index dc151e7cd..492a354ef 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -41,6 +41,9 @@ if TYPE_CHECKING: from types import EllipsisType + from anndata.tests.helpers import _SubsetFunc + + IGNORE_SPARSE_EFFICIENCY_WARNING = pytest.mark.filterwarnings( "ignore:Changing the sparsity structure:scipy.sparse.SparseEfficiencyWarning" ) @@ -81,7 +84,7 @@ def adata() -> ad.AnnData: @pytest.fixture(scope="session") def adata_gen_session(matrix_type) -> ad.AnnData: - adata = gen_adata((30, 15), X_type=matrix_type) + adata = gen_adata((30, 15), x_type=matrix_type) adata.raw = adata.copy() return adata @@ -324,10 +327,10 @@ def test_set_varm(adata): # TODO: Determine if this is the intended behavior, # or just the behaviour we’ve had for a while @IGNORE_SPARSE_EFFICIENCY_WARNING -def test_not_set_subset_X(matrix_type_base, subset_func): +def test_not_set_subset_x(matrix_type_base, subset_func: _SubsetFunc) -> None: adata = ad.AnnData(matrix_type_base(asarray(sparse.random(20, 20)))) init_hash = joblib.hash(adata) - orig_X_val = adata.X.copy() + orig_x_val = adata.X.copy() while True: subset_idx = slice_int_subset(adata.obs_names) if len(adata[subset_idx, :]) > 2: @@ -343,7 +346,7 @@ def test_not_set_subset_X(matrix_type_base, subset_func): with pytest.warns(ad.ImplicitModificationWarning, match=r".*X.*"): subset.X[:, internal_idx] = 1 assert not subset.is_view - assert not np.any(asarray(orig_X_val != adata.X)) + assert not np.any(asarray(orig_x_val != adata.X)) assert init_hash == joblib.hash(adata) assert isinstance(subset.X, type(adata.X)) @@ -352,10 +355,10 @@ def test_not_set_subset_X(matrix_type_base, subset_func): # TODO: Determine if this is the intended behavior, # or just the behaviour we’ve had for a while @IGNORE_SPARSE_EFFICIENCY_WARNING -def test_not_set_subset_X_dask(matrix_type_no_gpu, subset_func): +def test_not_set_subset_x_dask(matrix_type_no_gpu, subset_func: _SubsetFunc) -> None: adata = ad.AnnData(matrix_type_no_gpu(asarray(sparse.random(20, 20)))) init_hash = tokenize(adata) - orig_X_val = adata.X.copy() + orig_x_val = adata.X.copy() while True: subset_idx = slice_int_subset(adata.obs_names) if len(adata[subset_idx, :]) > 2: @@ -371,16 +374,16 @@ def test_not_set_subset_X_dask(matrix_type_no_gpu, subset_func): with pytest.warns(ad.ImplicitModificationWarning, match=r".*X.*"): subset.X[:, internal_idx] = 1 assert not subset.is_view - assert not np.any(asarray(orig_X_val != adata.X)) + assert not np.any(asarray(orig_x_val != adata.X)) assert init_hash == tokenize(adata) assert isinstance(subset.X, type(adata.X)) @IGNORE_SPARSE_EFFICIENCY_WARNING -def test_set_scalar_subset_X(matrix_type, subset_func): +def test_set_scalar_subset_x(matrix_type, subset_func: _SubsetFunc) -> None: adata = ad.AnnData(matrix_type(np.zeros((10, 10)))) - orig_X_val = adata.X.copy() + orig_x_val = adata.X.copy() subset_idx = subset_func(adata.obs_names) adata_subset = adata[subset_idx, :] @@ -392,11 +395,11 @@ def test_set_scalar_subset_X(matrix_type, subset_func): if isinstance(adata.X, CupyCSCMatrix): # Comparison broken for CSC matrices # https://github.com/cupy/cupy/issues/7757 - assert asarray(orig_X_val.tocsr() != adata.X.tocsr()).sum() == mul( + assert asarray(orig_x_val.tocsr() != adata.X.tocsr()).sum() == mul( *adata_subset.shape ) else: - assert asarray(orig_X_val != adata.X).sum() == mul(*adata_subset.shape) + assert asarray(orig_x_val != adata.X).sum() == mul(*adata_subset.shape) # TODO: Use different kind of subsetting for adata and view @@ -513,11 +516,11 @@ def test_view_setattr_machinery(attr, subset_func, subset_func2): assert_equal(actual, view, exact=True) -def test_layers_view(): +def test_layers_view() -> None: X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - L = np.array([[10, 11, 12], [13, 14, 15], [16, 17, 18]]) + layer = np.array([[10, 11, 12], [13, 14, 15], [16, 17, 18]]) real_adata = ad.AnnData(X) - real_adata.layers["L"] = L + real_adata.layers["L"] = layer view_adata = real_adata[1:, 1:] real_hash = joblib.hash(real_adata) view_hash = joblib.hash(view_adata) @@ -525,14 +528,14 @@ def test_layers_view(): assert view_adata.is_view with pytest.raises(ValueError, match=r"incorrect shape"): - view_adata.layers["L2"] = L + 2 + view_adata.layers["L2"] = layer + 2 assert view_adata.is_view # Failing to set layer item makes adata not view assert real_hash == joblib.hash(real_adata) assert view_hash == joblib.hash(view_adata) with pytest.warns(ad.ImplicitModificationWarning, match=r".*layers.*"): - view_adata.layers["L2"] = L[1:, 1:] + 2 + view_adata.layers["L2"] = layer[1:, 1:] + 2 assert not view_adata.is_view assert real_hash == joblib.hash(real_adata) @@ -598,7 +601,7 @@ def test_double_index(subset_func, subset_func2): def test_view_different_type_indices(matrix_type): - orig = gen_adata((30, 30), X_type=matrix_type) + orig = gen_adata((30, 30), x_type=matrix_type) boolean_array_mask = np.random.randint(0, 2, 30).astype("bool") boolean_list_mask = boolean_array_mask.tolist() integer_array_mask = np.where(boolean_array_mask)[0] @@ -729,13 +732,13 @@ def test_deepcopy_subset(adata, spmat: type): @pytest.mark.parametrize("array_type", array_type) @pytest.mark.parametrize("attr", ["X", "layers", "obsm", "varm", "obsp", "varp"]) def test_view_mixin_copies_data(adata, array_type: type, attr): - N = 100 + n = 100 adata = ad.AnnData( - obs=pd.DataFrame(index=np.arange(N).astype(str)), - var=pd.DataFrame(index=np.arange(N).astype(str)), + obs=pd.DataFrame(index=np.arange(n).astype(str)), + var=pd.DataFrame(index=np.arange(n).astype(str)), ) - X = array_type(sparse.eye(N, N).multiply(np.arange(1, N + 1))) + X = array_type(sparse.eye(n, n).multiply(np.arange(1, n + 1))) if attr == "X": adata.X = X else: @@ -759,7 +762,7 @@ def test_view_mixin_copies_data(adata, array_type: type, attr): assert not np.array_equal(arr_view_copy, arr_view) -def test_copy_X_dtype(): +def test_copy_x_dtype() -> None: adata = ad.AnnData(sparse.eye(50, dtype=np.float64, format="csr")) adata_c = adata[::2].copy() assert adata_c.X.dtype == adata.X.dtype @@ -806,7 +809,7 @@ def test_ellipsis_index( equivalent_ellipsis_index: tuple[slice, slice], matrix_type, ): - adata = gen_adata((10, 10), X_type=matrix_type, **GEN_ADATA_DASK_ARGS) + adata = gen_adata((10, 10), x_type=matrix_type, **GEN_ADATA_DASK_ARGS) subset_ellipsis = adata[ellipsis_index] subset = adata[equivalent_ellipsis_index] assert_equal(subset_ellipsis, subset) diff --git a/tests/test_x.py b/tests/test_x.py index d7da59a0c..96e0bb779 100644 --- a/tests/test_x.py +++ b/tests/test_x.py @@ -2,6 +2,8 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np import pandas as pd import pytest @@ -13,6 +15,11 @@ from anndata.tests.helpers import GEN_ADATA_NO_XARRAY_ARGS, assert_equal, gen_adata from anndata.utils import asarray +if TYPE_CHECKING: + from pathlib import Path + from typing import Literal + + UNLABELLED_ARRAY_TYPES = [ pytest.param(sparse.csr_matrix, id="csr"), pytest.param(sparse.csc_matrix, id="csc"), @@ -30,7 +37,7 @@ @pytest.mark.parametrize("new_array_type", UNLABELLED_ARRAY_TYPES) def test_setter_singular_dim(shape, orig_array_type, new_array_type): # https://github.com/scverse/anndata/issues/500 - adata = gen_adata(shape, X_type=orig_array_type) + adata = gen_adata(shape, x_type=orig_array_type) to_assign = new_array_type(np.ones(shape)) adata.X = to_assign np.testing.assert_equal(asarray(adata.X), 1) @@ -38,7 +45,7 @@ def test_setter_singular_dim(shape, orig_array_type, new_array_type): def test_repeat_indices_view(): - adata = gen_adata((10, 10), X_type=np.asarray) + adata = gen_adata((10, 10), x_type=np.asarray) subset = adata[[0, 0, 1, 1], :] mat = np.array([np.ones(adata.shape[1]) * i for i in range(4)]) with pytest.warns( @@ -51,16 +58,16 @@ def test_repeat_indices_view(): @pytest.mark.parametrize("orig_array_type", UNLABELLED_ARRAY_TYPES) @pytest.mark.parametrize("new_array_type", UNLABELLED_ARRAY_TYPES) def test_setter_view(orig_array_type, new_array_type): - adata = gen_adata((10, 10), X_type=orig_array_type) - orig_X = adata.X + adata = gen_adata((10, 10), x_type=orig_array_type) + orig_x = adata.X to_assign = new_array_type(np.ones((9, 9))) - if isinstance(orig_X, np.ndarray) and sparse.issparse(to_assign): + if isinstance(orig_x, np.ndarray) and sparse.issparse(to_assign): # https://github.com/scverse/anndata/issues/500 pytest.xfail("Cannot set a dense array with a sparse array") view = adata[:9, :9] view.X = to_assign np.testing.assert_equal(asarray(view.X), np.ones((9, 9))) - assert isinstance(view.X, type(orig_X)) + assert isinstance(view.X, type(orig_x)) ############################### @@ -75,7 +82,7 @@ def test_set_x_is_none(): assert adata.X is None -def test_del_set_equiv_X(): +def test_del_set_equiv_x() -> None: """Tests that `del adata.X` is equivalent to `adata.X = None`""" # test setter and deleter orig = gen_adata((10, 10)) @@ -119,12 +126,12 @@ def test_init_x_as_none_explicit_shape(): @pytest.mark.parametrize("shape", [*SINGULAR_SHAPES, pytest.param((5, 3), id="(5, 3)")]) -def test_transpose_with_X_as_none(shape): - adata = gen_adata(shape, X_type=lambda x: None) - adataT = adata.transpose() - assert_equal(adataT.shape, shape[::-1]) - assert_equal(adataT.obsp.keys(), adata.varp.keys()) - assert_equal(adataT.T, adata) +def test_transpose_with_x_as_none(shape: tuple[int, int]) -> None: + adata = gen_adata(shape, x_type=lambda x: None) + adata_t = adata.transpose() + assert_equal(adata_t.shape, shape[::-1]) + assert_equal(adata_t.obsp.keys(), adata.varp.keys()) + assert_equal(adata_t.T, adata) def test_copy(): @@ -151,7 +158,7 @@ def test_copy_view(): ############ -def test_io_missing_X(tmp_path, diskfmt): +def test_io_missing_x(tmp_path: Path, diskfmt: Literal["h5ad", "zarr"]) -> None: file_pth = tmp_path / f"x_none_adata.{diskfmt}" write = lambda obj, pth: getattr(obj, f"write_{diskfmt}")(pth) read = lambda pth: getattr(ad, f"read_{diskfmt}")(pth) From c1f28660a1944ca8a2aaf53ecda92d42b47e2a7a Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 6 Nov 2025 19:11:52 +0100 Subject: [PATCH 2/2] x --- benchmarks/benchmarks/backed_hdf5.py | 6 +- benchmarks/benchmarks/sparse_dataset.py | 12 +- benchmarks/benchmarks/utils.py | 10 +- pyproject.toml | 2 - src/anndata/_core/anndata.py | 153 +++++++++--------- src/anndata/_core/merge.py | 4 +- src/anndata/_core/raw.py | 20 +-- src/anndata/_io/read.py | 24 +-- src/anndata/_io/specs/methods.py | 2 +- .../multi_files/_anncollection.py | 22 +-- src/anndata/tests/helpers.py | 6 +- tests/lazy/test_write.py | 4 +- tests/test_backed_hdf5.py | 10 +- tests/test_base.py | 26 +-- tests/test_dask.py | 4 +- tests/test_io_dispatched.py | 6 +- tests/test_io_elementwise.py | 23 +-- tests/test_layers.py | 43 ++--- tests/test_obsmvarm.py | 6 +- tests/test_obspvarp.py | 6 +- tests/test_readwrite.py | 52 +++--- tests/test_views.py | 18 +-- tests/test_x.py | 4 +- 23 files changed, 238 insertions(+), 225 deletions(-) diff --git a/benchmarks/benchmarks/backed_hdf5.py b/benchmarks/benchmarks/backed_hdf5.py index 164f21bab..f9990d4bc 100644 --- a/benchmarks/benchmarks/backed_hdf5.py +++ b/benchmarks/benchmarks/backed_hdf5.py @@ -21,10 +21,10 @@ def setup_cache(self) -> None: format="csr", random_state=np.random.default_rng(42), ) - for X, arr_type in [ + for x, arr_type in [ (x_sparse, "sparse"), ]: - n_obs, n_var = X.shape + n_obs, n_var = x.shape # Create obs and var dataframes obs = pd.DataFrame( @@ -45,7 +45,7 @@ def setup_cache(self) -> None: ) # Create AnnData object and save to HDF5 - adata = ad.AnnData(X=X, obs=obs, var=var) + adata = ad.AnnData(X=x, obs=obs, var=var) # Create temporary file adata.write_h5ad(file_paths[arr_type]) diff --git a/benchmarks/benchmarks/sparse_dataset.py b/benchmarks/benchmarks/sparse_dataset.py index 66f5b221a..d0f2ddafa 100644 --- a/benchmarks/benchmarks/sparse_dataset.py +++ b/benchmarks/benchmarks/sparse_dataset.py @@ -41,8 +41,8 @@ class SparseCSRContiguousSlice: "use_dask", ) - def setup_cache(self): - X = sparse.random( + def setup_cache(self) -> None: + x = sparse.random( 10_000, 10_000, density=0.01, @@ -50,7 +50,7 @@ def setup_cache(self): random_state=np.random.default_rng(42), ) g = zarr.group(self.filepath) - write_elem(g, "X", X) + write_elem(g, "X", x) def setup(self, index: str, use_dask: bool): # noqa: FBT001 g = zarr.open(self.filepath) @@ -82,8 +82,8 @@ def peakmem_getitem_adata(self, *_): class SparseCSRDask: filepath = "data.zarr" - def setup_cache(self): - X = sparse.random( + def setup_cache(self) -> None: + x = sparse.random( 10_000, 10_000, density=0.01, @@ -91,7 +91,7 @@ def setup_cache(self): random_state=np.random.default_rng(42), ) g = zarr.group(self.filepath) - write_elem(g, "X", X) + write_elem(g, "X", x) def setup(self): self.group = zarr.group(self.filepath) diff --git a/benchmarks/benchmarks/utils.py b/benchmarks/benchmarks/utils.py index 2c4da9cd6..35bfa3471 100644 --- a/benchmarks/benchmarks/utils.py +++ b/benchmarks/benchmarks/utils.py @@ -95,7 +95,7 @@ def gen_indexer(adata, dim, index_kind, ratio): def gen_adata(n_obs, n_var, attr_set): if "X-csr" in attr_set: - X = sparse.random( + x = sparse.random( n_obs, n_var, density=0.1, @@ -103,24 +103,24 @@ def gen_adata(n_obs, n_var, attr_set): random_state=np.random.default_rng(42), ) elif "X-dense" in attr_set: - X = sparse.random( + x = sparse.random( n_obs, n_var, density=0.1, format="csr", random_state=np.random.default_rng(42), ) - X = X.toarray() + x = x.toarray() else: # TODO: There's probably a better way to do this - X = sparse.random( + x = sparse.random( n_obs, n_var, density=0, format="csr", random_state=np.random.default_rng(42), ) - adata = AnnData(X) + adata = AnnData(x) if "obs,var" in attr_set: adata.obs = pd.DataFrame( {k: np.random.randint(0, 100, n_obs) for k in ascii_lowercase}, diff --git a/pyproject.toml b/pyproject.toml index b7a36d124..88a0b7236 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -242,8 +242,6 @@ extend-immutable-calls = [ "slice" ] [tool.ruff.lint.flake8-type-checking] exempt-modules = [ ] strict = true -[tool.ruff.lint.pep8-naming] -extend-ignore-names = [ "X" ] [tool.ruff.lint.pylint] max-args = 7 max-positional-args = 5 diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index 926fbf401..f8f00a95f 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -210,7 +210,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641 ) def __init__( # noqa: PLR0913 self, - X: XDataType | pd.DataFrame | None = None, + X: XDataType | pd.DataFrame | None = None, # noqa: N803 obs: pd.DataFrame | Mapping[str, Iterable[Any]] | None = None, var: pd.DataFrame | Mapping[str, Iterable[Any]] | None = None, uns: Mapping[str, Any] | None = None, @@ -242,7 +242,7 @@ def __init__( # noqa: PLR0913 self._init_as_view(X, oidx, vidx) else: self._init_as_actual( - X=X, + X, obs=obs, var=var, uns=uns, @@ -323,7 +323,8 @@ def _init_as_view( def _init_as_actual( # noqa: PLR0912, PLR0913, PLR0915 self, - X=None, + x=None, + /, *, obs=None, var=None, @@ -359,65 +360,65 @@ def _init_as_actual( # noqa: PLR0912, PLR0913, PLR0915 self.file = AnnDataFileManager(self, None) # init from AnnData - if isinstance(X, AnnData): + if isinstance(x, AnnData): if any((obs, var, uns, obsm, varm, obsp, varp)): msg = "If `X` is a dict no further arguments must be provided." raise ValueError(msg) - X, obs, var, uns, obsm, varm, obsp, varp, layers, raw = ( - X._X, - X.obs, - X.var, - X.uns, - X.obsm, - X.varm, - X.obsp, - X.varp, - X.layers, - X.raw, + x, obs, var, uns, obsm, varm, obsp, varp, layers, raw = ( + x._X, + x.obs, + x.var, + x.uns, + x.obsm, + x.varm, + x.obsp, + x.varp, + x.layers, + x.raw, ) # init from DataFrame - elif isinstance(X, pd.DataFrame): + elif isinstance(x, pd.DataFrame): # to verify index matching, we wait until obs and var are DataFrames if obs is None: - obs = pd.DataFrame(index=X.index) - elif not isinstance(X.index, pd.RangeIndex): - x_indices.append(("obs", "index", X.index.astype(str))) + obs = pd.DataFrame(index=x.index) + elif not isinstance(x.index, pd.RangeIndex): + x_indices.append(("obs", "index", x.index.astype(str))) if var is None: - var = pd.DataFrame(index=X.columns) - elif not isinstance(X.columns, pd.RangeIndex): - x_indices.append(("var", "columns", X.columns.astype(str))) - X = ensure_df_homogeneous(X, "X") + var = pd.DataFrame(index=x.columns) + elif not isinstance(x.columns, pd.RangeIndex): + x_indices.append(("var", "columns", x.columns.astype(str))) + x = ensure_df_homogeneous(x, "X") # ---------------------------------------------------------------------- # actually process the data # ---------------------------------------------------------------------- # check data type of X - if X is not None: - X = coerce_array(X, name="X") + if x is not None: + x = coerce_array(x, name="X") if shape is not None: msg = "`shape` needs to be `None` if `X` is not `None`." raise ValueError(msg) - _check_2d_shape(X) + _check_2d_shape(x) # if type doesn’t match, a copy is made, otherwise, use a view if dtype is not None: msg = ( "The dtype argument is deprecated and will be removed in late 2024." ) warnings.warn(msg, FutureWarning, stacklevel=3) - if issparse(X) or isinstance(X, ma.MaskedArray): + if issparse(x) or isinstance(x, ma.MaskedArray): # TODO: maybe use view on data attribute of sparse matrix # as in readwrite.read_10x_h5 - if X.dtype != np.dtype(dtype): - X = X.astype(dtype) - elif isinstance(X, ZarrArray | DaskArray): - X = X.astype(dtype) + if x.dtype != np.dtype(dtype): + x = x.astype(dtype) + elif isinstance(x, ZarrArray | DaskArray): + x = x.astype(dtype) else: # is np.ndarray or a subclass, convert to true np.ndarray - X = np.asarray(X, dtype) + x = np.asarray(x, dtype) # data matrix and shape - self._X = X - n_obs, n_vars = X.shape + self._X = x + n_obs, n_vars = x.shape source = "X" else: self._X = None @@ -485,18 +486,18 @@ def _init_as_actual( # noqa: PLR0912, PLR0913, PLR0915 def __sizeof__( self, *, show_stratified: bool = False, with_disk: bool = False ) -> int: - def get_size(X) -> int: - def cs_to_bytes(X) -> int: - return int(X.data.nbytes + X.indptr.nbytes + X.indices.nbytes) - - if isinstance(X, h5py.Dataset) and with_disk: - return int(np.array(X.shape).prod() * X.dtype.itemsize) - elif isinstance(X, BaseCompressedSparseDataset) and with_disk: - return cs_to_bytes(X._to_backed()) - elif issparse(X): - return cs_to_bytes(X) + def get_size(x) -> int: + def cs_to_bytes(x) -> int: + return int(x.data.nbytes + x.indptr.nbytes + x.indices.nbytes) + + if isinstance(x, h5py.Dataset) and with_disk: + return int(np.array(x.shape).prod() * x.dtype.itemsize) + elif isinstance(x, BaseCompressedSparseDataset) and with_disk: + return cs_to_bytes(x._to_backed()) + elif issparse(x): + return cs_to_bytes(x) else: - return X.__sizeof__() + return x.__sizeof__() sizes = {} attrs = ["X", "_obs", "_var"] @@ -554,28 +555,28 @@ def shape(self) -> tuple[int, int]: return self.n_obs, self.n_vars @property - def X(self) -> XDataType | None: + def X(self) -> XDataType | None: # noqa: N802 """Data matrix of shape :attr:`n_obs` × :attr:`n_vars`.""" if self.isbacked: if not self.file.is_open: self.file.open() - X = self.file["X"] - if isinstance(X, h5py.Group): - X = sparse_dataset(X) + x = self.file["X"] + if isinstance(x, h5py.Group): + x = sparse_dataset(x) # This is so that we can index into a backed dense dataset with # indices that aren’t strictly increasing if self.is_view: - X = _subset(X, (self._oidx, self._vidx)) + x = _subset(x, (self._oidx, self._vidx)) elif self.is_view and self._adata_ref.X is None: - X = None + x = None elif self.is_view: - X = as_view( + x = as_view( _subset(self._adata_ref.X, (self._oidx, self._vidx)), ElementRef(self, "X"), ) else: - X = self._X - return X + x = self._X + return x # if self.n_obs == 1 and self.n_vars == 1: # return X[0, 0] # elif self.n_obs == 1 or self.n_vars == 1: @@ -585,7 +586,7 @@ def X(self) -> XDataType | None: # return X @X.setter - def X(self, value: XDataType | None): # noqa: PLR0912 + def X(self, value: XDataType | None) -> None: # noqa: N802, PLR0912 if value is None: if self.isbacked: msg = "Cannot currently remove data matrix from backed object." @@ -632,10 +633,10 @@ def X(self, value: XDataType | None): # noqa: PLR0912 value = value.reshape(self.shape) if self.isbacked: if self.is_view: - X = self.file["X"] - if isinstance(X, h5py.Group): - X = sparse_dataset(X) - X[oidx, vidx] = value + x = self.file["X"] + if isinstance(x, h5py.Group): + x = sparse_dataset(x) + x[oidx, vidx] = value else: self._set_backed("X", value) elif self.is_view: @@ -670,7 +671,7 @@ def X(self, value: XDataType | None): # noqa: PLR0912 raise ValueError(msg) @X.deleter - def X(self): + def X(self) -> None: # noqa: N802 self.X = None layers: AlignedMappingProperty[Layers | LayersView] = AlignedMappingProperty( @@ -1031,9 +1032,9 @@ def __delitem__(self, index: Index): if not self.isbacked: del self._X[obs, var] else: - X = self.file["X"] - del X[obs, var] - self._set_backed("X", X) + x = self.file["X"] + del x[obs, var] + self._set_backed("X", x) if var == slice(None): del self._obs.iloc[obs, :] if obs == slice(None): @@ -1206,9 +1207,9 @@ def __setitem__(self, index: Index, val: float | XDataType): if not self.isbacked: self._X[obs, var] = val else: - X = self.file["X"] - X[obs, var] = val - self._set_backed("X", X) + x = self.file["X"] + x[obs, var] = val + self._set_backed("X", x) def __len__(self) -> int: return self.shape[0] @@ -1222,7 +1223,7 @@ def transpose(self) -> AnnData: """ from anndata.compat import _safe_transpose - X = self.X if not self.isbacked else self.file["X"] + x = self.X if not self.isbacked else self.file["X"] if self.is_view: msg = ( "You’re trying to transpose a view of an `AnnData`, " @@ -1231,7 +1232,7 @@ def transpose(self) -> AnnData: raise ValueError(msg) return AnnData( - X=_safe_transpose(X) if X is not None else None, + X=_safe_transpose(x) if x is not None else None, layers={k: _safe_transpose(v) for k, v in self.layers.items()}, obs=self.var, var=self.obs, @@ -1266,15 +1267,15 @@ def to_df(self, layer: str | None = None) -> pd.DataFrame: Pandas DataFrame of specified data matrix. """ if layer is not None: - X = self.layers[layer] + x = self.layers[layer] elif not self._has_x(): msg = "X is None, cannot convert to dataframe." raise ValueError(msg) else: - X = self.X - if issparse(X): - X = X.toarray() - return pd.DataFrame(X, index=self.obs_names, columns=self.var_names) + x = self.X + if issparse(x): + x = x.toarray() + return pd.DataFrame(x, index=self.obs_names, columns=self.var_names) def _get_x(self, *, use_raw: bool = False, layer: str | None = None): """\ @@ -2113,14 +2114,14 @@ def _remove_unused_categories_xr( pass # this is handled automatically by the categorical arrays themselves i.e., they dedup upon access. -def _check_2d_shape(X): +def _check_2d_shape(x) -> None: """\ Check shape of array or sparse matrix. Assure that X is always 2D: Unlike numpy we always deal with 2D arrays. """ - if X.dtype.names is None and len(X.shape) != 2: - msg = f"X needs to be 2-dimensional, not {len(X.shape)}-dimensional." + if x.dtype.names is None and len(x.shape) != 2: + msg = f"X needs to be 2-dimensional, not {len(x.shape)}-dimensional." raise ValueError(msg) diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py index 2b898aac9..8045c2881 100644 --- a/src/anndata/_core/merge.py +++ b/src/anndata/_core/merge.py @@ -1706,7 +1706,7 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915 ) alt_annot.true_index_dim = "merge_index" - X = concat_xs(adatas, reindexers, axis=axis, fill_value=fill_value) + x = concat_xs(adatas, reindexers, axis=axis, fill_value=fill_value) if join == "inner": concat_aligned_mapping = inner_concat_aligned_mapping @@ -1779,7 +1779,7 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915 ) warn(msg, UserWarning, stacklevel=2) return AnnData(**{ - "X": X, + "X": x, "layers": layers, axis_name: concat_annot, alt_axis_name: alt_annot, diff --git a/src/anndata/_core/raw.py b/src/anndata/_core/raw.py index 165c6f2c2..93943b6a9 100644 --- a/src/anndata/_core/raw.py +++ b/src/anndata/_core/raw.py @@ -30,7 +30,7 @@ class Raw: def __init__( self, adata: AnnData, - X: np.ndarray | CSMatrix | None = None, + X: np.ndarray | CSMatrix | None = None, # noqa: N803 var: pd.DataFrame | Mapping[str, Sequence] | None = None, varm: AxisArrays | Mapping[str, np.ndarray] | None = None, ): @@ -66,7 +66,7 @@ def _get_x(self, layer=None): return self.X @property - def X(self) -> BaseCompressedSparseDataset | np.ndarray | CSMatrix: + def X(self) -> BaseCompressedSparseDataset | np.ndarray | CSMatrix: # noqa: N802 # TODO: Handle unsorted array of integer indices for h5py.Datasets if not self._adata.isbacked: return self._X @@ -74,24 +74,24 @@ def X(self) -> BaseCompressedSparseDataset | np.ndarray | CSMatrix: self._adata.file.open() # Handle legacy file formats: if "raw/X" in self._adata.file: - X = self._adata.file["raw/X"] + x = self._adata.file["raw/X"] elif "raw.X" in self._adata.file: - X = self._adata.file["raw.X"] # Backwards compat + x = self._adata.file["raw.X"] # Backwards compat else: msg = ( f"Could not find dataset for raw X in file: " f"{self._adata.file.filename}." ) raise AttributeError(msg) - if isinstance(X, h5py.Group): - X = sparse_dataset(X) + if isinstance(x, h5py.Group): + x = sparse_dataset(x) # Check if we need to subset if self._adata.is_view: # TODO: As noted above, implement views of raw # so we can know if we need to subset by var - return _subset(X, (self._adata._oidx, slice(None))) + return _subset(x, (self._adata._oidx, slice(None))) else: - return X + return x @property def shape(self) -> tuple[int, int]: @@ -130,10 +130,10 @@ def __getitem__(self, index: Index) -> Raw: if isinstance(oidx, int | np.integer): oidx = slice(oidx, oidx + 1, 1) - X = _subset(self.X, (oidx, vidx)) if not self._adata.isbacked else None + x = _subset(self.X, (oidx, vidx)) if not self._adata.isbacked else None var = self._var.iloc[vidx] - new = Raw(self._adata, X=X, var=var) + new = Raw(self._adata, X=x, var=var) if self.varm is not None: # Since there is no view of raws new.varm = self.varm._view(_RawViewHack(self, vidx), (vidx,)).copy() diff --git a/src/anndata/_io/read.py b/src/anndata/_io/read.py index 86f87ad27..555571e90 100644 --- a/src/anndata/_io/read.py +++ b/src/anndata/_io/read.py @@ -74,10 +74,10 @@ def read_excel( from pandas import read_excel df = read_excel(fspath(filename), sheet) - X = df.values[:, 1:] + x = df.values[:, 1:] row = dict(row_names=df.iloc[:, 0].values.astype(str)) col = dict(col_names=np.array(df.columns[1:], dtype=str)) - return AnnData(X, row, col) + return AnnData(x, row, col) def read_umi_tools(filename: PathLike[str] | str, dtype=None) -> AnnData: @@ -93,14 +93,14 @@ def read_umi_tools(filename: PathLike[str] | str, dtype=None) -> AnnData: # import gzip to read a gzipped file :-) table = pd.read_table(filename, dtype={"gene": "category", "cell": "category"}) - X = sparse.csr_matrix( + x = sparse.csr_matrix( (table["count"], (table["cell"].cat.codes, table["gene"].cat.codes)), dtype=dtype, ) obs = pd.DataFrame(index=pd.Index(table["cell"].cat.categories, name="cell")) var = pd.DataFrame(index=pd.Index(table["gene"].cat.categories, name="gene")) - return AnnData(X=X, obs=obs, var=var) + return AnnData(X=x, obs=obs, var=var) def read_hdf(filename: PathLike[str] | str, key: str) -> AnnData: @@ -127,13 +127,13 @@ def read_hdf(filename: PathLike[str] | str, key: str) -> AnnData: ) raise ValueError(msg) # read array - X = f[key][()] + x = f[key][()] # try to find row and column names rows_cols = [{}, {}] for iname, name in enumerate(["row_names", "col_names"]): if name in keys: rows_cols[iname][name] = f[name][()] - adata = AnnData(X, rows_cols[0], rows_cols[1]) + adata = AnnData(x, rows_cols[0], rows_cols[1]) return adata @@ -274,8 +274,8 @@ def read_loom( # noqa: PLR0912, PLR0913 if X_name not in lc.layers: X_name = "" # noqa: N806 - X = lc.layers[X_name].sparse().T.tocsr() if sparse else lc.layers[X_name][()].T - X = X.astype(dtype, copy=False) + x = lc.layers[X_name].sparse().T.tocsr() if sparse else lc.layers[X_name][()].T + x = x.astype(dtype, copy=False) layers = OrderedDict() if X_name != "": @@ -308,7 +308,7 @@ def read_loom( # noqa: PLR0912, PLR0913 uns["loom-var"] = uns_var adata = AnnData( - X, + x, obs=obs, var=var, layers=layers, @@ -333,11 +333,11 @@ def read_mtx(filename: PathLike[str] | str, dtype: str = "float32") -> AnnData: from scipy.io import mmread # could be rewritten accounting for dtype to be more performant - X = mmread(fspath(filename)).astype(dtype) + x = mmread(fspath(filename)).astype(dtype) from scipy.sparse import csr_matrix - X = csr_matrix(X) - return AnnData(X) + x = csr_matrix(x) + return AnnData(x) @old_positionals("first_column_names", "dtype") diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py index 931b63f22..9aeb9f2ca 100644 --- a/src/anndata/_io/specs/methods.py +++ b/src/anndata/_io/specs/methods.py @@ -211,7 +211,7 @@ def read_partial( # noqa: PLR0913 *, obs_idx=slice(None), var_idx=slice(None), - X=True, + X=True, # noqa: N803 obs=None, var=None, obsm=None, diff --git a/src/anndata/experimental/multi_files/_anncollection.py b/src/anndata/experimental/multi_files/_anncollection.py index 092a5b9be..01d7fb0d5 100644 --- a/src/anndata/experimental/multi_files/_anncollection.py +++ b/src/anndata/experimental/multi_files/_anncollection.py @@ -345,28 +345,28 @@ def _gather_x(self): continue adata = self.adatas[i] - X = adata.X + x = adata.X vidx = self.adatas_vidx[i] - if isinstance(X, Dataset): + if isinstance(x, Dataset): reverse = None if oidx.size > 1 and np.any(oidx[:-1] >= oidx[1:]): oidx, reverse = np.unique(oidx, return_inverse=True) # TODO: fix memory inefficient approach of X[oidx][:, vidx] - arr = X[oidx, vidx] if isinstance(vidx, slice) else X[oidx][:, vidx] + arr = x[oidx, vidx] if isinstance(vidx, slice) else x[oidx][:, vidx] xs.append(arr if reverse is None else arr[reverse]) - elif isinstance(X, BaseCompressedSparseDataset): + elif isinstance(x, BaseCompressedSparseDataset): # very slow indexing with two arrays if isinstance(vidx, slice) or len(vidx) <= 1000: - xs.append(X[oidx, vidx]) + xs.append(x[oidx, vidx]) else: - xs.append(X[oidx][:, vidx]) + xs.append(x[oidx][:, vidx]) else: # if vidx is present it is less memory efficient idx = oidx, vidx idx = np.ix_(*idx) if not isinstance(vidx, slice) else idx - xs.append(X[idx]) + xs.append(x[idx]) if len(xs) > 1: _x = _merge(xs) @@ -382,7 +382,7 @@ def _gather_x(self): return _x @property - def X(self): + def X(self): # noqa: N802 """Lazy subset of data matrix. The data matrix formed from the `.X` attributes of the underlying `adatas`, @@ -565,13 +565,13 @@ def to_adata( warn(msg, FutureWarning) ignore_x = ignore_X if ignore_x: - X = None + x = None shape = self.shape else: - X = self._gather_x() + x = self._gather_x() shape = None - adata = AnnData(X, obs=obs, obsm=obsm, layers=layers, shape=shape) + adata = AnnData(x, obs=obs, obsm=obsm, layers=layers, shape=shape) adata.obs_names = self.obs_names adata.var_names = self.var_names return adata diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py index 7784cbcbe..79bde4b36 100644 --- a/src/anndata/tests/helpers.py +++ b/src/anndata/tests/helpers.py @@ -346,9 +346,9 @@ def gen_adata( # noqa: PLR0913 var = XDataset.from_dataframe(var) if x_type is None: - X = None + x = None else: - X = x_type(random_state.binomial(100, 0.005, (m, n)).astype(x_dtype)) + x = x_type(random_state.binomial(100, 0.005, (m, n)).astype(x_dtype)) obsm = dict( array=np.random.random((m, 50)), @@ -429,7 +429,7 @@ def gen_adata( # noqa: PLR0913 with warnings.catch_warnings(): warnings.simplefilter("ignore", ExperimentalFeatureWarning) adata = AnnData( - X=X, + X=x, obs=obs, var=var, obsm=obsm, diff --git a/tests/lazy/test_write.py b/tests/lazy/test_write.py index 345ce2312..7c575a180 100644 --- a/tests/lazy/test_write.py +++ b/tests/lazy/test_write.py @@ -25,8 +25,8 @@ def test_write_error( key: Literal["obs", "var", "obsm", "varm"], ): path = tmp_path / "adata.h5ad" - X = np.random.random((4, 4)) - adata = AnnData(X=X) + x = np.random.random((4, 4)) + adata = AnnData(X=x) if key.endswith("m"): elem = {"df": getattr(adata, key[:-1])} setattr(adata, key, elem) diff --git a/tests/test_backed_hdf5.py b/tests/test_backed_hdf5.py index 26f050a36..90ac42acb 100644 --- a/tests/test_backed_hdf5.py +++ b/tests/test_backed_hdf5.py @@ -42,7 +42,7 @@ def adata() -> ad.AnnData: [4, 5, 6], [7, 8, 9], ] # data matrix of shape n_obs x n_vars - X = np.array(x_list) + x = np.array(x_list) obs_dict = dict( # annotation of observations / rows row_names=["name1", "name2", "name3"], # row annotation oanno1=["cat1", "cat2", "cat2"], # categorical annotation @@ -54,13 +54,13 @@ def adata() -> ad.AnnData: oanno1_colors=["#000000", "#FFFFFF"], uns2=["some annotation"] ) return ad.AnnData( - X, + x, obs=obs_dict, var=var_dict, uns=uns_dict, - obsm=dict(o1=np.zeros((X.shape[0], 10))), - varm=dict(v1=np.ones((X.shape[1], 20))), - layers=dict(float=X.astype(float), sparse=sparse.csr_matrix(X)), + obsm=dict(o1=np.zeros((x.shape[0], 10))), + varm=dict(v1=np.ones((x.shape[1], 20))), + layers=dict(float=x.astype(float), sparse=sparse.csr_matrix(x)), ) diff --git a/tests/test_base.py b/tests/test_base.py index f24703dbf..d4585e9a0 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -44,16 +44,16 @@ def test_creation(): AnnData(ma.array([[1, 2], [3, 4]]), uns=dict(mask=[0, 1, 1, 0])) AnnData(sp.eye(2, format="csr")) AnnData(sp.csr_array([[1, 0], [0, 1]])) - X = np.array([[1, 2, 3], [4, 5, 6]]) + x = np.array([[1, 2, 3], [4, 5, 6]]) adata = AnnData( - X=X, + X=x, obs=dict(Obs=["A", "B"]), var=dict(Feat=["a", "b", "c"]), obsm=dict(X_pca=np.array([[1, 2], [3, 4]])), - raw=dict(X=X, var=dict(var_names=["a", "b", "c"])), + raw=dict(X=x, var=dict(var_names=["a", "b", "c"])), ) - assert adata.raw.X.tolist() == X.tolist() + assert adata.raw.X.tolist() == x.tolist() assert adata.raw.var_names.tolist() == ["a", "b", "c"] # init with empty data matrix @@ -111,11 +111,11 @@ def test_invalid_x() -> None: AnnData("string is not a valid X") -def test_create_with_dfs(): - X = np.ones((6, 3)) +def test_create_with_dfs() -> None: + x = np.ones((6, 3)) obs = pd.DataFrame(dict(cat_anno=pd.Categorical(["a", "a", "a", "a", "b", "a"]))) obs_copy = obs.copy() - adata = AnnData(X=X, obs=obs) + adata = AnnData(X=x, obs=obs) assert obs.index.equals(obs_copy.index) assert obs.index.astype(str).equals(adata.obs.index) @@ -562,10 +562,10 @@ def test_equality_comparisons(): adata1 != 1 # noqa: B015 -def test_rename_categories(): - X = np.ones((6, 3)) +def test_rename_categories() -> None: + x = np.ones((6, 3)) obs = pd.DataFrame(dict(cat_anno=pd.Categorical(["a", "a", "a", "a", "b", "a"]))) - adata = AnnData(X=X, obs=obs) + adata = AnnData(X=x, obs=obs) adata.uns["tool"] = {} adata.uns["tool"]["cat_array"] = np.rec.fromarrays( [np.ones(2) for cat in adata.obs["cat_anno"].cat.categories], @@ -676,10 +676,10 @@ def test_1d_slice_dtypes() -> None: assert np.all(new_var_df == var_df) -def test_to_df_sparse(): - X = adata_sparse.X.toarray() +def test_to_df_sparse() -> None: + x = adata_sparse.X.toarray() df = adata_sparse.to_df() - assert df.values.tolist() == X.tolist() + assert df.values.tolist() == x.tolist() def test_to_df_no_x() -> None: diff --git a/tests/test_dask.py b/tests/test_dask.py index c8e6656fa..698f67c60 100644 --- a/tests/test_dask.py +++ b/tests/test_dask.py @@ -59,14 +59,14 @@ def adata(sizes: tuple[tuple[int, int], tuple[int, int]]) -> AnnData: import numpy as np (m, n), chunks = sizes - X = da.random.random((m, n), chunks=chunks) + x = da.random.random((m, n), chunks=chunks) obs = pd.DataFrame( {"batch": np.random.choice(["a", "b"], m)}, index=[f"cell{i:03d}" for i in range(m)], ) var = pd.DataFrame(index=[f"gene{i:03d}" for i in range(n)]) - return AnnData(X, obs=obs, var=var) + return AnnData(x, obs=obs, var=var) def test_dask_x_view() -> None: diff --git a/tests/test_io_dispatched.py b/tests/test_io_dispatched.py index c66d4883f..f93c3c0aa 100644 --- a/tests/test_io_dispatched.py +++ b/tests/test_io_dispatched.py @@ -103,18 +103,18 @@ def test_read_dispatched_null_case(tmp_path: Path): @pytest.mark.parametrize("sparse_format", ["csr", "csc"]) def test_write_dispatched_csr_dataset( tmp_path: Path, sparse_format: Literal["csr", "csc"] -): +) -> None: ad.io.write_elem( open_write_group(tmp_path / "arr.zarr"), "/", sp.random(10, 10, format=sparse_format), ) - X = ad.io.sparse_dataset(zarr.open(tmp_path / "arr.zarr")) + x = ad.io.sparse_dataset(zarr.open(tmp_path / "arr.zarr")) def zarr_writer(func, store, elem_name: str, elem, iospec, dataset_kwargs): assert iospec.encoding_type == f"{sparse_format}_matrix" - write_dispatched(zarr.open(tmp_path / "check.zarr", mode="w"), "/X", X, zarr_writer) + write_dispatched(zarr.open(tmp_path / "check.zarr", mode="w"), "/X", x, zarr_writer) @pytest.mark.zarr_io diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py index 3ade66943..d38027d9a 100644 --- a/tests/test_io_elementwise.py +++ b/tests/test_io_elementwise.py @@ -71,9 +71,8 @@ def sparse_format(request: pytest.FixtureRequest) -> Literal["csr", "csc"]: def create_dense_store( store: H5Group | ZarrGroup, *, shape: tuple[int, ...] = DEFAULT_SHAPE ) -> H5Group | ZarrGroup: - X = np.random.randn(*shape) - - write_elem(store, "X", X) + x = np.random.randn(*shape) + write_elem(store, "X", x) return store @@ -420,8 +419,10 @@ def test_read_lazy_bad_chunk_kwargs(tmp_path): @pytest.mark.parametrize("sparse_format", ["csr", "csc"]) -def test_write_indptr_dtype_override(store, sparse_format): - X = sparse.random( +def test_write_indptr_dtype_override( + store: H5Group | ZarrGroup, sparse_format: Literal["csr", "csc"] +) -> None: + x = sparse.random( 100, 100, format=sparse_format, @@ -429,11 +430,11 @@ def test_write_indptr_dtype_override(store, sparse_format): random_state=np.random.default_rng(), ) - write_elem(store, "X", X, dataset_kwargs=dict(indptr_dtype="int64")) + write_elem(store, "X", x, dataset_kwargs=dict(indptr_dtype="int64")) assert store["X/indptr"].dtype == np.int64 - assert X.indptr.dtype == np.int32 - np.testing.assert_array_equal(store["X/indptr"][...], X.indptr) + assert x.indptr.dtype == np.int32 + np.testing.assert_array_equal(store["X/indptr"][...], x.indptr) def test_io_spec_raw(store): @@ -763,18 +764,18 @@ def test_write_auto_cannot_set_v2_format_after_sharding(): @pytest.mark.skipif(is_zarr_v2(), reason="auto sharding is allowed only for zarr v3.") def test_write_auto_sharded_does_not_override(tmp_path: Path): z = open_write_group(tmp_path / "arr.zarr", zarr_format=3) - X = sparse.random( + x = sparse.random( 100, 100, density=0.1, format="csr", rng=np.random.default_rng(42) ) with ad.settings.override(auto_shard_zarr_v3=True, zarr_write_format=3): - ad.io.write_elem(z, "X_default", X) + ad.io.write_elem(z, "X_default", x) shards_default = z["X_default"]["indices"].shards new_shards = shards_default[0] // 2 new_shards = int(new_shards - new_shards % 2) ad.io.write_elem( z, "X_manually_set", - X, + x, dataset_kwargs={ "shards": (new_shards,), "chunks": (int(new_shards / 2),), diff --git a/tests/test_layers.py b/tests/test_layers.py index 391d1fe98..14ea31ede 100644 --- a/tests/test_layers.py +++ b/tests/test_layers.py @@ -1,6 +1,7 @@ from __future__ import annotations import warnings +from typing import TYPE_CHECKING import numpy as np import pandas as pd @@ -9,17 +10,21 @@ from anndata import AnnData, ImplicitModificationWarning, read_h5ad from anndata.tests.helpers import gen_typed_df_t2_size -X_ = np.arange(12).reshape((3, 4)) +if TYPE_CHECKING: + from collections.abc import Callable + from pathlib import Path + +X = np.arange(12).reshape((3, 4)) L = np.arange(12).reshape((3, 4)) + 12 -@pytest.fixture(params=[X_, None]) -def X(request): +@pytest.fixture(params=[X, None]) +def x(request): return request.param -def test_creation(X: np.ndarray | None): - adata = AnnData(X=X, layers=dict(L=L.copy())) +def test_creation(x: np.ndarray | None) -> None: + adata = AnnData(x, layers=dict(L=L.copy())) assert list(adata.layers.keys()) == ["L"] assert "L" in adata.layers @@ -29,8 +34,8 @@ def test_creation(X: np.ndarray | None): assert adata.shape == L.shape -def test_views(): - adata = AnnData(X=X_, layers=dict(L=L.copy())) +def test_views() -> None: + adata = AnnData(X, layers=dict(L=L.copy())) adata_view = adata[1:, 1:] assert adata_view.layers.is_view @@ -39,13 +44,13 @@ def test_views(): assert adata_view.layers.keys() == adata.layers.keys() assert (adata_view.layers["L"] == adata.layers["L"][1:, 1:]).all() - adata.layers["S"] = X_ + adata.layers["S"] = X assert adata_view.layers.keys() == adata.layers.keys() assert (adata_view.layers["S"] == adata.layers["S"][1:, 1:]).all() with pytest.warns(ImplicitModificationWarning): - adata_view.layers["T"] = X_[1:, 1:] + adata_view.layers["T"] = X[1:, 1:] assert not adata_view.layers.is_view assert not adata_view.is_view @@ -54,12 +59,14 @@ def test_views(): @pytest.mark.parametrize( ("df", "homogenous", "dtype"), [ - (lambda: gen_typed_df_t2_size(*X_.shape), True, np.object_), - (lambda: pd.DataFrame(X_**2), False, np.int_), + (lambda: gen_typed_df_t2_size(*X.shape), True, np.object_), + (lambda: pd.DataFrame(X**2), False, np.int_), ], ) -def test_set_dataframe(homogenous, df, dtype): - adata = AnnData(X_) +def test_set_dataframe( + *, homogenous: bool, df: Callable[[], pd.DataFrame], dtype: type[np.generic] +) -> None: + adata = AnnData(X) if homogenous: with pytest.warns(UserWarning, match=r"Layer 'df'.*dtype object"): adata.layers["df"] = df() @@ -71,8 +78,8 @@ def test_set_dataframe(homogenous, df, dtype): assert np.issubdtype(adata.layers["df"].dtype, dtype) -def test_readwrite(X: np.ndarray | None, backing_h5ad): - adata = AnnData(X=X, layers=dict(L=L.copy())) +def test_readwrite(x: np.ndarray | None, backing_h5ad: Path) -> None: + adata = AnnData(x, layers=dict(L=L.copy())) adata.write(backing_h5ad) adata_read = read_h5ad(backing_h5ad) @@ -86,7 +93,7 @@ def test_backed(): def test_copy(): - adata = AnnData(X=X_, layers=dict(L=L.copy())) + adata = AnnData(X=X, layers=dict(L=L.copy())) bdata = adata.copy() # check that we don’t create too many references assert bdata._layers is bdata.layers._data @@ -96,7 +103,7 @@ def test_copy(): def test_shape_error(): - adata = AnnData(X=X_) + adata = AnnData(X=X) with pytest.raises( ValueError, match=( @@ -105,4 +112,4 @@ def test_shape_error(): r"Value had shape \(4, 4\) while it should have had \(3, 4\)\." ), ): - adata.layers["L"] = np.zeros((X_.shape[0] + 1, X_.shape[1])) + adata.layers["L"] = np.zeros((X.shape[0] + 1, X.shape[1])) diff --git a/tests/test_obsmvarm.py b/tests/test_obsmvarm.py index e2513e3a8..1e9104594 100644 --- a/tests/test_obsmvarm.py +++ b/tests/test_obsmvarm.py @@ -29,14 +29,14 @@ def array_type(request): @pytest.fixture -def adata(): - X = np.zeros((M, N)) +def adata() -> AnnData: + x = np.zeros((M, N)) obs = pd.DataFrame( dict(batch=np.array(["a", "b"])[np.random.randint(0, 2, M)]), index=[f"cell{i:03d}" for i in range(N)], ) var = pd.DataFrame(index=[f"gene{i:03d}" for i in range(N)]) - return AnnData(X, obs=obs, var=var) + return AnnData(x, obs=obs, var=var) def test_assignment_dict(adata: AnnData): diff --git a/tests/test_obspvarp.py b/tests/test_obspvarp.py index 42fc47172..ab61c1db2 100644 --- a/tests/test_obspvarp.py +++ b/tests/test_obspvarp.py @@ -17,14 +17,14 @@ @pytest.fixture -def adata(): - X = np.zeros((M, N)) +def adata() -> AnnData: + x = np.zeros((M, N)) obs = pd.DataFrame( dict(batch=np.array(["a", "b"])[np.random.randint(0, 2, M)]), index=[f"cell{i:03d}" for i in range(M)], ) var = pd.DataFrame(index=[f"gene{i:03d}" for i in range(N)]) - return AnnData(X, obs=obs, var=var) + return AnnData(x, obs=obs, var=var) def test_assigmnent_dict(adata: AnnData): diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index efc6abded..3d3624635 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -48,9 +48,9 @@ # ------------------------------------------------------------------------------ -X_sp = csr_matrix([[1, 0, 0], [3, 0, 0], [5, 6, 0], [0, 0, 0], [0, 0, 0]]) +X_SP = csr_matrix([[1, 0, 0], [3, 0, 0], [5, 6, 0], [0, 0, 0], [0, 0, 0]]) -X_list = [[1, 0], [3, 0], [5, 6]] # data matrix of shape n_obs x n_vars +X_LIST = [[1, 0], [3, 0], [5, 6]] # data matrix of shape n_obs x n_vars obs_dict = dict( # annotation of observations / rows row_names=["name1", "name2", "name3"], # row annotation @@ -128,7 +128,7 @@ def test_readwrite_roundtrip(typ, tmp_path, diskfmt, diskfmt2): write2 = lambda x: getattr(x, f"write_{diskfmt2}")(pth2) read2 = lambda: getattr(ad, f"read_{diskfmt2}")(pth2) - adata1 = ad.AnnData(typ(X_list), obs=obs_dict, var=var_dict, uns=uns_dict) + adata1 = ad.AnnData(typ(X_LIST), obs=obs_dict, var=var_dict, uns=uns_dict) write1(adata1) adata2 = read1() write2(adata2) @@ -147,7 +147,7 @@ async def _do_test(): zarr_path = tmp_path / "first.zarr" adata1 = ad.AnnData( - csr_matrix(X_list), obs=obs_dict, var=var_dict, uns=uns_dict + csr_matrix(X_LIST), obs=obs_dict, var=var_dict, uns=uns_dict ) adata1.write_zarr(zarr_path) adata2 = ad.read_zarr(zarr_path) @@ -160,9 +160,15 @@ async def _do_test(): @pytest.mark.parametrize("storage", ["h5ad", "zarr"]) @pytest.mark.parametrize("typ", [np.array, csr_matrix, csr_array, as_dense_dask_array]) -def test_readwrite_kitchensink(tmp_path, storage, typ, backing_h5ad, dataset_kwargs): - X = typ(X_list) - adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) +def test_readwrite_kitchensink( + tmp_path: Path, + storage: Literal["h5ad", "zarr"], + typ, + backing_h5ad: Path, + dataset_kwargs, +) -> None: + x = typ(X_LIST) + adata_src = ad.AnnData(x, obs=obs_dict, var=var_dict, uns=uns_dict) assert not isinstance(adata_src.obs["oanno1"].dtype, pd.CategoricalDtype) adata_src.raw = adata_src.copy() @@ -206,9 +212,9 @@ def test_readwrite_kitchensink(tmp_path, storage, typ, backing_h5ad, dataset_kwa @pytest.mark.parametrize("typ", [np.array, csr_matrix, csr_array, as_dense_dask_array]) -def test_readwrite_maintain_x_dtype(typ, backing_h5ad): - X = typ(X_list).astype("int8") - adata_src = ad.AnnData(X) +def test_readwrite_maintain_x_dtype(typ, backing_h5ad: Path) -> None: + x = typ(X_LIST).astype("int8") + adata_src = ad.AnnData(x) adata_src.write(backing_h5ad) adata = ad.read_h5ad(backing_h5ad) @@ -239,9 +245,9 @@ def test_maintain_layers(rw): @pytest.mark.parametrize("typ", [np.array, csr_matrix, csr_array, as_dense_dask_array]) -def test_readwrite_h5ad_one_dimension(typ, backing_h5ad): - X = typ(X_list) - adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) +def test_readwrite_h5ad_one_dimension(typ, backing_h5ad: Path) -> None: + x = typ(X_LIST) + adata_src = ad.AnnData(x, obs=obs_dict, var=var_dict, uns=uns_dict) adata_one = adata_src[:, 0].copy() adata_one.write(backing_h5ad) adata = ad.read_h5ad(backing_h5ad) @@ -251,8 +257,8 @@ def test_readwrite_h5ad_one_dimension(typ, backing_h5ad): @pytest.mark.parametrize("typ", [np.array, csr_matrix, csr_array, as_dense_dask_array]) def test_readwrite_backed(typ, backing_h5ad): - X = typ(X_list) - adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) + x = typ(X_LIST) + adata_src = ad.AnnData(x, obs=obs_dict, var=var_dict, uns=uns_dict) adata_src.filename = backing_h5ad # change to backed mode adata_src.write() @@ -455,14 +461,14 @@ def test_read_csv(): adata = ad.io.read_csv(HERE / "data" / "adata.csv") assert adata.obs_names.tolist() == ["r1", "r2", "r3"] assert adata.var_names.tolist() == ["c1", "c2"] - assert adata.X.tolist() == X_list + assert adata.X.tolist() == X_LIST def test_read_tsv_strpath(): adata = ad.io.read_text(str(HERE / "data" / "adata-comments.tsv"), "\t") assert adata.obs_names.tolist() == ["r1", "r2", "r3"] assert adata.var_names.tolist() == ["c1", "c2"] - assert adata.X.tolist() == X_list + assert adata.X.tolist() == X_LIST def test_read_tsv_iter(): @@ -470,13 +476,13 @@ def test_read_tsv_iter(): adata = ad.io.read_text(f, "\t") assert adata.obs_names.tolist() == ["r1", "r2", "r3"] assert adata.var_names.tolist() == ["c1", "c2"] - assert adata.X.tolist() == X_list + assert adata.X.tolist() == X_LIST @pytest.mark.parametrize("typ", [np.array, csr_matrix]) -def test_write_csv(typ, tmp_path): - X = typ(X_list) - adata = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) +def test_write_csv(typ, tmp_path: Path) -> None: + x = typ(X_LIST) + adata = ad.AnnData(x, obs=obs_dict, var=var_dict, uns=uns_dict) adata.write_csvs(tmp_path / "test_csv_dir", skip_data=False) @@ -501,7 +507,7 @@ def hash_dir_contents(dir: Path) -> dict[str, bytes]: str(k)[len(root_pth) :]: md5_path(k) for k in dir.rglob("*") if k.is_file() } - adata = ad.AnnData(typ(X_list), obs=obs_dict, var=var_dict, uns=uns_dict) + adata = ad.AnnData(typ(X_LIST), obs=obs_dict, var=var_dict, uns=uns_dict) # Test writing a view view_pth = tmp_path / "test_view_csv_dir" @@ -534,7 +540,7 @@ def test_read_excel(): category=DeprecationWarning, ) adata = ad.io.read_excel(HERE / "data/excel.xlsx", "Sheet1", dtype=int) - assert adata.X.tolist() == X_list + assert adata.X.tolist() == X_LIST def test_read_umi_tools(): diff --git a/tests/test_views.py b/tests/test_views.py index 492a354ef..53930fe84 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -122,9 +122,9 @@ def mapping_name(request): # ------------------------------------------------------------------------------ -def test_views(): - X = np.array(X_list, dtype="int32") - adata = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) +def test_views() -> None: + x = np.array(X_list, dtype="int32") + adata = ad.AnnData(x, obs=obs_dict, var=var_dict, uns=uns_dict) assert adata[:, 0].is_view assert adata[:, 0].X.tolist() == np.reshape([1, 4, 7], (3, 1)).tolist() @@ -517,9 +517,9 @@ def test_view_setattr_machinery(attr, subset_func, subset_func2): def test_layers_view() -> None: - X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) layer = np.array([[10, 11, 12], [13, 14, 15], [16, 17, 18]]) - real_adata = ad.AnnData(X) + real_adata = ad.AnnData(x) real_adata.layers["L"] = layer view_adata = real_adata[1:, 1:] real_hash = joblib.hash(real_adata) @@ -738,17 +738,17 @@ def test_view_mixin_copies_data(adata, array_type: type, attr): var=pd.DataFrame(index=np.arange(n).astype(str)), ) - X = array_type(sparse.eye(n, n).multiply(np.arange(1, n + 1))) + x = array_type(sparse.eye(n, n).multiply(np.arange(1, n + 1))) if attr == "X": - adata.X = X + adata.X = x else: - getattr(adata, attr)["arr"] = X + getattr(adata, attr)["arr"] = x view = adata[:50] arr_view = view.X if attr == "X" else getattr(view, attr)["arr"] arr_view_copy = arr_view.copy() - if sparse.issparse(X): + if sparse.issparse(x): assert not np.shares_memory(arr_view.indices, arr_view_copy.indices) assert not np.shares_memory(arr_view.indptr, arr_view_copy.indptr) assert not np.shares_memory(arr_view.data, arr_view_copy.data) diff --git a/tests/test_x.py b/tests/test_x.py index 96e0bb779..100456a6a 100644 --- a/tests/test_x.py +++ b/tests/test_x.py @@ -193,9 +193,9 @@ def test_set_dense_x_view_from_sparse(): def test_fail_on_non_csr_csc_matrix(): - X = sparse.eye(100, format="coo") + x = sparse.eye(100, format="coo") with pytest.raises( ValueError, match=r"Only CSR and CSC.*", ): - ad.AnnData(X=X) + ad.AnnData(X=x)