diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index d40dad02c..e90cfb204 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -46,10 +46,11 @@ jobs: environment-name: asv cache-environment: true # Deps documented in https://asv.readthedocs.io/en/latest/installing.html + # https://github.com/airspeed-velocity/asv/issues/1577 create-args: >- python=${{ matrix.python }} - asv - py_rattler + asv>=0.6.4 + py_rattler<0.22 conda-build - name: Cache datasets diff --git a/docs/release-notes/2338.breaking.md b/docs/release-notes/2338.breaking.md new file mode 100644 index 000000000..1023555d8 --- /dev/null +++ b/docs/release-notes/2338.breaking.md @@ -0,0 +1 @@ +{attr}`anndata.AnnData.X` is now copy-on-write like the other elements like {attr}`~anndata.AnnData.layers`. Operations like `my_subset.X = 0` will *not* propagate back to original object. {user}`ilan-gold` diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index fa6fd299e..45e93e9cd 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -49,7 +49,7 @@ from .aligned_df import _gen_dataframe from .aligned_mapping import AlignedMappingProperty, AxisArrays, Layers, PairwiseArrays from .file_backing import AnnDataFileManager, to_memory -from .index import _normalize_indices, _subset, array_api_ix, get_vector +from .index import _normalize_indices, _subset, get_vector from .raw import Raw from .sparse_dataset import BaseCompressedSparseDataset, sparse_dataset from .storage import coerce_array @@ -627,12 +627,20 @@ def _handle_view_X_cow(self, value: _XDataType | None): return False @X.setter - def X(self, value: _XDataType | None): # noqa: PLR0912 + def X(self, value: _XDataType | None): value = ( coerce_array(value, name="X", allow_array_like=True) if value is not None else None ) + if np.isscalar(value) and value is not None: + msg = "The ability to set X with a scalar value will be removed in the future. Initializing as an `np.array` with the shape of the current view." + warn(msg, FutureWarning) + value = np.full(self.shape, fill_value=value) + if hasattr(value, "shape") and value.shape != self.shape: + msg = "Automatic reshaping when setting X will be removed in the future." + warn(msg, FutureWarning) + value = value.reshape(self.shape) can_set_direct_if_not_none = value is None or ( np.isscalar(value) or (hasattr(value, "shape") and (self.shape == value.shape)) @@ -642,83 +650,13 @@ def X(self, value: _XDataType | None): # noqa: PLR0912 if not can_set_direct_if_not_none: msg = f"Data matrix has wrong shape {value.shape}, need to be {self.shape}." raise ValueError(msg) - if self._handle_view_X_cow(value): - return None - if value is None: - if self.isbacked: - msg = "Cannot currently remove data matrix from backed object." - raise NotImplementedError(msg) - if self.is_view: - self._init_as_actual(self.copy()) - self._X = None - return - - # If indices are both arrays, we need to modify them - # so we don’t set values like coordinates - # This can occur if there are successive views - # Handle IndexManager by extracting array_api compat if possible - # Otherwise fall back to numpy - oidx, vidx = ( - ( - idx.get_for_array(self._adata_ref._X) - if has_xp(self._adata_ref._X) - else np.array(idx) - ) - if isinstance(idx, IndexManager) - else idx - for idx in (self._oidx, self._vidx) - ) - if ( - self.is_view - and isinstance(self._oidx, IndexManager) - and isinstance(self._vidx, IndexManager) - ): - oidx, vidx = array_api_ix(oidx, vidx) - if not np.isscalar(value): - if self.is_view and any( - isinstance(idx, np.ndarray) and len(np.unique(idx)) != len(idx.ravel()) - for idx in [oidx, vidx] - ): - msg = ( - "You are attempting to set `X` to a matrix on a view which has non-unique indices. " - "The resulting `adata.X` will likely not equal the value to which you set it. " - "To avoid this potential issue, please make a copy of the data first. " - "In the future, this operation will throw an error." - ) - warn(msg, FutureWarning) - if self.shape != value.shape: - # For assigning vector of values to 2d array or matrix - # Not necessary for row of 2d array - value = value.reshape(self.shape) - if self.isbacked: - if self.is_view: - X = self.file["X"] - if isinstance(X, h5py.Group): - msg = "Cannot write to views of sparse backed files" - raise NotImplementedError(msg) - X[oidx, vidx] = value - else: - self._set_backed("X", value) - elif self.is_view: - if sparse.issparse(self._adata_ref._X) and isinstance(value, np.ndarray): - if isinstance(self._adata_ref.X, CSArray): - memory_class = sparse.coo_array - else: - memory_class = sparse.coo_matrix - value = memory_class(value) - elif sparse.issparse(value) and isinstance(self._adata_ref._X, np.ndarray): - msg = ( - "Trying to set a dense array with a sparse array on a view. " - "Densifying the sparse array. " - "This may incur excessive memory usage" - ) - warn(msg, UserWarning) - value = value.toarray() - msg = "Modifying `X` on a view results in data being overridden" + if self.is_view: + msg = "Setting element `.X` of view, initializing view as actual." warn(msg, ImplicitModificationWarning) - self._adata_ref._X[oidx, vidx] = value - else: - self._X = value + new = self._mutated_copy(X=value) + self._init_as_actual(new) + return + self._X = value @X.deleter def X(self): diff --git a/tests/test_backed_dense.py b/tests/test_backed_dense.py index 72fcc05e4..01165b921 100644 --- a/tests/test_backed_dense.py +++ b/tests/test_backed_dense.py @@ -85,4 +85,5 @@ def test_assign_x_subset(file: h5py.File | zarr.Group): expected = x.copy() expected[3:7, 6:8] = np.zeros((4, 2)) - assert_equal(adata.X, expected) + assert_equal(adata.X, x) + assert_equal(view.X, np.zeros((4, 2))) diff --git a/tests/test_backed_hdf5.py b/tests/test_backed_hdf5.py index ab143ca65..2d584a5be 100644 --- a/tests/test_backed_hdf5.py +++ b/tests/test_backed_hdf5.py @@ -153,8 +153,8 @@ def test_backing(adata: ad.AnnData, tmp_path: Path, backing_h5ad: Path) -> None: # know that the file is open again.... assert adata.file.is_open - adata[:2, 0].X = [0, 0] - assert adata[:, 0].X.tolist() == np.reshape([0, 0, 7], (3, 1)).tolist() + adata[:2, 0].X = np.array([[0], [0]]) + assert adata[:, 0].X.tolist() == np.reshape([1, 4, 7], (3, 1)).tolist() adata_subset = adata[:2, [0, 1]] assert adata_subset.is_view @@ -346,6 +346,7 @@ def test_backed_modification_sparse( ): adata.X[:, 1] = 0 # Make it a little sparse adata.X = sparse_format(adata.X) + orig = adata.X.copy() assert not adata.isbacked adata.write(backing_h5ad) @@ -354,9 +355,9 @@ def test_backed_modification_sparse( assert adata.filename == backing_h5ad assert adata.isbacked - pat = r"Cannot write to views of sparse backed files" - with pytest.raises(NotImplementedError, match=pat): - adata[0, [0, 2]].X = 10 + # Does not modify backed store + adata[0, [0, 2]].X = np.array([[10, 10]]) + np.testing.assert_equal(orig.toarray(), adata.X[...].toarray()) # TODO: Work around h5py not supporting this diff --git a/tests/test_views.py b/tests/test_views.py index db3e44592..e6e010b3a 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -5,7 +5,6 @@ from functools import partial from importlib.metadata import version from importlib.util import find_spec -from operator import mul from typing import TYPE_CHECKING import joblib @@ -35,7 +34,8 @@ SparseCSRArrayView, SparseCSRMatrixView, ) -from anndata.compat import CupyCSCMatrix, DaskArray, XDataArray +from anndata._warnings import ImplicitModificationWarning +from anndata.compat import DaskArray, XDataArray from anndata.tests.helpers import ( BASE_MATRIX_PARAMS, CUPY_MATRIX_PARAMS, @@ -145,10 +145,13 @@ def test_views(): assert adata[:, 0].is_view assert adata[:, 0].X.tolist() == np.reshape([1, 4, 7], (3, 1)).tolist() + with ( + pytest.warns(ImplicitModificationWarning, match=r"initializing view as actual"), + pytest.warns(FutureWarning, match=r"Automatic reshaping"), + ): + adata[:2, 0].X = [0, 0] - adata[:2, 0].X = [0, 0] - - assert adata[:, 0].X.tolist() == np.reshape([0, 0, 7], (3, 1)).tolist() + assert adata[:, 0].X.tolist() == np.reshape([1, 4, 7], (3, 1)).tolist() adata_subset = adata[:2, [0, 1]] @@ -439,28 +442,19 @@ def test_not_set_subset_X_dask(matrix_type_no_gpu, subset_func): @IGNORE_SPARSE_EFFICIENCY_WARNING def test_set_scalar_subset_X(matrix_type, subset_func): adata = ad.AnnData(matrix_type(np.zeros((10, 10)))) - orig_X_val = adata.X.copy() subset_idx = subset_func(adata.obs_names) adata_subset = adata[subset_idx, :] + with ( + pytest.warns(ImplicitModificationWarning, match=r"initializing view as actual"), + pytest.warns(FutureWarning, match=r"The ability to set X with a scalar value"), + ): + adata_subset.X = 1 - if jnp is not None and isinstance(adata.X, jnp.ndarray): - with pytest.raises(TypeError, match=r"immutable"): - adata_subset.X = 1 - return - - adata_subset.X = 1 - - assert adata_subset.is_view - assert np.all(asarray(adata[subset_idx, :].X) == 1) - if isinstance(adata.X, CupyCSCMatrix): - # Comparison broken for CSC matrices - # https://github.com/cupy/cupy/issues/7757 - assert asarray(orig_X_val.tocsr() != adata.X.tocsr()).sum() == mul( - *adata_subset.shape - ) - else: - assert asarray(orig_X_val != adata.X).sum() == mul(*adata_subset.shape) + # Subset materializes on write + assert not adata_subset.is_view + assert np.all(asarray(adata_subset.X) == 1) + assert np.all(asarray(adata.X) == 0) # TODO: Use different kind of subsetting for adata and view @@ -635,18 +629,17 @@ def test_view_of_view(adata_gen: ad.AnnData, subset_func, subset_func2) -> None: assert isinstance(view_of_view_copy.X, type(adata.X)) -def test_view_of_view_modification(): - adata = ad.AnnData(np.zeros((10, 10))) - adata[0, :][:, 5:].X = np.ones(5) - assert np.all(adata.X[0, 5:] == np.ones(5)) - adata[[1, 2], :][:, [1, 2]].X = np.ones((2, 2)) - assert np.all(adata.X[1:3, 1:3] == np.ones((2, 2))) - - adata.X = sparse.csr_matrix(adata.X) - adata[0, :][:, 5:].X = np.ones(5) * 2 - assert np.all(asarray(adata.X)[0, 5:] == np.ones(5) * 2) - adata[[1, 2], :][:, [1, 2]].X = np.ones((2, 2)) * 2 - assert np.all(asarray(adata.X)[1:3, 1:3] == np.ones((2, 2)) * 2) +@pytest.mark.parametrize("matrix_type", [sparse.csr_matrix, np.array]) +def test_view_of_view_modification( + matrix_type: Callable[[np.ndarray], sparse.csr_matrix | np.ndarray], +): + adata = ad.AnnData(matrix_type(np.zeros((10, 10)))) + subset1 = adata[0, :] + subset = subset1[:, 5:] + subset.X = np.ones((1, 5)) + assert np.all(np.ones(5) == subset.X) + assert np.all(asarray(subset1.X) == 0) + assert np.all(asarray(adata.X) == 0) def test_double_index(subset_func, subset_func2): diff --git a/tests/test_x.py b/tests/test_x.py index 8c7b42a46..69508a94e 100644 --- a/tests/test_x.py +++ b/tests/test_x.py @@ -2,8 +2,6 @@ from __future__ import annotations -from contextlib import nullcontext - import numpy as np import pandas as pd import pytest @@ -16,7 +14,6 @@ GEN_ADATA_NO_XARRAY_ARGS, assert_equal, gen_adata, - jnp, jnp_array_or_idempotent, ) from anndata.utils import asarray @@ -46,54 +43,24 @@ def test_setter_singular_dim(shape, orig_array_type, new_array_type): assert isinstance(adata.X, type(to_assign)) -def test_repeat_indices_view(): - adata = gen_adata((10, 10), X_type=np.asarray) - subset = adata[[0, 0, 1, 1], :] - mat = np.array([np.ones(adata.shape[1]) * i for i in range(4)]) - - with pytest.warns( - FutureWarning, - match=r"You are attempting to set `X` to a matrix on a view which has non-unique indices", - ): - subset.X = mat - - @pytest.mark.parametrize("orig_array_type", UNLABELLED_ARRAY_TYPES) @pytest.mark.parametrize("new_array_type", UNLABELLED_ARRAY_TYPES) -@pytest.mark.parametrize("copy_on_write_X", [True, False], ids=["CoW", "update"]) -def test_setter_view(orig_array_type, new_array_type, *, copy_on_write_X: bool): - ad.settings.copy_on_write_X = copy_on_write_X +def test_setter_view(orig_array_type, new_array_type): adata = gen_adata((10, 10), X_type=orig_array_type) orig_X = adata.X expected_X = asarray(orig_X.copy()) to_assign = new_array_type(np.ones((9, 9))) - if not copy_on_write_X and jnp is not None and isinstance(orig_X, jnp.ndarray): - view = adata[:9, :9] - with pytest.raises(TypeError, match=r"immutable|in-place"): - view.X = to_assign - return - if not copy_on_write_X: - expected_X[:9, :9] = asarray(to_assign) view = adata[:9, :9] with ( - pytest.warns( - ImplicitModificationWarning if copy_on_write_X else FutureWarning, - match=r"initializing view as actual" - if copy_on_write_X - else r"will obey copy-on-write semantics", - ), - pytest.warns(UserWarning, match=r"Trying to set a dense array") - if sparse.issparse(to_assign) - and isinstance(orig_X, np.ndarray) - and not copy_on_write_X - else nullcontext(), + pytest.warns(ImplicitModificationWarning, match=r"initializing view as actual"), ): view.X = to_assign - assert_equal(view.X, to_assign) - assert isinstance(view.X, type(to_assign) if copy_on_write_X else type(orig_X)) + # view has been initialized + new_adata = view + assert not new_adata.is_view + assert_equal(new_adata.X, to_assign) + assert isinstance(new_adata.X, type(to_assign)) assert_equal(adata.X, expected_X) - # If cow, then not a view and if not cow, it is a view - assert view.is_view != copy_on_write_X ############################### @@ -198,26 +165,6 @@ def test_io_missing_X(tmp_path, diskfmt): assert_equal(from_disk, adata) -def test_set_dense_x_view_from_sparse(): - x = np.zeros((100, 30)) - x1 = np.ones((100, 30)) - orig = ad.AnnData(x) - view = orig[:30] - with ( - pytest.warns( - UserWarning, - match=r"Trying to set a dense array with a sparse array on a view", - ), - pytest.warns( - ImplicitModificationWarning, match=r"Modifying `X` on a view results" - ), - ): - view.X = sparse.csr_matrix(x1[:30]) - assert_equal(view.X, x1[:30]) - assert_equal(orig.X[:30], x1[:30]) # change propagates through - assert_equal(orig.X[30:], x[30:]) # change propagates through - - def test_fail_on_non_csr_csc_matrix(): X = sparse.eye(100, format="coo") with pytest.raises(