diff --git a/docs/release-notes/2327.chore.md b/docs/release-notes/2327.chore.md new file mode 100644 index 000000000..dc68905c0 --- /dev/null +++ b/docs/release-notes/2327.chore.md @@ -0,0 +1 @@ +Add {attr}`anndata.settings.copy_on_write_X` to allow for forward-compatibility with future copy-on-write behavior (release `0.13`). Currently, setting {attr}`~anndata.AnnData.X` on a view implicitly updates the object from which the view was created. {user}`ilan-gold` diff --git a/pyproject.toml b/pyproject.toml index 74e0a55cb..7b8fa06c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -166,6 +166,8 @@ filterwarnings_when_strict = [ "default:.*FixedLengthUTF32:zarr.core.dtype.common.UnstableSpecificationWarning", "default:Automatic shard shape inference is experimental", "default:Writing zarr v2:UserWarning", + # TODO: Remove in conjunction with or before https://github.com/scverse/anndata/pull/1707 + "default:.*will obey copy-on-write semantics:FutureWarning", ] python_files = [ "test_*.py" ] testpaths = [ diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index f582c5dfe..9b7b6fcaf 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -601,8 +601,38 @@ def X(self) -> _XDataType | None: # else: # return X + def _handle_view_X_cow(self, value: _XDataType | None): + if self._is_view: + if settings.copy_on_write_X: + msg = "Setting element `.X` of view, initializing view as actual." + warn(msg, ImplicitModificationWarning) + new = self._mutated_copy(X=value) + self._init_as_actual(new) + return True + msg = "Setting element `.X` of view of `AnnData` object will obey copy-on-write semantics in the next minor release. " + "In other words, this subset of your original `AnnData` will be copied-in-place and initialized with the value passed into this setter. " + "Set `anndata.settings.copy_on_write_X = True` to begin opting in to this behavior." + warn(msg, FutureWarning) + return False + @X.setter def X(self, value: _XDataType | None): # noqa: PLR0912 + value = ( + coerce_array(value, name="X", allow_array_like=True) + if value is not None + else None + ) + can_set_direct_if_not_none = value is None or ( + np.isscalar(value) + or (hasattr(value, "shape") and (self.shape == value.shape)) + or (self.n_vars == 1 and self.n_obs == len(value)) + or (self.n_obs == 1 and self.n_vars == len(value)) + ) + if not can_set_direct_if_not_none: + msg = f"Data matrix has wrong shape {value.shape}, need to be {self.shape}." + raise ValueError(msg) + if self._handle_view_X_cow(value): + return None if value is None: if self.isbacked: msg = "Cannot currently remove data matrix from backed object." @@ -611,7 +641,6 @@ def X(self, value: _XDataType | None): # noqa: PLR0912 self._init_as_actual(self.copy()) self._X = None return - value = coerce_array(value, name="X", allow_array_like=True) # If indices are both arrays, we need to modify them # so we don’t set values like coordinates @@ -624,65 +653,51 @@ def X(self, value: _XDataType | None): # noqa: PLR0912 oidx, vidx = np.ix_(self._oidx, self._vidx) else: oidx, vidx = self._oidx, self._vidx - if ( - np.isscalar(value) - or (hasattr(value, "shape") and (self.shape == value.shape)) - or (self.n_vars == 1 and self.n_obs == len(value)) - or (self.n_obs == 1 and self.n_vars == len(value)) - ): - if not np.isscalar(value): - if self.is_view and any( - isinstance(idx, np.ndarray) - and len(np.unique(idx)) != len(idx.ravel()) - for idx in [oidx, vidx] - ): - msg = ( - "You are attempting to set `X` to a matrix on a view which has non-unique indices. " - "The resulting `adata.X` will likely not equal the value to which you set it. " - "To avoid this potential issue, please make a copy of the data first. " - "In the future, this operation will throw an error." - ) - warn(msg, FutureWarning) - if self.shape != value.shape: - # For assigning vector of values to 2d array or matrix - # Not necessary for row of 2d array - value = value.reshape(self.shape) - if self.isbacked: - if self.is_view: - X = self.file["X"] - if isinstance(X, h5py.Group): - msg = "Cannot write to views of sparse backed files" - raise NotImplementedError(msg) - X[oidx, vidx] = value - else: - self._set_backed("X", value) - elif self.is_view: - if sparse.issparse(self._adata_ref._X) and isinstance( - value, np.ndarray - ): - if isinstance(self._adata_ref.X, CSArray): - memory_class = sparse.coo_array - else: - memory_class = sparse.coo_matrix - value = memory_class(value) - elif sparse.issparse(value) and isinstance( - self._adata_ref._X, np.ndarray - ): - msg = ( - "Trying to set a dense array with a sparse array on a view. " - "Densifying the sparse array. " - "This may incur excessive memory usage" - ) - warn(msg, UserWarning) - value = value.toarray() - msg = "Modifying `X` on a view results in data being overridden" - warn(msg, ImplicitModificationWarning) - self._adata_ref._X[oidx, vidx] = value + if not np.isscalar(value): + if self.is_view and any( + isinstance(idx, np.ndarray) and len(np.unique(idx)) != len(idx.ravel()) + for idx in [oidx, vidx] + ): + msg = ( + "You are attempting to set `X` to a matrix on a view which has non-unique indices. " + "The resulting `adata.X` will likely not equal the value to which you set it. " + "To avoid this potential issue, please make a copy of the data first. " + "In the future, this operation will throw an error." + ) + warn(msg, FutureWarning) + if self.shape != value.shape: + # For assigning vector of values to 2d array or matrix + # Not necessary for row of 2d array + value = value.reshape(self.shape) + if self.isbacked: + if self.is_view: + X = self.file["X"] + if isinstance(X, h5py.Group): + msg = "Cannot write to views of sparse backed files" + raise NotImplementedError(msg) + X[oidx, vidx] = value else: - self._X = value + self._set_backed("X", value) + elif self.is_view: + if sparse.issparse(self._adata_ref._X) and isinstance(value, np.ndarray): + if isinstance(self._adata_ref.X, CSArray): + memory_class = sparse.coo_array + else: + memory_class = sparse.coo_matrix + value = memory_class(value) + elif sparse.issparse(value) and isinstance(self._adata_ref._X, np.ndarray): + msg = ( + "Trying to set a dense array with a sparse array on a view. " + "Densifying the sparse array. " + "This may incur excessive memory usage" + ) + warn(msg, UserWarning) + value = value.toarray() + msg = "Modifying `X` on a view results in data being overridden" + warn(msg, ImplicitModificationWarning) + self._adata_ref._X[oidx, vidx] = value else: - msg = f"Data matrix has wrong shape {value.shape}, need to be {self.shape}." - raise ValueError(msg) + self._X = value @X.deleter def X(self): @@ -1518,8 +1533,7 @@ def copy(self, filename: PathLike[str] | str | None = None) -> AnnData: return self._mutated_copy( X=_subset(self._adata_ref.X, (self._oidx, self._vidx)).copy() ) - else: - return self._mutated_copy() + return self._mutated_copy() else: from ..io import read_h5ad, write_h5ad diff --git a/src/anndata/_settings.py b/src/anndata/_settings.py index 1b8268898..a5651ba28 100644 --- a/src/anndata/_settings.py +++ b/src/anndata/_settings.py @@ -526,5 +526,18 @@ def validate_sparse_settings(val: Any, settings: SettingsManager) -> None: ) +settings.register( + "copy_on_write_X", + default_value=False, + description=( + "Whether to copy-on-write X. " + "Currently `my_adata_view[subset].X = value` will write back to the original AnnData object at the `subset` location. " + "`X` is the only element where this behavior is implemented though." + ), + validate=validate_bool, + get_from_env=check_and_get_bool, +) + + ################################################################################## ################################################################################## diff --git a/src/anndata/_settings.pyi b/src/anndata/_settings.pyi index 2f2c71b0a..d55afdeda 100644 --- a/src/anndata/_settings.pyi +++ b/src/anndata/_settings.pyi @@ -39,6 +39,7 @@ class SettingsManager[T]: class _AnnDataSettingsManager(SettingsManager): remove_unused_categories: bool = True check_uniqueness: bool = True + copy_on_write_X: bool = False allow_write_nullable_strings: bool | None = None zarr_write_format: Literal[2, 3] = 2 use_sparse_array_on_read: bool = False diff --git a/tests/test_x.py b/tests/test_x.py index d7da59a0c..f15876049 100644 --- a/tests/test_x.py +++ b/tests/test_x.py @@ -2,6 +2,8 @@ from __future__ import annotations +from contextlib import nullcontext + import numpy as np import pandas as pd import pytest @@ -50,17 +52,35 @@ def test_repeat_indices_view(): @pytest.mark.parametrize("orig_array_type", UNLABELLED_ARRAY_TYPES) @pytest.mark.parametrize("new_array_type", UNLABELLED_ARRAY_TYPES) -def test_setter_view(orig_array_type, new_array_type): +@pytest.mark.parametrize("copy_on_write_X", [True, False], ids=["CoW", "update"]) +def test_setter_view(orig_array_type, new_array_type, *, copy_on_write_X: bool): + ad.settings.copy_on_write_X = copy_on_write_X adata = gen_adata((10, 10), X_type=orig_array_type) orig_X = adata.X + expected_X = asarray(orig_X.copy()) to_assign = new_array_type(np.ones((9, 9))) - if isinstance(orig_X, np.ndarray) and sparse.issparse(to_assign): - # https://github.com/scverse/anndata/issues/500 - pytest.xfail("Cannot set a dense array with a sparse array") + if not copy_on_write_X: + expected_X[:9, :9] = asarray(to_assign) view = adata[:9, :9] - view.X = to_assign - np.testing.assert_equal(asarray(view.X), np.ones((9, 9))) - assert isinstance(view.X, type(orig_X)) + with ( + pytest.warns( + ImplicitModificationWarning if copy_on_write_X else FutureWarning, + match=r"initializing view as actual" + if copy_on_write_X + else r"will obey copy-on-write semantics", + ), + pytest.warns(UserWarning, match=r"Trying to set a dense array") + if sparse.issparse(to_assign) + and isinstance(orig_X, np.ndarray) + and not copy_on_write_X + else nullcontext(), + ): + view.X = to_assign + assert_equal(view.X, to_assign) + assert isinstance(view.X, type(to_assign) if copy_on_write_X else type(orig_X)) + assert_equal(adata.X, expected_X) + # If cow, then not a view and if not cow, it is a view + assert view.is_view != copy_on_write_X ###############################