Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release-notes/2327.chore.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add {attr}`anndata.settings.copy_on_write_X` to allow for forward-compatibility with future copy-on-write behavior (release `0.13`). Currently, setting {attr}`~anndata.AnnData.X` on a view implicitly updates the object from which the view was created. {user}`ilan-gold`
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ filterwarnings_when_strict = [
"default:.*FixedLengthUTF32:zarr.core.dtype.common.UnstableSpecificationWarning",
"default:Automatic shard shape inference is experimental",
"default:Writing zarr v2:UserWarning",
# TODO: Remove in conjunction with or before https://github.com/scverse/anndata/pull/1707
"default:.*will obey copy-on-write semantics:FutureWarning",
]
python_files = [ "test_*.py" ]
testpaths = [
Expand Down
134 changes: 74 additions & 60 deletions src/anndata/_core/anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,8 +601,38 @@ def X(self) -> _XDataType | None:
# else:
# return X

def _handle_view_X_cow(self, value: _XDataType | None):
if self._is_view:
if settings.copy_on_write_X:
msg = "Setting element `.X` of view, initializing view as actual."
warn(msg, ImplicitModificationWarning)
new = self._mutated_copy(X=value)
self._init_as_actual(new)
return True
msg = "Setting element `.X` of view of `AnnData` object will obey copy-on-write semantics in the next minor release. "
"In other words, this subset of your original `AnnData` will be copied-in-place and initialized with the value passed into this setter. "
"Set `anndata.settings.copy_on_write_X = True` to begin opting in to this behavior."
warn(msg, FutureWarning)
return False

@X.setter
def X(self, value: _XDataType | None): # noqa: PLR0912
value = (
coerce_array(value, name="X", allow_array_like=True)
if value is not None
else None
)
can_set_direct_if_not_none = value is None or (
np.isscalar(value)
or (hasattr(value, "shape") and (self.shape == value.shape))
or (self.n_vars == 1 and self.n_obs == len(value))
or (self.n_obs == 1 and self.n_vars == len(value))
)
if not can_set_direct_if_not_none:
msg = f"Data matrix has wrong shape {value.shape}, need to be {self.shape}."
raise ValueError(msg)
if self._handle_view_X_cow(value):
return None
if value is None:
if self.isbacked:
msg = "Cannot currently remove data matrix from backed object."
Expand All @@ -611,7 +641,6 @@ def X(self, value: _XDataType | None): # noqa: PLR0912
self._init_as_actual(self.copy())
self._X = None
return
value = coerce_array(value, name="X", allow_array_like=True)

# If indices are both arrays, we need to modify them
# so we don’t set values like coordinates
Expand All @@ -624,65 +653,51 @@ def X(self, value: _XDataType | None): # noqa: PLR0912
oidx, vidx = np.ix_(self._oidx, self._vidx)
else:
oidx, vidx = self._oidx, self._vidx
if (
np.isscalar(value)
or (hasattr(value, "shape") and (self.shape == value.shape))
or (self.n_vars == 1 and self.n_obs == len(value))
or (self.n_obs == 1 and self.n_vars == len(value))
):
Comment thread
flying-sheep marked this conversation as resolved.
if not np.isscalar(value):
if self.is_view and any(
isinstance(idx, np.ndarray)
and len(np.unique(idx)) != len(idx.ravel())
for idx in [oidx, vidx]
):
msg = (
"You are attempting to set `X` to a matrix on a view which has non-unique indices. "
"The resulting `adata.X` will likely not equal the value to which you set it. "
"To avoid this potential issue, please make a copy of the data first. "
"In the future, this operation will throw an error."
)
warn(msg, FutureWarning)
if self.shape != value.shape:
# For assigning vector of values to 2d array or matrix
# Not necessary for row of 2d array
value = value.reshape(self.shape)
if self.isbacked:
if self.is_view:
X = self.file["X"]
if isinstance(X, h5py.Group):
msg = "Cannot write to views of sparse backed files"
raise NotImplementedError(msg)
X[oidx, vidx] = value
else:
self._set_backed("X", value)
elif self.is_view:
if sparse.issparse(self._adata_ref._X) and isinstance(
value, np.ndarray
):
if isinstance(self._adata_ref.X, CSArray):
memory_class = sparse.coo_array
else:
memory_class = sparse.coo_matrix
value = memory_class(value)
elif sparse.issparse(value) and isinstance(
self._adata_ref._X, np.ndarray
):
msg = (
"Trying to set a dense array with a sparse array on a view. "
"Densifying the sparse array. "
"This may incur excessive memory usage"
)
warn(msg, UserWarning)
value = value.toarray()
msg = "Modifying `X` on a view results in data being overridden"
warn(msg, ImplicitModificationWarning)
self._adata_ref._X[oidx, vidx] = value
if not np.isscalar(value):
if self.is_view and any(
isinstance(idx, np.ndarray) and len(np.unique(idx)) != len(idx.ravel())
for idx in [oidx, vidx]
):
msg = (
"You are attempting to set `X` to a matrix on a view which has non-unique indices. "
"The resulting `adata.X` will likely not equal the value to which you set it. "
"To avoid this potential issue, please make a copy of the data first. "
"In the future, this operation will throw an error."
)
warn(msg, FutureWarning)
if self.shape != value.shape:
# For assigning vector of values to 2d array or matrix
# Not necessary for row of 2d array
value = value.reshape(self.shape)
if self.isbacked:
if self.is_view:
X = self.file["X"]
if isinstance(X, h5py.Group):
msg = "Cannot write to views of sparse backed files"
raise NotImplementedError(msg)
X[oidx, vidx] = value
else:
self._X = value
self._set_backed("X", value)
elif self.is_view:
if sparse.issparse(self._adata_ref._X) and isinstance(value, np.ndarray):
if isinstance(self._adata_ref.X, CSArray):
memory_class = sparse.coo_array
else:
memory_class = sparse.coo_matrix
value = memory_class(value)
elif sparse.issparse(value) and isinstance(self._adata_ref._X, np.ndarray):
msg = (
"Trying to set a dense array with a sparse array on a view. "
"Densifying the sparse array. "
"This may incur excessive memory usage"
)
warn(msg, UserWarning)
value = value.toarray()
msg = "Modifying `X` on a view results in data being overridden"
warn(msg, ImplicitModificationWarning)
self._adata_ref._X[oidx, vidx] = value
else:
msg = f"Data matrix has wrong shape {value.shape}, need to be {self.shape}."
raise ValueError(msg)
self._X = value

@X.deleter
def X(self):
Expand Down Expand Up @@ -1518,8 +1533,7 @@ def copy(self, filename: PathLike[str] | str | None = None) -> AnnData:
return self._mutated_copy(
X=_subset(self._adata_ref.X, (self._oidx, self._vidx)).copy()
)
else:
return self._mutated_copy()
return self._mutated_copy()
else:
from ..io import read_h5ad, write_h5ad

Expand Down
13 changes: 13 additions & 0 deletions src/anndata/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,5 +526,18 @@ def validate_sparse_settings(val: Any, settings: SettingsManager) -> None:
)


settings.register(
"copy_on_write_X",
default_value=False,
description=(
"Whether to copy-on-write X. "
"Currently `my_adata_view[subset].X = value` will write back to the original AnnData object at the `subset` location. "
"`X` is the only element where this behavior is implemented though."
),
validate=validate_bool,
get_from_env=check_and_get_bool,
)


##################################################################################
##################################################################################
1 change: 1 addition & 0 deletions src/anndata/_settings.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class SettingsManager[T]:
class _AnnDataSettingsManager(SettingsManager):
remove_unused_categories: bool = True
check_uniqueness: bool = True
copy_on_write_X: bool = False
allow_write_nullable_strings: bool | None = None
zarr_write_format: Literal[2, 3] = 2
use_sparse_array_on_read: bool = False
Expand Down
34 changes: 27 additions & 7 deletions tests/test_x.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from __future__ import annotations

from contextlib import nullcontext

import numpy as np
import pandas as pd
import pytest
Expand Down Expand Up @@ -50,17 +52,35 @@ def test_repeat_indices_view():

@pytest.mark.parametrize("orig_array_type", UNLABELLED_ARRAY_TYPES)
@pytest.mark.parametrize("new_array_type", UNLABELLED_ARRAY_TYPES)
def test_setter_view(orig_array_type, new_array_type):
@pytest.mark.parametrize("copy_on_write_X", [True, False], ids=["CoW", "update"])
def test_setter_view(orig_array_type, new_array_type, *, copy_on_write_X: bool):
ad.settings.copy_on_write_X = copy_on_write_X
adata = gen_adata((10, 10), X_type=orig_array_type)
orig_X = adata.X
expected_X = asarray(orig_X.copy())
to_assign = new_array_type(np.ones((9, 9)))
if isinstance(orig_X, np.ndarray) and sparse.issparse(to_assign):
# https://github.com/scverse/anndata/issues/500
pytest.xfail("Cannot set a dense array with a sparse array")
if not copy_on_write_X:
expected_X[:9, :9] = asarray(to_assign)
view = adata[:9, :9]
view.X = to_assign
np.testing.assert_equal(asarray(view.X), np.ones((9, 9)))
assert isinstance(view.X, type(orig_X))
with (
pytest.warns(
ImplicitModificationWarning if copy_on_write_X else FutureWarning,
match=r"initializing view as actual"
if copy_on_write_X
else r"will obey copy-on-write semantics",
),
pytest.warns(UserWarning, match=r"Trying to set a dense array")
if sparse.issparse(to_assign)
and isinstance(orig_X, np.ndarray)
and not copy_on_write_X
else nullcontext(),
):
view.X = to_assign
assert_equal(view.X, to_assign)
assert isinstance(view.X, type(to_assign) if copy_on_write_X else type(orig_X))
assert_equal(adata.X, expected_X)
# If cow, then not a view and if not cow, it is a view
assert view.is_view != copy_on_write_X


###############################
Expand Down