Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,11 @@ jobs:
environment-name: asv
cache-environment: true
# Deps documented in https://asv.readthedocs.io/en/latest/installing.html
# https://github.com/airspeed-velocity/asv/issues/1577
create-args: >-
python=${{ matrix.python }}
asv
py_rattler
asv>=0.6.4
py_rattler<0.22
conda-build

- name: Cache datasets
Expand Down
1 change: 1 addition & 0 deletions docs/release-notes/2338.breaking.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{attr}`anndata.AnnData.X` is now copy-on-write like the other elements like {attr}`~anndata.AnnData.layers`. Operations like `my_subset.X = 0` will *not* propagate back to original object. {user}`ilan-gold`
94 changes: 16 additions & 78 deletions src/anndata/_core/anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
from .aligned_df import _gen_dataframe
from .aligned_mapping import AlignedMappingProperty, AxisArrays, Layers, PairwiseArrays
from .file_backing import AnnDataFileManager, to_memory
from .index import _normalize_indices, _subset, array_api_ix, get_vector
from .index import _normalize_indices, _subset, get_vector
from .raw import Raw
from .sparse_dataset import BaseCompressedSparseDataset, sparse_dataset
from .storage import coerce_array
Expand Down Expand Up @@ -627,12 +627,20 @@ def _handle_view_X_cow(self, value: _XDataType | None):
return False

@X.setter
def X(self, value: _XDataType | None): # noqa: PLR0912
def X(self, value: _XDataType | None):
value = (
coerce_array(value, name="X", allow_array_like=True)
if value is not None
else None
)
if np.isscalar(value) and value is not None:
msg = "The ability to set X with a scalar value will be removed in the future. Initializing as an `np.array` with the shape of the current view."
warn(msg, FutureWarning)
value = np.full(self.shape, fill_value=value)
if hasattr(value, "shape") and value.shape != self.shape:
msg = "Automatic reshaping when setting X will be removed in the future."
warn(msg, FutureWarning)
value = value.reshape(self.shape)
can_set_direct_if_not_none = value is None or (
np.isscalar(value)
or (hasattr(value, "shape") and (self.shape == value.shape))
Expand All @@ -642,83 +650,13 @@ def X(self, value: _XDataType | None): # noqa: PLR0912
if not can_set_direct_if_not_none:
msg = f"Data matrix has wrong shape {value.shape}, need to be {self.shape}."
raise ValueError(msg)
if self._handle_view_X_cow(value):
return None
if value is None:
if self.isbacked:
msg = "Cannot currently remove data matrix from backed object."
raise NotImplementedError(msg)
if self.is_view:
self._init_as_actual(self.copy())
self._X = None
return

# If indices are both arrays, we need to modify them
# so we don’t set values like coordinates
# This can occur if there are successive views
# Handle IndexManager by extracting array_api compat if possible
# Otherwise fall back to numpy
oidx, vidx = (
(
idx.get_for_array(self._adata_ref._X)
if has_xp(self._adata_ref._X)
else np.array(idx)
)
if isinstance(idx, IndexManager)
else idx
for idx in (self._oidx, self._vidx)
)
if (
self.is_view
and isinstance(self._oidx, IndexManager)
and isinstance(self._vidx, IndexManager)
):
oidx, vidx = array_api_ix(oidx, vidx)
if not np.isscalar(value):
if self.is_view and any(
isinstance(idx, np.ndarray) and len(np.unique(idx)) != len(idx.ravel())
for idx in [oidx, vidx]
):
msg = (
"You are attempting to set `X` to a matrix on a view which has non-unique indices. "
"The resulting `adata.X` will likely not equal the value to which you set it. "
"To avoid this potential issue, please make a copy of the data first. "
"In the future, this operation will throw an error."
)
warn(msg, FutureWarning)
if self.shape != value.shape:
# For assigning vector of values to 2d array or matrix
# Not necessary for row of 2d array
value = value.reshape(self.shape)
if self.isbacked:
if self.is_view:
X = self.file["X"]
if isinstance(X, h5py.Group):
msg = "Cannot write to views of sparse backed files"
raise NotImplementedError(msg)
X[oidx, vidx] = value
else:
self._set_backed("X", value)
elif self.is_view:
if sparse.issparse(self._adata_ref._X) and isinstance(value, np.ndarray):
if isinstance(self._adata_ref.X, CSArray):
memory_class = sparse.coo_array
else:
memory_class = sparse.coo_matrix
value = memory_class(value)
elif sparse.issparse(value) and isinstance(self._adata_ref._X, np.ndarray):
msg = (
"Trying to set a dense array with a sparse array on a view. "
"Densifying the sparse array. "
"This may incur excessive memory usage"
)
warn(msg, UserWarning)
value = value.toarray()
msg = "Modifying `X` on a view results in data being overridden"
if self.is_view:
msg = "Setting element `.X` of view, initializing view as actual."
warn(msg, ImplicitModificationWarning)
self._adata_ref._X[oidx, vidx] = value
else:
self._X = value
new = self._mutated_copy(X=value)
self._init_as_actual(new)
return
self._X = value

@X.deleter
def X(self):
Expand Down
3 changes: 2 additions & 1 deletion tests/test_backed_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,5 @@ def test_assign_x_subset(file: h5py.File | zarr.Group):

expected = x.copy()
expected[3:7, 6:8] = np.zeros((4, 2))
assert_equal(adata.X, expected)
assert_equal(adata.X, x)
assert_equal(view.X, np.zeros((4, 2)))
11 changes: 6 additions & 5 deletions tests/test_backed_hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ def test_backing(adata: ad.AnnData, tmp_path: Path, backing_h5ad: Path) -> None:
# know that the file is open again....
assert adata.file.is_open

adata[:2, 0].X = [0, 0]
assert adata[:, 0].X.tolist() == np.reshape([0, 0, 7], (3, 1)).tolist()
adata[:2, 0].X = np.array([[0], [0]])
assert adata[:, 0].X.tolist() == np.reshape([1, 4, 7], (3, 1)).tolist()

adata_subset = adata[:2, [0, 1]]
assert adata_subset.is_view
Expand Down Expand Up @@ -346,6 +346,7 @@ def test_backed_modification_sparse(
):
adata.X[:, 1] = 0 # Make it a little sparse
adata.X = sparse_format(adata.X)
orig = adata.X.copy()
assert not adata.isbacked

adata.write(backing_h5ad)
Expand All @@ -354,9 +355,9 @@ def test_backed_modification_sparse(
assert adata.filename == backing_h5ad
assert adata.isbacked

pat = r"Cannot write to views of sparse backed files"
with pytest.raises(NotImplementedError, match=pat):
adata[0, [0, 2]].X = 10
# Does not modify backed store
adata[0, [0, 2]].X = np.array([[10, 10]])
np.testing.assert_equal(orig.toarray(), adata.X[...].toarray())


# TODO: Work around h5py not supporting this
Expand Down
63 changes: 28 additions & 35 deletions tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from functools import partial
from importlib.metadata import version
from importlib.util import find_spec
from operator import mul
from typing import TYPE_CHECKING

import joblib
Expand Down Expand Up @@ -35,7 +34,8 @@
SparseCSRArrayView,
SparseCSRMatrixView,
)
from anndata.compat import CupyCSCMatrix, DaskArray, XDataArray
from anndata._warnings import ImplicitModificationWarning
from anndata.compat import DaskArray, XDataArray
from anndata.tests.helpers import (
BASE_MATRIX_PARAMS,
CUPY_MATRIX_PARAMS,
Expand Down Expand Up @@ -145,10 +145,13 @@ def test_views():

assert adata[:, 0].is_view
assert adata[:, 0].X.tolist() == np.reshape([1, 4, 7], (3, 1)).tolist()
with (
pytest.warns(ImplicitModificationWarning, match=r"initializing view as actual"),
pytest.warns(FutureWarning, match=r"Automatic reshaping"),
):
adata[:2, 0].X = [0, 0]

adata[:2, 0].X = [0, 0]

assert adata[:, 0].X.tolist() == np.reshape([0, 0, 7], (3, 1)).tolist()
assert adata[:, 0].X.tolist() == np.reshape([1, 4, 7], (3, 1)).tolist()

adata_subset = adata[:2, [0, 1]]

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check here that the view has the changed values?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But it's checked below? The oprevious lines I changed i.e., assert adata[:, 0].X.tolist() == np.reshape([1, 4, 7], (3, 1)).tolist() are specifically checking that the parent didn't change despite the update

@flying-sheep flying-sheep Feb 11, 2026

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean that adata[:2, 0].X = [0, 0] creates a view, then actualizes it and sets the new non-view AnnData’s values. But I guess that’s tested elsewhere already.

maybe at least check that that line specifically raises the “view as actual” warning


Expand Down Expand Up @@ -439,28 +442,19 @@ def test_not_set_subset_X_dask(matrix_type_no_gpu, subset_func):
@IGNORE_SPARSE_EFFICIENCY_WARNING
def test_set_scalar_subset_X(matrix_type, subset_func):
adata = ad.AnnData(matrix_type(np.zeros((10, 10))))
orig_X_val = adata.X.copy()
subset_idx = subset_func(adata.obs_names)

adata_subset = adata[subset_idx, :]
with (
pytest.warns(ImplicitModificationWarning, match=r"initializing view as actual"),
pytest.warns(FutureWarning, match=r"The ability to set X with a scalar value"),
):
adata_subset.X = 1

if jnp is not None and isinstance(adata.X, jnp.ndarray):
with pytest.raises(TypeError, match=r"immutable"):
adata_subset.X = 1
return

adata_subset.X = 1

assert adata_subset.is_view
assert np.all(asarray(adata[subset_idx, :].X) == 1)
if isinstance(adata.X, CupyCSCMatrix):
# Comparison broken for CSC matrices
# https://github.com/cupy/cupy/issues/7757
assert asarray(orig_X_val.tocsr() != adata.X.tocsr()).sum() == mul(
*adata_subset.shape
)
else:
assert asarray(orig_X_val != adata.X).sum() == mul(*adata_subset.shape)
# Subset materializes on write
assert not adata_subset.is_view
assert np.all(asarray(adata_subset.X) == 1)
assert np.all(asarray(adata.X) == 0)


# TODO: Use different kind of subsetting for adata and view
Expand Down Expand Up @@ -635,18 +629,17 @@ def test_view_of_view(adata_gen: ad.AnnData, subset_func, subset_func2) -> None:
assert isinstance(view_of_view_copy.X, type(adata.X))


def test_view_of_view_modification():
adata = ad.AnnData(np.zeros((10, 10)))
adata[0, :][:, 5:].X = np.ones(5)
assert np.all(adata.X[0, 5:] == np.ones(5))
adata[[1, 2], :][:, [1, 2]].X = np.ones((2, 2))
assert np.all(adata.X[1:3, 1:3] == np.ones((2, 2)))

adata.X = sparse.csr_matrix(adata.X)
adata[0, :][:, 5:].X = np.ones(5) * 2
assert np.all(asarray(adata.X)[0, 5:] == np.ones(5) * 2)
adata[[1, 2], :][:, [1, 2]].X = np.ones((2, 2)) * 2
assert np.all(asarray(adata.X)[1:3, 1:3] == np.ones((2, 2)) * 2)
@pytest.mark.parametrize("matrix_type", [sparse.csr_matrix, np.array])
def test_view_of_view_modification(
matrix_type: Callable[[np.ndarray], sparse.csr_matrix | np.ndarray],
):
adata = ad.AnnData(matrix_type(np.zeros((10, 10))))
subset1 = adata[0, :]
subset = subset1[:, 5:]
subset.X = np.ones((1, 5))
assert np.all(np.ones(5) == subset.X)
assert np.all(asarray(subset1.X) == 0)
assert np.all(asarray(adata.X) == 0)


def test_double_index(subset_func, subset_func2):
Expand Down
67 changes: 7 additions & 60 deletions tests/test_x.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

from __future__ import annotations

from contextlib import nullcontext

import numpy as np
import pandas as pd
import pytest
Expand All @@ -16,7 +14,6 @@
GEN_ADATA_NO_XARRAY_ARGS,
assert_equal,
gen_adata,
jnp,
jnp_array_or_idempotent,
)
from anndata.utils import asarray
Expand Down Expand Up @@ -46,54 +43,24 @@ def test_setter_singular_dim(shape, orig_array_type, new_array_type):
assert isinstance(adata.X, type(to_assign))


def test_repeat_indices_view():
adata = gen_adata((10, 10), X_type=np.asarray)
subset = adata[[0, 0, 1, 1], :]
mat = np.array([np.ones(adata.shape[1]) * i for i in range(4)])

with pytest.warns(
FutureWarning,
match=r"You are attempting to set `X` to a matrix on a view which has non-unique indices",
):
subset.X = mat


@pytest.mark.parametrize("orig_array_type", UNLABELLED_ARRAY_TYPES)
@pytest.mark.parametrize("new_array_type", UNLABELLED_ARRAY_TYPES)
@pytest.mark.parametrize("copy_on_write_X", [True, False], ids=["CoW", "update"])
def test_setter_view(orig_array_type, new_array_type, *, copy_on_write_X: bool):
ad.settings.copy_on_write_X = copy_on_write_X
def test_setter_view(orig_array_type, new_array_type):
adata = gen_adata((10, 10), X_type=orig_array_type)
orig_X = adata.X
expected_X = asarray(orig_X.copy())
to_assign = new_array_type(np.ones((9, 9)))
if not copy_on_write_X and jnp is not None and isinstance(orig_X, jnp.ndarray):
view = adata[:9, :9]
with pytest.raises(TypeError, match=r"immutable|in-place"):
view.X = to_assign
return
if not copy_on_write_X:
expected_X[:9, :9] = asarray(to_assign)
view = adata[:9, :9]
with (
pytest.warns(
ImplicitModificationWarning if copy_on_write_X else FutureWarning,
match=r"initializing view as actual"
if copy_on_write_X
else r"will obey copy-on-write semantics",
),
pytest.warns(UserWarning, match=r"Trying to set a dense array")
if sparse.issparse(to_assign)
and isinstance(orig_X, np.ndarray)
and not copy_on_write_X
else nullcontext(),
pytest.warns(ImplicitModificationWarning, match=r"initializing view as actual"),
):
view.X = to_assign
assert_equal(view.X, to_assign)
assert isinstance(view.X, type(to_assign) if copy_on_write_X else type(orig_X))
# view has been initialized
new_adata = view
assert not new_adata.is_view
assert_equal(new_adata.X, to_assign)
assert isinstance(new_adata.X, type(to_assign))
assert_equal(adata.X, expected_X)
# If cow, then not a view and if not cow, it is a view
assert view.is_view != copy_on_write_X


###############################
Expand Down Expand Up @@ -198,26 +165,6 @@ def test_io_missing_X(tmp_path, diskfmt):
assert_equal(from_disk, adata)


def test_set_dense_x_view_from_sparse():
x = np.zeros((100, 30))
x1 = np.ones((100, 30))
orig = ad.AnnData(x)
view = orig[:30]
with (
pytest.warns(
UserWarning,
match=r"Trying to set a dense array with a sparse array on a view",
),
pytest.warns(
ImplicitModificationWarning, match=r"Modifying `X` on a view results"
),
):
view.X = sparse.csr_matrix(x1[:30])
assert_equal(view.X, x1[:30])
assert_equal(orig.X[:30], x1[:30]) # change propagates through
assert_equal(orig.X[30:], x[30:]) # change propagates through


def test_fail_on_non_csr_csc_matrix():
X = sparse.eye(100, format="coo")
with pytest.raises(
Expand Down
Loading