From ed4a1d7c65d7638889226a716b9fc0787808d7a5 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Tue, 9 Dec 2025 11:05:49 +0100
Subject: [PATCH 01/11] feat: api for dataframes

---
 src/anndata/_core/aligned_df.py      |   9 +-
 src/anndata/_core/aligned_mapping.py |   5 +-
 src/anndata/_core/anndata.py         |  11 +-
 src/anndata/_core/index.py           |  20 +-
 src/anndata/_core/merge.py           |   5 +-
 src/anndata/_core/storage.py         |  10 +
 src/anndata/_types.py                | 108 ++++++-
 tests/test_dataframe_protocol.py     | 407 +++++++++++++++++++++++++++
 8 files changed, 548 insertions(+), 27 deletions(-)
 create mode 100644 tests/test_dataframe_protocol.py

diff --git a/src/anndata/_core/aligned_df.py b/src/anndata/_core/aligned_df.py
index 722e881b7..880c08d38 100644
--- a/src/anndata/_core/aligned_df.py
+++ b/src/anndata/_core/aligned_df.py
@@ -7,6 +7,7 @@
 import pandas as pd
 from pandas.api.types import is_string_dtype
 
+from .._types import DataFrameLike
 from .._warnings import ImplicitModificationWarning
 from ..compat import XDataset
 from ..utils import warn
@@ -25,7 +26,13 @@ def _gen_dataframe(
     source: Literal["X", "shape"],
     attr: Literal["obs", "var"],
     length: int | None = None,
-) -> pd.DataFrame:  # pragma: no cover
+) -> DataFrameLike:  # pragma: no cover
+    # Check if anno satisfies the DataFrameLike protocol
+    # This allows any DataFrameLike-compliant object to be used as obs/var
+    if isinstance(anno, DataFrameLike):
+        if length is not None and anno.shape[0] != length:
+            raise _mk_df_error(source, attr, length, anno.shape[0])
+        return anno
     msg = f"Cannot convert {type(anno)} to {attr} DataFrame"
     raise ValueError(msg)
 
diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py
index 3ac1c33d7..32827cb94 100644
--- a/src/anndata/_core/aligned_mapping.py
+++ b/src/anndata/_core/aligned_mapping.py
@@ -9,6 +9,7 @@
 import numpy as np
 import pandas as pd
 
+from .._types import DataFrameLike
 from .._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning
 from ..compat import AwkArray, CSArray, CSMatrix, CupyArray, XDataset
 from ..utils import (
@@ -36,8 +37,8 @@
 
 OneDIdx = Sequence[int] | Sequence[bool] | slice
 TwoDIdx = tuple[OneDIdx, OneDIdx]
-# TODO: pd.DataFrame only allowed in AxisArrays?
-Value = pd.DataFrame | CSMatrix | CSArray | np.ndarray
+# DataFrameLike encompasses pd.DataFrame and Dataset2D
+Value = DataFrameLike | CSMatrix | CSArray | np.ndarray
 
 
 class AlignedMappingBase[I: OneDIdx](MutableMapping[str, Value], ABC):
diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py
index 35a679d20..c2c27fd8e 100644
--- a/src/anndata/_core/anndata.py
+++ b/src/anndata/_core/anndata.py
@@ -53,6 +53,7 @@
 
     from zarr.storage import StoreLike
 
+    from .._types import DataFrameLike
     from ..compat import Index1D, Index1DNorm, XDataset
     from ..typing import XDataType
     from .aligned_mapping import AxisArraysView, LayersView, PairwiseArraysView
@@ -757,7 +758,7 @@ def n_vars(self) -> int:
         """Number of variables/features."""
         return len(self.var_names)
 
-    def _set_dim_df(self, value: pd.DataFrame | XDataset, attr: Literal["obs", "var"]):
+    def _set_dim_df(self, value: DataFrameLike | XDataset, attr: Literal["obs", "var"]):
         value = _gen_dataframe(
             value,
             [f"{attr}_names", f"{'row' if attr == 'obs' else 'col'}_names"],
@@ -819,12 +820,12 @@ def _set_dim_index(self, value: pd.Index, attr: str):
                 v.index = value
 
     @property
-    def obs(self) -> pd.DataFrame | Dataset2D:
+    def obs(self) -> DataFrameLike:
         """One-dimensional annotation of observations (`pd.DataFrame`)."""
         return self._obs
 
     @obs.setter
-    def obs(self, value: pd.DataFrame | XDataset):
+    def obs(self, value: DataFrameLike | XDataset):
         self._set_dim_df(value, "obs")
 
     @obs.deleter
@@ -842,12 +843,12 @@ def obs_names(self, names: Sequence[str]):
         self._set_dim_index(names, "obs")
 
     @property
-    def var(self) -> pd.DataFrame | Dataset2D:
+    def var(self) -> DataFrameLike:
         """One-dimensional annotation of variables/ features (`pd.DataFrame`)."""
         return self._var
 
     @var.setter
-    def var(self, value: pd.DataFrame | XDataset):
+    def var(self, value: DataFrameLike | XDataset):
         self._set_dim_df(value, "var")
 
     @var.deleter
diff --git a/src/anndata/_core/index.py b/src/anndata/_core/index.py
index 3b92a99ac..5e3fe2c72 100644
--- a/src/anndata/_core/index.py
+++ b/src/anndata/_core/index.py
@@ -10,8 +10,8 @@
 import pandas as pd
 from scipy.sparse import issparse
 
+from .._types import DataFrameLike
 from ..compat import AwkArray, CSArray, CSMatrix, DaskArray, XDataArray
-from .xarray import Dataset2D
 
 if TYPE_CHECKING:
     from numpy.typing import NDArray
@@ -42,7 +42,7 @@ def _normalize_index(  # noqa: PLR0911, PLR0912
 ) -> Index1DNorm | int | np.integer:
     # TODO: why is this here? All tests pass without it and it seems at the minimum not strict enough.
     if not isinstance(index, pd.RangeIndex) and index.dtype in (np.float64, np.int64):
-        msg = f"Don’t call _normalize_index with non-categorical/string names and non-range index {index}"
+        msg = f"Don't call _normalize_index with non-categorical/string names and non-range index {index}"
         raise TypeError(msg)
 
     # the following is insanely slow for sequences,
@@ -90,7 +90,7 @@ def name_idx(i):
         elif issubclass(indexer.dtype.type, np.bool_):
             if indexer.shape != index.shape:
                 msg = (
-                    f"Boolean index does not match AnnData’s shape along this "
+                    f"Boolean index does not match AnnData's shape along this "
                     f"dimension. Boolean index has shape {indexer.shape} while "
                     f"AnnData index has shape {index.shape}."
                 )
@@ -164,9 +164,12 @@ def unpack_index(index: Index) -> tuple[Index1D, Index1D]:
 
 @singledispatch
 def _subset(
-    a: np.ndarray | pd.DataFrame,
+    a: np.ndarray | DataFrameLike,
     subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
 ):
+    # Check for DataFrameLike objects (pd.DataFrame, Dataset2D, etc.)
+    if isinstance(a, DataFrameLike):
+        return a.iloc[subset_idx]
     # Select as combination of indexes, not coordinates
     # Correcting for indexing behaviour of np.ndarray
     if all(isinstance(x, Iterable) for x in subset_idx):
@@ -200,15 +203,6 @@ def _subset_sparse(
     return a[subset_idx]
 
 
-@_subset.register(pd.DataFrame)
-@_subset.register(Dataset2D)
-def _subset_df(
-    df: pd.DataFrame | Dataset2D,
-    subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
-):
-    return df.iloc[subset_idx]
-
-
 @_subset.register(AwkArray)
 def _subset_awkarray(
     a: AwkArray, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py
index a4bec22c3..9922c85ce 100644
--- a/src/anndata/_core/merge.py
+++ b/src/anndata/_core/merge.py
@@ -20,6 +20,7 @@
 from anndata._core.file_backing import to_memory
 from anndata._warnings import ExperimentalFeatureWarning
 
+from .._types import DataFrameLike
 from ..compat import (
     AwkArray,
     CSArray,
@@ -574,7 +575,7 @@ def apply(self, el, *, axis, fill_value=None):  # noqa: PLR0911
         """
         if self.no_change and (axis_len(el, axis) == len(self.old_idx)):
             return el
-        if isinstance(el, pd.DataFrame | Dataset2D):
+        if isinstance(el, DataFrameLike):
             return self._apply_to_df_like(el, axis=axis, fill_value=fill_value)
         elif isinstance(el, CSMatrix | CSArray | CupySparseMatrix):
             return self._apply_to_sparse(el, axis=axis, fill_value=fill_value)
@@ -587,7 +588,7 @@ def apply(self, el, *, axis, fill_value=None):  # noqa: PLR0911
         else:
             return self._apply_to_array(el, axis=axis, fill_value=fill_value)
 
-    def _apply_to_df_like(self, el: pd.DataFrame | Dataset2D, *, axis, fill_value=None):
+    def _apply_to_df_like(self, el: DataFrameLike, *, axis, fill_value=None):
         if fill_value is None:
             fill_value = np.nan
         return el.reindex(self.new_idx, axis=axis, fill_value=fill_value)
diff --git a/src/anndata/_core/storage.py b/src/anndata/_core/storage.py
index b7a63d785..95de80614 100644
--- a/src/anndata/_core/storage.py
+++ b/src/anndata/_core/storage.py
@@ -8,6 +8,7 @@
 
 from anndata.compat import CSArray, CSMatrix
 
+from .._types import DataFrameLike
 from .._warnings import ImplicitModificationWarning
 from ..compat import XDataset
 from ..utils import (
@@ -59,6 +60,15 @@ def coerce_array(
         if allow_df:
             raise_value_error_if_multiindex_columns(value, name)
         return value if allow_df else ensure_df_homogeneous(value, name)
+    # Handle other DataFrameLike objects (not pd.DataFrame)
+    if isinstance(value, DataFrameLike):
+        if allow_df:
+            return value
+        # For non-DataFrames, we can't use ensure_df_homogeneous
+        # so we convert to array via iloc
+        msg = f"DataFrameLike object used for {name} will be converted to array."
+        warn(msg, ImplicitModificationWarning)
+        return np.array(value.iloc[:, :])
     # if value is an array-like object, try to convert it
     e = None
     if allow_array_like:
diff --git a/src/anndata/_types.py b/src/anndata/_types.py
index cbec38aa1..da23bdb5b 100644
--- a/src/anndata/_types.py
+++ b/src/anndata/_types.py
@@ -4,17 +4,19 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Literal, Protocol
+from typing import TYPE_CHECKING, Literal, Protocol, runtime_checkable
 
-from . import typing
 from .compat import H5Array, H5Group, ZarrArray, ZarrGroup
 
 if TYPE_CHECKING:
     from collections.abc import Mapping
-    from typing import Any
+    from typing import Any, Self
+
+    import pandas as pd
 
     from anndata._core.xarray import Dataset2D
 
+    from . import typing
     from ._io.specs.registry import (
         IOSpec,
         LazyDataStructures,
@@ -26,6 +28,8 @@
 
 __all__ = [
     "ArrayStorageType",
+    "DataFrameLike",
+    "DataFrameLikeIlocIndexer",
     "GroupStorageType",
     "StorageType",
     "_ReadInternal",
@@ -39,7 +43,103 @@
 
 # circumvent https://github.com/tox-dev/sphinx-autodoc-typehints/issues/580
 type S = StorageType
-type RWAble = typing.RWAble
+type RWAble = "typing.RWAble"
+
+
+@runtime_checkable
+class DataFrameLikeIlocIndexer(Protocol):
+    """Protocol for iloc-style indexers on DataFrame-like objects.
+
+    This protocol defines the minimal interface for positional-based indexing
+    that AnnData requires. Both :class:`pandas.DataFrame` and
+    :class:`~anndata.experimental.backed.Dataset2D` provide compatible
+    ``iloc`` accessors.
+
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> from anndata._types import DataFrameLikeIlocIndexer
+    >>> df = pd.DataFrame({"a": [1, 2, 3]})
+    >>> isinstance(df.iloc, DataFrameLikeIlocIndexer)
+    True
+    """
+
+    def __getitem__(self, idx: Any) -> Self: ...
+
+
+@runtime_checkable
+class DataFrameLike(Protocol):
+    """Protocol for DataFrame-like objects usable in AnnData.
+
+    This runtime-checkable protocol defines the minimal DataFrame API that
+    AnnData uses internally for ``obs``, ``var``, and similar dataframe-like
+    data containers. Any class implementing this protocol can be used as a
+    drop-in replacement for :class:`pandas.DataFrame` in these contexts.
+
+    The required interface includes:
+
+    - :attr:`index`: Row labels as a :class:`pandas.Index`
+    - :attr:`columns`: Column labels as a :class:`pandas.Index`
+    - :attr:`shape`: Tuple of (n_rows, n_columns)
+    - :attr:`iloc`: Positional indexer returning a :class:`DataFrameLikeIlocIndexer`
+    - :meth:`reindex`: Method to reindex rows
+
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> from anndata._types import DataFrameLike
+    >>> df = pd.DataFrame({"a": [1, 2, 3]})
+    >>> isinstance(df, DataFrameLike)
+    True
+
+    See Also
+    --------
+    :class:`~anndata.experimental.backed.Dataset2D`
+        An xarray-based implementation of this protocol.
+    """
+
+    @property
+    def index(self) -> pd.Index:
+        """Row labels of the DataFrame-like object."""
+        ...
+
+    @property
+    def columns(self) -> pd.Index:
+        """Column labels of the DataFrame-like object."""
+        ...
+
+    @property
+    def shape(self) -> tuple[int, int]:
+        """Shape of the DataFrame-like object as (n_rows, n_columns)."""
+        ...
+
+    @property
+    def iloc(self) -> DataFrameLikeIlocIndexer:
+        """Positional indexer for the DataFrame-like object."""
+        ...
+
+    def reindex(
+        self,
+        index: pd.Index | None = None,
+        axis: Literal[0] = 0,
+        fill_value: Any = ...,
+    ) -> Self:
+        """Reindex the DataFrame-like object to match a new index.
+
+        Parameters
+        ----------
+        index
+            New index to conform to.
+        axis
+            Axis to reindex along (only 0 is supported).
+        fill_value
+            Value to use for missing values.
+
+        Returns
+        -------
+        Reindexed DataFrame-like object.
+        """
+        ...
 
 
 class Dataset2DIlocIndexer(Protocol):
diff --git a/tests/test_dataframe_protocol.py b/tests/test_dataframe_protocol.py
new file mode 100644
index 000000000..62b9371b5
--- /dev/null
+++ b/tests/test_dataframe_protocol.py
@@ -0,0 +1,407 @@
+"""Tests for the DataFrameLike protocol."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from anndata._types import DataFrameLike, DataFrameLikeIlocIndexer
+
+if TYPE_CHECKING:
+    from typing import Any, Literal, Self
+
+
+class MockIlocIndexer:
+    """Mock iloc indexer for testing."""
+
+    def __init__(self, data: pd.DataFrame):
+        self._data = data
+
+    def __getitem__(self, idx: Any) -> MockDataFrame:
+        result = self._data.iloc[idx]
+        if isinstance(result, pd.DataFrame):
+            return MockDataFrame(result)
+        # For single row selection, wrap in DataFrame
+        return MockDataFrame(pd.DataFrame([result]))
+
+
+class MockDataFrame:
+    """A minimal DataFrame-like class for testing the protocol."""
+
+    def __init__(self, data: pd.DataFrame):
+        self._data = data
+
+    @property
+    def index(self) -> pd.Index:
+        return self._data.index
+
+    @property
+    def columns(self) -> pd.Index:
+        return self._data.columns
+
+    @property
+    def shape(self) -> tuple[int, int]:
+        return self._data.shape
+
+    @property
+    def iloc(self) -> MockIlocIndexer:
+        return MockIlocIndexer(self._data)
+
+    def reindex(
+        self,
+        index: pd.Index | None = None,
+        axis: Literal[0] = 0,
+        fill_value: Any = np.nan,
+    ) -> Self:
+        # axis=0 is the default; don't pass it when index is specified
+        # since pandas doesn't allow both keyword arguments together
+        return MockDataFrame(self._data.reindex(index=index, fill_value=fill_value))
+
+
+class TestDataFrameLikeProtocol:
+    """Test the DataFrameLike protocol with different implementations."""
+
+    @pytest.fixture
+    def sample_df(self) -> pd.DataFrame:
+        """Create a sample pandas DataFrame for testing."""
+        return pd.DataFrame(
+            {"a": [1, 2, 3], "b": [4.0, 5.0, 6.0], "c": ["x", "y", "z"]},
+            index=["row1", "row2", "row3"],
+        )
+
+    def test_pandas_dataframe_is_dataframe_like(self, sample_df: pd.DataFrame):
+        """pd.DataFrame should satisfy the DataFrameLike protocol."""
+        assert isinstance(sample_df, DataFrameLike)
+
+    def test_pandas_iloc_is_iloc_indexer(self, sample_df: pd.DataFrame):
+        """pd.DataFrame.iloc should satisfy the DataFrameLikeIlocIndexer protocol."""
+        assert isinstance(sample_df.iloc, DataFrameLikeIlocIndexer)
+
+    def test_mock_dataframe_is_dataframe_like(self, sample_df: pd.DataFrame):
+        """MockDataFrame should satisfy the DataFrameLike protocol."""
+        mock_df = MockDataFrame(sample_df)
+        assert isinstance(mock_df, DataFrameLike)
+
+    def test_mock_iloc_is_iloc_indexer(self, sample_df: pd.DataFrame):
+        """MockDataFrame.iloc should satisfy the DataFrameLikeIlocIndexer protocol."""
+        mock_df = MockDataFrame(sample_df)
+        assert isinstance(mock_df.iloc, DataFrameLikeIlocIndexer)
+
+    def test_dataframe_like_has_required_properties(self, sample_df: pd.DataFrame):
+        """Verify DataFrameLike objects have the required properties."""
+        for df in [sample_df, MockDataFrame(sample_df)]:
+            assert hasattr(df, "index")
+            assert hasattr(df, "columns")
+            assert hasattr(df, "shape")
+            assert hasattr(df, "iloc")
+            assert hasattr(df, "reindex")
+
+    def test_dataframe_like_index(self, sample_df: pd.DataFrame):
+        """Verify index property returns a pd.Index."""
+        mock_df = MockDataFrame(sample_df)
+        assert isinstance(mock_df.index, pd.Index)
+        pd.testing.assert_index_equal(mock_df.index, sample_df.index)
+
+    def test_dataframe_like_columns(self, sample_df: pd.DataFrame):
+        """Verify columns property returns a pd.Index."""
+        mock_df = MockDataFrame(sample_df)
+        assert isinstance(mock_df.columns, pd.Index)
+        pd.testing.assert_index_equal(mock_df.columns, sample_df.columns)
+
+    def test_dataframe_like_shape(self, sample_df: pd.DataFrame):
+        """Verify shape property returns correct tuple."""
+        mock_df = MockDataFrame(sample_df)
+        assert mock_df.shape == sample_df.shape
+        assert mock_df.shape == (3, 3)
+
+    def test_dataframe_like_iloc(self, sample_df: pd.DataFrame):
+        """Verify iloc indexer works correctly."""
+        mock_df = MockDataFrame(sample_df)
+
+        # Test single row selection
+        result = mock_df.iloc[0]
+        assert isinstance(result, DataFrameLike)
+
+        # Test slice selection
+        result = mock_df.iloc[0:2]
+        assert isinstance(result, DataFrameLike)
+        assert result.shape[0] == 2
+
+    def test_dataframe_like_reindex(self, sample_df: pd.DataFrame):
+        """Verify reindex method works correctly."""
+        mock_df = MockDataFrame(sample_df)
+        new_index = pd.Index(["row1", "row2", "row4"])
+
+        result = mock_df.reindex(index=new_index, fill_value=-1)
+        assert isinstance(result, DataFrameLike)
+        pd.testing.assert_index_equal(result.index, new_index)
+
+    def test_non_dataframe_is_not_dataframe_like(self):
+        """Objects that don't implement the protocol should not match."""
+        assert not isinstance([], DataFrameLike)
+        assert not isinstance({}, DataFrameLike)
+        assert not isinstance("string", DataFrameLike)
+        assert not isinstance(42, DataFrameLike)
+        assert not isinstance(np.array([1, 2, 3]), DataFrameLike)
+
+
+@pytest.mark.usefixtures("xr_available")
+class TestDataset2DIsDataFrameLike:
+    """Test that Dataset2D satisfies the DataFrameLike protocol."""
+
+    @pytest.fixture
+    def xr_available(self):
+        """Skip tests if xarray is not available."""
+        pytest.importorskip("xarray")
+
+    @pytest.fixture
+    def sample_dataset2d(self):
+        """Create a sample Dataset2D for testing."""
+        from anndata._core.xarray import Dataset2D
+        from anndata.compat import XDataset
+
+        ds = XDataset(
+            {
+                "a": (["idx"], [1, 2, 3]),
+                "b": (["idx"], [4.0, 5.0, 6.0]),
+            },
+            coords={"idx": ["row1", "row2", "row3"]},
+        )
+        return Dataset2D(ds)
+
+    def test_dataset2d_is_dataframe_like(self, sample_dataset2d):
+        """Dataset2D should satisfy the DataFrameLike protocol."""
+        assert isinstance(sample_dataset2d, DataFrameLike)
+
+    def test_dataset2d_iloc_is_iloc_indexer(self, sample_dataset2d):
+        """Dataset2D.iloc should satisfy the DataFrameLikeIlocIndexer protocol."""
+        assert isinstance(sample_dataset2d.iloc, DataFrameLikeIlocIndexer)
+
+    def test_dataset2d_has_required_properties(self, sample_dataset2d):
+        """Verify Dataset2D has the required properties."""
+        assert hasattr(sample_dataset2d, "index")
+        assert hasattr(sample_dataset2d, "columns")
+        assert hasattr(sample_dataset2d, "shape")
+        assert hasattr(sample_dataset2d, "iloc")
+        assert hasattr(sample_dataset2d, "reindex")
+
+    def test_dataset2d_properties_return_correct_types(self, sample_dataset2d):
+        """Verify Dataset2D properties return correct types."""
+        assert isinstance(sample_dataset2d.index, pd.Index)
+        assert isinstance(sample_dataset2d.columns, pd.Index)
+        assert isinstance(sample_dataset2d.shape, tuple)
+        assert len(sample_dataset2d.shape) == 2
+
+
+class TestDataFrameLikeWithAnnData:
+    """Test that DataFrameLike protocol works correctly with AnnData objects."""
+
+    @pytest.fixture
+    def simple_adata(self):
+        """Create a simple AnnData object for testing."""
+        import anndata as ad
+
+        return ad.AnnData(
+            X=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            obs=pd.DataFrame(
+                {"cell_type": ["A", "B", "C"]},
+                index=["cell1", "cell2", "cell3"],
+            ),
+            var=pd.DataFrame(
+                {"gene_name": ["g1", "g2", "g3"]},
+                index=["gene1", "gene2", "gene3"],
+            ),
+        )
+
+    def test_adata_obs_is_dataframe_like(self, simple_adata):
+        """AnnData.obs should satisfy the DataFrameLike protocol."""
+        assert isinstance(simple_adata.obs, DataFrameLike)
+
+    def test_adata_var_is_dataframe_like(self, simple_adata):
+        """AnnData.var should satisfy the DataFrameLike protocol."""
+        assert isinstance(simple_adata.var, DataFrameLike)
+
+    def test_adata_obs_has_required_properties(self, simple_adata):
+        """Verify AnnData.obs has all required DataFrameLike properties."""
+        obs = simple_adata.obs
+        assert hasattr(obs, "index")
+        assert hasattr(obs, "columns")
+        assert hasattr(obs, "shape")
+        assert hasattr(obs, "iloc")
+        assert hasattr(obs, "reindex")
+
+    def test_adata_obs_iloc_subsetting(self, simple_adata):
+        """Verify iloc subsetting works on AnnData.obs."""
+        obs = simple_adata.obs
+        subset = obs.iloc[0:2]
+        assert isinstance(subset, DataFrameLike)
+        assert subset.shape[0] == 2
+
+    def test_adata_subset_preserves_dataframe_like(self, simple_adata):
+        """Verify subsetting AnnData preserves DataFrameLike for obs/var."""
+        adata_subset = simple_adata[0:2, 0:2]
+        assert isinstance(adata_subset.obs, DataFrameLike)
+        assert isinstance(adata_subset.var, DataFrameLike)
+        assert adata_subset.obs.shape[0] == 2
+        assert adata_subset.var.shape[0] == 2
+
+    def test_adata_copy_preserves_dataframe_like(self, simple_adata):
+        """Verify copying AnnData preserves DataFrameLike for obs/var."""
+        adata_copy = simple_adata.copy()
+        assert isinstance(adata_copy.obs, DataFrameLike)
+        assert isinstance(adata_copy.var, DataFrameLike)
+
+    def test_set_obs_with_dataframe(self, simple_adata):
+        """Verify setting obs with pd.DataFrame works."""
+        new_obs = pd.DataFrame(
+            {"new_col": [1, 2, 3]},
+            index=["cell1", "cell2", "cell3"],
+        )
+        simple_adata.obs = new_obs
+        assert isinstance(simple_adata.obs, DataFrameLike)
+        assert "new_col" in simple_adata.obs.columns
+
+    def test_set_var_with_dataframe(self, simple_adata):
+        """Verify setting var with pd.DataFrame works."""
+        new_var = pd.DataFrame(
+            {"new_col": [1, 2, 3]},
+            index=["gene1", "gene2", "gene3"],
+        )
+        simple_adata.var = new_var
+        assert isinstance(simple_adata.var, DataFrameLike)
+        assert "new_col" in simple_adata.var.columns
+
+
+class TestCustomDataFrameLikeWithAnnData:
+    """Test that custom DataFrameLike implementations work with AnnData."""
+
+    def test_init_adata_with_custom_dataframe_like_obs(self):
+        """Verify AnnData can be initialized with a custom DataFrameLike obs."""
+        import anndata as ad
+
+        # Create a custom DataFrameLike object
+        obs_df = pd.DataFrame(
+            {"cell_type": ["A", "B", "C"]},
+            index=["cell1", "cell2", "cell3"],
+        )
+        mock_obs = MockDataFrame(obs_df)
+
+        # Verify MockDataFrame satisfies the protocol
+        assert isinstance(mock_obs, DataFrameLike)
+
+        # Create AnnData with custom DataFrameLike
+        adata = ad.AnnData(
+            X=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            obs=mock_obs,
+            var=pd.DataFrame(
+                {"gene_name": ["g1", "g2", "g3"]},
+                index=["gene1", "gene2", "gene3"],
+            ),
+        )
+
+        # Verify obs is the MockDataFrame (unchanged)
+        assert isinstance(adata.obs, DataFrameLike)
+        assert adata.obs.shape[0] == 3
+
+    def test_init_adata_with_custom_dataframe_like_var(self):
+        """Verify AnnData can be initialized with a custom DataFrameLike var."""
+        import anndata as ad
+
+        # Create a custom DataFrameLike object
+        var_df = pd.DataFrame(
+            {"gene_name": ["g1", "g2", "g3"]},
+            index=["gene1", "gene2", "gene3"],
+        )
+        mock_var = MockDataFrame(var_df)
+
+        # Verify MockDataFrame satisfies the protocol
+        assert isinstance(mock_var, DataFrameLike)
+
+        # Create AnnData with custom DataFrameLike var
+        adata = ad.AnnData(
+            X=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            obs=pd.DataFrame(
+                {"cell_type": ["A", "B", "C"]},
+                index=["cell1", "cell2", "cell3"],
+            ),
+            var=mock_var,
+        )
+
+        # Verify var is the MockDataFrame (unchanged)
+        assert isinstance(adata.var, DataFrameLike)
+        assert adata.var.shape[0] == 3
+
+    def test_custom_dataframe_like_length_validation(self):
+        """Verify length validation works for custom DataFrameLike."""
+        import anndata as ad
+
+        # Create a custom DataFrameLike with wrong length
+        obs_df = pd.DataFrame(
+            {"cell_type": ["A", "B"]},  # Only 2 rows, but X has 3
+            index=["cell1", "cell2"],
+        )
+        mock_obs = MockDataFrame(obs_df)
+
+        # Should raise ValueError due to length mismatch
+        with pytest.raises(ValueError, match="must have as many rows"):
+            ad.AnnData(
+                X=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+                obs=mock_obs,
+                var=pd.DataFrame(
+                    {"gene_name": ["g1", "g2", "g3"]},
+                    index=["gene1", "gene2", "gene3"],
+                ),
+            )
+
+
+@pytest.mark.usefixtures("xr_available")
+class TestDataset2DWithAnnData:
+    """Test that Dataset2D works correctly as obs/var in AnnData."""
+
+    @pytest.fixture
+    def xr_available(self):
+        """Skip tests if xarray is not available."""
+        pytest.importorskip("xarray")
+
+    @pytest.fixture
+    def adata_with_dataset2d_obs(self):
+        """Create an AnnData with Dataset2D obs."""
+        import anndata as ad
+        from anndata._core.xarray import Dataset2D
+        from anndata.compat import XDataset
+
+        X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+
+        # Create Dataset2D for obs
+        obs_ds = XDataset(
+            {"cell_type": (["idx"], ["A", "B", "C"])},
+            coords={"idx": ["cell1", "cell2", "cell3"]},
+        )
+        obs = Dataset2D(obs_ds)
+
+        var = pd.DataFrame(
+            {"gene_name": ["g1", "g2", "g3"]},
+            index=["gene1", "gene2", "gene3"],
+        )
+
+        return ad.AnnData(X=X, obs=obs, var=var)
+
+    def test_adata_with_dataset2d_obs_is_dataframe_like(self, adata_with_dataset2d_obs):
+        """AnnData with Dataset2D obs should satisfy DataFrameLike."""
+        assert isinstance(adata_with_dataset2d_obs.obs, DataFrameLike)
+
+    def test_adata_with_dataset2d_subset(self, adata_with_dataset2d_obs):
+        """Subsetting AnnData with Dataset2D obs should work."""
+        adata_subset = adata_with_dataset2d_obs[0:2]
+        assert isinstance(adata_subset.obs, DataFrameLike)
+        assert adata_subset.obs.shape[0] == 2
+
+    def test_adata_with_dataset2d_obs_index(self, adata_with_dataset2d_obs):
+        """Dataset2D obs should have correct index."""
+        obs = adata_with_dataset2d_obs.obs
+        pd.testing.assert_index_equal(
+            obs.index, pd.Index(["cell1", "cell2", "cell3"]), check_names=False
+        )

From 232d1baf4cf3f8aab9e2ccd37e223da228969697 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Wed, 4 Feb 2026 16:48:11 +0100
Subject: [PATCH 02/11] fix: iloc

---
 src/anndata/_types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/anndata/_types.py b/src/anndata/_types.py
index e57b842b0..900276d53 100644
--- a/src/anndata/_types.py
+++ b/src/anndata/_types.py
@@ -65,7 +65,7 @@ class DataFrameLikeIlocIndexer(Protocol):
     True
     """
 
-    def __getitem__(self, idx: Any) -> Self: ...
+    def __getitem__(self, idx: Any) -> Any: ...
 
 
 @runtime_checkable

From 3a7882ea878e84e43f3f95a0fc168983653cdb54 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Wed, 4 Feb 2026 17:03:43 +0100
Subject: [PATCH 03/11] fix: maybe make `reindex` weaker?

---
 src/anndata/_types.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/anndata/_types.py b/src/anndata/_types.py
index 900276d53..1a3c2fe76 100644
--- a/src/anndata/_types.py
+++ b/src/anndata/_types.py
@@ -121,9 +121,11 @@ def iloc(self) -> DataFrameLikeIlocIndexer:
 
     def reindex(
         self,
+        *,
         index: pd.Index | None = None,
-        axis: Literal[0] = 0,
+        axis: Literal[0, 1] | None = 0,
         fill_value: Any = ...,
+        **kwargs,
     ) -> Self:
         """Reindex the DataFrame-like object to match a new index.
 

From 287a6160587e544d04ad25009669100c94b0f5db Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Wed, 4 Feb 2026 17:08:40 +0100
Subject: [PATCH 04/11] fix: setter

---
 src/anndata/_types.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/anndata/_types.py b/src/anndata/_types.py
index 1a3c2fe76..74c6c2df3 100644
--- a/src/anndata/_types.py
+++ b/src/anndata/_types.py
@@ -109,6 +109,11 @@ def columns(self) -> pd.Index:
         """Column labels of the DataFrame-like object."""
         ...
 
+    @columns.setter
+    def _(self, v: Any) -> None:
+        """Setter for columns"""
+        ...
+
     @property
     def shape(self) -> tuple[int, int]:
         """Shape of the DataFrame-like object as (n_rows, n_columns)."""

From 1fb50d6d64525da8010ddf422ecd19b55008b838 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Wed, 4 Feb 2026 17:29:33 +0100
Subject: [PATCH 05/11] fix: put in `types.py`

---
 src/anndata/_core/aligned_df.py      |   2 +-
 src/anndata/_core/aligned_mapping.py |   2 +-
 src/anndata/_core/anndata.py         |   2 +-
 src/anndata/_core/index.py           |   4 +-
 src/anndata/_core/merge.py           |   2 +-
 src/anndata/_core/storage.py         |   2 +-
 src/anndata/_types.py                | 111 +--------------------------
 src/anndata/types.py                 | 107 ++++++++++++++++++++++++++
 8 files changed, 117 insertions(+), 115 deletions(-)

diff --git a/src/anndata/_core/aligned_df.py b/src/anndata/_core/aligned_df.py
index 82621f542..b35706122 100644
--- a/src/anndata/_core/aligned_df.py
+++ b/src/anndata/_core/aligned_df.py
@@ -7,9 +7,9 @@
 import pandas as pd
 from pandas.api.types import is_string_dtype
 
-from .._types import DataFrameLike
 from .._warnings import ImplicitModificationWarning
 from ..compat import XDataset, pandas_as_str
+from ..types import DataFrameLike
 from ..utils import warn
 from .xarray import Dataset2D
 
diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py
index 32827cb94..70621b433 100644
--- a/src/anndata/_core/aligned_mapping.py
+++ b/src/anndata/_core/aligned_mapping.py
@@ -9,9 +9,9 @@
 import numpy as np
 import pandas as pd
 
-from .._types import DataFrameLike
 from .._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning
 from ..compat import AwkArray, CSArray, CSMatrix, CupyArray, XDataset
+from ..types import DataFrameLike
 from ..utils import (
     axis_len,
     convert_to_dict,
diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py
index 95f79557e..8a9b8f1af 100644
--- a/src/anndata/_core/anndata.py
+++ b/src/anndata/_core/anndata.py
@@ -61,7 +61,7 @@
 
     from zarr.storage import StoreLike
 
-    from .._types import DataFrameLike
+    from ..types import DataFrameLike
     from ..typing import Index1D, _Index1DNorm, _XDataType
     from .aligned_mapping import AxisArraysView, LayersView, PairwiseArraysView
     from .index import Index
diff --git a/src/anndata/_core/index.py b/src/anndata/_core/index.py
index 1b90b4ba7..d1fb4cdd4 100644
--- a/src/anndata/_core/index.py
+++ b/src/anndata/_core/index.py
@@ -10,12 +10,12 @@
 import pandas as pd
 from scipy.sparse import issparse
 
-from .._types import DataFrameLike
 from ..compat import AwkArray, CSArray, CSMatrix, DaskArray, XDataArray
 
 if TYPE_CHECKING:
     from numpy.typing import NDArray
 
+    from ..types import DataFrameLike
     from ..typing import Index, Index1D, _Index1DNorm
 
 
@@ -175,6 +175,8 @@ def _subset(
     subset_idx: tuple[_Index1DNorm] | tuple[_Index1DNorm, _Index1DNorm],
 ):
     # Check for DataFrameLike objects (pd.DataFrame, Dataset2D, etc.)
+    from ..types import DataFrameLike
+
     if isinstance(a, DataFrameLike):
         return a.iloc[subset_idx]
     # Select as combination of indexes, not coordinates
diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py
index 649ab3e6f..4d477ef38 100644
--- a/src/anndata/_core/merge.py
+++ b/src/anndata/_core/merge.py
@@ -20,7 +20,6 @@
 from anndata._core.file_backing import to_memory
 from anndata._warnings import ExperimentalFeatureWarning
 
-from .._types import DataFrameLike
 from ..compat import (
     AwkArray,
     CSArray,
@@ -30,6 +29,7 @@
     CupySparseMatrix,
     DaskArray,
 )
+from ..types import DataFrameLike
 from ..utils import asarray, axis_len, warn, warn_once
 from .anndata import AnnData
 from .index import _subset, make_slice
diff --git a/src/anndata/_core/storage.py b/src/anndata/_core/storage.py
index 22afcebb9..05942fdea 100644
--- a/src/anndata/_core/storage.py
+++ b/src/anndata/_core/storage.py
@@ -8,9 +8,9 @@
 
 from anndata.compat import CSArray, CSMatrix
 
-from .._types import DataFrameLike
 from .._warnings import ImplicitModificationWarning
 from ..compat import XDataset
+from ..types import DataFrameLike
 from ..utils import (
     ensure_df_homogeneous,
     get_union_members,
diff --git a/src/anndata/_types.py b/src/anndata/_types.py
index 74c6c2df3..e278e4041 100644
--- a/src/anndata/_types.py
+++ b/src/anndata/_types.py
@@ -4,16 +4,14 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Literal, Protocol, runtime_checkable
+from typing import TYPE_CHECKING, Literal, Protocol
 
 from .compat import H5Array, H5Group, ZarrArray, ZarrGroup
 from .utils import set_module
 
 if TYPE_CHECKING:
     from collections.abc import Mapping
-    from typing import Any, Self, TypeAlias
-
-    import pandas as pd
+    from typing import Any, TypeAlias
 
     from anndata._core.xarray import Dataset2D
 
@@ -31,8 +29,6 @@
 
 
 __all__ = [
-    "DataFrameLike",
-    "DataFrameLikeIlocIndexer",
     "StorageType",
     "_ArrayStorageType",
     "_GroupStorageType",
@@ -47,109 +43,6 @@
 type StorageType = _ArrayStorageType | _GroupStorageType
 
 
-@runtime_checkable
-class DataFrameLikeIlocIndexer(Protocol):
-    """Protocol for iloc-style indexers on DataFrame-like objects.
-
-    This protocol defines the minimal interface for positional-based indexing
-    that AnnData requires. Both :class:`pandas.DataFrame` and
-    :class:`~anndata.experimental.backed.Dataset2D` provide compatible
-    ``iloc`` accessors.
-
-    Examples
-    --------
-    >>> import pandas as pd
-    >>> from anndata._types import DataFrameLikeIlocIndexer
-    >>> df = pd.DataFrame({"a": [1, 2, 3]})
-    >>> isinstance(df.iloc, DataFrameLikeIlocIndexer)
-    True
-    """
-
-    def __getitem__(self, idx: Any) -> Any: ...
-
-
-@runtime_checkable
-class DataFrameLike(Protocol):
-    """Protocol for DataFrame-like objects usable in AnnData.
-
-    This runtime-checkable protocol defines the minimal DataFrame API that
-    AnnData uses internally for ``obs``, ``var``, and similar dataframe-like
-    data containers. Any class implementing this protocol can be used as a
-    drop-in replacement for :class:`pandas.DataFrame` in these contexts.
-
-    The required interface includes:
-
-    - :attr:`index`: Row labels as a :class:`pandas.Index`
-    - :attr:`columns`: Column labels as a :class:`pandas.Index`
-    - :attr:`shape`: Tuple of (n_rows, n_columns)
-    - :attr:`iloc`: Positional indexer returning a :class:`DataFrameLikeIlocIndexer`
-    - :meth:`reindex`: Method to reindex rows
-
-    Examples
-    --------
-    >>> import pandas as pd
-    >>> from anndata._types import DataFrameLike
-    >>> df = pd.DataFrame({"a": [1, 2, 3]})
-    >>> isinstance(df, DataFrameLike)
-    True
-
-    See Also
-    --------
-    :class:`~anndata.experimental.backed.Dataset2D`
-        An xarray-based implementation of this protocol.
-    """
-
-    @property
-    def index(self) -> pd.Index:
-        """Row labels of the DataFrame-like object."""
-        ...
-
-    @property
-    def columns(self) -> pd.Index:
-        """Column labels of the DataFrame-like object."""
-        ...
-
-    @columns.setter
-    def _(self, v: Any) -> None:
-        """Setter for columns"""
-        ...
-
-    @property
-    def shape(self) -> tuple[int, int]:
-        """Shape of the DataFrame-like object as (n_rows, n_columns)."""
-        ...
-
-    @property
-    def iloc(self) -> DataFrameLikeIlocIndexer:
-        """Positional indexer for the DataFrame-like object."""
-        ...
-
-    def reindex(
-        self,
-        *,
-        index: pd.Index | None = None,
-        axis: Literal[0, 1] | None = 0,
-        fill_value: Any = ...,
-        **kwargs,
-    ) -> Self:
-        """Reindex the DataFrame-like object to match a new index.
-
-        Parameters
-        ----------
-        index
-            New index to conform to.
-        axis
-            Axis to reindex along (only 0 is supported).
-        fill_value
-            Value to use for missing values.
-
-        Returns
-        -------
-        Reindexed DataFrame-like object.
-        """
-        ...
-
-
 @set_module("anndata.experimental")
 class Dataset2DIlocIndexer(Protocol):
     def __getitem__(self, idx: Any) -> Dataset2D: ...
diff --git a/src/anndata/types.py b/src/anndata/types.py
index ed3a293bd..49b75ac80 100644
--- a/src/anndata/types.py
+++ b/src/anndata/types.py
@@ -3,6 +3,10 @@
 from typing import TYPE_CHECKING, Protocol, runtime_checkable
 
 if TYPE_CHECKING:
+    from typing import Any, Literal, Self
+
+    from pandas import Index
+
     from ._core.anndata import AnnData
 
 
@@ -20,3 +24,106 @@ def __init__(self, adata: AnnData) -> None:
         """
         Used to enforce the correct signature for extension namespaces.
         """
+
+
+@runtime_checkable
+class DataFrameLikeIlocIndexer(Protocol):
+    """Protocol for iloc-style indexers on DataFrame-like objects.
+
+    This protocol defines the minimal interface for positional-based indexing
+    that AnnData requires. Both :class:`pandas.DataFrame` and
+    :class:`~anndata.experimental.backed.Dataset2D` provide compatible
+    ``iloc`` accessors.
+
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> from anndata._types import DataFrameLikeIlocIndexer
+    >>> df = pd.DataFrame({"a": [1, 2, 3]})
+    >>> isinstance(df.iloc, DataFrameLikeIlocIndexer)
+    True
+    """
+
+    def __getitem__(self, idx: Any) -> Any: ...
+
+
+@runtime_checkable
+class DataFrameLike(Protocol):
+    """Protocol for DataFrame-like objects usable in AnnData.
+
+    This runtime-checkable protocol defines the minimal DataFrame API that
+    AnnData uses internally for ``obs``, ``var``, and similar dataframe-like
+    data containers. Any class implementing this protocol can be used as a
+    drop-in replacement for :class:`pandas.DataFrame` in these contexts.
+
+    The required interface includes:
+
+    - :attr:`index`: Row labels as a :class:`pandas.Index`
+    - :attr:`columns`: Column labels as a :class:`pandas.Index`
+    - :attr:`shape`: Tuple of (n_rows, n_columns)
+    - :attr:`iloc`: Positional indexer returning a :class:`DataFrameLikeIlocIndexer`
+    - :meth:`reindex`: Method to reindex rows
+
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> from anndata._types import DataFrameLike
+    >>> df = pd.DataFrame({"a": [1, 2, 3]})
+    >>> isinstance(df, DataFrameLike)
+    True
+
+    See Also
+    --------
+    :class:`~anndata.experimental.backed.Dataset2D`
+        An xarray-based implementation of this protocol.
+    """
+
+    @property
+    def index(self) -> Index:
+        """Row labels of the DataFrame-like object."""
+        ...
+
+    @property
+    def columns(self) -> Index:
+        """Column labels of the DataFrame-like object."""
+        ...
+
+    @columns.setter
+    def columns(self, v: Any) -> None:
+        """Setter for columns"""
+        ...
+
+    @property
+    def shape(self) -> tuple[int, int]:
+        """Shape of the DataFrame-like object as (n_rows, n_columns)."""
+        ...
+
+    @property
+    def iloc(self) -> DataFrameLikeIlocIndexer:
+        """Positional indexer for the DataFrame-like object."""
+        ...
+
+    def reindex(
+        self,
+        *,
+        index: Index | None = None,
+        axis: Literal[0, 1] | None = 0,
+        fill_value: Any = ...,
+        **kwargs,
+    ) -> Self:
+        """Reindex the DataFrame-like object to match a new index.
+
+        Parameters
+        ----------
+        index
+            New index to conform to.
+        axis
+            Axis to reindex along (only 0 is supported).
+        fill_value
+            Value to use for missing values.
+
+        Returns
+        -------
+        Reindexed DataFrame-like object.
+        """
+        ...

From 86013ca2c2ff7bcffdce044f956a995c29c5f004 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Wed, 4 Feb 2026 17:32:31 +0100
Subject: [PATCH 06/11] fix: AI test with this was a failure

---
 tests/test_dataframe_protocol.py | 407 -------------------------------
 1 file changed, 407 deletions(-)
 delete mode 100644 tests/test_dataframe_protocol.py

diff --git a/tests/test_dataframe_protocol.py b/tests/test_dataframe_protocol.py
deleted file mode 100644
index 62b9371b5..000000000
--- a/tests/test_dataframe_protocol.py
+++ /dev/null
@@ -1,407 +0,0 @@
-"""Tests for the DataFrameLike protocol."""
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-import numpy as np
-import pandas as pd
-import pytest
-
-from anndata._types import DataFrameLike, DataFrameLikeIlocIndexer
-
-if TYPE_CHECKING:
-    from typing import Any, Literal, Self
-
-
-class MockIlocIndexer:
-    """Mock iloc indexer for testing."""
-
-    def __init__(self, data: pd.DataFrame):
-        self._data = data
-
-    def __getitem__(self, idx: Any) -> MockDataFrame:
-        result = self._data.iloc[idx]
-        if isinstance(result, pd.DataFrame):
-            return MockDataFrame(result)
-        # For single row selection, wrap in DataFrame
-        return MockDataFrame(pd.DataFrame([result]))
-
-
-class MockDataFrame:
-    """A minimal DataFrame-like class for testing the protocol."""
-
-    def __init__(self, data: pd.DataFrame):
-        self._data = data
-
-    @property
-    def index(self) -> pd.Index:
-        return self._data.index
-
-    @property
-    def columns(self) -> pd.Index:
-        return self._data.columns
-
-    @property
-    def shape(self) -> tuple[int, int]:
-        return self._data.shape
-
-    @property
-    def iloc(self) -> MockIlocIndexer:
-        return MockIlocIndexer(self._data)
-
-    def reindex(
-        self,
-        index: pd.Index | None = None,
-        axis: Literal[0] = 0,
-        fill_value: Any = np.nan,
-    ) -> Self:
-        # axis=0 is the default; don't pass it when index is specified
-        # since pandas doesn't allow both keyword arguments together
-        return MockDataFrame(self._data.reindex(index=index, fill_value=fill_value))
-
-
-class TestDataFrameLikeProtocol:
-    """Test the DataFrameLike protocol with different implementations."""
-
-    @pytest.fixture
-    def sample_df(self) -> pd.DataFrame:
-        """Create a sample pandas DataFrame for testing."""
-        return pd.DataFrame(
-            {"a": [1, 2, 3], "b": [4.0, 5.0, 6.0], "c": ["x", "y", "z"]},
-            index=["row1", "row2", "row3"],
-        )
-
-    def test_pandas_dataframe_is_dataframe_like(self, sample_df: pd.DataFrame):
-        """pd.DataFrame should satisfy the DataFrameLike protocol."""
-        assert isinstance(sample_df, DataFrameLike)
-
-    def test_pandas_iloc_is_iloc_indexer(self, sample_df: pd.DataFrame):
-        """pd.DataFrame.iloc should satisfy the DataFrameLikeIlocIndexer protocol."""
-        assert isinstance(sample_df.iloc, DataFrameLikeIlocIndexer)
-
-    def test_mock_dataframe_is_dataframe_like(self, sample_df: pd.DataFrame):
-        """MockDataFrame should satisfy the DataFrameLike protocol."""
-        mock_df = MockDataFrame(sample_df)
-        assert isinstance(mock_df, DataFrameLike)
-
-    def test_mock_iloc_is_iloc_indexer(self, sample_df: pd.DataFrame):
-        """MockDataFrame.iloc should satisfy the DataFrameLikeIlocIndexer protocol."""
-        mock_df = MockDataFrame(sample_df)
-        assert isinstance(mock_df.iloc, DataFrameLikeIlocIndexer)
-
-    def test_dataframe_like_has_required_properties(self, sample_df: pd.DataFrame):
-        """Verify DataFrameLike objects have the required properties."""
-        for df in [sample_df, MockDataFrame(sample_df)]:
-            assert hasattr(df, "index")
-            assert hasattr(df, "columns")
-            assert hasattr(df, "shape")
-            assert hasattr(df, "iloc")
-            assert hasattr(df, "reindex")
-
-    def test_dataframe_like_index(self, sample_df: pd.DataFrame):
-        """Verify index property returns a pd.Index."""
-        mock_df = MockDataFrame(sample_df)
-        assert isinstance(mock_df.index, pd.Index)
-        pd.testing.assert_index_equal(mock_df.index, sample_df.index)
-
-    def test_dataframe_like_columns(self, sample_df: pd.DataFrame):
-        """Verify columns property returns a pd.Index."""
-        mock_df = MockDataFrame(sample_df)
-        assert isinstance(mock_df.columns, pd.Index)
-        pd.testing.assert_index_equal(mock_df.columns, sample_df.columns)
-
-    def test_dataframe_like_shape(self, sample_df: pd.DataFrame):
-        """Verify shape property returns correct tuple."""
-        mock_df = MockDataFrame(sample_df)
-        assert mock_df.shape == sample_df.shape
-        assert mock_df.shape == (3, 3)
-
-    def test_dataframe_like_iloc(self, sample_df: pd.DataFrame):
-        """Verify iloc indexer works correctly."""
-        mock_df = MockDataFrame(sample_df)
-
-        # Test single row selection
-        result = mock_df.iloc[0]
-        assert isinstance(result, DataFrameLike)
-
-        # Test slice selection
-        result = mock_df.iloc[0:2]
-        assert isinstance(result, DataFrameLike)
-        assert result.shape[0] == 2
-
-    def test_dataframe_like_reindex(self, sample_df: pd.DataFrame):
-        """Verify reindex method works correctly."""
-        mock_df = MockDataFrame(sample_df)
-        new_index = pd.Index(["row1", "row2", "row4"])
-
-        result = mock_df.reindex(index=new_index, fill_value=-1)
-        assert isinstance(result, DataFrameLike)
-        pd.testing.assert_index_equal(result.index, new_index)
-
-    def test_non_dataframe_is_not_dataframe_like(self):
-        """Objects that don't implement the protocol should not match."""
-        assert not isinstance([], DataFrameLike)
-        assert not isinstance({}, DataFrameLike)
-        assert not isinstance("string", DataFrameLike)
-        assert not isinstance(42, DataFrameLike)
-        assert not isinstance(np.array([1, 2, 3]), DataFrameLike)
-
-
-@pytest.mark.usefixtures("xr_available")
-class TestDataset2DIsDataFrameLike:
-    """Test that Dataset2D satisfies the DataFrameLike protocol."""
-
-    @pytest.fixture
-    def xr_available(self):
-        """Skip tests if xarray is not available."""
-        pytest.importorskip("xarray")
-
-    @pytest.fixture
-    def sample_dataset2d(self):
-        """Create a sample Dataset2D for testing."""
-        from anndata._core.xarray import Dataset2D
-        from anndata.compat import XDataset
-
-        ds = XDataset(
-            {
-                "a": (["idx"], [1, 2, 3]),
-                "b": (["idx"], [4.0, 5.0, 6.0]),
-            },
-            coords={"idx": ["row1", "row2", "row3"]},
-        )
-        return Dataset2D(ds)
-
-    def test_dataset2d_is_dataframe_like(self, sample_dataset2d):
-        """Dataset2D should satisfy the DataFrameLike protocol."""
-        assert isinstance(sample_dataset2d, DataFrameLike)
-
-    def test_dataset2d_iloc_is_iloc_indexer(self, sample_dataset2d):
-        """Dataset2D.iloc should satisfy the DataFrameLikeIlocIndexer protocol."""
-        assert isinstance(sample_dataset2d.iloc, DataFrameLikeIlocIndexer)
-
-    def test_dataset2d_has_required_properties(self, sample_dataset2d):
-        """Verify Dataset2D has the required properties."""
-        assert hasattr(sample_dataset2d, "index")
-        assert hasattr(sample_dataset2d, "columns")
-        assert hasattr(sample_dataset2d, "shape")
-        assert hasattr(sample_dataset2d, "iloc")
-        assert hasattr(sample_dataset2d, "reindex")
-
-    def test_dataset2d_properties_return_correct_types(self, sample_dataset2d):
-        """Verify Dataset2D properties return correct types."""
-        assert isinstance(sample_dataset2d.index, pd.Index)
-        assert isinstance(sample_dataset2d.columns, pd.Index)
-        assert isinstance(sample_dataset2d.shape, tuple)
-        assert len(sample_dataset2d.shape) == 2
-
-
-class TestDataFrameLikeWithAnnData:
-    """Test that DataFrameLike protocol works correctly with AnnData objects."""
-
-    @pytest.fixture
-    def simple_adata(self):
-        """Create a simple AnnData object for testing."""
-        import anndata as ad
-
-        return ad.AnnData(
-            X=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            obs=pd.DataFrame(
-                {"cell_type": ["A", "B", "C"]},
-                index=["cell1", "cell2", "cell3"],
-            ),
-            var=pd.DataFrame(
-                {"gene_name": ["g1", "g2", "g3"]},
-                index=["gene1", "gene2", "gene3"],
-            ),
-        )
-
-    def test_adata_obs_is_dataframe_like(self, simple_adata):
-        """AnnData.obs should satisfy the DataFrameLike protocol."""
-        assert isinstance(simple_adata.obs, DataFrameLike)
-
-    def test_adata_var_is_dataframe_like(self, simple_adata):
-        """AnnData.var should satisfy the DataFrameLike protocol."""
-        assert isinstance(simple_adata.var, DataFrameLike)
-
-    def test_adata_obs_has_required_properties(self, simple_adata):
-        """Verify AnnData.obs has all required DataFrameLike properties."""
-        obs = simple_adata.obs
-        assert hasattr(obs, "index")
-        assert hasattr(obs, "columns")
-        assert hasattr(obs, "shape")
-        assert hasattr(obs, "iloc")
-        assert hasattr(obs, "reindex")
-
-    def test_adata_obs_iloc_subsetting(self, simple_adata):
-        """Verify iloc subsetting works on AnnData.obs."""
-        obs = simple_adata.obs
-        subset = obs.iloc[0:2]
-        assert isinstance(subset, DataFrameLike)
-        assert subset.shape[0] == 2
-
-    def test_adata_subset_preserves_dataframe_like(self, simple_adata):
-        """Verify subsetting AnnData preserves DataFrameLike for obs/var."""
-        adata_subset = simple_adata[0:2, 0:2]
-        assert isinstance(adata_subset.obs, DataFrameLike)
-        assert isinstance(adata_subset.var, DataFrameLike)
-        assert adata_subset.obs.shape[0] == 2
-        assert adata_subset.var.shape[0] == 2
-
-    def test_adata_copy_preserves_dataframe_like(self, simple_adata):
-        """Verify copying AnnData preserves DataFrameLike for obs/var."""
-        adata_copy = simple_adata.copy()
-        assert isinstance(adata_copy.obs, DataFrameLike)
-        assert isinstance(adata_copy.var, DataFrameLike)
-
-    def test_set_obs_with_dataframe(self, simple_adata):
-        """Verify setting obs with pd.DataFrame works."""
-        new_obs = pd.DataFrame(
-            {"new_col": [1, 2, 3]},
-            index=["cell1", "cell2", "cell3"],
-        )
-        simple_adata.obs = new_obs
-        assert isinstance(simple_adata.obs, DataFrameLike)
-        assert "new_col" in simple_adata.obs.columns
-
-    def test_set_var_with_dataframe(self, simple_adata):
-        """Verify setting var with pd.DataFrame works."""
-        new_var = pd.DataFrame(
-            {"new_col": [1, 2, 3]},
-            index=["gene1", "gene2", "gene3"],
-        )
-        simple_adata.var = new_var
-        assert isinstance(simple_adata.var, DataFrameLike)
-        assert "new_col" in simple_adata.var.columns
-
-
-class TestCustomDataFrameLikeWithAnnData:
-    """Test that custom DataFrameLike implementations work with AnnData."""
-
-    def test_init_adata_with_custom_dataframe_like_obs(self):
-        """Verify AnnData can be initialized with a custom DataFrameLike obs."""
-        import anndata as ad
-
-        # Create a custom DataFrameLike object
-        obs_df = pd.DataFrame(
-            {"cell_type": ["A", "B", "C"]},
-            index=["cell1", "cell2", "cell3"],
-        )
-        mock_obs = MockDataFrame(obs_df)
-
-        # Verify MockDataFrame satisfies the protocol
-        assert isinstance(mock_obs, DataFrameLike)
-
-        # Create AnnData with custom DataFrameLike
-        adata = ad.AnnData(
-            X=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            obs=mock_obs,
-            var=pd.DataFrame(
-                {"gene_name": ["g1", "g2", "g3"]},
-                index=["gene1", "gene2", "gene3"],
-            ),
-        )
-
-        # Verify obs is the MockDataFrame (unchanged)
-        assert isinstance(adata.obs, DataFrameLike)
-        assert adata.obs.shape[0] == 3
-
-    def test_init_adata_with_custom_dataframe_like_var(self):
-        """Verify AnnData can be initialized with a custom DataFrameLike var."""
-        import anndata as ad
-
-        # Create a custom DataFrameLike object
-        var_df = pd.DataFrame(
-            {"gene_name": ["g1", "g2", "g3"]},
-            index=["gene1", "gene2", "gene3"],
-        )
-        mock_var = MockDataFrame(var_df)
-
-        # Verify MockDataFrame satisfies the protocol
-        assert isinstance(mock_var, DataFrameLike)
-
-        # Create AnnData with custom DataFrameLike var
-        adata = ad.AnnData(
-            X=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-            obs=pd.DataFrame(
-                {"cell_type": ["A", "B", "C"]},
-                index=["cell1", "cell2", "cell3"],
-            ),
-            var=mock_var,
-        )
-
-        # Verify var is the MockDataFrame (unchanged)
-        assert isinstance(adata.var, DataFrameLike)
-        assert adata.var.shape[0] == 3
-
-    def test_custom_dataframe_like_length_validation(self):
-        """Verify length validation works for custom DataFrameLike."""
-        import anndata as ad
-
-        # Create a custom DataFrameLike with wrong length
-        obs_df = pd.DataFrame(
-            {"cell_type": ["A", "B"]},  # Only 2 rows, but X has 3
-            index=["cell1", "cell2"],
-        )
-        mock_obs = MockDataFrame(obs_df)
-
-        # Should raise ValueError due to length mismatch
-        with pytest.raises(ValueError, match="must have as many rows"):
-            ad.AnnData(
-                X=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-                obs=mock_obs,
-                var=pd.DataFrame(
-                    {"gene_name": ["g1", "g2", "g3"]},
-                    index=["gene1", "gene2", "gene3"],
-                ),
-            )
-
-
-@pytest.mark.usefixtures("xr_available")
-class TestDataset2DWithAnnData:
-    """Test that Dataset2D works correctly as obs/var in AnnData."""
-
-    @pytest.fixture
-    def xr_available(self):
-        """Skip tests if xarray is not available."""
-        pytest.importorskip("xarray")
-
-    @pytest.fixture
-    def adata_with_dataset2d_obs(self):
-        """Create an AnnData with Dataset2D obs."""
-        import anndata as ad
-        from anndata._core.xarray import Dataset2D
-        from anndata.compat import XDataset
-
-        X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-
-        # Create Dataset2D for obs
-        obs_ds = XDataset(
-            {"cell_type": (["idx"], ["A", "B", "C"])},
-            coords={"idx": ["cell1", "cell2", "cell3"]},
-        )
-        obs = Dataset2D(obs_ds)
-
-        var = pd.DataFrame(
-            {"gene_name": ["g1", "g2", "g3"]},
-            index=["gene1", "gene2", "gene3"],
-        )
-
-        return ad.AnnData(X=X, obs=obs, var=var)
-
-    def test_adata_with_dataset2d_obs_is_dataframe_like(self, adata_with_dataset2d_obs):
-        """AnnData with Dataset2D obs should satisfy DataFrameLike."""
-        assert isinstance(adata_with_dataset2d_obs.obs, DataFrameLike)
-
-    def test_adata_with_dataset2d_subset(self, adata_with_dataset2d_obs):
-        """Subsetting AnnData with Dataset2D obs should work."""
-        adata_subset = adata_with_dataset2d_obs[0:2]
-        assert isinstance(adata_subset.obs, DataFrameLike)
-        assert adata_subset.obs.shape[0] == 2
-
-    def test_adata_with_dataset2d_obs_index(self, adata_with_dataset2d_obs):
-        """Dataset2D obs should have correct index."""
-        obs = adata_with_dataset2d_obs.obs
-        pd.testing.assert_index_equal(
-            obs.index, pd.Index(["cell1", "cell2", "cell3"]), check_names=False
-        )

From e3491b7c1f8f64a26043496a093c575515e789f5 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Wed, 4 Feb 2026 17:43:46 +0100
Subject: [PATCH 07/11] fix: more ai BS

---
 src/anndata/types.py | 34 +---------------------------------
 1 file changed, 1 insertion(+), 33 deletions(-)

diff --git a/src/anndata/types.py b/src/anndata/types.py
index 49b75ac80..dd678cc5d 100644
--- a/src/anndata/types.py
+++ b/src/anndata/types.py
@@ -30,18 +30,7 @@ def __init__(self, adata: AnnData) -> None:
 class DataFrameLikeIlocIndexer(Protocol):
     """Protocol for iloc-style indexers on DataFrame-like objects.
 
-    This protocol defines the minimal interface for positional-based indexing
-    that AnnData requires. Both :class:`pandas.DataFrame` and
-    :class:`~anndata.experimental.backed.Dataset2D` provide compatible
-    ``iloc`` accessors.
-
-    Examples
-    --------
-    >>> import pandas as pd
-    >>> from anndata._types import DataFrameLikeIlocIndexer
-    >>> df = pd.DataFrame({"a": [1, 2, 3]})
-    >>> isinstance(df.iloc, DataFrameLikeIlocIndexer)
-    True
+    Only requires `__getitem__`.
     """
 
     def __getitem__(self, idx: Any) -> Any: ...
@@ -51,27 +40,6 @@ def __getitem__(self, idx: Any) -> Any: ...
 class DataFrameLike(Protocol):
     """Protocol for DataFrame-like objects usable in AnnData.
 
-    This runtime-checkable protocol defines the minimal DataFrame API that
-    AnnData uses internally for ``obs``, ``var``, and similar dataframe-like
-    data containers. Any class implementing this protocol can be used as a
-    drop-in replacement for :class:`pandas.DataFrame` in these contexts.
-
-    The required interface includes:
-
-    - :attr:`index`: Row labels as a :class:`pandas.Index`
-    - :attr:`columns`: Column labels as a :class:`pandas.Index`
-    - :attr:`shape`: Tuple of (n_rows, n_columns)
-    - :attr:`iloc`: Positional indexer returning a :class:`DataFrameLikeIlocIndexer`
-    - :meth:`reindex`: Method to reindex rows
-
-    Examples
-    --------
-    >>> import pandas as pd
-    >>> from anndata._types import DataFrameLike
-    >>> df = pd.DataFrame({"a": [1, 2, 3]})
-    >>> isinstance(df, DataFrameLike)
-    True
-
     See Also
     --------
     :class:`~anndata.experimental.backed.Dataset2D`

From cb21896323dd85aa2448e99572f5b8ce03272129 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Wed, 4 Feb 2026 17:54:00 +0100
Subject: [PATCH 08/11] fix: make `Dataset2D` public

---
 src/anndata/_core/aligned_df.py      | 14 +-------------
 src/anndata/_core/aligned_mapping.py |  5 +----
 src/anndata/_core/storage.py         |  3 ---
 src/anndata/_core/xarray.py          |  4 +---
 src/anndata/experimental/__init__.py |  3 +++
 5 files changed, 6 insertions(+), 23 deletions(-)

diff --git a/src/anndata/_core/aligned_df.py b/src/anndata/_core/aligned_df.py
index b35706122..4d7789eaa 100644
--- a/src/anndata/_core/aligned_df.py
+++ b/src/anndata/_core/aligned_df.py
@@ -8,7 +8,7 @@
 from pandas.api.types import is_string_dtype
 
 from .._warnings import ImplicitModificationWarning
-from ..compat import XDataset, pandas_as_str
+from ..compat import pandas_as_str
 from ..types import DataFrameLike
 from ..utils import warn
 from .xarray import Dataset2D
@@ -148,15 +148,3 @@ def _gen_dataframe_xr(
     length: int | None = None,
 ):
     return anno
-
-
-@_gen_dataframe.register(XDataset)
-def _gen_dataframe_xdataset(
-    anno: XDataset,
-    index_names: Iterable[str],
-    *,
-    source: Literal["X", "shape"],
-    attr: Literal["obs", "var"],
-    length: int | None = None,
-):
-    return Dataset2D(anno)
diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py
index 70621b433..bfbc2a05b 100644
--- a/src/anndata/_core/aligned_mapping.py
+++ b/src/anndata/_core/aligned_mapping.py
@@ -10,7 +10,7 @@
 import pandas as pd
 
 from .._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning
-from ..compat import AwkArray, CSArray, CSMatrix, CupyArray, XDataset
+from ..compat import AwkArray, CSArray, CSMatrix, CupyArray
 from ..types import DataFrameLike
 from ..utils import (
     axis_len,
@@ -25,7 +25,6 @@
 from .index import _subset
 from .storage import coerce_array
 from .views import as_view, view_update
-from .xarray import Dataset2D
 
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterable, Iterator, Mapping
@@ -75,8 +74,6 @@ def _validate_value(self, val: Value, key: str) -> Value:
             warn_once(msg, ExperimentalFeatureWarning)
         elif isinstance(val, np.ndarray | CupyArray) and len(val.shape) == 1:
             val = val.reshape((val.shape[0], 1))
-        elif isinstance(val, XDataset):
-            val = Dataset2D(val)
         for i, axis in enumerate(self.axes):
             if self.parent.shape[axis] == axis_len(val, i):
                 continue
diff --git a/src/anndata/_core/storage.py b/src/anndata/_core/storage.py
index 05942fdea..4a5503d3f 100644
--- a/src/anndata/_core/storage.py
+++ b/src/anndata/_core/storage.py
@@ -9,7 +9,6 @@
 from anndata.compat import CSArray, CSMatrix
 
 from .._warnings import ImplicitModificationWarning
-from ..compat import XDataset
 from ..types import DataFrameLike
 from ..utils import (
     ensure_df_homogeneous,
@@ -39,8 +38,6 @@ def coerce_array(
         return value
     # If value is one of the allowed types, return it
     array_data_structure_types = get_union_members(_ArrayDataStructureTypes)
-    if isinstance(value, XDataset):
-        value = Dataset2D(value)
     if isinstance(value, (*array_data_structure_types, Dataset2D)):
         if isinstance(value, np.matrix):
             msg = f"{name} should not be a np.matrix, use np.ndarray instead."
diff --git a/src/anndata/_core/xarray.py b/src/anndata/_core/xarray.py
index 0e75d604a..67460bf11 100644
--- a/src/anndata/_core/xarray.py
+++ b/src/anndata/_core/xarray.py
@@ -40,9 +40,7 @@ class Dataset2D(Mapping[Hashable, XDataArray | Self]):
     are respected, namely that there is only one 1d dim and coord with the same name i.e.,
     like a :class:`pandas.DataFrame`.
 
-    You should not have to initiate this class yourself.  Setting an :class:`xarray.Dataset`
-    into a relevant part of the :class:`~anndata.AnnData` object will attempt to wrap that
-    object in this object, trying to enforce the "dataframe-invariants."
+    You will need to wrap :class:`xarray.Dataset` inside this class if you wish to set :attr:`~anndata.AnnData.obs` or :attr:`~anndata.AnnData.var` with that.
 
     Because xarray requires :attr:`xarray.Dataset.coords` to be in-memory, this class provides
     handling for an out-of-memory index via :attr:`~anndata.experimental.backed.Dataset2D.true_index`.
diff --git a/src/anndata/experimental/__init__.py b/src/anndata/experimental/__init__.py
index 1271ac9b5..bb0480af8 100644
--- a/src/anndata/experimental/__init__.py
+++ b/src/anndata/experimental/__init__.py
@@ -12,6 +12,7 @@
     Write,
     WriteCallback,
 )
+from ..types import DataFrameLike, DataFrameLikeIlocIndexer
 from ..utils import module_get_attr_redirect
 from ._dispatch_io import read_dispatched, write_dispatched
 from .backed import read_lazy
@@ -53,6 +54,8 @@ def __getattr__(attr_name: str) -> Any:
 __all__ = [
     "AnnCollection",
     "AnnLoader",
+    "DataFrameLike",
+    "DataFrameLikeIlocIndexer",
     "Dataset2DIlocIndexer",
     "IOSpec",
     "Read",

From 44a1fe3b1b3baf40e4a37dfe47942a3bfa0eda08 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Wed, 4 Feb 2026 18:12:47 +0100
Subject: [PATCH 09/11] fix: remove internal uses of `Dataset2D` +
 `pd.DataFrame`

---
 src/anndata/_core/aligned_df.py | 23 ++---------------------
 src/anndata/_core/storage.py    |  3 +--
 src/anndata/_core/views.py      |  6 +++---
 src/anndata/_core/xarray.py     | 18 ++++++++++++------
 src/anndata/tests/helpers.py    | 12 ++++++------
 src/anndata/types.py            |  4 ++++
 6 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/src/anndata/_core/aligned_df.py b/src/anndata/_core/aligned_df.py
index 4d7789eaa..dd6e82bbd 100644
--- a/src/anndata/_core/aligned_df.py
+++ b/src/anndata/_core/aligned_df.py
@@ -11,7 +11,6 @@
 from ..compat import pandas_as_str
 from ..types import DataFrameLike
 from ..utils import warn
-from .xarray import Dataset2D
 
 if TYPE_CHECKING:
     from collections.abc import Iterable
@@ -27,12 +26,6 @@ def _gen_dataframe(
     attr: Literal["obs", "var"],
     length: int | None = None,
 ) -> DataFrameLike:  # pragma: no cover
-    # Check if anno satisfies the DataFrameLike protocol
-    # This allows any DataFrameLike-compliant object to be used as obs/var
-    if isinstance(anno, DataFrameLike):
-        if length is not None and anno.shape[0] != length:
-            raise _mk_df_error(source, attr, length, anno.shape[0])
-        return anno
     msg = f"Cannot convert {type(anno)} to {attr} DataFrame"
     raise ValueError(msg)
 
@@ -76,9 +69,9 @@ def mk_index(l: int) -> pd.Index:
     return df
 
 
-@_gen_dataframe.register(pd.DataFrame)
+@_gen_dataframe.register(DataFrameLike)
 def _gen_dataframe_df(
-    anno: pd.DataFrame,
+    anno: DataFrameLike,
     index_names: Iterable[str],
     *,
     source: Literal["X", "shape"],
@@ -136,15 +129,3 @@ def _mk_df_error(
             f"({actual} {what}s instead of {expected})"
         )
     return ValueError(msg)
-
-
-@_gen_dataframe.register(Dataset2D)
-def _gen_dataframe_xr(
-    anno: Dataset2D,
-    index_names: Iterable[str],
-    *,
-    source: Literal["X", "shape"],
-    attr: Literal["obs", "var"],
-    length: int | None = None,
-):
-    return anno
diff --git a/src/anndata/_core/storage.py b/src/anndata/_core/storage.py
index 4a5503d3f..9e793a4ba 100644
--- a/src/anndata/_core/storage.py
+++ b/src/anndata/_core/storage.py
@@ -17,7 +17,6 @@
     raise_value_error_if_multiindex_columns,
     warn,
 )
-from .xarray import Dataset2D
 
 if TYPE_CHECKING:
     from typing import Any
@@ -38,7 +37,7 @@ def coerce_array(
         return value
     # If value is one of the allowed types, return it
     array_data_structure_types = get_union_members(_ArrayDataStructureTypes)
-    if isinstance(value, (*array_data_structure_types, Dataset2D)):
+    if isinstance(value, (*array_data_structure_types, DataFrameLike)):
         if isinstance(value, np.matrix):
             msg = f"{name} should not be a np.matrix, use np.ndarray instead."
             warn(msg, ImplicitModificationWarning)
diff --git a/src/anndata/_core/views.py b/src/anndata/_core/views.py
index 95054139f..58e0e06a6 100644
--- a/src/anndata/_core/views.py
+++ b/src/anndata/_core/views.py
@@ -11,6 +11,7 @@
 from scipy import sparse
 
 from anndata._warnings import ImplicitModificationWarning
+from anndata.types import DataFrameLike
 
 from .._settings import settings
 from ..compat import (
@@ -23,7 +24,6 @@
 )
 from ..utils import warn
 from .access import ElementRef
-from .xarray import Dataset2D
 
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterable, KeysView, Sequence
@@ -366,8 +366,8 @@ def as_view_cupy_csc(mtx, view_args):
     return CupySparseCSCView(mtx, view_args=view_args)
 
 
-@as_view.register(Dataset2D)
-def _(a: Dataset2D, view_args):
+@as_view.register(DataFrameLike)
+def _(a: DataFrameLike, view_args):
     return a
 
 
diff --git a/src/anndata/_core/xarray.py b/src/anndata/_core/xarray.py
index 67460bf11..0d74e1d05 100644
--- a/src/anndata/_core/xarray.py
+++ b/src/anndata/_core/xarray.py
@@ -1,10 +1,9 @@
 from __future__ import annotations
 
 import warnings
-from collections.abc import Hashable, Mapping
 from dataclasses import dataclass
 from functools import wraps
-from typing import TYPE_CHECKING, Self, overload
+from typing import TYPE_CHECKING, overload
 
 import numpy as np
 import pandas as pd
@@ -14,7 +13,14 @@
 from ..compat import XDataArray, XDataset, XVariable, pandas_as_str
 
 if TYPE_CHECKING:
-    from collections.abc import Callable, Collection, Iterable, Iterator
+    from collections.abc import (
+        Callable,
+        Collection,
+        Hashable,
+        Iterable,
+        Iterator,
+        Mapping,
+    )
     from typing import Any, Literal
 
     from .._types import Dataset2DIlocIndexer
@@ -33,7 +39,7 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
     return wrapper
 
 
-class Dataset2D(Mapping[Hashable, XDataArray | Self]):
+class Dataset2D:
     r"""
     A wrapper class meant to enable working with lazy dataframe data according to
     :class:`~anndata.AnnData`'s internal API.  This class ensures that "dataframe-invariants"
@@ -189,7 +195,7 @@ def shape(self) -> tuple[int, int]:
         -------
         The (2D) shape of the dataframe resolved from :attr:`~xarray.Dataset.sizes`.
         """
-        return (self.ds.sizes[self.index_dim], len(self.ds))
+        return (len(self), len(self.ds))
 
     @property
     def iloc(self) -> Dataset2DIlocIndexer:
@@ -361,7 +367,7 @@ def __iter__(self) -> Iterator[Hashable]:
         return iter(self.ds)
 
     def __len__(self) -> int:
-        return len(self.ds)
+        return self.ds.sizes[self.index_dim]
 
     @property
     def dtypes(self) -> Mapping[Hashable, np.dtype]:
diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py
index 3ff344a16..8079d567f 100644
--- a/src/anndata/tests/helpers.py
+++ b/src/anndata/tests/helpers.py
@@ -337,9 +337,9 @@ def gen_adata(  # noqa: PLR0913
 
     if has_xr := find_spec("xarray"):
         if obs_xdataset:
-            obs = XDataset.from_dataframe(obs)
+            obs = Dataset2D(XDataset.from_dataframe(obs))
         if var_xdataset:
-            var = XDataset.from_dataframe(var)
+            var = Dataset2D(XDataset.from_dataframe(var))
 
     if X_type is None:
         X = None
@@ -361,11 +361,11 @@ def gen_adata(  # noqa: PLR0913
         da=da.random.random((N, 50)),
     )
     if has_xr:
-        obsm["xdataset"] = XDataset.from_dataframe(
-            gen_typed_df(M, obs_names, dtypes=obs_dtypes)
+        obsm["xdataset"] = Dataset2D(
+            XDataset.from_dataframe(gen_typed_df(M, obs_names, dtypes=obs_dtypes))
         )
-        varm["xdataset"] = XDataset.from_dataframe(
-            gen_typed_df(N, var_names, dtypes=var_dtypes)
+        varm["xdataset"] = Dataset2D(
+            XDataset.from_dataframe(gen_typed_df(N, var_names, dtypes=var_dtypes))
         )
     obsm = {k: v for k, v in obsm.items() if type(v) in obsm_types}
     obsm = maybe_add_sparse_array(
diff --git a/src/anndata/types.py b/src/anndata/types.py
index dd678cc5d..a2f6f4953 100644
--- a/src/anndata/types.py
+++ b/src/anndata/types.py
@@ -46,6 +46,10 @@ class DataFrameLike(Protocol):
         An xarray-based implementation of this protocol.
     """
 
+    def __len__(self) -> int:
+        """Number of rows in this object"""
+        ...
+
     @property
     def index(self) -> Index:
         """Row labels of the DataFrame-like object."""

From a69d374aceeb30a580439e4777ee698681b07f54 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Wed, 4 Feb 2026 18:45:38 +0100
Subject: [PATCH 10/11] fix: more ai bs

---
 src/anndata/_core/storage.py | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/src/anndata/_core/storage.py b/src/anndata/_core/storage.py
index 9e793a4ba..ac87a6e91 100644
--- a/src/anndata/_core/storage.py
+++ b/src/anndata/_core/storage.py
@@ -3,13 +3,12 @@
 from typing import TYPE_CHECKING
 
 import numpy as np
-import pandas as pd
 from scipy import sparse
 
 from anndata.compat import CSArray, CSMatrix
+from anndata.types import DataFrameLike
 
 from .._warnings import ImplicitModificationWarning
-from ..types import DataFrameLike
 from ..utils import (
     ensure_df_homogeneous,
     get_union_members,
@@ -37,7 +36,7 @@ def coerce_array(
         return value
     # If value is one of the allowed types, return it
     array_data_structure_types = get_union_members(_ArrayDataStructureTypes)
-    if isinstance(value, (*array_data_structure_types, DataFrameLike)):
+    if isinstance(value, array_data_structure_types):
         if isinstance(value, np.matrix):
             msg = f"{name} should not be a np.matrix, use np.ndarray instead."
             warn(msg, ImplicitModificationWarning)
@@ -53,19 +52,10 @@ def coerce_array(
     if any(is_non_csc_r_array_or_matrix):
         msg = f"Only CSR and CSC {'matrices' if isinstance(value, sparse.spmatrix) else 'arrays'} are supported."
         raise ValueError(msg)
-    if isinstance(value, pd.DataFrame):
+    if isinstance(value, DataFrameLike):
         if allow_df:
             raise_value_error_if_multiindex_columns(value, name)
         return value if allow_df else ensure_df_homogeneous(value, name)
-    # Handle other DataFrameLike objects (not pd.DataFrame)
-    if isinstance(value, DataFrameLike):
-        if allow_df:
-            return value
-        # For non-DataFrames, we can't use ensure_df_homogeneous
-        # so we convert to array via iloc
-        msg = f"DataFrameLike object used for {name} will be converted to array."
-        warn(msg, ImplicitModificationWarning)
-        return np.array(value.iloc[:, :])
     # if value is an array-like object, try to convert it
     e = None
     if allow_array_like:

From fc13bf6fa3a49f878eef1ae73effd695c201a5fe Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Wed, 4 Feb 2026 18:58:47 +0100
Subject: [PATCH 11/11] fix: test

---
 tests/test_concatenate.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py
index 62ac5f90f..8110020c6 100644
--- a/tests/test_concatenate.py
+++ b/tests/test_concatenate.py
@@ -269,8 +269,8 @@ def test_concatenate_roundtrip(
     if backwards_compat and use_xdataset:
         import xarray as xr
 
-        result.var = xr.Dataset.from_dataframe(
-            result.var
+        result.var = Dataset2D(
+            xr.Dataset.from_dataframe(result.var)
         )  # backwards compat always returns a dataframe
 
     # Correcting for known differences