From ce88b8618b742479149d3d2b60dfc720f44485a6 Mon Sep 17 00:00:00 2001 From: Ali Ramlaoui Date: Sat, 9 May 2026 16:21:42 +0200 Subject: [PATCH 1/2] feat: support negative database indices --- atompack-py/python/atompack/__init__.pyi | 6 +-- atompack-py/python/atompack/_atompack_rs.pyi | 4 +- atompack-py/src/database.rs | 45 ++++++++++++++------ atompack-py/tests/test_database.py | 32 +++++++++++--- 4 files changed, 63 insertions(+), 24 deletions(-) diff --git a/atompack-py/python/atompack/__init__.pyi b/atompack-py/python/atompack/__init__.pyi index a28c97e..1a62047 100644 --- a/atompack-py/python/atompack/__init__.pyi +++ b/atompack-py/python/atompack/__init__.pyi @@ -538,7 +538,7 @@ class Database: Parameters ---------- index : int - Molecule index (0-based) + Molecule index (0-based). Negative indices are supported. Returns ------- @@ -553,7 +553,7 @@ class Database: Parameters ---------- indices : list of int - Molecule indices (0-based) + Molecule indices (0-based). Negative indices are supported. Returns ------- @@ -612,7 +612,7 @@ class Database: Parameters ---------- index : int - Molecule index (0-based) + Molecule index (0-based). Negative indices are supported. Returns ------- diff --git a/atompack-py/python/atompack/_atompack_rs.pyi b/atompack-py/python/atompack/_atompack_rs.pyi index 99e8730..3b4c284 100644 --- a/atompack-py/python/atompack/_atompack_rs.pyi +++ b/atompack-py/python/atompack/_atompack_rs.pyi @@ -475,7 +475,7 @@ class PyAtomDatabase: Parameters ---------- index : int - Molecule index (0-based) + Molecule index (0-based). Negative indices are supported. Returns ------- @@ -491,7 +491,7 @@ class PyAtomDatabase: Parameters ---------- indices : sequence of int - Molecule indices (0-based) + Molecule indices (0-based). Negative indices are supported. Returns ------- diff --git a/atompack-py/src/database.rs b/atompack-py/src/database.rs index c890ae8..ad9dab4 100644 --- a/atompack-py/src/database.rs +++ b/atompack-py/src/database.rs @@ -39,6 +39,31 @@ impl PyAtomDatabase { SoaMoleculeView::from_owned_bytes(decompressed, ctx) } + fn normalize_index(&self, index: isize) -> PyResult { + let len = self.inner.len(); + let normalized = if index < 0 { + (len as isize) + .checked_add(index) + .ok_or_else(|| PyIndexError::new_err("index underflow"))? + } else { + index + }; + if normalized < 0 || normalized >= len as isize { + return Err(PyIndexError::new_err(format!( + "Index {} out of bounds for database of length {}", + index, len + ))); + } + Ok(normalized as usize) + } + + fn normalize_indices(&self, indices: Vec) -> PyResult> { + indices + .into_iter() + .map(|index| self.normalize_index(index)) + .collect() + } + fn single_molecule_view(&self, py: Python<'_>, index: usize) -> PyResult { let compression = self.inner.compression(); let ctx = self.soa_context()?; @@ -273,15 +298,9 @@ impl PyAtomDatabase { } /// Get a molecule by index as a lazy view-backed molecule. - fn get_molecule(&self, py: Python<'_>, index: usize) -> PyResult { - let len = self.inner.len(); - if index >= len { - return Err(PyIndexError::new_err(format!( - "Index {} out of bounds for database of length {}", - index, len - ))); - } - Ok(PyMolecule::from_view(self.single_molecule_view(py, index)?)) + fn get_molecule(&self, py: Python<'_>, index: isize) -> PyResult { + let normalized = self.normalize_index(index)?; + Ok(PyMolecule::from_view(self.single_molecule_view(py, normalized)?)) } /// Get multiple molecules by indices (parallel batch reading) @@ -294,10 +313,11 @@ impl PyAtomDatabase { /// /// Returns: /// - List of molecules - fn get_molecules(&self, py: Python<'_>, indices: Vec) -> PyResult> { + fn get_molecules(&self, py: Python<'_>, indices: Vec) -> PyResult> { if indices.is_empty() { return Ok(Vec::new()); } + let indices = self.normalize_indices(indices)?; let views = self.molecule_views(py, indices)?; Ok(views.into_iter().map(PyMolecule::from_view).collect()) } @@ -327,8 +347,9 @@ impl PyAtomDatabase { fn get_molecules_flat<'py>( &self, py: Python<'py>, - indices: Vec, + indices: Vec, ) -> PyResult> { + let indices = self.normalize_indices(indices)?; flat::get_molecules_flat_soa_impl(&self.inner, py, indices) } @@ -338,7 +359,7 @@ impl PyAtomDatabase { } /// Enable indexing: db[i] - fn __getitem__(&self, py: Python<'_>, index: usize) -> PyResult { + fn __getitem__(&self, py: Python<'_>, index: isize) -> PyResult { self.get_molecule(py, index) } diff --git a/atompack-py/tests/test_database.py b/atompack-py/tests/test_database.py index 4d83dfd..6dc7af2 100644 --- a/atompack-py/tests/test_database.py +++ b/atompack-py/tests/test_database.py @@ -324,6 +324,28 @@ def test_database_add_arrays_batch_promotes_to_float64_geometry_when_needed( assert flat["forces"].dtype == np.float64 +def test_database_negative_indices_work_across_read_apis(tmp_path: Path) -> None: + path = tmp_path / "negative_indices.atp" + db = atompack.Database(str(path)) + db.add_molecules([_make_molecule(-1.0), _make_molecule(-2.0), _make_molecule(-3.0)]) + db.flush() + + reopened = atompack.Database.open(str(path)) + + assert reopened[-1].energy == pytest.approx(-3.0) + assert reopened.get_molecule(-2).energy == pytest.approx(-2.0) + assert [m.energy for m in reopened.get_molecules([-1, 0, -3])] == pytest.approx( + [-3.0, -1.0, -1.0] + ) + np.testing.assert_allclose( + reopened.get_molecules_flat([-1, -2])["energy"], + np.array([-3.0, -2.0], dtype=np.float64), + ) + + with pytest.raises(IndexError, match="out of bounds"): + reopened.get_molecule(-4) + + @pytest.mark.parametrize("mmap", [False, True]) @pytest.mark.parametrize("compression", ["none", "lz4", "zstd"]) def test_database_single_item_reads_are_view_compatible( @@ -735,19 +757,15 @@ def test_database_open_mmap_populate(tmp_path: Path) -> None: assert db_r[0].energy == pytest.approx(-3.0) -def test_database_negative_indexing_raises_overflow_error(tmp_path: Path) -> None: - # Database does not support negative indexing today. PyO3 extracts the - # index argument as `usize`, so a negative integer raises OverflowError - # at the FFI boundary. If wraparound semantics are ever added, this - # test will fail loudly so the intent is explicit. +def test_database_negative_indexing_out_of_bounds_raises_index_error(tmp_path: Path) -> None: path = tmp_path / "negidx.atp" db = atompack.Database(str(path)) db.add_molecule(_make_molecule(-1.0)) db.flush() db_r = atompack.Database.open(str(path)) - with pytest.raises(OverflowError, match=r"negative"): - _ = db_r[-1] + with pytest.raises(IndexError, match=r"out of bounds"): + _ = db_r[-2] def test_database_empty_molecule_roundtrip(tmp_path: Path) -> None: From 0dad665fa8ecaf7a56977da3807ffb52b052b29e Mon Sep 17 00:00:00 2001 From: Ali Ramlaoui Date: Sun, 10 May 2026 15:26:38 +0200 Subject: [PATCH 2/2] fix: format negative indexing rust bindings --- atompack-py/src/database.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/atompack-py/src/database.rs b/atompack-py/src/database.rs index ad9dab4..6064670 100644 --- a/atompack-py/src/database.rs +++ b/atompack-py/src/database.rs @@ -300,7 +300,9 @@ impl PyAtomDatabase { /// Get a molecule by index as a lazy view-backed molecule. fn get_molecule(&self, py: Python<'_>, index: isize) -> PyResult { let normalized = self.normalize_index(index)?; - Ok(PyMolecule::from_view(self.single_molecule_view(py, normalized)?)) + Ok(PyMolecule::from_view( + self.single_molecule_view(py, normalized)?, + )) } /// Get multiple molecules by indices (parallel batch reading)