From c5bb0e13d4e522d52ea0eed65da10dafd157e5f5 Mon Sep 17 00:00:00 2001 From: Fabian Peddinghaus Date: Sat, 9 May 2026 14:31:14 +0200 Subject: [PATCH] Add 'small' minimalloc subset and switch source to enum selection Replace MinimallocSource(file_path=...) with a MinimallocSubset enum ("examples", "small", "challenging"). Adds the bundled "small" CSVs that previously had no corresponding subset. --- external/minimalloc/README.md | 4 +- external/minimalloc/small/a.6.csv | 6 + external/minimalloc/small/b.6.csv | 6 + external/minimalloc/small/c.8.csv | 6 + external/minimalloc/small/d.9.csv | 7 + external/minimalloc/small/e.12.csv | 6 + external/minimalloc/small/f.14.csv | 7 + external/minimalloc/small/g.7.csv | 7 + src/python/omnimalloc/benchmark/__init__.py | 1 + .../omnimalloc/benchmark/sources/__init__.py | 1 + .../benchmark/sources/minimalloc.py | 48 +++-- .../unit/benchmark/sources/test_minimalloc.py | 178 ++++++++---------- uv.lock | 2 +- 13 files changed, 154 insertions(+), 125 deletions(-) create mode 100644 external/minimalloc/small/a.6.csv create mode 100644 external/minimalloc/small/b.6.csv create mode 100644 external/minimalloc/small/c.8.csv create mode 100644 external/minimalloc/small/d.9.csv create mode 100644 external/minimalloc/small/e.12.csv create mode 100644 external/minimalloc/small/f.14.csv create mode 100644 external/minimalloc/small/g.7.csv diff --git a/external/minimalloc/README.md b/external/minimalloc/README.md index aa123ef..3d479d3 100644 --- a/external/minimalloc/README.md +++ b/external/minimalloc/README.md @@ -1 +1,3 @@ -https://github.com/google/minimalloc +# MiniMalloc Problems + +Contains problems from minimalloc (see ) as well as custom ones using the same format. diff --git a/external/minimalloc/small/a.6.csv b/external/minimalloc/small/a.6.csv new file mode 100644 index 0000000..635f96b --- /dev/null +++ b/external/minimalloc/small/a.6.csv @@ -0,0 +1,6 @@ +id,lower,upper,size +0,0,3,2 +1,2,5,2 +2,0,2,3 +3,3,5,3 +4,1,4,1 diff --git a/external/minimalloc/small/b.6.csv b/external/minimalloc/small/b.6.csv new file mode 100644 index 0000000..b73b535 --- /dev/null +++ b/external/minimalloc/small/b.6.csv @@ -0,0 +1,6 @@ +id,lower,upper,size +0,0,4,2 +1,2,6,2 +2,0,2,3 +3,4,6,3 +4,1,5,1 diff --git a/external/minimalloc/small/c.8.csv b/external/minimalloc/small/c.8.csv new file mode 100644 index 0000000..5f55252 --- /dev/null +++ b/external/minimalloc/small/c.8.csv @@ -0,0 +1,6 @@ +id,lower,upper,size +0,0,5,3 +1,3,8,3 +2,0,3,4 +3,5,8,4 +4,2,6,1 diff --git a/external/minimalloc/small/d.9.csv b/external/minimalloc/small/d.9.csv new file mode 100644 index 0000000..9a69403 --- /dev/null +++ b/external/minimalloc/small/d.9.csv @@ -0,0 +1,7 @@ +id,lower,upper,size +0,0,5,3 +1,3,8,3 +2,0,3,4 +3,5,8,4 +4,2,6,1 +5,1,7,1 diff --git a/external/minimalloc/small/e.12.csv b/external/minimalloc/small/e.12.csv new file mode 100644 index 0000000..bf55ff0 --- /dev/null +++ b/external/minimalloc/small/e.12.csv @@ -0,0 +1,6 @@ +id,lower,upper,size +0,10,11,5 +1,3,5,4 +2,7,12,4 +3,5,11,3 +4,1,8,4 diff --git a/external/minimalloc/small/f.14.csv b/external/minimalloc/small/f.14.csv new file mode 100644 index 0000000..42805ed --- /dev/null +++ b/external/minimalloc/small/f.14.csv @@ -0,0 +1,7 @@ +id,lower,upper,size +0,0,5,4 +1,0,6,3 +2,1,6,2 +3,5,8,5 +4,2,8,4 +5,1,2,5 diff --git a/external/minimalloc/small/g.7.csv b/external/minimalloc/small/g.7.csv new file mode 100644 index 0000000..7364789 --- /dev/null +++ b/external/minimalloc/small/g.7.csv @@ -0,0 +1,7 @@ +id,lower,upper,size +0,10,11,3 +1,1,6,2 +2,1,3,5 +3,5,12,2 +4,10,12,1 +5,7,9,5 diff --git a/src/python/omnimalloc/benchmark/__init__.py b/src/python/omnimalloc/benchmark/__init__.py index 4c25ef9..2afc7e4 100644 --- a/src/python/omnimalloc/benchmark/__init__.py +++ b/src/python/omnimalloc/benchmark/__init__.py @@ -13,6 +13,7 @@ from .sources import HighContentionSource as HighContentionSource from .sources import HuggingfaceSource as HuggingfaceSource from .sources import MinimallocSource as MinimallocSource +from .sources import MinimallocSubset as MinimallocSubset from .sources import PowerOf2Source as PowerOf2Source from .sources import RandomSource as RandomSource from .sources import SequentialSource as SequentialSource diff --git a/src/python/omnimalloc/benchmark/sources/__init__.py b/src/python/omnimalloc/benchmark/sources/__init__.py index f7ca90e..f787fa6 100644 --- a/src/python/omnimalloc/benchmark/sources/__init__.py +++ b/src/python/omnimalloc/benchmark/sources/__init__.py @@ -10,6 +10,7 @@ from .generator import UniformSource as UniformSource from .huggingface import HuggingfaceSource as HuggingfaceSource from .minimalloc import MinimallocSource as MinimallocSource +from .minimalloc import MinimallocSubset as MinimallocSubset from .utils import AVAILABLE_SOURCES as AVAILABLE_SOURCES from .utils import DEFAULT_SOURCE as DEFAULT_SOURCE from .utils import get_available_sources as get_available_sources diff --git a/src/python/omnimalloc/benchmark/sources/minimalloc.py b/src/python/omnimalloc/benchmark/sources/minimalloc.py index b605e1c..10f907d 100644 --- a/src/python/omnimalloc/benchmark/sources/minimalloc.py +++ b/src/python/omnimalloc/benchmark/sources/minimalloc.py @@ -5,6 +5,7 @@ import csv import logging from dataclasses import dataclass +from enum import Enum from pathlib import Path from omnimalloc.common.directories import EXTERNAL_DIR @@ -15,6 +16,14 @@ logger = logging.getLogger(__name__) +class MinimallocSubset(str, Enum): + """Bundled CSV subsets shipped under ``external/minimalloc/``.""" + + EXAMPLES = "examples" + SMALL = "small" + CHALLENGING = "challenging" + + @dataclass(frozen=True) class _MinimallocBuffer: id: IdType @@ -70,29 +79,20 @@ def _from_minimalloc_csv(file_path: str | Path) -> Pool: return pool -def _get_minimalloc_pools() -> list[Pool]: - csv_dir = EXTERNAL_DIR / "minimalloc" / "challenging" - csv_files = list(csv_dir.glob("*.csv")) - pools = [_from_minimalloc_csv(file) for file in csv_files] - return pools - - class MinimallocSource(BaseSource): - """Load allocations from Minimalloc CSV format. + """Load allocations from a bundled Minimalloc CSV subset. - This is a fixed source with predetermined pools from Minimalloc benchmarks. - Can be initialized with either a specific CSV file or a directory of CSVs. + This is a fixed source with predetermined pools from the Minimalloc + benchmarks. Pick a bundled ``subset`` to select which pools to load. """ - def __init__(self, file_path: str | Path | None = None) -> None: - self.file_path = Path(file_path) if file_path is not None else None + def __init__( + self, + subset: MinimallocSubset | str = MinimallocSubset.CHALLENGING, + ) -> None: + self.subset = MinimallocSubset(subset) self._cached_pools: list[Pool] | None = None - # Validate path exists if provided - if self.file_path is not None and not self.file_path.exists(): - msg = f"Path does not exist: {self.file_path}" - raise FileNotFoundError(msg) - # Load pools to get actual num_allocations pools = self._pools num_allocs = sum(len(p.allocations) for p in pools) if pools else 1 @@ -103,16 +103,10 @@ def __init__(self, file_path: str | Path | None = None) -> None: @property def _pools(self) -> list[Pool]: if self._cached_pools is None: - if self.file_path is None: - self._cached_pools = _get_minimalloc_pools() - elif self.file_path.is_file(): - self._cached_pools = [_from_minimalloc_csv(self.file_path)] - elif self.file_path.is_dir(): - csv_files = list(self.file_path.glob("*.csv")) - self._cached_pools = [_from_minimalloc_csv(f) for f in csv_files] - else: - msg = f"Path does not exist: {self.file_path}" - raise FileNotFoundError(msg) + csv_dir = EXTERNAL_DIR / "minimalloc" / self.subset.value + self._cached_pools = [ + _from_minimalloc_csv(f) for f in csv_dir.glob("*.csv") + ] return self._cached_pools def _all_allocations(self) -> tuple[Allocation, ...]: diff --git a/tests/unit/benchmark/sources/test_minimalloc.py b/tests/unit/benchmark/sources/test_minimalloc.py index 2e291b6..dc5ffc9 100644 --- a/tests/unit/benchmark/sources/test_minimalloc.py +++ b/tests/unit/benchmark/sources/test_minimalloc.py @@ -2,120 +2,106 @@ # SPDX-License-Identifier: Apache-2.0 # -import tempfile -from pathlib import Path - import pytest -from omnimalloc.benchmark.sources.minimalloc import MinimallocSource +from omnimalloc.benchmark.sources.minimalloc import MinimallocSource, MinimallocSubset from omnimalloc.primitives import BufferKind -@pytest.fixture -def sample_csv_path() -> Path: - """Create a temporary CSV file with sample minimalloc data.""" - content = """id,lower,upper,size -0,0,3,4 -1,3,9,4 -2,0,9,8 -3,9,21,4 -4,0,21,16 -""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: - f.write(content) - return Path(f.name) - - -def test_minimalloc_source_basic_creation(sample_csv_path: Path) -> None: - source = MinimallocSource(sample_csv_path) - assert source.file_path == sample_csv_path - assert source.num_allocations == 5 - - -def test_minimalloc_source_get_allocations(sample_csv_path: Path) -> None: - source = MinimallocSource(sample_csv_path) - allocations = source.get_allocations() - assert len(allocations) == 5 - assert allocations[0].id == "0" - assert allocations[0].start == 0 - assert allocations[0].end == 3 - assert allocations[0].size == 4 - assert allocations[0].kind == BufferKind.WORKSPACE - - -def test_minimalloc_source_get_allocations_with_count(sample_csv_path: Path) -> None: - source = MinimallocSource(sample_csv_path) - allocations = source.get_allocations(num_allocations=3) - assert len(allocations) == 3 - assert allocations[0].id == "0" - assert allocations[2].id == "2" - - -def test_minimalloc_source_get_allocations_with_skip(sample_csv_path: Path) -> None: - source = MinimallocSource(sample_csv_path) - allocations = source.get_allocations(skip=2) - assert len(allocations) == 3 - assert allocations[0].id == "2" - assert allocations[2].id == "4" - - -def test_minimalloc_source_get_allocations_skip_and_count( - sample_csv_path: Path, -) -> None: - source = MinimallocSource(sample_csv_path) - allocations = source.get_allocations(num_allocations=2, skip=1) - assert len(allocations) == 2 - assert allocations[0].id == "1" - assert allocations[1].id == "2" - - -def test_minimalloc_source_get_allocations_skip_past_end(sample_csv_path: Path) -> None: - source = MinimallocSource(sample_csv_path) - allocations = source.get_allocations(skip=10) - assert len(allocations) == 0 +def test_minimalloc_source_default_subset_is_challenging() -> None: + source = MinimallocSource() + assert source.subset is MinimallocSubset.CHALLENGING + assert source.num_allocations > 0 + + +def test_minimalloc_source_accepts_enum_member() -> None: + source = MinimallocSource(MinimallocSubset.SMALL) + assert source.subset is MinimallocSubset.SMALL + + +def test_minimalloc_source_accepts_string_alias() -> None: + """Raw strings are coerced to the matching enum member.""" + source = MinimallocSource("small") + assert source.subset is MinimallocSubset.SMALL + assert source.subset == "small" + + +def test_minimalloc_source_examples_subset() -> None: + source = MinimallocSource(subset="examples") + assert source.subset == "examples" + variants = source.get_available_variants() + assert len(variants) == 1 # Only one example pool + + +def test_minimalloc_source_small_subset() -> None: + source = MinimallocSource(subset="small") + assert source.subset == "small" + variants = source.get_available_variants() + assert len(variants) > 0 + assert all(v[0].islower() for v in variants) -def test_minimalloc_source_get_pools(sample_csv_path: Path) -> None: - source = MinimallocSource(sample_csv_path) - pools = source.get_pools() - assert len(pools) == 1 - assert len(pools[0].allocations) == 5 - assert pools[0].id == sample_csv_path.stem +def test_minimalloc_source_challenging_subset() -> None: + source = MinimallocSource(subset="challenging") + variants = source.get_available_variants() + assert len(variants) > 0 -def test_minimalloc_source_get_pools_with_skip(sample_csv_path: Path) -> None: - source = MinimallocSource(sample_csv_path) - pools = source.get_pools(skip=1) +def test_minimalloc_source_subsets_are_disjoint() -> None: + examples = set(MinimallocSource(subset="examples").get_available_variants()) + small = set(MinimallocSource(subset="small").get_available_variants()) + challenging = set(MinimallocSource(subset="challenging").get_available_variants()) + assert examples + assert small + assert challenging + assert examples.isdisjoint(small) + assert examples.isdisjoint(challenging) + assert small.isdisjoint(challenging) + + +def test_minimalloc_source_invalid_subset() -> None: + with pytest.raises(ValueError, match="not a valid MinimallocSubset"): + MinimallocSource(subset="bogus") # type: ignore[arg-type] + + +def test_minimalloc_source_get_allocations_skip_past_end() -> None: + source = MinimallocSource(subset="examples") + allocations = source.get_allocations(skip=10**9) + assert len(allocations) == 0 + + +def test_minimalloc_source_get_pools_with_skip_past_end() -> None: + source = MinimallocSource(subset="examples") + pools = source.get_pools(skip=10) assert len(pools) == 0 -def test_minimalloc_source_get_pools_count_zero(sample_csv_path: Path) -> None: - source = MinimallocSource(sample_csv_path) +def test_minimalloc_source_get_pools_count_zero() -> None: + source = MinimallocSource(subset="examples") pools = source.get_pools(num_pools=0) assert len(pools) == 0 -def test_minimalloc_source_get_pool(sample_csv_path: Path) -> None: - source = MinimallocSource(sample_csv_path) - pool = source.get_pool() - assert len(pool.allocations) == 5 +def test_minimalloc_source_get_allocation_workspace_kind() -> None: + """All loaded allocations are tagged as WORKSPACE buffers.""" + source = MinimallocSource(subset="examples") + allocation = source.get_allocation() + assert allocation.kind == BufferKind.WORKSPACE -def test_minimalloc_source_get_allocation(sample_csv_path: Path) -> None: - source = MinimallocSource(sample_csv_path) - allocation = source.get_allocation() - assert allocation.id == "0" - assert allocation.size == 4 +def test_minimalloc_source_get_variant_by_id() -> None: + source = MinimallocSource(subset="small") + variants = source.get_available_variants() + pool = source.get_variant(variants[0]) + assert pool.id == variants[0] -def test_minimalloc_source_file_not_found() -> None: - """Test that appropriate error is raised for missing file.""" - with pytest.raises(FileNotFoundError): - MinimallocSource("/nonexistent/path.csv") +def test_minimalloc_source_get_variant_by_index() -> None: + source = MinimallocSource(subset="small") + pool = source.get_variant(0) + assert pool.id in source.get_available_variants() -def test_minimalloc_source_str_path(sample_csv_path: Path) -> None: - """Test that string paths are accepted.""" - source = MinimallocSource(str(sample_csv_path)) - assert source.file_path == sample_csv_path - allocations = source.get_allocations() - assert len(allocations) == 5 +def test_minimalloc_source_get_variant_unknown_id() -> None: + source = MinimallocSource(subset="examples") + with pytest.raises(ValueError, match="not found"): + source.get_variant("does-not-exist") diff --git a/uv.lock b/uv.lock index 3ed3c5a..fae105d 100644 --- a/uv.lock +++ b/uv.lock @@ -1551,7 +1551,7 @@ wheels = [ [[package]] name = "omnimalloc" -version = "0.2.0" +version = "0.3.0" source = { editable = "." } dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },