Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion external/minimalloc/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
https://github.com/google/minimalloc
# MiniMalloc Problems

Contains problems from minimalloc (see <https://github.com/google/minimalloc>) as well as custom ones using the same format.
6 changes: 6 additions & 0 deletions external/minimalloc/small/a.6.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
id,lower,upper,size
0,0,3,2
1,2,5,2
2,0,2,3
3,3,5,3
4,1,4,1
6 changes: 6 additions & 0 deletions external/minimalloc/small/b.6.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
id,lower,upper,size
0,0,4,2
1,2,6,2
2,0,2,3
3,4,6,3
4,1,5,1
6 changes: 6 additions & 0 deletions external/minimalloc/small/c.8.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
id,lower,upper,size
0,0,5,3
1,3,8,3
2,0,3,4
3,5,8,4
4,2,6,1
7 changes: 7 additions & 0 deletions external/minimalloc/small/d.9.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
id,lower,upper,size
0,0,5,3
1,3,8,3
2,0,3,4
3,5,8,4
4,2,6,1
5,1,7,1
6 changes: 6 additions & 0 deletions external/minimalloc/small/e.12.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
id,lower,upper,size
0,10,11,5
1,3,5,4
2,7,12,4
3,5,11,3
4,1,8,4
7 changes: 7 additions & 0 deletions external/minimalloc/small/f.14.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
id,lower,upper,size
0,0,5,4
1,0,6,3
2,1,6,2
3,5,8,5
4,2,8,4
5,1,2,5
7 changes: 7 additions & 0 deletions external/minimalloc/small/g.7.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
id,lower,upper,size
0,10,11,3
1,1,6,2
2,1,3,5
3,5,12,2
4,10,12,1
5,7,9,5
1 change: 1 addition & 0 deletions src/python/omnimalloc/benchmark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .sources import HighContentionSource as HighContentionSource
from .sources import HuggingfaceSource as HuggingfaceSource
from .sources import MinimallocSource as MinimallocSource
from .sources import MinimallocSubset as MinimallocSubset
from .sources import PowerOf2Source as PowerOf2Source
from .sources import RandomSource as RandomSource
from .sources import SequentialSource as SequentialSource
Expand Down
1 change: 1 addition & 0 deletions src/python/omnimalloc/benchmark/sources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .generator import UniformSource as UniformSource
from .huggingface import HuggingfaceSource as HuggingfaceSource
from .minimalloc import MinimallocSource as MinimallocSource
from .minimalloc import MinimallocSubset as MinimallocSubset
from .utils import AVAILABLE_SOURCES as AVAILABLE_SOURCES
from .utils import DEFAULT_SOURCE as DEFAULT_SOURCE
from .utils import get_available_sources as get_available_sources
Expand Down
48 changes: 21 additions & 27 deletions src/python/omnimalloc/benchmark/sources/minimalloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import csv
import logging
from dataclasses import dataclass
from enum import Enum
from pathlib import Path

from omnimalloc.common.directories import EXTERNAL_DIR
Expand All @@ -15,6 +16,14 @@
logger = logging.getLogger(__name__)


class MinimallocSubset(str, Enum):
"""Bundled CSV subsets shipped under ``external/minimalloc/<value>``."""

EXAMPLES = "examples"
SMALL = "small"
CHALLENGING = "challenging"


@dataclass(frozen=True)
class _MinimallocBuffer:
id: IdType
Expand Down Expand Up @@ -70,29 +79,20 @@ def _from_minimalloc_csv(file_path: str | Path) -> Pool:
return pool


def _get_minimalloc_pools() -> list[Pool]:
csv_dir = EXTERNAL_DIR / "minimalloc" / "challenging"
csv_files = list(csv_dir.glob("*.csv"))
pools = [_from_minimalloc_csv(file) for file in csv_files]
return pools


class MinimallocSource(BaseSource):
"""Load allocations from Minimalloc CSV format.
"""Load allocations from a bundled Minimalloc CSV subset.

This is a fixed source with predetermined pools from Minimalloc benchmarks.
Can be initialized with either a specific CSV file or a directory of CSVs.
This is a fixed source with predetermined pools from the Minimalloc
benchmarks. Pick a bundled ``subset`` to select which pools to load.
"""

def __init__(self, file_path: str | Path | None = None) -> None:
self.file_path = Path(file_path) if file_path is not None else None
def __init__(
self,
subset: MinimallocSubset | str = MinimallocSubset.CHALLENGING,
) -> None:
self.subset = MinimallocSubset(subset)
self._cached_pools: list[Pool] | None = None

# Validate path exists if provided
if self.file_path is not None and not self.file_path.exists():
msg = f"Path does not exist: {self.file_path}"
raise FileNotFoundError(msg)

# Load pools to get actual num_allocations
pools = self._pools
num_allocs = sum(len(p.allocations) for p in pools) if pools else 1
Expand All @@ -103,16 +103,10 @@ def __init__(self, file_path: str | Path | None = None) -> None:
@property
def _pools(self) -> list[Pool]:
if self._cached_pools is None:
if self.file_path is None:
self._cached_pools = _get_minimalloc_pools()
elif self.file_path.is_file():
self._cached_pools = [_from_minimalloc_csv(self.file_path)]
elif self.file_path.is_dir():
csv_files = list(self.file_path.glob("*.csv"))
self._cached_pools = [_from_minimalloc_csv(f) for f in csv_files]
else:
msg = f"Path does not exist: {self.file_path}"
raise FileNotFoundError(msg)
csv_dir = EXTERNAL_DIR / "minimalloc" / self.subset.value
self._cached_pools = [
_from_minimalloc_csv(f) for f in csv_dir.glob("*.csv")
]
return self._cached_pools

def _all_allocations(self) -> tuple[Allocation, ...]:
Expand Down
178 changes: 82 additions & 96 deletions tests/unit/benchmark/sources/test_minimalloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,120 +2,106 @@
# SPDX-License-Identifier: Apache-2.0
#

import tempfile
from pathlib import Path

import pytest
from omnimalloc.benchmark.sources.minimalloc import MinimallocSource
from omnimalloc.benchmark.sources.minimalloc import MinimallocSource, MinimallocSubset
from omnimalloc.primitives import BufferKind


@pytest.fixture
def sample_csv_path() -> Path:
"""Create a temporary CSV file with sample minimalloc data."""
content = """id,lower,upper,size
0,0,3,4
1,3,9,4
2,0,9,8
3,9,21,4
4,0,21,16
"""
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write(content)
return Path(f.name)


def test_minimalloc_source_basic_creation(sample_csv_path: Path) -> None:
source = MinimallocSource(sample_csv_path)
assert source.file_path == sample_csv_path
assert source.num_allocations == 5


def test_minimalloc_source_get_allocations(sample_csv_path: Path) -> None:
source = MinimallocSource(sample_csv_path)
allocations = source.get_allocations()
assert len(allocations) == 5
assert allocations[0].id == "0"
assert allocations[0].start == 0
assert allocations[0].end == 3
assert allocations[0].size == 4
assert allocations[0].kind == BufferKind.WORKSPACE


def test_minimalloc_source_get_allocations_with_count(sample_csv_path: Path) -> None:
source = MinimallocSource(sample_csv_path)
allocations = source.get_allocations(num_allocations=3)
assert len(allocations) == 3
assert allocations[0].id == "0"
assert allocations[2].id == "2"


def test_minimalloc_source_get_allocations_with_skip(sample_csv_path: Path) -> None:
source = MinimallocSource(sample_csv_path)
allocations = source.get_allocations(skip=2)
assert len(allocations) == 3
assert allocations[0].id == "2"
assert allocations[2].id == "4"


def test_minimalloc_source_get_allocations_skip_and_count(
sample_csv_path: Path,
) -> None:
source = MinimallocSource(sample_csv_path)
allocations = source.get_allocations(num_allocations=2, skip=1)
assert len(allocations) == 2
assert allocations[0].id == "1"
assert allocations[1].id == "2"


def test_minimalloc_source_get_allocations_skip_past_end(sample_csv_path: Path) -> None:
source = MinimallocSource(sample_csv_path)
allocations = source.get_allocations(skip=10)
assert len(allocations) == 0
def test_minimalloc_source_default_subset_is_challenging() -> None:
source = MinimallocSource()
assert source.subset is MinimallocSubset.CHALLENGING
assert source.num_allocations > 0


def test_minimalloc_source_accepts_enum_member() -> None:
source = MinimallocSource(MinimallocSubset.SMALL)
assert source.subset is MinimallocSubset.SMALL


def test_minimalloc_source_accepts_string_alias() -> None:
"""Raw strings are coerced to the matching enum member."""
source = MinimallocSource("small")
assert source.subset is MinimallocSubset.SMALL
assert source.subset == "small"


def test_minimalloc_source_examples_subset() -> None:
source = MinimallocSource(subset="examples")
assert source.subset == "examples"
variants = source.get_available_variants()
assert len(variants) == 1 # Only one example pool


def test_minimalloc_source_small_subset() -> None:
source = MinimallocSource(subset="small")
assert source.subset == "small"
variants = source.get_available_variants()
assert len(variants) > 0
assert all(v[0].islower() for v in variants)


def test_minimalloc_source_get_pools(sample_csv_path: Path) -> None:
source = MinimallocSource(sample_csv_path)
pools = source.get_pools()
assert len(pools) == 1
assert len(pools[0].allocations) == 5
assert pools[0].id == sample_csv_path.stem
def test_minimalloc_source_challenging_subset() -> None:
source = MinimallocSource(subset="challenging")
variants = source.get_available_variants()
assert len(variants) > 0


def test_minimalloc_source_get_pools_with_skip(sample_csv_path: Path) -> None:
source = MinimallocSource(sample_csv_path)
pools = source.get_pools(skip=1)
def test_minimalloc_source_subsets_are_disjoint() -> None:
examples = set(MinimallocSource(subset="examples").get_available_variants())
small = set(MinimallocSource(subset="small").get_available_variants())
challenging = set(MinimallocSource(subset="challenging").get_available_variants())
assert examples
assert small
assert challenging
assert examples.isdisjoint(small)
assert examples.isdisjoint(challenging)
assert small.isdisjoint(challenging)


def test_minimalloc_source_invalid_subset() -> None:
with pytest.raises(ValueError, match="not a valid MinimallocSubset"):
MinimallocSource(subset="bogus") # type: ignore[arg-type]


def test_minimalloc_source_get_allocations_skip_past_end() -> None:
source = MinimallocSource(subset="examples")
allocations = source.get_allocations(skip=10**9)
assert len(allocations) == 0


def test_minimalloc_source_get_pools_with_skip_past_end() -> None:
source = MinimallocSource(subset="examples")
pools = source.get_pools(skip=10)
assert len(pools) == 0


def test_minimalloc_source_get_pools_count_zero(sample_csv_path: Path) -> None:
source = MinimallocSource(sample_csv_path)
def test_minimalloc_source_get_pools_count_zero() -> None:
source = MinimallocSource(subset="examples")
pools = source.get_pools(num_pools=0)
assert len(pools) == 0


def test_minimalloc_source_get_pool(sample_csv_path: Path) -> None:
source = MinimallocSource(sample_csv_path)
pool = source.get_pool()
assert len(pool.allocations) == 5
def test_minimalloc_source_get_allocation_workspace_kind() -> None:
"""All loaded allocations are tagged as WORKSPACE buffers."""
source = MinimallocSource(subset="examples")
allocation = source.get_allocation()
assert allocation.kind == BufferKind.WORKSPACE


def test_minimalloc_source_get_allocation(sample_csv_path: Path) -> None:
source = MinimallocSource(sample_csv_path)
allocation = source.get_allocation()
assert allocation.id == "0"
assert allocation.size == 4
def test_minimalloc_source_get_variant_by_id() -> None:
source = MinimallocSource(subset="small")
variants = source.get_available_variants()
pool = source.get_variant(variants[0])
assert pool.id == variants[0]


def test_minimalloc_source_file_not_found() -> None:
"""Test that appropriate error is raised for missing file."""
with pytest.raises(FileNotFoundError):
MinimallocSource("/nonexistent/path.csv")
def test_minimalloc_source_get_variant_by_index() -> None:
source = MinimallocSource(subset="small")
pool = source.get_variant(0)
assert pool.id in source.get_available_variants()


def test_minimalloc_source_str_path(sample_csv_path: Path) -> None:
"""Test that string paths are accepted."""
source = MinimallocSource(str(sample_csv_path))
assert source.file_path == sample_csv_path
allocations = source.get_allocations()
assert len(allocations) == 5
def test_minimalloc_source_get_variant_unknown_id() -> None:
source = MinimallocSource(subset="examples")
with pytest.raises(ValueError, match="not found"):
source.get_variant("does-not-exist")
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading