Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/whichllm/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,9 @@ def _auto_min_params_for_profile(hardware: HardwareInfo, profile: str) -> float
return None
if not hardware.gpus:
return 2.0 # CPU-only: tiny is the only practical choice
usable_ram = int(hardware.ram_bytes * 0.80)
from whichllm.hardware.memory import estimate_usable_ram

usable_ram = estimate_usable_ram(hardware.ram_bytes)
best_vram_gb = max(
(usable_ram if g.shared_memory and g.vram_bytes == 0 else g.vram_bytes)
for g in hardware.gpus
Expand Down
4 changes: 2 additions & 2 deletions src/whichllm/engine/compatibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from whichllm.engine.quantization import estimate_weight_bytes
from whichllm.engine.types import CompatibilityResult
from whichllm.engine.vram import estimate_vram
from whichllm.hardware.memory import estimate_usable_ram
from whichllm.hardware.types import GPUInfo, HardwareInfo
from whichllm.models.types import GGUFVariant, ModelInfo

Expand Down Expand Up @@ -56,8 +57,7 @@ def check_compatibility(

vram_required = estimate_vram(model, variant, context_length)

# Reserve 20% of RAM for OS and other processes
usable_ram = int(hardware.ram_bytes * 0.80)
usable_ram = estimate_usable_ram(hardware.ram_bytes)

# Determine best GPU
best_gpu: GPUInfo | None = None
Expand Down
11 changes: 11 additions & 0 deletions src/whichllm/hardware/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,17 @@ def detect_ram_bytes() -> int:
return psutil.virtual_memory().total


def estimate_usable_ram(total: int) -> int:
"""Estimate RAM available for model loading after OS/background reserve.

Uses a bounded-reserve formula: total - clamp(total * 0.15, 4 GiB, 32 GiB).
"""
_GiB = 1024**3
reserve = int(total * 0.15)
reserve = max(4 * _GiB, min(reserve, 32 * _GiB))
return max(0, total - reserve)


def detect_disk_free_bytes(path: str | None = None) -> int:
"""Get free disk space in bytes at the given path.

Expand Down
5 changes: 3 additions & 2 deletions tests/test_compatibility.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Tests for compatibility checking."""

from whichllm.engine.compatibility import check_compatibility
from whichllm.hardware.memory import estimate_usable_ram
from whichllm.hardware.types import GPUInfo, HardwareInfo
from whichllm.models.types import GGUFVariant, ModelInfo

Expand Down Expand Up @@ -92,7 +93,7 @@ def test_shared_memory_amd_apu_uses_system_memory_pool():

assert result.can_run is True
assert result.fit_type == "full_gpu"
assert result.vram_available_bytes == int(hw.ram_bytes * 0.80)
assert result.vram_available_bytes == estimate_usable_ram(hw.ram_bytes)
assert not any("offload" in w.lower() for w in result.warnings)
assert not any("cpu only" in w.lower() for w in result.warnings)

Expand Down Expand Up @@ -121,7 +122,7 @@ def test_windows_shared_memory_amd_apu_does_not_emit_rocm_warning():

assert result.can_run is True
assert result.fit_type == "full_gpu"
assert result.vram_available_bytes == int(hw.ram_bytes * 0.80)
assert result.vram_available_bytes == estimate_usable_ram(hw.ram_bytes)
assert not any("rocm" in w.lower() for w in result.warnings)
assert not any("offload" in w.lower() for w in result.warnings)

Expand Down
40 changes: 40 additions & 0 deletions tests/test_memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Tests for hardware.memory — estimate_usable_ram bounded-reserve formula."""

import pytest

from whichllm.hardware.memory import estimate_usable_ram

_GiB = 1024**3


def _expected_usable(total: int) -> int:
reserve = int(total * 0.15)
reserve = max(4 * _GiB, min(reserve, 32 * _GiB))
return total - reserve


@pytest.mark.parametrize(
"total_gb",
[16, 32, 64, 128, 1024],
ids=["16GB", "32GB", "64GB", "128GB", "1TB"],
)
def test_estimate_usable_ram(total_gb):
total = total_gb * _GiB
assert estimate_usable_ram(total) == _expected_usable(total)


def test_16gb_hits_min_reserve():
total = 16 * _GiB
assert estimate_usable_ram(total) == total - 4 * _GiB


def test_1tb_hits_max_reserve():
total = 1024 * _GiB
assert estimate_usable_ram(total) == total - 32 * _GiB


def test_midrange_uses_percentage():
total = 64 * _GiB
expected_reserve = int(total * 0.15)
assert 4 * _GiB < expected_reserve < 32 * _GiB
assert estimate_usable_ram(total) == total - expected_reserve