diff --git a/src/whichllm/cli.py b/src/whichllm/cli.py index 9db55ce..ff81f01 100644 --- a/src/whichllm/cli.py +++ b/src/whichllm/cli.py @@ -127,7 +127,9 @@ def _auto_min_params_for_profile(hardware: HardwareInfo, profile: str) -> float return None if not hardware.gpus: return 2.0 # CPU-only: tiny is the only practical choice - usable_ram = int(hardware.ram_bytes * 0.80) + from whichllm.hardware.memory import estimate_usable_ram + + usable_ram = estimate_usable_ram(hardware.ram_bytes) best_vram_gb = max( (usable_ram if g.shared_memory and g.vram_bytes == 0 else g.vram_bytes) for g in hardware.gpus diff --git a/src/whichllm/engine/compatibility.py b/src/whichllm/engine/compatibility.py index e0676b5..632965f 100644 --- a/src/whichllm/engine/compatibility.py +++ b/src/whichllm/engine/compatibility.py @@ -8,6 +8,7 @@ from whichllm.engine.quantization import estimate_weight_bytes from whichllm.engine.types import CompatibilityResult from whichllm.engine.vram import estimate_vram +from whichllm.hardware.memory import estimate_usable_ram from whichllm.hardware.types import GPUInfo, HardwareInfo from whichllm.models.types import GGUFVariant, ModelInfo @@ -56,8 +57,7 @@ def check_compatibility( vram_required = estimate_vram(model, variant, context_length) - # Reserve 20% of RAM for OS and other processes - usable_ram = int(hardware.ram_bytes * 0.80) + usable_ram = estimate_usable_ram(hardware.ram_bytes) # Determine best GPU best_gpu: GPUInfo | None = None diff --git a/src/whichllm/hardware/memory.py b/src/whichllm/hardware/memory.py index dd4ffde..082277c 100644 --- a/src/whichllm/hardware/memory.py +++ b/src/whichllm/hardware/memory.py @@ -13,6 +13,17 @@ def detect_ram_bytes() -> int: return psutil.virtual_memory().total +def estimate_usable_ram(total: int) -> int: + """Estimate RAM available for model loading after OS/background reserve. + + Uses a bounded-reserve formula: total - clamp(total * 0.15, 4 GiB, 32 GiB). + """ + _GiB = 1024**3 + reserve = int(total * 0.15) + reserve = max(4 * _GiB, min(reserve, 32 * _GiB)) + return max(0, total - reserve) + + def detect_disk_free_bytes(path: str | None = None) -> int: """Get free disk space in bytes at the given path. diff --git a/tests/test_compatibility.py b/tests/test_compatibility.py index 0610775..f170f21 100644 --- a/tests/test_compatibility.py +++ b/tests/test_compatibility.py @@ -1,6 +1,7 @@ """Tests for compatibility checking.""" from whichllm.engine.compatibility import check_compatibility +from whichllm.hardware.memory import estimate_usable_ram from whichllm.hardware.types import GPUInfo, HardwareInfo from whichllm.models.types import GGUFVariant, ModelInfo @@ -92,7 +93,7 @@ def test_shared_memory_amd_apu_uses_system_memory_pool(): assert result.can_run is True assert result.fit_type == "full_gpu" - assert result.vram_available_bytes == int(hw.ram_bytes * 0.80) + assert result.vram_available_bytes == estimate_usable_ram(hw.ram_bytes) assert not any("offload" in w.lower() for w in result.warnings) assert not any("cpu only" in w.lower() for w in result.warnings) @@ -121,7 +122,7 @@ def test_windows_shared_memory_amd_apu_does_not_emit_rocm_warning(): assert result.can_run is True assert result.fit_type == "full_gpu" - assert result.vram_available_bytes == int(hw.ram_bytes * 0.80) + assert result.vram_available_bytes == estimate_usable_ram(hw.ram_bytes) assert not any("rocm" in w.lower() for w in result.warnings) assert not any("offload" in w.lower() for w in result.warnings) diff --git a/tests/test_memory.py b/tests/test_memory.py new file mode 100644 index 0000000..879acf9 --- /dev/null +++ b/tests/test_memory.py @@ -0,0 +1,40 @@ +"""Tests for hardware.memory — estimate_usable_ram bounded-reserve formula.""" + +import pytest + +from whichllm.hardware.memory import estimate_usable_ram + +_GiB = 1024**3 + + +def _expected_usable(total: int) -> int: + reserve = int(total * 0.15) + reserve = max(4 * _GiB, min(reserve, 32 * _GiB)) + return total - reserve + + +@pytest.mark.parametrize( + "total_gb", + [16, 32, 64, 128, 1024], + ids=["16GB", "32GB", "64GB", "128GB", "1TB"], +) +def test_estimate_usable_ram(total_gb): + total = total_gb * _GiB + assert estimate_usable_ram(total) == _expected_usable(total) + + +def test_16gb_hits_min_reserve(): + total = 16 * _GiB + assert estimate_usable_ram(total) == total - 4 * _GiB + + +def test_1tb_hits_max_reserve(): + total = 1024 * _GiB + assert estimate_usable_ram(total) == total - 32 * _GiB + + +def test_midrange_uses_percentage(): + total = 64 * _GiB + expected_reserve = int(total * 0.15) + assert 4 * _GiB < expected_reserve < 32 * _GiB + assert estimate_usable_ram(total) == total - expected_reserve