diff --git a/Cargo.lock b/Cargo.lock index 795eccc0a..dfcac91a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -183,7 +183,7 @@ dependencies = [ "objc2-foundation", "parking_lot", "percent-encoding", - "windows-sys 0.60.2", + "windows-sys 0.59.0", "x11rb", ] @@ -760,7 +760,7 @@ version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" dependencies = [ - "darling 0.20.11", + "darling 0.23.0", "ident_case", "prettyplease", "proc-macro2", @@ -1041,7 +1041,7 @@ version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -1754,7 +1754,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -1938,7 +1938,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2555,7 +2555,7 @@ dependencies = [ [[package]] name = "hf-hub" version = "1.0.0" -source = "git+https://github.com/Mesh-LLM/hf-hub?branch=mesh-llm#e8fb7ac4e4ed982650f65738c54f661ed83aeddf" +source = "git+https://github.com/Mesh-LLM/hf-hub?branch=mesh-llm#fd3bfcabba1b9b827e685649cbcc8bf45ec6b310" dependencies = [ "base64", "bon", @@ -4763,7 +4763,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -5925,7 +5925,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.60.2", + "windows-sys 0.59.0", ] [[package]] @@ -6497,7 +6497,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.12.1", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -6590,7 +6590,7 @@ dependencies = [ "security-framework 3.7.0", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -6611,7 +6611,7 @@ dependencies = [ "security-framework 3.7.0", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -6865,7 +6865,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b55fb86dfd3a2f5f76ea78310a88f96c4ea21a3031f8d212443d56123fd0521" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -7634,7 +7634,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix 1.1.4", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -8980,7 +8980,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] diff --git a/crates/mesh-llm-host-runtime/src/models/remote_catalog.rs b/crates/mesh-llm-host-runtime/src/models/remote_catalog.rs index 0d3a7a4b5..7a3a6506f 100644 --- a/crates/mesh-llm-host-runtime/src/models/remote_catalog.rs +++ b/crates/mesh-llm-host-runtime/src/models/remote_catalog.rs @@ -9,7 +9,7 @@ use std::{ fs, path::{Component, Path, PathBuf}, sync::{Mutex, RwLock}, - time::{Duration, SystemTime}, + time::{Duration, Instant, SystemTime}, }; #[cfg(test)] @@ -44,6 +44,17 @@ pub use model_resolver::{ static CATALOG_ENTRIES: RwLock>> = RwLock::new(None); static CATALOG_ENSURE_LOCK: Mutex<()> = Mutex::new(()); +/// Tracks the most recent failed catalog refresh so we don't re-attempt a slow +/// network refresh on every request when the cache is already loaded but the +/// staleness marker can't be refreshed (e.g. a download error). Without this, +/// a persistently failing refresh turns every `/api/models` call into a fresh +/// multi-second download attempt. +static CATALOG_REFRESH_BACKOFF_UNTIL: Mutex> = Mutex::new(None); + +/// How long to suppress repeated refresh attempts after a failure when a stale +/// cached catalog is already available. +const CATALOG_REFRESH_BACKOFF: Duration = Duration::from_secs(5 * 60); + #[cfg(test)] static CATALOG_ENTRIES_OVERRIDE_ACTIVE: AtomicBool = AtomicBool::new(false); @@ -269,12 +280,25 @@ pub fn ensure_catalog() -> Result<()> { } if is_catalog_stale() { + // If a recent refresh failed and we already have a (stale) catalog + // loaded, don't hammer the network on every request — serve the loaded + // catalog until the backoff window elapses. + if catalog_entries().is_some() && refresh_in_backoff() { + return Ok(()); + } + match refresh_catalog() { - Ok(()) => Ok(()), + Ok(()) => { + clear_refresh_backoff(); + Ok(()) + } Err(refresh_err) => { if catalog_entries().is_some() { + set_refresh_backoff(); tracing::warn!( - "failed to refresh stale meshllm/catalog; using already-loaded stale catalog: {refresh_err:#}" + "failed to refresh stale meshllm/catalog; using already-loaded stale catalog \ + (suppressing retries for {}s): {refresh_err:#}", + CATALOG_REFRESH_BACKOFF.as_secs() ); return Ok(()); } @@ -291,6 +315,31 @@ pub fn ensure_catalog() -> Result<()> { } } +/// Returns true if a recent refresh failure means we should skip another +/// refresh attempt for now. +fn refresh_in_backoff() -> bool { + let guard = CATALOG_REFRESH_BACKOFF_UNTIL.lock(); + match guard { + Ok(until) => until.map(|t| Instant::now() < t).unwrap_or(false), + Err(_) => false, + } +} + +/// Records that a refresh just failed, suppressing retries for the backoff +/// window. +fn set_refresh_backoff() { + if let Ok(mut guard) = CATALOG_REFRESH_BACKOFF_UNTIL.lock() { + *guard = Some(Instant::now() + CATALOG_REFRESH_BACKOFF); + } +} + +/// Clears any active refresh backoff after a successful refresh. +fn clear_refresh_backoff() { + if let Ok(mut guard) = CATALOG_REFRESH_BACKOFF_UNTIL.lock() { + *guard = None; + } +} + /// Searches the cached catalog for a layer-package matching `model_query`. /// /// The query is matched (case-insensitive contains) against: @@ -794,6 +843,33 @@ mod tests { use serial_test::serial; + #[test] + #[serial] + fn refresh_backoff_suppresses_then_clears() { + clear_refresh_backoff(); + assert!(!refresh_in_backoff(), "no backoff initially"); + + set_refresh_backoff(); + assert!(refresh_in_backoff(), "backoff active after a failure"); + + clear_refresh_backoff(); + assert!(!refresh_in_backoff(), "backoff cleared after success"); + } + + /// Hits the network: verifies that the live meshllm/catalog dataset + /// downloads successfully with the patched hf-hub (redirect Content-Length + /// no longer mistaken for the file size). Run with: + /// cargo test -p mesh-llm-host-runtime refresh_catalog_live -- --ignored --nocapture + #[test] + #[ignore = "network: downloads the live meshllm/catalog dataset"] + #[serial] + fn refresh_catalog_live() { + refresh_catalog().expect("live catalog refresh should succeed"); + let entries = catalog_entries().expect("catalog entries loaded"); + assert!(!entries.is_empty(), "expected at least one catalog entry"); + println!("refresh_catalog_live: {} entries", entries.len()); + } + #[test] fn deserializes_catalog_entry() { let json = r#"{ diff --git a/crates/mesh-llm-host-runtime/src/models/resolve/tests.rs b/crates/mesh-llm-host-runtime/src/models/resolve/tests.rs index f7dd74cf9..4077fd82b 100644 --- a/crates/mesh-llm-host-runtime/src/models/resolve/tests.rs +++ b/crates/mesh-llm-host-runtime/src/models/resolve/tests.rs @@ -17,6 +17,16 @@ fn load_gemma_live_fixture() -> HfRepoFixture { .expect("parse live Hugging Face fixture") } +/// Isolates a parser test from the live remote catalog by installing an empty +/// catalog override. `parse_exact_model_ref` consults the catalog before the +/// Hugging Face parser branches, so without this a live catalog entry (e.g. a +/// real `unsloth/gemma-4-31B-it-GGUF` package) would be returned as +/// `ExactModelRef::Catalog` instead of the `HuggingFace` ref these tests +/// assert. Tests using this must be `#[serial]` because the override is global. +fn empty_catalog_guard() -> crate::models::remote_catalog::CatalogEntriesOverrideGuard { + crate::models::remote_catalog::set_catalog_entries_for_test(Vec::new()) +} + fn remote_catalog_entry( variant_name: &str, curated_name: &str, @@ -512,7 +522,9 @@ fn repo_name_can_signal_gguf_intent() { } #[test] +#[serial] fn parse_exact_model_ref_accepts_unsloth_gemma_repo_ref() { + let _catalog_guard = empty_catalog_guard(); let parsed = parse_exact_model_ref("unsloth/gemma-4-31B-it-GGUF").unwrap(); match parsed { ExactModelRef::HuggingFace { @@ -529,7 +541,9 @@ fn parse_exact_model_ref_accepts_unsloth_gemma_repo_ref() { } #[test] +#[serial] fn parse_exact_model_ref_accepts_unsloth_gemma_repo_url() { + let _catalog_guard = empty_catalog_guard(); let parsed = parse_exact_model_ref("https://huggingface.co/unsloth/gemma-4-31B-it-GGUF").unwrap(); match parsed { @@ -547,7 +561,9 @@ fn parse_exact_model_ref_accepts_unsloth_gemma_repo_url() { } #[test] +#[serial] fn parse_exact_model_ref_accepts_unsloth_gemma_quant_selector() { + let _catalog_guard = empty_catalog_guard(); let parsed = parse_exact_model_ref("unsloth/gemma-4-31B-it-GGUF:UD-Q4_K_XL").unwrap(); match parsed { ExactModelRef::HuggingFace { @@ -564,7 +580,9 @@ fn parse_exact_model_ref_accepts_unsloth_gemma_quant_selector() { } #[test] +#[serial] fn parse_exact_model_ref_accepts_revisioned_quant_selector() { + let _catalog_guard = empty_catalog_guard(); let parsed = parse_exact_model_ref("unsloth/gemma-4-31B-it-GGUF@main:UD-Q4_K_XL").unwrap(); match parsed { ExactModelRef::HuggingFace { @@ -605,7 +623,9 @@ fn simulated_name_and_repo_quant_inputs_converge_to_same_ref() { } #[test] +#[serial] fn parse_exact_model_ref_accepts_unsloth_gemma_repo_url_with_quant_selector() { + let _catalog_guard = empty_catalog_guard(); let parsed = parse_exact_model_ref("https://huggingface.co/unsloth/gemma-4-31B-it-GGUF:UD-Q4_K_XL") .unwrap(); @@ -835,7 +855,9 @@ fn format_huggingface_display_ref_prefers_repo_form_for_mlx() { } #[test] +#[serial] fn parse_exact_model_ref_accepts_legacy_mlx_model_path_shape() { + let _catalog_guard = empty_catalog_guard(); let parsed = parse_exact_model_ref("mlx-community/SmolLM-135M-8bit/model").unwrap(); match parsed { ExactModelRef::HuggingFace {