diff --git a/README.md b/README.md
index 5ba9767..9bd7d57 100644
--- a/README.md
+++ b/README.md
@@ -131,16 +131,16 @@ Auto-generated: **`overrides/compute.yml`** (from hardware detection). Do not co
 
 ### Dashboard
 
-Reach the dashboard at `https://${CADDY_TAILNET_HOSTNAME}/dash/` (Google SSO front door; allowlist via `auth/oauth2-proxy/emails.txt`). It lists models (Ollama and ComfyUI), links to other services, dependency health, and searchable model pulls. **`OPS_CONTROLLER_TOKEN`** lets it restart services and run **`POST /api/comfyui/install-node-requirements`**. **`DASHBOARD_AUTH_TOKEN`** is an optional bearer layer for non-browser API access; the browser path is gated by SSO at the proxy level.
+Reach the dashboard at `https://${CADDY_TAILNET_HOSTNAME}/dash/` (Google SSO front door; allowlist via `auth/oauth2-proxy/emails.txt`). It lists models (GGUF/llama.cpp and ComfyUI), links to other services, dependency health, and Hugging Face model pulls. **`OPS_CONTROLLER_TOKEN`** lets it restart services and run **`POST /api/comfyui/install-node-requirements`**. **`DASHBOARD_AUTH_TOKEN`** is an optional bearer layer for non-browser API access; the browser path is gated by SSO at the proxy level.
 
 After code changes affecting the dashboard image: `.\compose.ps1 build dashboard` then `.\compose.ps1 up -d` (or `./compose` equivalents).
 
-### Ollama models
+### LLM models (GGUF / llama.cpp)
 
-Pull lists and defaults come from **`.env`** (`MODELS`, `DEFAULT_MODEL`). Pull via the dashboard or:
+The stack pulls GGUF files (served by llama.cpp) directly from Hugging Face. Repo lists and defaults come from **`.env`** (`GGUF_MODELS`, `DEFAULT_MODEL`). Pull via the dashboard's **Models** panel (enter a Hugging Face repo id, a `huggingface.co/…`/`.gguf` URL, or `.env` to pull all `GGUF_MODELS`), or from the CLI:
 
 ```bash
-./compose run --rm model-puller
+./compose run --rm gguf-puller
 ```
 
 ### ComfyUI (LTX-2)
diff --git a/dashboard/app.py b/dashboard/app.py
index de63ae1..55970b0 100644
--- a/dashboard/app.py
+++ b/dashboard/app.py
@@ -210,24 +210,9 @@ def _model_gateway_headers() -> dict[str, str]:
         headers["Authorization"] = f"Bearer {MODEL_GATEWAY_API_KEY}"
     return headers
 
-# Ollama library: fetched from community JSON (all pullable model:tag names)
-OLLAMA_LIBRARY_URL = os.environ.get(
-    "OLLAMA_LIBRARY_URL",
-    "https://yuma-shintani.github.io/ollama-model-library/model.json",
-)
-OLLAMA_LIBRARY_CACHE_TTL = float(os.environ.get("OLLAMA_LIBRARY_CACHE_TTL_SEC", "86400"))  # 24h
-_ollama_library_cache: list[str] = []
-_ollama_library_ts: float = 0.0
-
-# Fallback when fetch fails (minimal curated list)
-OLLAMA_LIBRARY_FALLBACK = [
-    "llama3.2", "llama3.1", "deepseek-r1:7b", "qwen2.5:7b", "qwen3:14b", "qwen3:14b-q4_K_M",
-    "mistral", "nomic-embed-text", "phi4", "gemma3",
-]
-
 # Background pull status dicts
 _comfyui_status: dict = {"running": False, "output": "", "done": False, "success": None}
-_ollama_pull_status: dict = {"running": False, "model": "", "output": "", "pct": 0, "done": False, "success": None}
+_gguf_pull_status: dict = {"running": False, "model": "", "output": "", "pct": 0, "done": False, "success": None}
 
 
 
@@ -235,68 +220,7 @@ class PullRequest(BaseModel):
     model: str
 
 
-# --- Ollama ---
-
-
-def _fetch_ollama_library() -> list[str]:
-    """Fetch pullable model names from Ollama registry. Uses community JSON; caches 24h."""
-    global _ollama_library_cache, _ollama_library_ts
-    now = time.monotonic()
-    with _state_lock:
-        if _ollama_library_cache and (now - _ollama_library_ts) < OLLAMA_LIBRARY_CACHE_TTL:
-            return list(_ollama_library_cache)
-
-    urls = [OLLAMA_LIBRARY_URL]
-    for url in urls:
-        try:
-            req = urllib.request.Request(url, headers={"Accept": "application/json"})
-            with urllib.request.urlopen(req, timeout=15) as resp:
-                data = json.loads(resp.read().decode())
-        except Exception as e:
-            logger.warning("Ollama library fetch failed from %s: %s", url, e)
-            continue
-
-        names: set[str] = set()
-        if isinstance(data, list):
-            # yuma-shintani format: [{"name":"llama3.1","tags":[{"name":"llama3.1:8b"},...]}, ...]
-            for item in data:
-                if isinstance(item, dict):
-                    base = (item.get("name") or "").strip()
-                    tags = item.get("tags") or []
-                    for t in tags:
-                        if isinstance(t, dict) and t.get("name"):
-                            names.add(str(t["name"]).strip())
-                    if base:
-                        names.add(base)  # e.g. llama3.1 -> llama3.1:latest
-        elif isinstance(data, dict):
-            # Official format: {"library": {"llama3.1": {"tags": ["8b","70b"]}, ...}}
-            lib = data.get("library") or data
-            if isinstance(lib, dict):
-                for base, meta in lib.items():
-                    if isinstance(meta, dict):
-                        for tag in meta.get("tags") or []:
-                            names.add(f"{base}:{tag}" if tag else base)
-                    else:
-                        names.add(base)
-
-        if names:
-            result = sorted(names)
-            with _state_lock:
-                _ollama_library_cache = result
-                _ollama_library_ts = now
-            return result
-
-    with _state_lock:
-        _ollama_library_cache = OLLAMA_LIBRARY_FALLBACK
-        _ollama_library_ts = now
-    return list(OLLAMA_LIBRARY_FALLBACK)
-
-
-@app.get("/api/ollama/library")
-async def ollama_library():
-    """List models available in the Ollama registry (fetched programmatically, cached 24h)."""
-    models = await asyncio.to_thread(_fetch_ollama_library)
-    return {"models": models, "ok": True}
+# --- LLM (llama.cpp / GGUF) ---
 
 
 _GGUF_MODELS_DIR = Path(os.environ.get("GGUF_MODELS_DIR", "/gguf-models"))
@@ -315,8 +239,8 @@ def _scan_gguf_models() -> list[dict]:
     return models
 
 
-@app.get("/api/ollama/models")
-async def ollama_models():
+@app.get("/api/llm/models")
+async def llm_models():
     """List GGUF models available on disk (primary) merged with gateway active-model info."""
     disk_models = await asyncio.to_thread(_scan_gguf_models)
     if disk_models:
@@ -332,8 +256,8 @@ async def ollama_models():
         return {"models": [], "ok": False, "error": str(e)}
 
 
-@app.post("/api/ollama/delete")
-async def ollama_delete(req: PullRequest):
+@app.post("/api/llm/delete")
+async def llm_delete(req: PullRequest):
     """Delete a GGUF model file from disk."""
     name = (req.model or "").strip()
     if not name or ".." in name or "/" in name:
@@ -355,8 +279,8 @@ async def ollama_delete(req: PullRequest):
     return {"ok": True, "message": f"Deleted '{name}' from disk."}
 
 
-@app.post("/api/ollama/unload")
-async def ollama_unload(req: PullRequest):
+@app.post("/api/llm/unload")
+async def llm_unload(req: PullRequest):
     """Unload the currently active model from the gateway without deleting GGUF files."""
     name = (req.model or "").strip()
     if not name or ".." in name:
@@ -376,7 +300,7 @@ async def ollama_unload(req: PullRequest):
     except HTTPException:
         raise
     except Exception as e:
-        raise HTTPException(status_code=502, detail=f"Ollama request failed: {e}") from e
+        raise HTTPException(status_code=502, detail=f"Model gateway request failed: {e}") from e
 
 
 @app.post("/api/llamacpp/switch")
@@ -452,38 +376,38 @@ async def _do_set_active_model(req: PullRequest, request: Request):
     return {"ok": all_ok, "model": model, "errors": errors, **results}
 
 
-def _run_ollama_pull(model: str):
+def _run_gguf_pull(model: str):
     """Download GGUFs via ops-controller gguf-puller (docker compose --profile models)."""
-    global _ollama_pull_status
+    global _gguf_pull_status
     with _state_lock:
-        _ollama_pull_status = {"running": True, "model": model, "output": "", "pct": 0, "done": False, "success": None}
+        _gguf_pull_status = {"running": True, "model": model, "output": "", "pct": 0, "done": False, "success": None}
 
     repos = _normalize_gguf_pull_repos(model)
     if repos is None:
-        repos = _normalize_gguf_pull_repos(_hf_url_to_ollama(model))
+        repos = _normalize_gguf_pull_repos(_hf_url_to_repo(model))
     if repos is None:
         msg = (
-            "This stack uses GGUF files (llama.cpp), not the Ollama registry.\n\n"
+            "This stack pulls GGUF files (llama.cpp) directly from Hugging Face.\n\n"
             "Enter a Hugging Face repo id (e.g. bartowski/Llama-3.2-3B-Instruct-GGUF), "
             "a huggingface.co/… page or .gguf URL, hf.co/owner/repo, or type .env to pull all "
             "repos listed in GGUF_MODELS in your .env.\n\n"
-            "Names like llama3.2:8b only work with a real Ollama daemon, not this gateway."
+            "Bare tag names like llama3.2:8b are not supported; use a Hugging Face repo id or .gguf URL."
         )
         with _state_lock:
-            _ollama_pull_status["output"] = msg
-            _ollama_pull_status["success"] = False
-            _ollama_pull_status["running"] = False
-            _ollama_pull_status["done"] = True
+            _gguf_pull_status["output"] = msg
+            _gguf_pull_status["success"] = False
+            _gguf_pull_status["running"] = False
+            _gguf_pull_status["done"] = True
         return
 
     ops_url = os.environ.get("OPS_CONTROLLER_URL", "http://ops-controller:9000").rstrip("/")
     token = os.environ.get("OPS_CONTROLLER_TOKEN", "").strip()
     if not token:
         with _state_lock:
-            _ollama_pull_status["output"] = "OPS_CONTROLLER_TOKEN is not set; cannot run gguf-puller from the dashboard."
-            _ollama_pull_status["success"] = False
-            _ollama_pull_status["running"] = False
-            _ollama_pull_status["done"] = True
+            _gguf_pull_status["output"] = "OPS_CONTROLLER_TOKEN is not set; cannot run gguf-puller from the dashboard."
+            _gguf_pull_status["success"] = False
+            _gguf_pull_status["running"] = False
+            _gguf_pull_status["done"] = True
         return
 
     try:
@@ -496,10 +420,10 @@ def _run_ollama_pull(model: str):
             )
             if r.status_code == 409:
                 with _state_lock:
-                    _ollama_pull_status["output"] = "Another model or GGUF pull is already in progress."
-                    _ollama_pull_status["success"] = False
-                    _ollama_pull_status["running"] = False
-                    _ollama_pull_status["done"] = True
+                    _gguf_pull_status["output"] = "Another model or GGUF pull is already in progress."
+                    _gguf_pull_status["success"] = False
+                    _gguf_pull_status["running"] = False
+                    _gguf_pull_status["done"] = True
                 return
             if r.status_code >= 400:
                 try:
@@ -507,10 +431,10 @@ def _run_ollama_pull(model: str):
                 except (ValueError, UnicodeDecodeError):
                     det = r.text
                 with _state_lock:
-                    _ollama_pull_status["output"] = f"Failed to start gguf-puller: {det}"
-                    _ollama_pull_status["success"] = False
-                    _ollama_pull_status["running"] = False
-                    _ollama_pull_status["done"] = True
+                    _gguf_pull_status["output"] = f"Failed to start gguf-puller: {det}"
+                    _gguf_pull_status["success"] = False
+                    _gguf_pull_status["running"] = False
+                    _gguf_pull_status["done"] = True
                 return
 
         deadline = time.time() + 7200  # 2-hour max
@@ -536,44 +460,44 @@ def _run_ollama_pull(model: str):
                         raise RuntimeError(f"Poll failed 20 times: {poll_err}")
                     continue
                 with _state_lock:
-                    _ollama_pull_status["output"] = st.get("output", "")
-                    _ollama_pull_status["pct"] = 50 if st.get("running") else 100
+                    _gguf_pull_status["output"] = st.get("output", "")
+                    _gguf_pull_status["pct"] = 50 if st.get("running") else 100
                 if st.get("done"):
                     with _state_lock:
-                        _ollama_pull_status["success"] = bool(st.get("success"))
-                        _ollama_pull_status["running"] = False
-                        _ollama_pull_status["done"] = True
+                        _gguf_pull_status["success"] = bool(st.get("success"))
+                        _gguf_pull_status["running"] = False
+                        _gguf_pull_status["done"] = True
                     break
             else:
                 raise TimeoutError("GGUF pull timed out after 2 hours")
     except Exception as e:
         logger.error("GGUF pull failed: %s", e)
         with _state_lock:
-            _ollama_pull_status["output"] = (_ollama_pull_status.get("output") or "") + f"\nError: {e}"
-            _ollama_pull_status["success"] = False
-            _ollama_pull_status["running"] = False
-            _ollama_pull_status["done"] = True
+            _gguf_pull_status["output"] = (_gguf_pull_status.get("output") or "") + f"\nError: {e}"
+            _gguf_pull_status["success"] = False
+            _gguf_pull_status["running"] = False
+            _gguf_pull_status["done"] = True
 
 
-@app.post("/api/ollama/pull")
-async def ollama_pull(req: PullRequest):
-    """Start GGUF download (gguf-puller via ops-controller) in background. Poll /api/ollama/pull/status."""
-    global _ollama_pull_status
+@app.post("/api/llm/pull")
+async def llm_pull(req: PullRequest):
+    """Start GGUF download (gguf-puller via ops-controller) in background. Poll /api/llm/pull/status."""
+    global _gguf_pull_status
     with _state_lock:
-        if _ollama_pull_status.get("running"):
+        if _gguf_pull_status.get("running"):
             raise HTTPException(status_code=409, detail="Pull already in progress")
-        _ollama_pull_status["running"] = True
-        _ollama_pull_status["model"] = req.model
-    thread = threading.Thread(target=_run_ollama_pull, args=(req.model,), daemon=True)
+        _gguf_pull_status["running"] = True
+        _gguf_pull_status["model"] = req.model
+    thread = threading.Thread(target=_run_gguf_pull, args=(req.model,), daemon=True)
     thread.start()
     return {"status": "started", "model": req.model}
 
 
-@app.get("/api/ollama/pull/status")
-async def ollama_pull_status():
-    """Get Ollama pull progress."""
+@app.get("/api/llm/pull/status")
+async def llm_pull_status():
+    """Get GGUF pull progress."""
     with _state_lock:
-        return dict(_ollama_pull_status)
+        return dict(_gguf_pull_status)
 
 
 # --- ComfyUI ---
@@ -957,7 +881,7 @@ class ModelPullRequest(BaseModel):
 def _normalize_gguf_pull_repos(model: str) -> str | None:
     """Return comma-separated Hugging Face repo ids for gguf-puller, or '' to use .env GGUF_MODELS.
 
-    None means the string is not suitable (e.g. Ollama-style ``llama3.2:8b``).
+    None means the string is not suitable (e.g. a bare tag like ``llama3.2:8b``).
     """
     def _normalize_repo_ref(raw: str) -> str | None:
         candidate = raw.strip()
@@ -999,8 +923,8 @@ def _normalize_repo_ref(raw: str) -> str | None:
     return _normalize_repo_ref(s)
 
 
-def _hf_url_to_ollama(raw: str) -> str:
-    """Convert a HuggingFace GGUF URL to Ollama's hf.co/owner/repo format.
+def _hf_url_to_repo(raw: str) -> str:
+    """Convert a HuggingFace GGUF URL to hf.co/owner/repo form for the gguf-puller.
     Non-HF strings (model names, hf.co/ refs) are returned as-is.
     """
     if "huggingface.co/" in raw:
@@ -1016,7 +940,7 @@ def _hf_url_to_ollama(raw: str) -> str:
 @app.post("/api/models/download")
 async def models_download(req: ModelDownloadRequest, request: Request):
     """Unified model download.
-    - GGUF / HF repo → background gguf-puller via ops (same as ``/api/ollama/pull``); poll ``/api/ollama/pull/status``.
+    - GGUF / HF repo → background gguf-puller via ops (same as ``/api/llm/pull``); poll ``/api/llm/pull/status``.
     - safetensors / ckpt / pt / bin → proxied to ops-controller for file download.
     """
     raw = req.url.strip()
@@ -1039,15 +963,15 @@ async def models_download(req: ModelDownloadRequest, request: Request):
         return {**data, "target": "comfyui"}
     else:
         with _state_lock:
-            if _ollama_pull_status.get("running"):
+            if _gguf_pull_status.get("running"):
                 raise HTTPException(status_code=409, detail="Pull already in progress")
-            _ollama_pull_status["running"] = True
-        thread = threading.Thread(target=_run_ollama_pull, args=(raw,), daemon=True)
+            _gguf_pull_status["running"] = True
+        thread = threading.Thread(target=_run_gguf_pull, args=(raw,), daemon=True)
         thread.start()
         return {
             "status": "started",
             "target": "gguf",
-            "message": "Poll /api/ollama/pull/status for progress.",
+            "message": "Poll /api/llm/pull/status for progress.",
         }
 
 
@@ -1571,8 +1495,8 @@ async def performance_summary():
     }
 
 
-@app.get("/api/ollama/ps")
-async def ollama_ps():
+@app.get("/api/llm/ps")
+async def llm_ps():
     """List models currently advertised by model-gateway."""
     try:
         r = await _get_http_client().get(
@@ -1814,7 +1738,7 @@ def _open_webui_default_model(name: str) -> str:
 @app.post("/api/config/default-model")
 async def set_default_model(req: DefaultModelRequest, request: Request):
     """Write DEFAULT_MODEL and OPEN_WEBUI_DEFAULT_MODEL to .env and recreate open-webui."""
-    # Ollama allows namespaced ids: owner/model:tag (slashes required). Only reject empty / traversal.
+    # Model ids may be namespaced: owner/model:tag (slashes allowed). Only reject empty / traversal.
     name = (req.model or "").strip()
     if not name or ".." in name:
         raise HTTPException(status_code=400, detail="Invalid model name")
diff --git a/dashboard/static/index.html b/dashboard/static/index.html
index a02ba3a..7183c97 100644
--- a/dashboard/static/index.html
+++ b/dashboard/static/index.html
@@ -851,56 +851,13 @@
     .pill:hover { border-color: var(--accent); color: var(--accent); }
     .mcp-remove-btn:hover { border-color: var(--danger); color: var(--danger); }
     /* ── Model select dropdown ── */
-    .model-select-wrap { position: relative; flex: 1; min-width: 200px; }
-    .model-select-trigger {
-      width: 100%; display: flex; align-items: center; justify-content: space-between;
-      padding: var(--space-2) var(--space-4); background: var(--bg);
-      border: 1px solid var(--border); border-radius: var(--radius-sm);
+    .llm-model-input {
+      flex: 1; min-width: 200px; padding: var(--space-2) var(--space-4);
+      background: var(--bg); border: 1px solid var(--border); border-radius: var(--radius-sm);
       color: var(--fg); font-family: var(--font-mono); font-size: .88rem;
-      cursor: pointer; transition: all .2s;
-    }
-    .model-select-trigger:hover, .model-select-trigger.open { border-color: var(--accent); background: var(--accent-dim); }
-    .model-select-trigger .chevron { transition: transform .2s; opacity: .6; }
-    .model-select-trigger.open .chevron { transform: rotate(180deg); }
-    .model-select-dropdown {
-      position: absolute; top: 100%; left: 0; right: 0; margin-top: var(--space-1);
-      background: var(--surface); border: 1px solid var(--border);
-      border-radius: var(--radius-sm); max-height: 320px;
-      overflow: hidden; display: flex; flex-direction: column;
-      z-index: 1000; box-shadow: var(--shadow-lg);
-    }
-    .model-select-search { padding: var(--space-2) var(--space-3); border-bottom: 1px solid var(--border-subtle); }
-    .model-select-search input {
-      width: 100%; padding: .4rem .65rem; background: var(--bg);
-      border: 1px solid var(--border); border-radius: var(--radius-sm);
-      color: var(--fg); font-family: var(--font-mono); font-size: .82rem;
-    }
-    .model-select-search input:focus-visible { outline: 1.5px solid var(--accent); outline-offset: 2px; border-color: var(--accent); }
-    .model-select-search input:focus:not(:focus-visible) { outline: none; border-color: var(--accent); }
-    .model-select-dropdown .section-label {
-      padding: var(--space-2) var(--space-4); font-size: var(--text-xs);
-      text-transform: uppercase; letter-spacing: .06em; color: var(--muted);
-      border-bottom: 1px solid var(--border-subtle);
-    }
-    .model-select-option {
-      padding: var(--space-2) var(--space-4); font-family: var(--font-mono); font-size: .82rem;
-      cursor: pointer; display: flex; justify-content: space-between;
-      align-items: center; transition: background .12s;
-    }
-    .model-select-option:hover { background: var(--surface-hover); }
-    .model-select-option.installed { color: var(--muted); cursor: default; }
-    .model-select-option.installed:hover { background: transparent; }
-    .model-select-option .badge { font-size: .65rem; color: var(--success); }
-    .model-select-option .size { font-size: .75rem; color: var(--muted); }
-    #ollama-select-options { overflow-y: auto; max-height: 220px; }
-    .model-select-custom { padding: var(--space-3) var(--space-4); border-top: 1px solid var(--border-subtle); }
-    .model-select-custom input {
-      width: 100%; padding: .4rem .65rem; background: var(--bg);
-      border: 1px solid var(--border); border-radius: var(--radius-sm);
-      color: var(--fg); font-family: var(--font-mono); font-size: .82rem;
     }
-    .model-select-custom input:focus-visible { outline: 1.5px solid var(--accent); outline-offset: 2px; border-color: var(--accent); }
-    .model-select-custom input:focus:not(:focus-visible) { outline: none; border-color: var(--accent); }
+    .llm-model-input:focus-visible { outline: 1.5px solid var(--accent); outline-offset: 2px; border-color: var(--accent); }
+    .llm-model-input:focus:not(:focus-visible) { outline: none; border-color: var(--accent); }
     .last-updated { font-size: var(--text-xs); color: var(--muted); margin-top: var(--space-2); }
     /* ── Inputs ── */
     input[type="text"] {
@@ -1020,10 +977,8 @@
       .model-item .name { word-break: break-word; }
       .pull-area { padding: var(--space-4); }
       .pull-row { flex-direction: row; flex-wrap: wrap; gap: var(--space-3); }
-      .pull-row .model-select-wrap { flex: 1 1 100%; min-width: 0; }
+      .pull-row .llm-model-input { flex: 1 1 100%; min-width: 0; }
       .pull-row button { flex: 1 1 120px; min-width: 0; }
-      .model-select-wrap { min-width: 0; }
-      .model-select-trigger { font-size: .85rem; }
       input[type="text"] { min-width: 0; }
       button { min-height: 44px; padding: .75rem 1.25rem; }
       .mcp-howto { padding: var(--space-2) var(--space-3); }
@@ -1043,7 +998,6 @@
       h1 { font-size: 1.75rem; }
       section { padding: var(--space-4) var(--space-3); }
       .model-list { max-height: 160px; }
-      .model-select-dropdown { max-height: 60vh; left: var(--space-2); right: var(--space-2); width: auto; }
       .quick-pills { gap: var(--space-1); }
       .pill { padding: 2px var(--space-3); }
     }
@@ -1210,7 +1164,6 @@
     }
     /* Override arbitrary z-indexes with scale tokens */
     .modal-overlay { z-index: var(--z-modal); }
-    .model-select-dropdown { z-index: var(--z-dropdown); }
     header::before { z-index: var(--z-sticky); }
     .toast-container { z-index: var(--z-overlay); }
     .skip-link { z-index: calc(var(--z-modal) + 1); }
@@ -1901,36 +1854,21 @@ <h3>LLM — llama.cpp</h3>
             </div>
             <div id="llm-active-status" class="llm-active-status"></div>
           </div>
-          <div class="model-list" id="ollama-models">
+          <div class="model-list" id="llm-models">
             <div class="skeleton skeleton-line"></div>
             <div class="skeleton skeleton-line short"></div>
             <div class="skeleton skeleton-line"></div>
           </div>
           <div class="pull-area">
             <div class="pull-row">
-              <div class="model-select-wrap">
-                <div class="model-select-trigger" id="ollama-select-trigger" role="combobox" aria-expanded="false" aria-haspopup="listbox" aria-controls="ollama-select-dropdown" aria-labelledby="ollama-select-label" tabindex="0">
-                  <span id="ollama-select-label">Select model to pull...</span>
-                  <span class="chevron" aria-hidden="true">&#9662;</span>
-                </div>
-                <div class="model-select-dropdown" id="ollama-select-dropdown" role="listbox" aria-labelledby="ollama-select-label" style="display:none;">
-                  <div class="model-select-search">
-                    <input type="text" id="ollama-library-search" placeholder="Search catalog..." autocomplete="off" aria-label="Search model catalog">
-                  </div>
-                  <div class="section-label">Registry (pull uses HF repo ids; see custom field)</div>
-                  <div id="ollama-select-options"></div>
-                  <div class="model-select-custom">
-                    <input type="text" id="ollama-model-custom" placeholder="HF repo (org/name), .env, or huggingface.co URL" aria-label="Hugging Face repo or URL">
-                  </div>
-                </div>
-              </div>
-              <button id="ollama-starter-pack" class="secondary" title="Runs gguf-puller with GGUF_MODELS from .env">Pull .env Models</button>
-              <button id="ollama-pull">Pull</button>
+              <input type="text" id="llm-model-input" class="llm-model-input" placeholder="HF repo (org/name), .env, or huggingface.co URL" aria-label="Hugging Face repo or URL to pull" autocomplete="off">
+              <button id="llm-starter-pack" class="secondary" title="Runs gguf-puller with GGUF_MODELS from .env">Pull .env Models</button>
+              <button id="llm-pull">Pull</button>
             </div>
-            <div class="last-updated" id="ollama-last-updated"></div>
-            <div id="ollama-progress" class="progress-area" style="display:none;" role="region" aria-label="Pull progress">
-              <div class="progress-bar"><div class="fill" id="ollama-progress-bar" style="width:0%" role="progressbar" aria-valuemin="0" aria-valuemax="100" aria-valuenow="0"></div></div>
-              <div class="log" id="ollama-log" role="log" aria-live="polite"></div>
+            <div class="last-updated" id="llm-last-updated"></div>
+            <div id="llm-progress" class="progress-area" style="display:none;" role="region" aria-label="Pull progress">
+              <div class="progress-bar"><div class="fill" id="llm-progress-bar" style="width:0%" role="progressbar" aria-valuemin="0" aria-valuemax="100" aria-valuenow="0"></div></div>
+              <div class="log" id="llm-log" role="log" aria-live="polite"></div>
             </div>
           </div>
         </div>
@@ -2090,8 +2028,7 @@ <h2 id="auth-modal-title">Dashboard login</h2>
       });
     };
 
-    let ollamaModels = [];
-    let ollamaLibrary = [];
+    let llmModels = [];
 
     function toast(msg, type = '') {
       const el = document.createElement('div');
@@ -2213,7 +2150,6 @@ <h2 id="auth-modal-title">Dashboard login</h2>
     }
 
     const SERVICE_ICONS = {
-      'ollama': '🦙',
       'model-gateway': '⇌',
       'webui': '💬',
       'mcp': '🔌',
@@ -2838,62 +2774,33 @@ <h2 id="auth-modal-title">Dashboard login</h2>
       } catch (e) { jobsEl.textContent = "Jobs unavailable: " + e; }
     }
 
-    function buildOllamaDropdown(filter = '') {
-      const installed = new Set(ollamaModels.map(m => m.name));
-      const optionsEl = document.getElementById('ollama-select-options');
-      if (!optionsEl) return;
-      const q = filter.toLowerCase().trim();
-      const models = q ? ollamaLibrary.filter(m => m.toLowerCase().includes(q)) : ollamaLibrary;
-      optionsEl.innerHTML = models.slice(0, 80).map(name => {
-        const isInstalled = installed.has(name);
-        return `<div class="model-select-option ${isInstalled ? 'installed' : ''}" data-model="${name}" role="option" ${isInstalled ? 'aria-disabled="true"' : ''} tabindex="-1">
-          <span>${name}</span>
-          ${isInstalled ? '<span class="badge badge-success">✓</span>' : '<span class="size">pull</span>'}
-        </div>`;
-      }).join('');
-      if (models.length > 80) {
-        optionsEl.innerHTML += `<div class="section-label" style="border:none;padding:0.5rem;">+ ${models.length - 80} more — type to search</div>`;
-      }
-    }
-
-    async function loadOllamaLibrary() {
-      try {
-        const r = await api('/api/ollama/library');
-        const d = await r.json();
-        ollamaLibrary = d.models || [];
-      } catch (_) {
-        ollamaLibrary = ['llama3.2', 'deepseek-r1:7b', 'qwen2.5:7b', 'mistral', 'nomic-embed-text'];
-      }
-    }
-
-    async function loadOllamaModels() {
-      const el = document.getElementById('ollama-models');
+    async function loadModels() {
+      const el = document.getElementById('llm-models');
       try {
-        const r = await api('/api/ollama/models');
+        const r = await api('/api/llm/models');
         const d = await r.json();
-        ollamaModels = d.models || [];
-        buildOllamaDropdown(document.getElementById('ollama-library-search')?.value || '');
+        llmModels = d.models || [];
         if (!d.ok) {
           el.innerHTML = '<div class="empty">Model gateway unreachable.<span class="hint">Start with: docker compose up -d</span></div>';
           return;
         }
-        if (!ollamaModels.length) {
-          el.innerHTML = '<div class="empty">No models yet.<span class="hint">Select one below and click Pull.</span></div>';
-          document.getElementById('ollama-last-updated').textContent = '';
+        if (!llmModels.length) {
+          el.innerHTML = '<div class="empty">No models yet.<span class="hint">Enter a Hugging Face repo below and click Pull.</span></div>';
+          document.getElementById('llm-last-updated').textContent = '';
           populateThroughputModelSelect();
           return;
         }
-        document.getElementById('ollama-last-updated').textContent = 'Updated ' + new Date().toLocaleTimeString();
+        document.getElementById('llm-last-updated').textContent = 'Updated ' + new Date().toLocaleTimeString();
         // Fetch currently loaded model to show active badge
         let activeModel = '';
         try {
-          const ps = await api('/api/ollama/ps');
+          const ps = await api('/api/llm/ps');
           if (ps.ok) {
             const psData = await ps.json();
             activeModel = (psData.models?.[0]?.name || '').replace(/\.gguf$/i, '').split(':')[0];
           }
         } catch (_) {}
-        el.innerHTML = ollamaModels.map(m => {
+        el.innerHTML = llmModels.map(m => {
           const bareName = m.name.replace(/\.gguf$/i, '');
           const isActive = activeModel && (bareName === activeModel || bareName.split(':')[0] === activeModel);
           return `
@@ -2908,7 +2815,7 @@ <h2 id="auth-modal-title">Dashboard login</h2>
         const activeArea = document.getElementById('llm-active-area');
         const activeSel = document.getElementById('llm-active-select');
         if (activeSel) {
-          const llms = ollamaModels.filter(m => !isEmbeddingModel(m.name));
+          const llms = llmModels.filter(m => !isEmbeddingModel(m.name));
           if (llms.length) {
             activeSel.innerHTML = llms.map(m => `<option value="${escapeHtml(m.name)}">${escapeHtml(m.name)}</option>`).join('');
             const active = llms.find(m => {
@@ -2943,7 +2850,7 @@ <h2 id="auth-modal-title">Dashboard login</h2>
     function populateThroughputModelSelect() {
       const sel = document.getElementById('throughput-model-select');
       if (!sel) return;
-      const llms = ollamaModels.filter(m => !isEmbeddingModel(m.name));
+      const llms = llmModels.filter(m => !isEmbeddingModel(m.name));
       if (!llms.length) {
         sel.innerHTML = '<option value="">No LLMs — pull one (embedding models excluded)</option>';
         return;
@@ -3042,8 +2949,8 @@ <h2 id="auth-modal-title">Dashboard login</h2>
         ['perf-rail-p50','perf-rail-p95','perf-rail-p99','perf-rail-peak','perf-rail-ttft-p50','perf-rail-ttft-p95'].forEach(id => setText(id, '—'));
         heroEl?.classList.remove('loaded');
         // Bench target — show the configured local model name if available
-        const targetName = (typeof ollamaModels !== 'undefined' && ollamaModels?.length)
-          ? (ollamaModels.find(m => !isEmbeddingModel?.(m.name))?.name || ollamaModels[0]?.name) : '';
+        const targetName = (typeof llmModels !== 'undefined' && llmModels?.length)
+          ? (llmModels.find(m => !isEmbeddingModel?.(m.name))?.name || llmModels[0]?.name) : '';
         setText('bench-target', targetName || 'Run a benchmark to seed the dashboard');
         return;
       }
@@ -3119,7 +3026,7 @@ <h2 id="auth-modal-title">Dashboard login</h2>
       // No model selector in the simplified single-model UI — pick the first
       // non-embedding LLM the gateway reports. Fall back to the hero's current
       // target text (whatever the dashboard is actively showing).
-      const llms = ollamaModels.filter(m => !isEmbeddingModel(m.name));
+      const llms = llmModels.filter(m => !isEmbeddingModel(m.name));
       const heroTarget = document.getElementById('bench-target')?.textContent?.trim();
       const model = llms[0]?.name || (heroTarget && heroTarget !== '—' ? heroTarget : 'llama3.2');
       resultsEl.style.display = 'none';
@@ -3308,86 +3215,19 @@ <h2 id="auth-modal-title">Dashboard login</h2>
       }
     }
 
-    function getSelectedOllamaModel() {
-      const custom = document.getElementById('ollama-model-custom').value.trim();
-      if (custom) return custom;
-      const label = document.getElementById('ollama-select-label').textContent;
-      return label !== 'Select model to pull...' ? label : '';
-    }
-
-    function setSelectedOllamaModel(name) {
-      document.getElementById('ollama-model-custom').value = '';
-      document.getElementById('ollama-select-label').textContent = name || 'Select model to pull...';
+    function getSelectedModel() {
+      return document.getElementById('llm-model-input').value.trim();
     }
 
-    function toggleOllamaDropdown(open) {
-      const trigger = document.getElementById('ollama-select-trigger');
-      const dd = document.getElementById('ollama-select-dropdown');
-      dd.style.display = open ? 'flex' : 'none';
-      trigger.classList.toggle('open', open);
-      trigger.setAttribute('aria-expanded', open);
-      if (open) {
-        document.getElementById('ollama-library-search').value = '';
-        document.getElementById('ollama-library-search').focus();
-        buildOllamaDropdown('');
-      }
+    function setSelectedModel(name) {
+      document.getElementById('llm-model-input').value = name || '';
     }
-    document.getElementById('ollama-select-trigger').onclick = (e) => {
-      e.stopPropagation();
-      const dd = document.getElementById('ollama-select-dropdown');
-      toggleOllamaDropdown(dd.style.display !== 'flex');
-    };
-    document.getElementById('ollama-select-trigger').onkeydown = (e) => {
-      if (e.key === 'Enter' || e.key === ' ') {
-        e.preventDefault();
-        const dd = document.getElementById('ollama-select-dropdown');
-        toggleOllamaDropdown(dd.style.display !== 'flex');
-      }
-    };
-
-    document.getElementById('ollama-select-options').addEventListener('click', (e) => {
-      const opt = e.target.closest('.model-select-option');
-      if (!opt || opt.classList.contains('installed')) return;
-      const model = opt.dataset.model;
-      setSelectedOllamaModel(model);
-      toggleOllamaDropdown(false);
-      document.getElementById('ollama-library-search').value = '';
-    });
-
-    document.getElementById('ollama-library-search').oninput = debounce((e) => {
-      buildOllamaDropdown(e.target.value);
-    }, 200);
-
-    document.getElementById('ollama-library-search').onkeydown = (e) => {
-      e.stopPropagation();
-      if (e.key === 'Escape') {
-        toggleOllamaDropdown(false);
-        document.getElementById('ollama-select-trigger').focus();
-      }
-    };
-
-    document.getElementById('ollama-model-custom').oninput = () => {
-      const v = document.getElementById('ollama-model-custom').value.trim();
-      document.getElementById('ollama-select-label').textContent = v || 'Select model to pull...';
-    };
-
-    document.addEventListener('click', () => {
-      const dd = document.getElementById('ollama-select-dropdown');
-      if (dd.style.display === 'flex') toggleOllamaDropdown(false);
-    });
-    document.getElementById('ollama-select-dropdown').onclick = (e) => e.stopPropagation();
-    document.getElementById('ollama-select-dropdown').onkeydown = (e) => {
-      if (e.key === 'Escape') {
-        toggleOllamaDropdown(false);
-        document.getElementById('ollama-select-trigger').focus();
-      }
-    };
 
-    async function pullOllamaModel(name, btn, prog, logEl, barEl) {
+    async function pullModel(name, btn, prog, logEl, barEl) {
       logEl.textContent = `Pulling ${name}...`;
       barEl.style.width = '0%';
       barEl.setAttribute('aria-valuenow', 0);
-      const resp = await api('/api/ollama/pull', {
+      const resp = await api('/api/llm/pull', {
         method: 'POST',
         headers: { 'Content-Type': 'application/json' },
         body: JSON.stringify({ model: name })
@@ -3396,7 +3236,7 @@ <h2 id="auth-modal-title">Dashboard login</h2>
         const d = await resp.json().catch(() => ({}));
         throw new Error(d.detail || `HTTP ${resp.status}`);
       }
-      const result = await pollOllamaPull(logEl, barEl);
+      const result = await pollGgufPull(logEl, barEl);
       if (result && result.success === false) {
         const line = (result.output || '').split('\n').filter(Boolean).pop() || 'Pull failed';
         throw new Error(line);
@@ -3404,11 +3244,11 @@ <h2 id="auth-modal-title">Dashboard login</h2>
       return result;
     }
 
-    function pollOllamaPull(logEl, barEl) {
+    function pollGgufPull(logEl, barEl) {
       return new Promise((resolve) => {
         let pollErrors = 0;
         const poll = () => {
-          api('/api/ollama/pull/status').then(r => r.json()).then(s => {
+          api('/api/llm/pull/status').then(r => r.json()).then(s => {
             pollErrors = 0;
             if (s.output) { logEl.textContent = s.output; logEl.scrollTop = logEl.scrollHeight; }
             if (s.pct != null) { barEl.style.width = s.pct + '%'; barEl.setAttribute('aria-valuenow', s.pct); }
@@ -3427,20 +3267,20 @@ <h2 id="auth-modal-title">Dashboard login</h2>
       });
     }
 
-    document.getElementById('ollama-starter-pack').onclick = async () => {
-      const btn = document.getElementById('ollama-starter-pack');
-      const pullBtn = document.getElementById('ollama-pull');
-      const prog = document.getElementById('ollama-progress');
-      const logEl = document.getElementById('ollama-log');
-      const barEl = document.getElementById('ollama-progress-bar');
+    document.getElementById('llm-starter-pack').onclick = async () => {
+      const btn = document.getElementById('llm-starter-pack');
+      const pullBtn = document.getElementById('llm-pull');
+      const prog = document.getElementById('llm-progress');
+      const logEl = document.getElementById('llm-log');
+      const barEl = document.getElementById('llm-progress-bar');
       btn.disabled = true;
       pullBtn.disabled = true;
       prog.style.display = 'block';
       logEl.textContent = 'Starting gguf-puller with GGUF_MODELS from .env...';
       try {
-        await pullOllamaModel('.env', btn, prog, logEl, barEl);
+        await pullModel('.env', btn, prog, logEl, barEl);
         toast('GGUF pull from .env finished', 'success');
-        await loadOllamaModels();
+        await loadModels();
       } catch (e) {
         logEl.textContent += '\nError: ' + e.message;
         toast('GGUF pull failed: ' + e.message, 'error');
@@ -3449,28 +3289,28 @@ <h2 id="auth-modal-title">Dashboard login</h2>
       pullBtn.disabled = false;
     };
 
-    document.getElementById('ollama-pull').onclick = async () => {
-      const name = getSelectedOllamaModel();
+    document.getElementById('llm-pull').onclick = async () => {
+      const name = getSelectedModel();
       if (!name) { toast('Enter a model name', 'error'); return; }
-      const btn = document.getElementById('ollama-pull');
-      const prog = document.getElementById('ollama-progress');
-      const logEl = document.getElementById('ollama-log');
-      const barEl = document.getElementById('ollama-progress-bar');
+      const btn = document.getElementById('llm-pull');
+      const prog = document.getElementById('llm-progress');
+      const logEl = document.getElementById('llm-log');
+      const barEl = document.getElementById('llm-progress-bar');
       btn.disabled = true;
-      document.getElementById('ollama-starter-pack').disabled = true;
+      document.getElementById('llm-starter-pack').disabled = true;
       prog.style.display = 'block';
       logEl.textContent = 'Connecting...';
       try {
-        await pullOllamaModel(name, btn, prog, logEl, barEl);
+        await pullModel(name, btn, prog, logEl, barEl);
         toast(`Pulled ${name}`, 'success');
-        setSelectedOllamaModel('');
-        await loadOllamaModels();
+        setSelectedModel('');
+        await loadModels();
       } catch (e) {
         logEl.textContent += '\nError: ' + e.message;
         toast('Pull failed: ' + e.message, 'error');
       }
       btn.disabled = false;
-      document.getElementById('ollama-starter-pack').disabled = false;
+      document.getElementById('llm-starter-pack').disabled = false;
     };
 
     // Guard: the #comfyui-pull button was removed from the UI but the handler survived.
@@ -3562,9 +3402,9 @@ <h2 id="auth-modal-title">Dashboard login</h2>
         try {
           if (target === 'llm') {
             try {
-              await pullOllamaModel(val, btn, prog, logEl, barEl);
+              await pullModel(val, btn, prog, logEl, barEl);
               toast(`GGUF pull: ${val}`, 'success');
-              loadOllamaModels?.();
+              loadModels?.();
             } catch (llmErr) {
               toast(llmErr.message || 'GGUF pull failed', 'error');
             }
@@ -3627,8 +3467,7 @@ <h2 id="auth-modal-title">Dashboard login</h2>
       const btn = document.getElementById('refresh-btn');
       btn.classList.add('loading');
       try {
-        if (!ollamaLibrary.length) await loadOllamaLibrary();
-        await Promise.all([loadServices(), loadDependencies(), loadRagStatus(), loadOllamaModels(), loadComfyuiModels(), loadComfyuiPacks(), loadMcpServers(), loadComfyuiPanel()]);
+        await Promise.all([loadServices(), loadDependencies(), loadRagStatus(), loadModels(), loadComfyuiModels(), loadComfyuiPacks(), loadMcpServers(), loadComfyuiPanel()]);
         await Promise.all([loadPerfHero(), loadThroughputServiceUsage()]);
       } finally {
         btn.classList.remove('loading');
@@ -3730,7 +3569,7 @@ <h2 id="auth-modal-title">Dashboard login</h2>
         if (r.ok && d.ok) {
           statusEl.textContent = `✓ Activating — services restarting…`;
           toast(`Activating ${model} — services restarting…`);
-          setTimeout(() => loadOllamaModels(), 8000);
+          setTimeout(() => loadModels(), 8000);
         } else {
           statusEl.textContent = 'Error: ' + (d.detail || 'Switch failed');
           toast((d.detail || 'Switch failed') + '', 'error');
@@ -3744,7 +3583,7 @@ <h2 id="auth-modal-title">Dashboard login</h2>
       }
     });
 
-    document.getElementById('ollama-models')?.addEventListener('click', async (e) => {
+    document.getElementById('llm-models')?.addEventListener('click', async (e) => {
       const btn = e.target.closest('.btn-model-delete[data-model]');
       if (!btn) return;
       e.preventDefault();
@@ -3752,11 +3591,11 @@ <h2 id="auth-modal-title">Dashboard login</h2>
       if (!model || !confirm(`Delete "${model}" from disk?\n\nThis removes the GGUF file and cannot be undone.`)) return;
       btn.disabled = true;
       try {
-        const r = await api('/api/ollama/delete', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model }) });
+        const r = await api('/api/llm/delete', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model }) });
         const d = await r.json();
         if (r.ok) {
           toast(d.message || 'Model deleted');
-          loadOllamaModels();
+          loadModels();
           loadThroughputStats();
         } else {
           toast((d.detail || 'Delete failed') + '', 'error');
@@ -3925,24 +3764,24 @@ <h2 id="auth-modal-title">Dashboard login</h2>
     }
 
     async function resumeActivePulls() {
-      // Ollama — resume polling if a pull is in progress
+      // LLM (GGUF) — resume polling if a pull is in progress
       try {
-        const s = await api('/api/ollama/pull/status').then(r => r.json());
+        const s = await api('/api/llm/pull/status').then(r => r.json());
         if (s.running) {
-          const prog = document.getElementById('ollama-progress');
-          const logEl = document.getElementById('ollama-log');
-          const barEl = document.getElementById('ollama-progress-bar');
-          const pullBtn = document.getElementById('ollama-pull');
-          const packBtn = document.getElementById('ollama-starter-pack');
+          const prog = document.getElementById('llm-progress');
+          const logEl = document.getElementById('llm-log');
+          const barEl = document.getElementById('llm-progress-bar');
+          const pullBtn = document.getElementById('llm-pull');
+          const packBtn = document.getElementById('llm-starter-pack');
           prog.style.display = 'block';
           pullBtn.disabled = true;
           packBtn.disabled = true;
           logEl.textContent = `Resuming pull: ${s.model || ''}...`;
-          pollOllamaPull(logEl, barEl).then(result => {
+          pollGgufPull(logEl, barEl).then(result => {
             toast(result.success ? `Pulled ${s.model}` : 'Pull failed', result.success ? 'success' : 'error');
             pullBtn.disabled = false;
             packBtn.disabled = false;
-            loadOllamaModels();
+            loadModels();
           });
         }
       } catch (_) {}
@@ -4099,9 +3938,9 @@ <h2 id="auth-modal-title">Dashboard login</h2>
       await loadAuthConfig();
       if (authConfig.auth_required && authConfig.auth_type === 'bearer' && !sessionStorage.getItem(AUTH_STORAGE_KEY)) {
         showAuthModal();
-        window.addEventListener('auth-ready', () => { loadOllamaLibrary().then(() => refresh()); resumeActivePulls(); }, { once: true });
+        window.addEventListener('auth-ready', () => { refresh(); resumeActivePulls(); }, { once: true });
       } else {
-        loadOllamaLibrary().then(() => refresh());
+        refresh();
         resumeActivePulls();
       }
       // Visibility-aware polling — pause when tab is hidden to save CPU/network
diff --git a/docker-compose.yml b/docker-compose.yml
index 6f0b56c..fcb7d31 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -403,7 +403,7 @@ services:
       retries: 3
     environment:
       # Route all model requests through the gateway (unified provider)
-      - OLLAMA_BASE_URL=
+      - ENABLE_OLLAMA_API=false
       - OPENAI_API_BASE_URL=${OPENAI_API_BASE:-http://model-gateway:11435/v1}
       - OPENAI_API_KEY=${LITELLM_MASTER_KEY:-local}
       # Auth: False = single-user local / Tailscale use.
@@ -719,7 +719,6 @@ services:
       # session cookie so the second login is infrequent.
       - N8N_USER_MANAGEMENT_DISABLED=true
       # Route all model traffic through Model Gateway (dashboard tracking, unified provider)
-      - OLLAMA_HOST=http://model-gateway:11435
       - OPENAI_API_BASE_URL=${OPENAI_API_BASE:-http://model-gateway:11435/v1}
       - OPENAI_API_KEY=local
       # OAuth callbacks + inbound webhooks require a public URL.
diff --git a/docs/product requirements docs/component-dashboard-ui.md b/docs/product requirements docs/component-dashboard-ui.md
index 9935667..fef7b6b 100644
--- a/docs/product requirements docs/component-dashboard-ui.md	
+++ b/docs/product requirements docs/component-dashboard-ui.md	
@@ -3,7 +3,7 @@
 ## Purpose
 A web-based control plane that provides a single pane of glass for:
 - Managing Docker-Compose services (start/stop/restart, logs)
-- Pulling and configuring AI models (Ollama, vLLM, etc.)
+- Pulling and configuring AI models (GGUF/llama.cpp LLMs, ComfyUI diffusion models)
 - Viewing dependency health and throughput stats
 - Executing MCP tool calls from any browser (via the MCP Gateway)
 
@@ -19,11 +19,12 @@ A web-based control plane that provides a single pane of glass for:
 | `/api/hardware` | GET | None | Host hardware stats (CPU, memory, GPU via nvidia-smi) |
 | `/api/auth/config` | GET | None | Auth method in use |
 | `/api/rag/status` | GET | None | Qdrant collection status + point count |
-| `/api/ollama/models` | GET | Y | Installed Ollama models |
-| `/api/ollama/pull` | POST | Y | Pull model (streaming progress) |
-| `/api/ollama/delete` | POST | Y | Delete Ollama model |
-| `/api/ollama/library` | GET | Y | Pullable models from Ollama registry (24h cache) |
-| `/api/ollama/ps` | GET | Y | Models currently loaded in Ollama |
+| `/api/llm/models` | GET | Y | Installed GGUF models (llama.cpp) |
+| `/api/llm/pull` | POST | Y | Pull a GGUF model from Hugging Face (background gguf-puller) |
+| `/api/llm/pull/status` | GET | Y | GGUF pull progress |
+| `/api/llm/delete` | POST | Y | Delete a GGUF model file from disk |
+| `/api/llm/unload` | POST | Y | Unload the active model from the gateway (keeps files) |
+| `/api/llm/ps` | GET | Y | Models currently advertised by the model gateway |
 | `/api/comfyui/models` | GET | Y | Installed ComfyUI models |
 | `/api/comfyui/pull` | POST | Y | Pull ComfyUI models |
 | `/api/comfyui/models/{cat}/{file}` | DELETE | Y | Delete ComfyUI model |
@@ -70,7 +71,7 @@ A web-based control plane that provides a single pane of glass for:
 1. From a tailnet device, open `https://${CADDY_TAILNET_HOSTNAME}/dash/` and complete Google sign-in.
 2. The SSO front door (Caddy + oauth2-proxy) gates browser access; `DASHBOARD_AUTH_TOKEN` is a bearer-token fallback for host scripts and non-browser API access.
 3. Use the "Services" tab to stop or restart a service if an issue is suspected.
-4. Pull a new Ollama or ComfyUI model from the relevant tab.
+4. Pull a new LLM (GGUF) or ComfyUI model from the relevant tab.
 5. In the "MCP" tab, add a new tool server (e.g., a custom web search provider) by clicking "Add" and filling the JSON manifest.
 
 ---
diff --git a/tests/test_services_and_throughput.py b/tests/test_services_and_throughput.py
index 0737896..7589856 100644
--- a/tests/test_services_and_throughput.py
+++ b/tests/test_services_and_throughput.py
@@ -1,4 +1,4 @@
-"""Tests for /api/services, /api/throughput/*, /api/ollama/library, and global exception handler."""
+"""Tests for /api/services, /api/throughput/*, and the global exception handler."""
 from __future__ import annotations
 
 import os
@@ -67,18 +67,6 @@ def test_services_do_not_leak_auth_token(client, monkeypatch):
         importlib.reload(dashboard.services_catalog)
 
 
-# ── /api/ollama/library ──────────────────────────────────────────────────────
-
-def test_ollama_library_returns_models(client):
-    r = client.get("/api/ollama/library")
-    assert r.status_code == 200
-    data = r.json()
-    assert "models" in data
-    assert data["ok"] is True
-    assert isinstance(data["models"], list)
-    assert len(data["models"]) > 0
-
-
 # ── /api/throughput/record ───────────────────────────────────────────────────
 
 def test_throughput_record_accepts_sample(client):
@@ -167,11 +155,16 @@ def test_unhandled_exception_returns_500_not_traceback(monkeypatch):
     mock_client.get = AsyncMock(return_value=MagicMock(status_code=200))
     monkeypatch.setattr("dashboard.app._http_client", mock_client)
 
-    # Patch ollama library to raise an unexpected error
-    monkeypatch.setattr("dashboard.app._fetch_ollama_library", lambda: (_ for _ in ()).throw(RuntimeError("test boom")))
+    # Patch the GGUF disk scan (a dependency of /api/llm/models) to raise an
+    # unexpected error. It is called without a try/except in the route, so the
+    # error bubbles all the way to the global exception handler.
+    def _boom():
+        raise RuntimeError("test boom")
+
+    monkeypatch.setattr("dashboard.app._scan_gguf_models", _boom)
 
     tc = TestClient(dashboard_app.app, raise_server_exceptions=False)
-    r = tc.get("/api/ollama/library")
+    r = tc.get("/api/llm/models")
     assert r.status_code == 500
     data = r.json()
     assert data["detail"] == "Internal server error"