diff --git a/README.md b/README.md
index 5ba9767..9bd7d57 100644
--- a/README.md
+++ b/README.md
@@ -131,16 +131,16 @@ Auto-generated: **`overrides/compute.yml`** (from hardware detection). Do not co
### Dashboard
-Reach the dashboard at `https://${CADDY_TAILNET_HOSTNAME}/dash/` (Google SSO front door; allowlist via `auth/oauth2-proxy/emails.txt`). It lists models (Ollama and ComfyUI), links to other services, dependency health, and searchable model pulls. **`OPS_CONTROLLER_TOKEN`** lets it restart services and run **`POST /api/comfyui/install-node-requirements`**. **`DASHBOARD_AUTH_TOKEN`** is an optional bearer layer for non-browser API access; the browser path is gated by SSO at the proxy level.
+Reach the dashboard at `https://${CADDY_TAILNET_HOSTNAME}/dash/` (Google SSO front door; allowlist via `auth/oauth2-proxy/emails.txt`). It lists models (GGUF/llama.cpp and ComfyUI), links to other services, dependency health, and Hugging Face model pulls. **`OPS_CONTROLLER_TOKEN`** lets it restart services and run **`POST /api/comfyui/install-node-requirements`**. **`DASHBOARD_AUTH_TOKEN`** is an optional bearer layer for non-browser API access; the browser path is gated by SSO at the proxy level.
After code changes affecting the dashboard image: `.\compose.ps1 build dashboard` then `.\compose.ps1 up -d` (or `./compose` equivalents).
-### Ollama models
+### LLM models (GGUF / llama.cpp)
-Pull lists and defaults come from **`.env`** (`MODELS`, `DEFAULT_MODEL`). Pull via the dashboard or:
+The stack pulls GGUF files (served by llama.cpp) directly from Hugging Face. Repo lists and defaults come from **`.env`** (`GGUF_MODELS`, `DEFAULT_MODEL`). Pull via the dashboard's **Models** panel (enter a Hugging Face repo id, a `huggingface.co/…`/`.gguf` URL, or `.env` to pull all `GGUF_MODELS`), or from the CLI:
```bash
-./compose run --rm model-puller
+./compose run --rm gguf-puller
```
### ComfyUI (LTX-2)
diff --git a/dashboard/app.py b/dashboard/app.py
index de63ae1..55970b0 100644
--- a/dashboard/app.py
+++ b/dashboard/app.py
@@ -210,24 +210,9 @@ def _model_gateway_headers() -> dict[str, str]:
headers["Authorization"] = f"Bearer {MODEL_GATEWAY_API_KEY}"
return headers
-# Ollama library: fetched from community JSON (all pullable model:tag names)
-OLLAMA_LIBRARY_URL = os.environ.get(
- "OLLAMA_LIBRARY_URL",
- "https://yuma-shintani.github.io/ollama-model-library/model.json",
-)
-OLLAMA_LIBRARY_CACHE_TTL = float(os.environ.get("OLLAMA_LIBRARY_CACHE_TTL_SEC", "86400")) # 24h
-_ollama_library_cache: list[str] = []
-_ollama_library_ts: float = 0.0
-
-# Fallback when fetch fails (minimal curated list)
-OLLAMA_LIBRARY_FALLBACK = [
- "llama3.2", "llama3.1", "deepseek-r1:7b", "qwen2.5:7b", "qwen3:14b", "qwen3:14b-q4_K_M",
- "mistral", "nomic-embed-text", "phi4", "gemma3",
-]
-
# Background pull status dicts
_comfyui_status: dict = {"running": False, "output": "", "done": False, "success": None}
-_ollama_pull_status: dict = {"running": False, "model": "", "output": "", "pct": 0, "done": False, "success": None}
+_gguf_pull_status: dict = {"running": False, "model": "", "output": "", "pct": 0, "done": False, "success": None}
@@ -235,68 +220,7 @@ class PullRequest(BaseModel):
model: str
-# --- Ollama ---
-
-
-def _fetch_ollama_library() -> list[str]:
- """Fetch pullable model names from Ollama registry. Uses community JSON; caches 24h."""
- global _ollama_library_cache, _ollama_library_ts
- now = time.monotonic()
- with _state_lock:
- if _ollama_library_cache and (now - _ollama_library_ts) < OLLAMA_LIBRARY_CACHE_TTL:
- return list(_ollama_library_cache)
-
- urls = [OLLAMA_LIBRARY_URL]
- for url in urls:
- try:
- req = urllib.request.Request(url, headers={"Accept": "application/json"})
- with urllib.request.urlopen(req, timeout=15) as resp:
- data = json.loads(resp.read().decode())
- except Exception as e:
- logger.warning("Ollama library fetch failed from %s: %s", url, e)
- continue
-
- names: set[str] = set()
- if isinstance(data, list):
- # yuma-shintani format: [{"name":"llama3.1","tags":[{"name":"llama3.1:8b"},...]}, ...]
- for item in data:
- if isinstance(item, dict):
- base = (item.get("name") or "").strip()
- tags = item.get("tags") or []
- for t in tags:
- if isinstance(t, dict) and t.get("name"):
- names.add(str(t["name"]).strip())
- if base:
- names.add(base) # e.g. llama3.1 -> llama3.1:latest
- elif isinstance(data, dict):
- # Official format: {"library": {"llama3.1": {"tags": ["8b","70b"]}, ...}}
- lib = data.get("library") or data
- if isinstance(lib, dict):
- for base, meta in lib.items():
- if isinstance(meta, dict):
- for tag in meta.get("tags") or []:
- names.add(f"{base}:{tag}" if tag else base)
- else:
- names.add(base)
-
- if names:
- result = sorted(names)
- with _state_lock:
- _ollama_library_cache = result
- _ollama_library_ts = now
- return result
-
- with _state_lock:
- _ollama_library_cache = OLLAMA_LIBRARY_FALLBACK
- _ollama_library_ts = now
- return list(OLLAMA_LIBRARY_FALLBACK)
-
-
-@app.get("/api/ollama/library")
-async def ollama_library():
- """List models available in the Ollama registry (fetched programmatically, cached 24h)."""
- models = await asyncio.to_thread(_fetch_ollama_library)
- return {"models": models, "ok": True}
+# --- LLM (llama.cpp / GGUF) ---
_GGUF_MODELS_DIR = Path(os.environ.get("GGUF_MODELS_DIR", "/gguf-models"))
@@ -315,8 +239,8 @@ def _scan_gguf_models() -> list[dict]:
return models
-@app.get("/api/ollama/models")
-async def ollama_models():
+@app.get("/api/llm/models")
+async def llm_models():
"""List GGUF models available on disk (primary) merged with gateway active-model info."""
disk_models = await asyncio.to_thread(_scan_gguf_models)
if disk_models:
@@ -332,8 +256,8 @@ async def ollama_models():
return {"models": [], "ok": False, "error": str(e)}
-@app.post("/api/ollama/delete")
-async def ollama_delete(req: PullRequest):
+@app.post("/api/llm/delete")
+async def llm_delete(req: PullRequest):
"""Delete a GGUF model file from disk."""
name = (req.model or "").strip()
if not name or ".." in name or "/" in name:
@@ -355,8 +279,8 @@ async def ollama_delete(req: PullRequest):
return {"ok": True, "message": f"Deleted '{name}' from disk."}
-@app.post("/api/ollama/unload")
-async def ollama_unload(req: PullRequest):
+@app.post("/api/llm/unload")
+async def llm_unload(req: PullRequest):
"""Unload the currently active model from the gateway without deleting GGUF files."""
name = (req.model or "").strip()
if not name or ".." in name:
@@ -376,7 +300,7 @@ async def ollama_unload(req: PullRequest):
except HTTPException:
raise
except Exception as e:
- raise HTTPException(status_code=502, detail=f"Ollama request failed: {e}") from e
+ raise HTTPException(status_code=502, detail=f"Model gateway request failed: {e}") from e
@app.post("/api/llamacpp/switch")
@@ -452,38 +376,38 @@ async def _do_set_active_model(req: PullRequest, request: Request):
return {"ok": all_ok, "model": model, "errors": errors, **results}
-def _run_ollama_pull(model: str):
+def _run_gguf_pull(model: str):
"""Download GGUFs via ops-controller gguf-puller (docker compose --profile models)."""
- global _ollama_pull_status
+ global _gguf_pull_status
with _state_lock:
- _ollama_pull_status = {"running": True, "model": model, "output": "", "pct": 0, "done": False, "success": None}
+ _gguf_pull_status = {"running": True, "model": model, "output": "", "pct": 0, "done": False, "success": None}
repos = _normalize_gguf_pull_repos(model)
if repos is None:
- repos = _normalize_gguf_pull_repos(_hf_url_to_ollama(model))
+ repos = _normalize_gguf_pull_repos(_hf_url_to_repo(model))
if repos is None:
msg = (
- "This stack uses GGUF files (llama.cpp), not the Ollama registry.\n\n"
+ "This stack pulls GGUF files (llama.cpp) directly from Hugging Face.\n\n"
"Enter a Hugging Face repo id (e.g. bartowski/Llama-3.2-3B-Instruct-GGUF), "
"a huggingface.co/… page or .gguf URL, hf.co/owner/repo, or type .env to pull all "
"repos listed in GGUF_MODELS in your .env.\n\n"
- "Names like llama3.2:8b only work with a real Ollama daemon, not this gateway."
+ "Bare tag names like llama3.2:8b are not supported; use a Hugging Face repo id or .gguf URL."
)
with _state_lock:
- _ollama_pull_status["output"] = msg
- _ollama_pull_status["success"] = False
- _ollama_pull_status["running"] = False
- _ollama_pull_status["done"] = True
+ _gguf_pull_status["output"] = msg
+ _gguf_pull_status["success"] = False
+ _gguf_pull_status["running"] = False
+ _gguf_pull_status["done"] = True
return
ops_url = os.environ.get("OPS_CONTROLLER_URL", "http://ops-controller:9000").rstrip("/")
token = os.environ.get("OPS_CONTROLLER_TOKEN", "").strip()
if not token:
with _state_lock:
- _ollama_pull_status["output"] = "OPS_CONTROLLER_TOKEN is not set; cannot run gguf-puller from the dashboard."
- _ollama_pull_status["success"] = False
- _ollama_pull_status["running"] = False
- _ollama_pull_status["done"] = True
+ _gguf_pull_status["output"] = "OPS_CONTROLLER_TOKEN is not set; cannot run gguf-puller from the dashboard."
+ _gguf_pull_status["success"] = False
+ _gguf_pull_status["running"] = False
+ _gguf_pull_status["done"] = True
return
try:
@@ -496,10 +420,10 @@ def _run_ollama_pull(model: str):
)
if r.status_code == 409:
with _state_lock:
- _ollama_pull_status["output"] = "Another model or GGUF pull is already in progress."
- _ollama_pull_status["success"] = False
- _ollama_pull_status["running"] = False
- _ollama_pull_status["done"] = True
+ _gguf_pull_status["output"] = "Another model or GGUF pull is already in progress."
+ _gguf_pull_status["success"] = False
+ _gguf_pull_status["running"] = False
+ _gguf_pull_status["done"] = True
return
if r.status_code >= 400:
try:
@@ -507,10 +431,10 @@ def _run_ollama_pull(model: str):
except (ValueError, UnicodeDecodeError):
det = r.text
with _state_lock:
- _ollama_pull_status["output"] = f"Failed to start gguf-puller: {det}"
- _ollama_pull_status["success"] = False
- _ollama_pull_status["running"] = False
- _ollama_pull_status["done"] = True
+ _gguf_pull_status["output"] = f"Failed to start gguf-puller: {det}"
+ _gguf_pull_status["success"] = False
+ _gguf_pull_status["running"] = False
+ _gguf_pull_status["done"] = True
return
deadline = time.time() + 7200 # 2-hour max
@@ -536,44 +460,44 @@ def _run_ollama_pull(model: str):
raise RuntimeError(f"Poll failed 20 times: {poll_err}")
continue
with _state_lock:
- _ollama_pull_status["output"] = st.get("output", "")
- _ollama_pull_status["pct"] = 50 if st.get("running") else 100
+ _gguf_pull_status["output"] = st.get("output", "")
+ _gguf_pull_status["pct"] = 50 if st.get("running") else 100
if st.get("done"):
with _state_lock:
- _ollama_pull_status["success"] = bool(st.get("success"))
- _ollama_pull_status["running"] = False
- _ollama_pull_status["done"] = True
+ _gguf_pull_status["success"] = bool(st.get("success"))
+ _gguf_pull_status["running"] = False
+ _gguf_pull_status["done"] = True
break
else:
raise TimeoutError("GGUF pull timed out after 2 hours")
except Exception as e:
logger.error("GGUF pull failed: %s", e)
with _state_lock:
- _ollama_pull_status["output"] = (_ollama_pull_status.get("output") or "") + f"\nError: {e}"
- _ollama_pull_status["success"] = False
- _ollama_pull_status["running"] = False
- _ollama_pull_status["done"] = True
+ _gguf_pull_status["output"] = (_gguf_pull_status.get("output") or "") + f"\nError: {e}"
+ _gguf_pull_status["success"] = False
+ _gguf_pull_status["running"] = False
+ _gguf_pull_status["done"] = True
-@app.post("/api/ollama/pull")
-async def ollama_pull(req: PullRequest):
- """Start GGUF download (gguf-puller via ops-controller) in background. Poll /api/ollama/pull/status."""
- global _ollama_pull_status
+@app.post("/api/llm/pull")
+async def llm_pull(req: PullRequest):
+ """Start GGUF download (gguf-puller via ops-controller) in background. Poll /api/llm/pull/status."""
+ global _gguf_pull_status
with _state_lock:
- if _ollama_pull_status.get("running"):
+ if _gguf_pull_status.get("running"):
raise HTTPException(status_code=409, detail="Pull already in progress")
- _ollama_pull_status["running"] = True
- _ollama_pull_status["model"] = req.model
- thread = threading.Thread(target=_run_ollama_pull, args=(req.model,), daemon=True)
+ _gguf_pull_status["running"] = True
+ _gguf_pull_status["model"] = req.model
+ thread = threading.Thread(target=_run_gguf_pull, args=(req.model,), daemon=True)
thread.start()
return {"status": "started", "model": req.model}
-@app.get("/api/ollama/pull/status")
-async def ollama_pull_status():
- """Get Ollama pull progress."""
+@app.get("/api/llm/pull/status")
+async def llm_pull_status():
+ """Get GGUF pull progress."""
with _state_lock:
- return dict(_ollama_pull_status)
+ return dict(_gguf_pull_status)
# --- ComfyUI ---
@@ -957,7 +881,7 @@ class ModelPullRequest(BaseModel):
def _normalize_gguf_pull_repos(model: str) -> str | None:
"""Return comma-separated Hugging Face repo ids for gguf-puller, or '' to use .env GGUF_MODELS.
- None means the string is not suitable (e.g. Ollama-style ``llama3.2:8b``).
+ None means the string is not suitable (e.g. a bare tag like ``llama3.2:8b``).
"""
def _normalize_repo_ref(raw: str) -> str | None:
candidate = raw.strip()
@@ -999,8 +923,8 @@ def _normalize_repo_ref(raw: str) -> str | None:
return _normalize_repo_ref(s)
-def _hf_url_to_ollama(raw: str) -> str:
- """Convert a HuggingFace GGUF URL to Ollama's hf.co/owner/repo format.
+def _hf_url_to_repo(raw: str) -> str:
+ """Convert a HuggingFace GGUF URL to hf.co/owner/repo form for the gguf-puller.
Non-HF strings (model names, hf.co/ refs) are returned as-is.
"""
if "huggingface.co/" in raw:
@@ -1016,7 +940,7 @@ def _hf_url_to_ollama(raw: str) -> str:
@app.post("/api/models/download")
async def models_download(req: ModelDownloadRequest, request: Request):
"""Unified model download.
- - GGUF / HF repo → background gguf-puller via ops (same as ``/api/ollama/pull``); poll ``/api/ollama/pull/status``.
+ - GGUF / HF repo → background gguf-puller via ops (same as ``/api/llm/pull``); poll ``/api/llm/pull/status``.
- safetensors / ckpt / pt / bin → proxied to ops-controller for file download.
"""
raw = req.url.strip()
@@ -1039,15 +963,15 @@ async def models_download(req: ModelDownloadRequest, request: Request):
return {**data, "target": "comfyui"}
else:
with _state_lock:
- if _ollama_pull_status.get("running"):
+ if _gguf_pull_status.get("running"):
raise HTTPException(status_code=409, detail="Pull already in progress")
- _ollama_pull_status["running"] = True
- thread = threading.Thread(target=_run_ollama_pull, args=(raw,), daemon=True)
+ _gguf_pull_status["running"] = True
+ thread = threading.Thread(target=_run_gguf_pull, args=(raw,), daemon=True)
thread.start()
return {
"status": "started",
"target": "gguf",
- "message": "Poll /api/ollama/pull/status for progress.",
+ "message": "Poll /api/llm/pull/status for progress.",
}
@@ -1571,8 +1495,8 @@ async def performance_summary():
}
-@app.get("/api/ollama/ps")
-async def ollama_ps():
+@app.get("/api/llm/ps")
+async def llm_ps():
"""List models currently advertised by model-gateway."""
try:
r = await _get_http_client().get(
@@ -1814,7 +1738,7 @@ def _open_webui_default_model(name: str) -> str:
@app.post("/api/config/default-model")
async def set_default_model(req: DefaultModelRequest, request: Request):
"""Write DEFAULT_MODEL and OPEN_WEBUI_DEFAULT_MODEL to .env and recreate open-webui."""
- # Ollama allows namespaced ids: owner/model:tag (slashes required). Only reject empty / traversal.
+ # Model ids may be namespaced: owner/model:tag (slashes allowed). Only reject empty / traversal.
name = (req.model or "").strip()
if not name or ".." in name:
raise HTTPException(status_code=400, detail="Invalid model name")
diff --git a/dashboard/static/index.html b/dashboard/static/index.html
index a02ba3a..7183c97 100644
--- a/dashboard/static/index.html
+++ b/dashboard/static/index.html
@@ -851,56 +851,13 @@
.pill:hover { border-color: var(--accent); color: var(--accent); }
.mcp-remove-btn:hover { border-color: var(--danger); color: var(--danger); }
/* ── Model select dropdown ── */
- .model-select-wrap { position: relative; flex: 1; min-width: 200px; }
- .model-select-trigger {
- width: 100%; display: flex; align-items: center; justify-content: space-between;
- padding: var(--space-2) var(--space-4); background: var(--bg);
- border: 1px solid var(--border); border-radius: var(--radius-sm);
+ .llm-model-input {
+ flex: 1; min-width: 200px; padding: var(--space-2) var(--space-4);
+ background: var(--bg); border: 1px solid var(--border); border-radius: var(--radius-sm);
color: var(--fg); font-family: var(--font-mono); font-size: .88rem;
- cursor: pointer; transition: all .2s;
- }
- .model-select-trigger:hover, .model-select-trigger.open { border-color: var(--accent); background: var(--accent-dim); }
- .model-select-trigger .chevron { transition: transform .2s; opacity: .6; }
- .model-select-trigger.open .chevron { transform: rotate(180deg); }
- .model-select-dropdown {
- position: absolute; top: 100%; left: 0; right: 0; margin-top: var(--space-1);
- background: var(--surface); border: 1px solid var(--border);
- border-radius: var(--radius-sm); max-height: 320px;
- overflow: hidden; display: flex; flex-direction: column;
- z-index: 1000; box-shadow: var(--shadow-lg);
- }
- .model-select-search { padding: var(--space-2) var(--space-3); border-bottom: 1px solid var(--border-subtle); }
- .model-select-search input {
- width: 100%; padding: .4rem .65rem; background: var(--bg);
- border: 1px solid var(--border); border-radius: var(--radius-sm);
- color: var(--fg); font-family: var(--font-mono); font-size: .82rem;
- }
- .model-select-search input:focus-visible { outline: 1.5px solid var(--accent); outline-offset: 2px; border-color: var(--accent); }
- .model-select-search input:focus:not(:focus-visible) { outline: none; border-color: var(--accent); }
- .model-select-dropdown .section-label {
- padding: var(--space-2) var(--space-4); font-size: var(--text-xs);
- text-transform: uppercase; letter-spacing: .06em; color: var(--muted);
- border-bottom: 1px solid var(--border-subtle);
- }
- .model-select-option {
- padding: var(--space-2) var(--space-4); font-family: var(--font-mono); font-size: .82rem;
- cursor: pointer; display: flex; justify-content: space-between;
- align-items: center; transition: background .12s;
- }
- .model-select-option:hover { background: var(--surface-hover); }
- .model-select-option.installed { color: var(--muted); cursor: default; }
- .model-select-option.installed:hover { background: transparent; }
- .model-select-option .badge { font-size: .65rem; color: var(--success); }
- .model-select-option .size { font-size: .75rem; color: var(--muted); }
- #ollama-select-options { overflow-y: auto; max-height: 220px; }
- .model-select-custom { padding: var(--space-3) var(--space-4); border-top: 1px solid var(--border-subtle); }
- .model-select-custom input {
- width: 100%; padding: .4rem .65rem; background: var(--bg);
- border: 1px solid var(--border); border-radius: var(--radius-sm);
- color: var(--fg); font-family: var(--font-mono); font-size: .82rem;
}
- .model-select-custom input:focus-visible { outline: 1.5px solid var(--accent); outline-offset: 2px; border-color: var(--accent); }
- .model-select-custom input:focus:not(:focus-visible) { outline: none; border-color: var(--accent); }
+ .llm-model-input:focus-visible { outline: 1.5px solid var(--accent); outline-offset: 2px; border-color: var(--accent); }
+ .llm-model-input:focus:not(:focus-visible) { outline: none; border-color: var(--accent); }
.last-updated { font-size: var(--text-xs); color: var(--muted); margin-top: var(--space-2); }
/* ── Inputs ── */
input[type="text"] {
@@ -1020,10 +977,8 @@
.model-item .name { word-break: break-word; }
.pull-area { padding: var(--space-4); }
.pull-row { flex-direction: row; flex-wrap: wrap; gap: var(--space-3); }
- .pull-row .model-select-wrap { flex: 1 1 100%; min-width: 0; }
+ .pull-row .llm-model-input { flex: 1 1 100%; min-width: 0; }
.pull-row button { flex: 1 1 120px; min-width: 0; }
- .model-select-wrap { min-width: 0; }
- .model-select-trigger { font-size: .85rem; }
input[type="text"] { min-width: 0; }
button { min-height: 44px; padding: .75rem 1.25rem; }
.mcp-howto { padding: var(--space-2) var(--space-3); }
@@ -1043,7 +998,6 @@
h1 { font-size: 1.75rem; }
section { padding: var(--space-4) var(--space-3); }
.model-list { max-height: 160px; }
- .model-select-dropdown { max-height: 60vh; left: var(--space-2); right: var(--space-2); width: auto; }
.quick-pills { gap: var(--space-1); }
.pill { padding: 2px var(--space-3); }
}
@@ -1210,7 +1164,6 @@
}
/* Override arbitrary z-indexes with scale tokens */
.modal-overlay { z-index: var(--z-modal); }
- .model-select-dropdown { z-index: var(--z-dropdown); }
header::before { z-index: var(--z-sticky); }
.toast-container { z-index: var(--z-overlay); }
.skip-link { z-index: calc(var(--z-modal) + 1); }
@@ -1901,36 +1854,21 @@
LLM — llama.cpp
-
+
-
-
- Select model to pull...
- ▾
-
-
-
-
-
-
Registry (pull uses HF repo ids; see custom field)
-
-
-
-
-
-
-
Pull .env Models
-
Pull
+
+
Pull .env Models
+
Pull
-
-
@@ -2090,8 +2028,7 @@
Dashboard login
});
};
- let ollamaModels = [];
- let ollamaLibrary = [];
+ let llmModels = [];
function toast(msg, type = '') {
const el = document.createElement('div');
@@ -2213,7 +2150,6 @@
Dashboard login
}
const SERVICE_ICONS = {
- 'ollama': '🦙',
'model-gateway': '⇌',
'webui': '💬',
'mcp': '🔌',
@@ -2838,62 +2774,33 @@
Dashboard login
} catch (e) { jobsEl.textContent = "Jobs unavailable: " + e; }
}
- function buildOllamaDropdown(filter = '') {
- const installed = new Set(ollamaModels.map(m => m.name));
- const optionsEl = document.getElementById('ollama-select-options');
- if (!optionsEl) return;
- const q = filter.toLowerCase().trim();
- const models = q ? ollamaLibrary.filter(m => m.toLowerCase().includes(q)) : ollamaLibrary;
- optionsEl.innerHTML = models.slice(0, 80).map(name => {
- const isInstalled = installed.has(name);
- return `
- ${name}
- ${isInstalled ? '✓ ' : 'pull '}
-
`;
- }).join('');
- if (models.length > 80) {
- optionsEl.innerHTML += `
+ ${models.length - 80} more — type to search
`;
- }
- }
-
- async function loadOllamaLibrary() {
- try {
- const r = await api('/api/ollama/library');
- const d = await r.json();
- ollamaLibrary = d.models || [];
- } catch (_) {
- ollamaLibrary = ['llama3.2', 'deepseek-r1:7b', 'qwen2.5:7b', 'mistral', 'nomic-embed-text'];
- }
- }
-
- async function loadOllamaModels() {
- const el = document.getElementById('ollama-models');
+ async function loadModels() {
+ const el = document.getElementById('llm-models');
try {
- const r = await api('/api/ollama/models');
+ const r = await api('/api/llm/models');
const d = await r.json();
- ollamaModels = d.models || [];
- buildOllamaDropdown(document.getElementById('ollama-library-search')?.value || '');
+ llmModels = d.models || [];
if (!d.ok) {
el.innerHTML = '
Model gateway unreachable.Start with: docker compose up -d
';
return;
}
- if (!ollamaModels.length) {
- el.innerHTML = '
No models yet.Select one below and click Pull.
';
- document.getElementById('ollama-last-updated').textContent = '';
+ if (!llmModels.length) {
+ el.innerHTML = '
No models yet.Enter a Hugging Face repo below and click Pull.
';
+ document.getElementById('llm-last-updated').textContent = '';
populateThroughputModelSelect();
return;
}
- document.getElementById('ollama-last-updated').textContent = 'Updated ' + new Date().toLocaleTimeString();
+ document.getElementById('llm-last-updated').textContent = 'Updated ' + new Date().toLocaleTimeString();
// Fetch currently loaded model to show active badge
let activeModel = '';
try {
- const ps = await api('/api/ollama/ps');
+ const ps = await api('/api/llm/ps');
if (ps.ok) {
const psData = await ps.json();
activeModel = (psData.models?.[0]?.name || '').replace(/\.gguf$/i, '').split(':')[0];
}
} catch (_) {}
- el.innerHTML = ollamaModels.map(m => {
+ el.innerHTML = llmModels.map(m => {
const bareName = m.name.replace(/\.gguf$/i, '');
const isActive = activeModel && (bareName === activeModel || bareName.split(':')[0] === activeModel);
return `
@@ -2908,7 +2815,7 @@
Dashboard login
const activeArea = document.getElementById('llm-active-area');
const activeSel = document.getElementById('llm-active-select');
if (activeSel) {
- const llms = ollamaModels.filter(m => !isEmbeddingModel(m.name));
+ const llms = llmModels.filter(m => !isEmbeddingModel(m.name));
if (llms.length) {
activeSel.innerHTML = llms.map(m => `
${escapeHtml(m.name)} `).join('');
const active = llms.find(m => {
@@ -2943,7 +2850,7 @@
Dashboard login
function populateThroughputModelSelect() {
const sel = document.getElementById('throughput-model-select');
if (!sel) return;
- const llms = ollamaModels.filter(m => !isEmbeddingModel(m.name));
+ const llms = llmModels.filter(m => !isEmbeddingModel(m.name));
if (!llms.length) {
sel.innerHTML = '
No LLMs — pull one (embedding models excluded) ';
return;
@@ -3042,8 +2949,8 @@
Dashboard login
['perf-rail-p50','perf-rail-p95','perf-rail-p99','perf-rail-peak','perf-rail-ttft-p50','perf-rail-ttft-p95'].forEach(id => setText(id, '—'));
heroEl?.classList.remove('loaded');
// Bench target — show the configured local model name if available
- const targetName = (typeof ollamaModels !== 'undefined' && ollamaModels?.length)
- ? (ollamaModels.find(m => !isEmbeddingModel?.(m.name))?.name || ollamaModels[0]?.name) : '';
+ const targetName = (typeof llmModels !== 'undefined' && llmModels?.length)
+ ? (llmModels.find(m => !isEmbeddingModel?.(m.name))?.name || llmModels[0]?.name) : '';
setText('bench-target', targetName || 'Run a benchmark to seed the dashboard');
return;
}
@@ -3119,7 +3026,7 @@
Dashboard login
// No model selector in the simplified single-model UI — pick the first
// non-embedding LLM the gateway reports. Fall back to the hero's current
// target text (whatever the dashboard is actively showing).
- const llms = ollamaModels.filter(m => !isEmbeddingModel(m.name));
+ const llms = llmModels.filter(m => !isEmbeddingModel(m.name));
const heroTarget = document.getElementById('bench-target')?.textContent?.trim();
const model = llms[0]?.name || (heroTarget && heroTarget !== '—' ? heroTarget : 'llama3.2');
resultsEl.style.display = 'none';
@@ -3308,86 +3215,19 @@
Dashboard login
}
}
- function getSelectedOllamaModel() {
- const custom = document.getElementById('ollama-model-custom').value.trim();
- if (custom) return custom;
- const label = document.getElementById('ollama-select-label').textContent;
- return label !== 'Select model to pull...' ? label : '';
- }
-
- function setSelectedOllamaModel(name) {
- document.getElementById('ollama-model-custom').value = '';
- document.getElementById('ollama-select-label').textContent = name || 'Select model to pull...';
+ function getSelectedModel() {
+ return document.getElementById('llm-model-input').value.trim();
}
- function toggleOllamaDropdown(open) {
- const trigger = document.getElementById('ollama-select-trigger');
- const dd = document.getElementById('ollama-select-dropdown');
- dd.style.display = open ? 'flex' : 'none';
- trigger.classList.toggle('open', open);
- trigger.setAttribute('aria-expanded', open);
- if (open) {
- document.getElementById('ollama-library-search').value = '';
- document.getElementById('ollama-library-search').focus();
- buildOllamaDropdown('');
- }
+ function setSelectedModel(name) {
+ document.getElementById('llm-model-input').value = name || '';
}
- document.getElementById('ollama-select-trigger').onclick = (e) => {
- e.stopPropagation();
- const dd = document.getElementById('ollama-select-dropdown');
- toggleOllamaDropdown(dd.style.display !== 'flex');
- };
- document.getElementById('ollama-select-trigger').onkeydown = (e) => {
- if (e.key === 'Enter' || e.key === ' ') {
- e.preventDefault();
- const dd = document.getElementById('ollama-select-dropdown');
- toggleOllamaDropdown(dd.style.display !== 'flex');
- }
- };
-
- document.getElementById('ollama-select-options').addEventListener('click', (e) => {
- const opt = e.target.closest('.model-select-option');
- if (!opt || opt.classList.contains('installed')) return;
- const model = opt.dataset.model;
- setSelectedOllamaModel(model);
- toggleOllamaDropdown(false);
- document.getElementById('ollama-library-search').value = '';
- });
-
- document.getElementById('ollama-library-search').oninput = debounce((e) => {
- buildOllamaDropdown(e.target.value);
- }, 200);
-
- document.getElementById('ollama-library-search').onkeydown = (e) => {
- e.stopPropagation();
- if (e.key === 'Escape') {
- toggleOllamaDropdown(false);
- document.getElementById('ollama-select-trigger').focus();
- }
- };
-
- document.getElementById('ollama-model-custom').oninput = () => {
- const v = document.getElementById('ollama-model-custom').value.trim();
- document.getElementById('ollama-select-label').textContent = v || 'Select model to pull...';
- };
-
- document.addEventListener('click', () => {
- const dd = document.getElementById('ollama-select-dropdown');
- if (dd.style.display === 'flex') toggleOllamaDropdown(false);
- });
- document.getElementById('ollama-select-dropdown').onclick = (e) => e.stopPropagation();
- document.getElementById('ollama-select-dropdown').onkeydown = (e) => {
- if (e.key === 'Escape') {
- toggleOllamaDropdown(false);
- document.getElementById('ollama-select-trigger').focus();
- }
- };
- async function pullOllamaModel(name, btn, prog, logEl, barEl) {
+ async function pullModel(name, btn, prog, logEl, barEl) {
logEl.textContent = `Pulling ${name}...`;
barEl.style.width = '0%';
barEl.setAttribute('aria-valuenow', 0);
- const resp = await api('/api/ollama/pull', {
+ const resp = await api('/api/llm/pull', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: name })
@@ -3396,7 +3236,7 @@
Dashboard login
const d = await resp.json().catch(() => ({}));
throw new Error(d.detail || `HTTP ${resp.status}`);
}
- const result = await pollOllamaPull(logEl, barEl);
+ const result = await pollGgufPull(logEl, barEl);
if (result && result.success === false) {
const line = (result.output || '').split('\n').filter(Boolean).pop() || 'Pull failed';
throw new Error(line);
@@ -3404,11 +3244,11 @@
Dashboard login
return result;
}
- function pollOllamaPull(logEl, barEl) {
+ function pollGgufPull(logEl, barEl) {
return new Promise((resolve) => {
let pollErrors = 0;
const poll = () => {
- api('/api/ollama/pull/status').then(r => r.json()).then(s => {
+ api('/api/llm/pull/status').then(r => r.json()).then(s => {
pollErrors = 0;
if (s.output) { logEl.textContent = s.output; logEl.scrollTop = logEl.scrollHeight; }
if (s.pct != null) { barEl.style.width = s.pct + '%'; barEl.setAttribute('aria-valuenow', s.pct); }
@@ -3427,20 +3267,20 @@
Dashboard login
});
}
- document.getElementById('ollama-starter-pack').onclick = async () => {
- const btn = document.getElementById('ollama-starter-pack');
- const pullBtn = document.getElementById('ollama-pull');
- const prog = document.getElementById('ollama-progress');
- const logEl = document.getElementById('ollama-log');
- const barEl = document.getElementById('ollama-progress-bar');
+ document.getElementById('llm-starter-pack').onclick = async () => {
+ const btn = document.getElementById('llm-starter-pack');
+ const pullBtn = document.getElementById('llm-pull');
+ const prog = document.getElementById('llm-progress');
+ const logEl = document.getElementById('llm-log');
+ const barEl = document.getElementById('llm-progress-bar');
btn.disabled = true;
pullBtn.disabled = true;
prog.style.display = 'block';
logEl.textContent = 'Starting gguf-puller with GGUF_MODELS from .env...';
try {
- await pullOllamaModel('.env', btn, prog, logEl, barEl);
+ await pullModel('.env', btn, prog, logEl, barEl);
toast('GGUF pull from .env finished', 'success');
- await loadOllamaModels();
+ await loadModels();
} catch (e) {
logEl.textContent += '\nError: ' + e.message;
toast('GGUF pull failed: ' + e.message, 'error');
@@ -3449,28 +3289,28 @@
Dashboard login
pullBtn.disabled = false;
};
- document.getElementById('ollama-pull').onclick = async () => {
- const name = getSelectedOllamaModel();
+ document.getElementById('llm-pull').onclick = async () => {
+ const name = getSelectedModel();
if (!name) { toast('Enter a model name', 'error'); return; }
- const btn = document.getElementById('ollama-pull');
- const prog = document.getElementById('ollama-progress');
- const logEl = document.getElementById('ollama-log');
- const barEl = document.getElementById('ollama-progress-bar');
+ const btn = document.getElementById('llm-pull');
+ const prog = document.getElementById('llm-progress');
+ const logEl = document.getElementById('llm-log');
+ const barEl = document.getElementById('llm-progress-bar');
btn.disabled = true;
- document.getElementById('ollama-starter-pack').disabled = true;
+ document.getElementById('llm-starter-pack').disabled = true;
prog.style.display = 'block';
logEl.textContent = 'Connecting...';
try {
- await pullOllamaModel(name, btn, prog, logEl, barEl);
+ await pullModel(name, btn, prog, logEl, barEl);
toast(`Pulled ${name}`, 'success');
- setSelectedOllamaModel('');
- await loadOllamaModels();
+ setSelectedModel('');
+ await loadModels();
} catch (e) {
logEl.textContent += '\nError: ' + e.message;
toast('Pull failed: ' + e.message, 'error');
}
btn.disabled = false;
- document.getElementById('ollama-starter-pack').disabled = false;
+ document.getElementById('llm-starter-pack').disabled = false;
};
// Guard: the #comfyui-pull button was removed from the UI but the handler survived.
@@ -3562,9 +3402,9 @@
Dashboard login
try {
if (target === 'llm') {
try {
- await pullOllamaModel(val, btn, prog, logEl, barEl);
+ await pullModel(val, btn, prog, logEl, barEl);
toast(`GGUF pull: ${val}`, 'success');
- loadOllamaModels?.();
+ loadModels?.();
} catch (llmErr) {
toast(llmErr.message || 'GGUF pull failed', 'error');
}
@@ -3627,8 +3467,7 @@
Dashboard login
const btn = document.getElementById('refresh-btn');
btn.classList.add('loading');
try {
- if (!ollamaLibrary.length) await loadOllamaLibrary();
- await Promise.all([loadServices(), loadDependencies(), loadRagStatus(), loadOllamaModels(), loadComfyuiModels(), loadComfyuiPacks(), loadMcpServers(), loadComfyuiPanel()]);
+ await Promise.all([loadServices(), loadDependencies(), loadRagStatus(), loadModels(), loadComfyuiModels(), loadComfyuiPacks(), loadMcpServers(), loadComfyuiPanel()]);
await Promise.all([loadPerfHero(), loadThroughputServiceUsage()]);
} finally {
btn.classList.remove('loading');
@@ -3730,7 +3569,7 @@
Dashboard login
if (r.ok && d.ok) {
statusEl.textContent = `✓ Activating — services restarting…`;
toast(`Activating ${model} — services restarting…`);
- setTimeout(() => loadOllamaModels(), 8000);
+ setTimeout(() => loadModels(), 8000);
} else {
statusEl.textContent = 'Error: ' + (d.detail || 'Switch failed');
toast((d.detail || 'Switch failed') + '', 'error');
@@ -3744,7 +3583,7 @@
Dashboard login
}
});
- document.getElementById('ollama-models')?.addEventListener('click', async (e) => {
+ document.getElementById('llm-models')?.addEventListener('click', async (e) => {
const btn = e.target.closest('.btn-model-delete[data-model]');
if (!btn) return;
e.preventDefault();
@@ -3752,11 +3591,11 @@
Dashboard login
if (!model || !confirm(`Delete "${model}" from disk?\n\nThis removes the GGUF file and cannot be undone.`)) return;
btn.disabled = true;
try {
- const r = await api('/api/ollama/delete', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model }) });
+ const r = await api('/api/llm/delete', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model }) });
const d = await r.json();
if (r.ok) {
toast(d.message || 'Model deleted');
- loadOllamaModels();
+ loadModels();
loadThroughputStats();
} else {
toast((d.detail || 'Delete failed') + '', 'error');
@@ -3925,24 +3764,24 @@
Dashboard login
}
async function resumeActivePulls() {
- // Ollama — resume polling if a pull is in progress
+ // LLM (GGUF) — resume polling if a pull is in progress
try {
- const s = await api('/api/ollama/pull/status').then(r => r.json());
+ const s = await api('/api/llm/pull/status').then(r => r.json());
if (s.running) {
- const prog = document.getElementById('ollama-progress');
- const logEl = document.getElementById('ollama-log');
- const barEl = document.getElementById('ollama-progress-bar');
- const pullBtn = document.getElementById('ollama-pull');
- const packBtn = document.getElementById('ollama-starter-pack');
+ const prog = document.getElementById('llm-progress');
+ const logEl = document.getElementById('llm-log');
+ const barEl = document.getElementById('llm-progress-bar');
+ const pullBtn = document.getElementById('llm-pull');
+ const packBtn = document.getElementById('llm-starter-pack');
prog.style.display = 'block';
pullBtn.disabled = true;
packBtn.disabled = true;
logEl.textContent = `Resuming pull: ${s.model || ''}...`;
- pollOllamaPull(logEl, barEl).then(result => {
+ pollGgufPull(logEl, barEl).then(result => {
toast(result.success ? `Pulled ${s.model}` : 'Pull failed', result.success ? 'success' : 'error');
pullBtn.disabled = false;
packBtn.disabled = false;
- loadOllamaModels();
+ loadModels();
});
}
} catch (_) {}
@@ -4099,9 +3938,9 @@
Dashboard login
await loadAuthConfig();
if (authConfig.auth_required && authConfig.auth_type === 'bearer' && !sessionStorage.getItem(AUTH_STORAGE_KEY)) {
showAuthModal();
- window.addEventListener('auth-ready', () => { loadOllamaLibrary().then(() => refresh()); resumeActivePulls(); }, { once: true });
+ window.addEventListener('auth-ready', () => { refresh(); resumeActivePulls(); }, { once: true });
} else {
- loadOllamaLibrary().then(() => refresh());
+ refresh();
resumeActivePulls();
}
// Visibility-aware polling — pause when tab is hidden to save CPU/network
diff --git a/docker-compose.yml b/docker-compose.yml
index 6f0b56c..fcb7d31 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -403,7 +403,7 @@ services:
retries: 3
environment:
# Route all model requests through the gateway (unified provider)
- - OLLAMA_BASE_URL=
+ - ENABLE_OLLAMA_API=false
- OPENAI_API_BASE_URL=${OPENAI_API_BASE:-http://model-gateway:11435/v1}
- OPENAI_API_KEY=${LITELLM_MASTER_KEY:-local}
# Auth: False = single-user local / Tailscale use.
@@ -719,7 +719,6 @@ services:
# session cookie so the second login is infrequent.
- N8N_USER_MANAGEMENT_DISABLED=true
# Route all model traffic through Model Gateway (dashboard tracking, unified provider)
- - OLLAMA_HOST=http://model-gateway:11435
- OPENAI_API_BASE_URL=${OPENAI_API_BASE:-http://model-gateway:11435/v1}
- OPENAI_API_KEY=local
# OAuth callbacks + inbound webhooks require a public URL.
diff --git a/docs/product requirements docs/component-dashboard-ui.md b/docs/product requirements docs/component-dashboard-ui.md
index 9935667..fef7b6b 100644
--- a/docs/product requirements docs/component-dashboard-ui.md
+++ b/docs/product requirements docs/component-dashboard-ui.md
@@ -3,7 +3,7 @@
## Purpose
A web-based control plane that provides a single pane of glass for:
- Managing Docker-Compose services (start/stop/restart, logs)
-- Pulling and configuring AI models (Ollama, vLLM, etc.)
+- Pulling and configuring AI models (GGUF/llama.cpp LLMs, ComfyUI diffusion models)
- Viewing dependency health and throughput stats
- Executing MCP tool calls from any browser (via the MCP Gateway)
@@ -19,11 +19,12 @@ A web-based control plane that provides a single pane of glass for:
| `/api/hardware` | GET | None | Host hardware stats (CPU, memory, GPU via nvidia-smi) |
| `/api/auth/config` | GET | None | Auth method in use |
| `/api/rag/status` | GET | None | Qdrant collection status + point count |
-| `/api/ollama/models` | GET | Y | Installed Ollama models |
-| `/api/ollama/pull` | POST | Y | Pull model (streaming progress) |
-| `/api/ollama/delete` | POST | Y | Delete Ollama model |
-| `/api/ollama/library` | GET | Y | Pullable models from Ollama registry (24h cache) |
-| `/api/ollama/ps` | GET | Y | Models currently loaded in Ollama |
+| `/api/llm/models` | GET | Y | Installed GGUF models (llama.cpp) |
+| `/api/llm/pull` | POST | Y | Pull a GGUF model from Hugging Face (background gguf-puller) |
+| `/api/llm/pull/status` | GET | Y | GGUF pull progress |
+| `/api/llm/delete` | POST | Y | Delete a GGUF model file from disk |
+| `/api/llm/unload` | POST | Y | Unload the active model from the gateway (keeps files) |
+| `/api/llm/ps` | GET | Y | Models currently advertised by the model gateway |
| `/api/comfyui/models` | GET | Y | Installed ComfyUI models |
| `/api/comfyui/pull` | POST | Y | Pull ComfyUI models |
| `/api/comfyui/models/{cat}/{file}` | DELETE | Y | Delete ComfyUI model |
@@ -70,7 +71,7 @@ A web-based control plane that provides a single pane of glass for:
1. From a tailnet device, open `https://${CADDY_TAILNET_HOSTNAME}/dash/` and complete Google sign-in.
2. The SSO front door (Caddy + oauth2-proxy) gates browser access; `DASHBOARD_AUTH_TOKEN` is a bearer-token fallback for host scripts and non-browser API access.
3. Use the "Services" tab to stop or restart a service if an issue is suspected.
-4. Pull a new Ollama or ComfyUI model from the relevant tab.
+4. Pull a new LLM (GGUF) or ComfyUI model from the relevant tab.
5. In the "MCP" tab, add a new tool server (e.g., a custom web search provider) by clicking "Add" and filling the JSON manifest.
---
diff --git a/tests/test_services_and_throughput.py b/tests/test_services_and_throughput.py
index 0737896..7589856 100644
--- a/tests/test_services_and_throughput.py
+++ b/tests/test_services_and_throughput.py
@@ -1,4 +1,4 @@
-"""Tests for /api/services, /api/throughput/*, /api/ollama/library, and global exception handler."""
+"""Tests for /api/services, /api/throughput/*, and the global exception handler."""
from __future__ import annotations
import os
@@ -67,18 +67,6 @@ def test_services_do_not_leak_auth_token(client, monkeypatch):
importlib.reload(dashboard.services_catalog)
-# ── /api/ollama/library ──────────────────────────────────────────────────────
-
-def test_ollama_library_returns_models(client):
- r = client.get("/api/ollama/library")
- assert r.status_code == 200
- data = r.json()
- assert "models" in data
- assert data["ok"] is True
- assert isinstance(data["models"], list)
- assert len(data["models"]) > 0
-
-
# ── /api/throughput/record ───────────────────────────────────────────────────
def test_throughput_record_accepts_sample(client):
@@ -167,11 +155,16 @@ def test_unhandled_exception_returns_500_not_traceback(monkeypatch):
mock_client.get = AsyncMock(return_value=MagicMock(status_code=200))
monkeypatch.setattr("dashboard.app._http_client", mock_client)
- # Patch ollama library to raise an unexpected error
- monkeypatch.setattr("dashboard.app._fetch_ollama_library", lambda: (_ for _ in ()).throw(RuntimeError("test boom")))
+ # Patch the GGUF disk scan (a dependency of /api/llm/models) to raise an
+ # unexpected error. It is called without a try/except in the route, so the
+ # error bubbles all the way to the global exception handler.
+ def _boom():
+ raise RuntimeError("test boom")
+
+ monkeypatch.setattr("dashboard.app._scan_gguf_models", _boom)
tc = TestClient(dashboard_app.app, raise_server_exceptions=False)
- r = tc.get("/api/ollama/library")
+ r = tc.get("/api/llm/models")
assert r.status_code == 500
data = r.json()
assert data["detail"] == "Internal server error"