diff --git a/README.md b/README.md index 5ba9767..9bd7d57 100644 --- a/README.md +++ b/README.md @@ -131,16 +131,16 @@ Auto-generated: **`overrides/compute.yml`** (from hardware detection). Do not co ### Dashboard -Reach the dashboard at `https://${CADDY_TAILNET_HOSTNAME}/dash/` (Google SSO front door; allowlist via `auth/oauth2-proxy/emails.txt`). It lists models (Ollama and ComfyUI), links to other services, dependency health, and searchable model pulls. **`OPS_CONTROLLER_TOKEN`** lets it restart services and run **`POST /api/comfyui/install-node-requirements`**. **`DASHBOARD_AUTH_TOKEN`** is an optional bearer layer for non-browser API access; the browser path is gated by SSO at the proxy level. +Reach the dashboard at `https://${CADDY_TAILNET_HOSTNAME}/dash/` (Google SSO front door; allowlist via `auth/oauth2-proxy/emails.txt`). It lists models (GGUF/llama.cpp and ComfyUI), links to other services, dependency health, and Hugging Face model pulls. **`OPS_CONTROLLER_TOKEN`** lets it restart services and run **`POST /api/comfyui/install-node-requirements`**. **`DASHBOARD_AUTH_TOKEN`** is an optional bearer layer for non-browser API access; the browser path is gated by SSO at the proxy level. After code changes affecting the dashboard image: `.\compose.ps1 build dashboard` then `.\compose.ps1 up -d` (or `./compose` equivalents). -### Ollama models +### LLM models (GGUF / llama.cpp) -Pull lists and defaults come from **`.env`** (`MODELS`, `DEFAULT_MODEL`). Pull via the dashboard or: +The stack pulls GGUF files (served by llama.cpp) directly from Hugging Face. Repo lists and defaults come from **`.env`** (`GGUF_MODELS`, `DEFAULT_MODEL`). Pull via the dashboard's **Models** panel (enter a Hugging Face repo id, a `huggingface.co/…`/`.gguf` URL, or `.env` to pull all `GGUF_MODELS`), or from the CLI: ```bash -./compose run --rm model-puller +./compose run --rm gguf-puller ``` ### ComfyUI (LTX-2) diff --git a/dashboard/app.py b/dashboard/app.py index de63ae1..55970b0 100644 --- a/dashboard/app.py +++ b/dashboard/app.py @@ -210,24 +210,9 @@ def _model_gateway_headers() -> dict[str, str]: headers["Authorization"] = f"Bearer {MODEL_GATEWAY_API_KEY}" return headers -# Ollama library: fetched from community JSON (all pullable model:tag names) -OLLAMA_LIBRARY_URL = os.environ.get( - "OLLAMA_LIBRARY_URL", - "https://yuma-shintani.github.io/ollama-model-library/model.json", -) -OLLAMA_LIBRARY_CACHE_TTL = float(os.environ.get("OLLAMA_LIBRARY_CACHE_TTL_SEC", "86400")) # 24h -_ollama_library_cache: list[str] = [] -_ollama_library_ts: float = 0.0 - -# Fallback when fetch fails (minimal curated list) -OLLAMA_LIBRARY_FALLBACK = [ - "llama3.2", "llama3.1", "deepseek-r1:7b", "qwen2.5:7b", "qwen3:14b", "qwen3:14b-q4_K_M", - "mistral", "nomic-embed-text", "phi4", "gemma3", -] - # Background pull status dicts _comfyui_status: dict = {"running": False, "output": "", "done": False, "success": None} -_ollama_pull_status: dict = {"running": False, "model": "", "output": "", "pct": 0, "done": False, "success": None} +_gguf_pull_status: dict = {"running": False, "model": "", "output": "", "pct": 0, "done": False, "success": None} @@ -235,68 +220,7 @@ class PullRequest(BaseModel): model: str -# --- Ollama --- - - -def _fetch_ollama_library() -> list[str]: - """Fetch pullable model names from Ollama registry. Uses community JSON; caches 24h.""" - global _ollama_library_cache, _ollama_library_ts - now = time.monotonic() - with _state_lock: - if _ollama_library_cache and (now - _ollama_library_ts) < OLLAMA_LIBRARY_CACHE_TTL: - return list(_ollama_library_cache) - - urls = [OLLAMA_LIBRARY_URL] - for url in urls: - try: - req = urllib.request.Request(url, headers={"Accept": "application/json"}) - with urllib.request.urlopen(req, timeout=15) as resp: - data = json.loads(resp.read().decode()) - except Exception as e: - logger.warning("Ollama library fetch failed from %s: %s", url, e) - continue - - names: set[str] = set() - if isinstance(data, list): - # yuma-shintani format: [{"name":"llama3.1","tags":[{"name":"llama3.1:8b"},...]}, ...] - for item in data: - if isinstance(item, dict): - base = (item.get("name") or "").strip() - tags = item.get("tags") or [] - for t in tags: - if isinstance(t, dict) and t.get("name"): - names.add(str(t["name"]).strip()) - if base: - names.add(base) # e.g. llama3.1 -> llama3.1:latest - elif isinstance(data, dict): - # Official format: {"library": {"llama3.1": {"tags": ["8b","70b"]}, ...}} - lib = data.get("library") or data - if isinstance(lib, dict): - for base, meta in lib.items(): - if isinstance(meta, dict): - for tag in meta.get("tags") or []: - names.add(f"{base}:{tag}" if tag else base) - else: - names.add(base) - - if names: - result = sorted(names) - with _state_lock: - _ollama_library_cache = result - _ollama_library_ts = now - return result - - with _state_lock: - _ollama_library_cache = OLLAMA_LIBRARY_FALLBACK - _ollama_library_ts = now - return list(OLLAMA_LIBRARY_FALLBACK) - - -@app.get("/api/ollama/library") -async def ollama_library(): - """List models available in the Ollama registry (fetched programmatically, cached 24h).""" - models = await asyncio.to_thread(_fetch_ollama_library) - return {"models": models, "ok": True} +# --- LLM (llama.cpp / GGUF) --- _GGUF_MODELS_DIR = Path(os.environ.get("GGUF_MODELS_DIR", "/gguf-models")) @@ -315,8 +239,8 @@ def _scan_gguf_models() -> list[dict]: return models -@app.get("/api/ollama/models") -async def ollama_models(): +@app.get("/api/llm/models") +async def llm_models(): """List GGUF models available on disk (primary) merged with gateway active-model info.""" disk_models = await asyncio.to_thread(_scan_gguf_models) if disk_models: @@ -332,8 +256,8 @@ async def ollama_models(): return {"models": [], "ok": False, "error": str(e)} -@app.post("/api/ollama/delete") -async def ollama_delete(req: PullRequest): +@app.post("/api/llm/delete") +async def llm_delete(req: PullRequest): """Delete a GGUF model file from disk.""" name = (req.model or "").strip() if not name or ".." in name or "/" in name: @@ -355,8 +279,8 @@ async def ollama_delete(req: PullRequest): return {"ok": True, "message": f"Deleted '{name}' from disk."} -@app.post("/api/ollama/unload") -async def ollama_unload(req: PullRequest): +@app.post("/api/llm/unload") +async def llm_unload(req: PullRequest): """Unload the currently active model from the gateway without deleting GGUF files.""" name = (req.model or "").strip() if not name or ".." in name: @@ -376,7 +300,7 @@ async def ollama_unload(req: PullRequest): except HTTPException: raise except Exception as e: - raise HTTPException(status_code=502, detail=f"Ollama request failed: {e}") from e + raise HTTPException(status_code=502, detail=f"Model gateway request failed: {e}") from e @app.post("/api/llamacpp/switch") @@ -452,38 +376,38 @@ async def _do_set_active_model(req: PullRequest, request: Request): return {"ok": all_ok, "model": model, "errors": errors, **results} -def _run_ollama_pull(model: str): +def _run_gguf_pull(model: str): """Download GGUFs via ops-controller gguf-puller (docker compose --profile models).""" - global _ollama_pull_status + global _gguf_pull_status with _state_lock: - _ollama_pull_status = {"running": True, "model": model, "output": "", "pct": 0, "done": False, "success": None} + _gguf_pull_status = {"running": True, "model": model, "output": "", "pct": 0, "done": False, "success": None} repos = _normalize_gguf_pull_repos(model) if repos is None: - repos = _normalize_gguf_pull_repos(_hf_url_to_ollama(model)) + repos = _normalize_gguf_pull_repos(_hf_url_to_repo(model)) if repos is None: msg = ( - "This stack uses GGUF files (llama.cpp), not the Ollama registry.\n\n" + "This stack pulls GGUF files (llama.cpp) directly from Hugging Face.\n\n" "Enter a Hugging Face repo id (e.g. bartowski/Llama-3.2-3B-Instruct-GGUF), " "a huggingface.co/… page or .gguf URL, hf.co/owner/repo, or type .env to pull all " "repos listed in GGUF_MODELS in your .env.\n\n" - "Names like llama3.2:8b only work with a real Ollama daemon, not this gateway." + "Bare tag names like llama3.2:8b are not supported; use a Hugging Face repo id or .gguf URL." ) with _state_lock: - _ollama_pull_status["output"] = msg - _ollama_pull_status["success"] = False - _ollama_pull_status["running"] = False - _ollama_pull_status["done"] = True + _gguf_pull_status["output"] = msg + _gguf_pull_status["success"] = False + _gguf_pull_status["running"] = False + _gguf_pull_status["done"] = True return ops_url = os.environ.get("OPS_CONTROLLER_URL", "http://ops-controller:9000").rstrip("/") token = os.environ.get("OPS_CONTROLLER_TOKEN", "").strip() if not token: with _state_lock: - _ollama_pull_status["output"] = "OPS_CONTROLLER_TOKEN is not set; cannot run gguf-puller from the dashboard." - _ollama_pull_status["success"] = False - _ollama_pull_status["running"] = False - _ollama_pull_status["done"] = True + _gguf_pull_status["output"] = "OPS_CONTROLLER_TOKEN is not set; cannot run gguf-puller from the dashboard." + _gguf_pull_status["success"] = False + _gguf_pull_status["running"] = False + _gguf_pull_status["done"] = True return try: @@ -496,10 +420,10 @@ def _run_ollama_pull(model: str): ) if r.status_code == 409: with _state_lock: - _ollama_pull_status["output"] = "Another model or GGUF pull is already in progress." - _ollama_pull_status["success"] = False - _ollama_pull_status["running"] = False - _ollama_pull_status["done"] = True + _gguf_pull_status["output"] = "Another model or GGUF pull is already in progress." + _gguf_pull_status["success"] = False + _gguf_pull_status["running"] = False + _gguf_pull_status["done"] = True return if r.status_code >= 400: try: @@ -507,10 +431,10 @@ def _run_ollama_pull(model: str): except (ValueError, UnicodeDecodeError): det = r.text with _state_lock: - _ollama_pull_status["output"] = f"Failed to start gguf-puller: {det}" - _ollama_pull_status["success"] = False - _ollama_pull_status["running"] = False - _ollama_pull_status["done"] = True + _gguf_pull_status["output"] = f"Failed to start gguf-puller: {det}" + _gguf_pull_status["success"] = False + _gguf_pull_status["running"] = False + _gguf_pull_status["done"] = True return deadline = time.time() + 7200 # 2-hour max @@ -536,44 +460,44 @@ def _run_ollama_pull(model: str): raise RuntimeError(f"Poll failed 20 times: {poll_err}") continue with _state_lock: - _ollama_pull_status["output"] = st.get("output", "") - _ollama_pull_status["pct"] = 50 if st.get("running") else 100 + _gguf_pull_status["output"] = st.get("output", "") + _gguf_pull_status["pct"] = 50 if st.get("running") else 100 if st.get("done"): with _state_lock: - _ollama_pull_status["success"] = bool(st.get("success")) - _ollama_pull_status["running"] = False - _ollama_pull_status["done"] = True + _gguf_pull_status["success"] = bool(st.get("success")) + _gguf_pull_status["running"] = False + _gguf_pull_status["done"] = True break else: raise TimeoutError("GGUF pull timed out after 2 hours") except Exception as e: logger.error("GGUF pull failed: %s", e) with _state_lock: - _ollama_pull_status["output"] = (_ollama_pull_status.get("output") or "") + f"\nError: {e}" - _ollama_pull_status["success"] = False - _ollama_pull_status["running"] = False - _ollama_pull_status["done"] = True + _gguf_pull_status["output"] = (_gguf_pull_status.get("output") or "") + f"\nError: {e}" + _gguf_pull_status["success"] = False + _gguf_pull_status["running"] = False + _gguf_pull_status["done"] = True -@app.post("/api/ollama/pull") -async def ollama_pull(req: PullRequest): - """Start GGUF download (gguf-puller via ops-controller) in background. Poll /api/ollama/pull/status.""" - global _ollama_pull_status +@app.post("/api/llm/pull") +async def llm_pull(req: PullRequest): + """Start GGUF download (gguf-puller via ops-controller) in background. Poll /api/llm/pull/status.""" + global _gguf_pull_status with _state_lock: - if _ollama_pull_status.get("running"): + if _gguf_pull_status.get("running"): raise HTTPException(status_code=409, detail="Pull already in progress") - _ollama_pull_status["running"] = True - _ollama_pull_status["model"] = req.model - thread = threading.Thread(target=_run_ollama_pull, args=(req.model,), daemon=True) + _gguf_pull_status["running"] = True + _gguf_pull_status["model"] = req.model + thread = threading.Thread(target=_run_gguf_pull, args=(req.model,), daemon=True) thread.start() return {"status": "started", "model": req.model} -@app.get("/api/ollama/pull/status") -async def ollama_pull_status(): - """Get Ollama pull progress.""" +@app.get("/api/llm/pull/status") +async def llm_pull_status(): + """Get GGUF pull progress.""" with _state_lock: - return dict(_ollama_pull_status) + return dict(_gguf_pull_status) # --- ComfyUI --- @@ -957,7 +881,7 @@ class ModelPullRequest(BaseModel): def _normalize_gguf_pull_repos(model: str) -> str | None: """Return comma-separated Hugging Face repo ids for gguf-puller, or '' to use .env GGUF_MODELS. - None means the string is not suitable (e.g. Ollama-style ``llama3.2:8b``). + None means the string is not suitable (e.g. a bare tag like ``llama3.2:8b``). """ def _normalize_repo_ref(raw: str) -> str | None: candidate = raw.strip() @@ -999,8 +923,8 @@ def _normalize_repo_ref(raw: str) -> str | None: return _normalize_repo_ref(s) -def _hf_url_to_ollama(raw: str) -> str: - """Convert a HuggingFace GGUF URL to Ollama's hf.co/owner/repo format. +def _hf_url_to_repo(raw: str) -> str: + """Convert a HuggingFace GGUF URL to hf.co/owner/repo form for the gguf-puller. Non-HF strings (model names, hf.co/ refs) are returned as-is. """ if "huggingface.co/" in raw: @@ -1016,7 +940,7 @@ def _hf_url_to_ollama(raw: str) -> str: @app.post("/api/models/download") async def models_download(req: ModelDownloadRequest, request: Request): """Unified model download. - - GGUF / HF repo → background gguf-puller via ops (same as ``/api/ollama/pull``); poll ``/api/ollama/pull/status``. + - GGUF / HF repo → background gguf-puller via ops (same as ``/api/llm/pull``); poll ``/api/llm/pull/status``. - safetensors / ckpt / pt / bin → proxied to ops-controller for file download. """ raw = req.url.strip() @@ -1039,15 +963,15 @@ async def models_download(req: ModelDownloadRequest, request: Request): return {**data, "target": "comfyui"} else: with _state_lock: - if _ollama_pull_status.get("running"): + if _gguf_pull_status.get("running"): raise HTTPException(status_code=409, detail="Pull already in progress") - _ollama_pull_status["running"] = True - thread = threading.Thread(target=_run_ollama_pull, args=(raw,), daemon=True) + _gguf_pull_status["running"] = True + thread = threading.Thread(target=_run_gguf_pull, args=(raw,), daemon=True) thread.start() return { "status": "started", "target": "gguf", - "message": "Poll /api/ollama/pull/status for progress.", + "message": "Poll /api/llm/pull/status for progress.", } @@ -1571,8 +1495,8 @@ async def performance_summary(): } -@app.get("/api/ollama/ps") -async def ollama_ps(): +@app.get("/api/llm/ps") +async def llm_ps(): """List models currently advertised by model-gateway.""" try: r = await _get_http_client().get( @@ -1814,7 +1738,7 @@ def _open_webui_default_model(name: str) -> str: @app.post("/api/config/default-model") async def set_default_model(req: DefaultModelRequest, request: Request): """Write DEFAULT_MODEL and OPEN_WEBUI_DEFAULT_MODEL to .env and recreate open-webui.""" - # Ollama allows namespaced ids: owner/model:tag (slashes required). Only reject empty / traversal. + # Model ids may be namespaced: owner/model:tag (slashes allowed). Only reject empty / traversal. name = (req.model or "").strip() if not name or ".." in name: raise HTTPException(status_code=400, detail="Invalid model name") diff --git a/dashboard/static/index.html b/dashboard/static/index.html index a02ba3a..7183c97 100644 --- a/dashboard/static/index.html +++ b/dashboard/static/index.html @@ -851,56 +851,13 @@ .pill:hover { border-color: var(--accent); color: var(--accent); } .mcp-remove-btn:hover { border-color: var(--danger); color: var(--danger); } /* ── Model select dropdown ── */ - .model-select-wrap { position: relative; flex: 1; min-width: 200px; } - .model-select-trigger { - width: 100%; display: flex; align-items: center; justify-content: space-between; - padding: var(--space-2) var(--space-4); background: var(--bg); - border: 1px solid var(--border); border-radius: var(--radius-sm); + .llm-model-input { + flex: 1; min-width: 200px; padding: var(--space-2) var(--space-4); + background: var(--bg); border: 1px solid var(--border); border-radius: var(--radius-sm); color: var(--fg); font-family: var(--font-mono); font-size: .88rem; - cursor: pointer; transition: all .2s; - } - .model-select-trigger:hover, .model-select-trigger.open { border-color: var(--accent); background: var(--accent-dim); } - .model-select-trigger .chevron { transition: transform .2s; opacity: .6; } - .model-select-trigger.open .chevron { transform: rotate(180deg); } - .model-select-dropdown { - position: absolute; top: 100%; left: 0; right: 0; margin-top: var(--space-1); - background: var(--surface); border: 1px solid var(--border); - border-radius: var(--radius-sm); max-height: 320px; - overflow: hidden; display: flex; flex-direction: column; - z-index: 1000; box-shadow: var(--shadow-lg); - } - .model-select-search { padding: var(--space-2) var(--space-3); border-bottom: 1px solid var(--border-subtle); } - .model-select-search input { - width: 100%; padding: .4rem .65rem; background: var(--bg); - border: 1px solid var(--border); border-radius: var(--radius-sm); - color: var(--fg); font-family: var(--font-mono); font-size: .82rem; - } - .model-select-search input:focus-visible { outline: 1.5px solid var(--accent); outline-offset: 2px; border-color: var(--accent); } - .model-select-search input:focus:not(:focus-visible) { outline: none; border-color: var(--accent); } - .model-select-dropdown .section-label { - padding: var(--space-2) var(--space-4); font-size: var(--text-xs); - text-transform: uppercase; letter-spacing: .06em; color: var(--muted); - border-bottom: 1px solid var(--border-subtle); - } - .model-select-option { - padding: var(--space-2) var(--space-4); font-family: var(--font-mono); font-size: .82rem; - cursor: pointer; display: flex; justify-content: space-between; - align-items: center; transition: background .12s; - } - .model-select-option:hover { background: var(--surface-hover); } - .model-select-option.installed { color: var(--muted); cursor: default; } - .model-select-option.installed:hover { background: transparent; } - .model-select-option .badge { font-size: .65rem; color: var(--success); } - .model-select-option .size { font-size: .75rem; color: var(--muted); } - #ollama-select-options { overflow-y: auto; max-height: 220px; } - .model-select-custom { padding: var(--space-3) var(--space-4); border-top: 1px solid var(--border-subtle); } - .model-select-custom input { - width: 100%; padding: .4rem .65rem; background: var(--bg); - border: 1px solid var(--border); border-radius: var(--radius-sm); - color: var(--fg); font-family: var(--font-mono); font-size: .82rem; } - .model-select-custom input:focus-visible { outline: 1.5px solid var(--accent); outline-offset: 2px; border-color: var(--accent); } - .model-select-custom input:focus:not(:focus-visible) { outline: none; border-color: var(--accent); } + .llm-model-input:focus-visible { outline: 1.5px solid var(--accent); outline-offset: 2px; border-color: var(--accent); } + .llm-model-input:focus:not(:focus-visible) { outline: none; border-color: var(--accent); } .last-updated { font-size: var(--text-xs); color: var(--muted); margin-top: var(--space-2); } /* ── Inputs ── */ input[type="text"] { @@ -1020,10 +977,8 @@ .model-item .name { word-break: break-word; } .pull-area { padding: var(--space-4); } .pull-row { flex-direction: row; flex-wrap: wrap; gap: var(--space-3); } - .pull-row .model-select-wrap { flex: 1 1 100%; min-width: 0; } + .pull-row .llm-model-input { flex: 1 1 100%; min-width: 0; } .pull-row button { flex: 1 1 120px; min-width: 0; } - .model-select-wrap { min-width: 0; } - .model-select-trigger { font-size: .85rem; } input[type="text"] { min-width: 0; } button { min-height: 44px; padding: .75rem 1.25rem; } .mcp-howto { padding: var(--space-2) var(--space-3); } @@ -1043,7 +998,6 @@ h1 { font-size: 1.75rem; } section { padding: var(--space-4) var(--space-3); } .model-list { max-height: 160px; } - .model-select-dropdown { max-height: 60vh; left: var(--space-2); right: var(--space-2); width: auto; } .quick-pills { gap: var(--space-1); } .pill { padding: 2px var(--space-3); } } @@ -1210,7 +1164,6 @@ } /* Override arbitrary z-indexes with scale tokens */ .modal-overlay { z-index: var(--z-modal); } - .model-select-dropdown { z-index: var(--z-dropdown); } header::before { z-index: var(--z-sticky); } .toast-container { z-index: var(--z-overlay); } .skip-link { z-index: calc(var(--z-modal) + 1); } @@ -1901,36 +1854,21 @@

LLM — llama.cpp

-
+
-
- - -
- - + + +
-
-
@@ -2090,8 +2028,7 @@

Dashboard login

}); }; - let ollamaModels = []; - let ollamaLibrary = []; + let llmModels = []; function toast(msg, type = '') { const el = document.createElement('div'); @@ -2213,7 +2150,6 @@

Dashboard login

} const SERVICE_ICONS = { - 'ollama': '🦙', 'model-gateway': '⇌', 'webui': '💬', 'mcp': '🔌', @@ -2838,62 +2774,33 @@

Dashboard login

} catch (e) { jobsEl.textContent = "Jobs unavailable: " + e; } } - function buildOllamaDropdown(filter = '') { - const installed = new Set(ollamaModels.map(m => m.name)); - const optionsEl = document.getElementById('ollama-select-options'); - if (!optionsEl) return; - const q = filter.toLowerCase().trim(); - const models = q ? ollamaLibrary.filter(m => m.toLowerCase().includes(q)) : ollamaLibrary; - optionsEl.innerHTML = models.slice(0, 80).map(name => { - const isInstalled = installed.has(name); - return `
- ${name} - ${isInstalled ? '' : 'pull'} -
`; - }).join(''); - if (models.length > 80) { - optionsEl.innerHTML += `
+ ${models.length - 80} more — type to search
`; - } - } - - async function loadOllamaLibrary() { - try { - const r = await api('/api/ollama/library'); - const d = await r.json(); - ollamaLibrary = d.models || []; - } catch (_) { - ollamaLibrary = ['llama3.2', 'deepseek-r1:7b', 'qwen2.5:7b', 'mistral', 'nomic-embed-text']; - } - } - - async function loadOllamaModels() { - const el = document.getElementById('ollama-models'); + async function loadModels() { + const el = document.getElementById('llm-models'); try { - const r = await api('/api/ollama/models'); + const r = await api('/api/llm/models'); const d = await r.json(); - ollamaModels = d.models || []; - buildOllamaDropdown(document.getElementById('ollama-library-search')?.value || ''); + llmModels = d.models || []; if (!d.ok) { el.innerHTML = '
Model gateway unreachable.Start with: docker compose up -d
'; return; } - if (!ollamaModels.length) { - el.innerHTML = '
No models yet.Select one below and click Pull.
'; - document.getElementById('ollama-last-updated').textContent = ''; + if (!llmModels.length) { + el.innerHTML = '
No models yet.Enter a Hugging Face repo below and click Pull.
'; + document.getElementById('llm-last-updated').textContent = ''; populateThroughputModelSelect(); return; } - document.getElementById('ollama-last-updated').textContent = 'Updated ' + new Date().toLocaleTimeString(); + document.getElementById('llm-last-updated').textContent = 'Updated ' + new Date().toLocaleTimeString(); // Fetch currently loaded model to show active badge let activeModel = ''; try { - const ps = await api('/api/ollama/ps'); + const ps = await api('/api/llm/ps'); if (ps.ok) { const psData = await ps.json(); activeModel = (psData.models?.[0]?.name || '').replace(/\.gguf$/i, '').split(':')[0]; } } catch (_) {} - el.innerHTML = ollamaModels.map(m => { + el.innerHTML = llmModels.map(m => { const bareName = m.name.replace(/\.gguf$/i, ''); const isActive = activeModel && (bareName === activeModel || bareName.split(':')[0] === activeModel); return ` @@ -2908,7 +2815,7 @@

Dashboard login

const activeArea = document.getElementById('llm-active-area'); const activeSel = document.getElementById('llm-active-select'); if (activeSel) { - const llms = ollamaModels.filter(m => !isEmbeddingModel(m.name)); + const llms = llmModels.filter(m => !isEmbeddingModel(m.name)); if (llms.length) { activeSel.innerHTML = llms.map(m => ``).join(''); const active = llms.find(m => { @@ -2943,7 +2850,7 @@

Dashboard login

function populateThroughputModelSelect() { const sel = document.getElementById('throughput-model-select'); if (!sel) return; - const llms = ollamaModels.filter(m => !isEmbeddingModel(m.name)); + const llms = llmModels.filter(m => !isEmbeddingModel(m.name)); if (!llms.length) { sel.innerHTML = ''; return; @@ -3042,8 +2949,8 @@

Dashboard login

['perf-rail-p50','perf-rail-p95','perf-rail-p99','perf-rail-peak','perf-rail-ttft-p50','perf-rail-ttft-p95'].forEach(id => setText(id, '—')); heroEl?.classList.remove('loaded'); // Bench target — show the configured local model name if available - const targetName = (typeof ollamaModels !== 'undefined' && ollamaModels?.length) - ? (ollamaModels.find(m => !isEmbeddingModel?.(m.name))?.name || ollamaModels[0]?.name) : ''; + const targetName = (typeof llmModels !== 'undefined' && llmModels?.length) + ? (llmModels.find(m => !isEmbeddingModel?.(m.name))?.name || llmModels[0]?.name) : ''; setText('bench-target', targetName || 'Run a benchmark to seed the dashboard'); return; } @@ -3119,7 +3026,7 @@

Dashboard login

// No model selector in the simplified single-model UI — pick the first // non-embedding LLM the gateway reports. Fall back to the hero's current // target text (whatever the dashboard is actively showing). - const llms = ollamaModels.filter(m => !isEmbeddingModel(m.name)); + const llms = llmModels.filter(m => !isEmbeddingModel(m.name)); const heroTarget = document.getElementById('bench-target')?.textContent?.trim(); const model = llms[0]?.name || (heroTarget && heroTarget !== '—' ? heroTarget : 'llama3.2'); resultsEl.style.display = 'none'; @@ -3308,86 +3215,19 @@

Dashboard login

} } - function getSelectedOllamaModel() { - const custom = document.getElementById('ollama-model-custom').value.trim(); - if (custom) return custom; - const label = document.getElementById('ollama-select-label').textContent; - return label !== 'Select model to pull...' ? label : ''; - } - - function setSelectedOllamaModel(name) { - document.getElementById('ollama-model-custom').value = ''; - document.getElementById('ollama-select-label').textContent = name || 'Select model to pull...'; + function getSelectedModel() { + return document.getElementById('llm-model-input').value.trim(); } - function toggleOllamaDropdown(open) { - const trigger = document.getElementById('ollama-select-trigger'); - const dd = document.getElementById('ollama-select-dropdown'); - dd.style.display = open ? 'flex' : 'none'; - trigger.classList.toggle('open', open); - trigger.setAttribute('aria-expanded', open); - if (open) { - document.getElementById('ollama-library-search').value = ''; - document.getElementById('ollama-library-search').focus(); - buildOllamaDropdown(''); - } + function setSelectedModel(name) { + document.getElementById('llm-model-input').value = name || ''; } - document.getElementById('ollama-select-trigger').onclick = (e) => { - e.stopPropagation(); - const dd = document.getElementById('ollama-select-dropdown'); - toggleOllamaDropdown(dd.style.display !== 'flex'); - }; - document.getElementById('ollama-select-trigger').onkeydown = (e) => { - if (e.key === 'Enter' || e.key === ' ') { - e.preventDefault(); - const dd = document.getElementById('ollama-select-dropdown'); - toggleOllamaDropdown(dd.style.display !== 'flex'); - } - }; - - document.getElementById('ollama-select-options').addEventListener('click', (e) => { - const opt = e.target.closest('.model-select-option'); - if (!opt || opt.classList.contains('installed')) return; - const model = opt.dataset.model; - setSelectedOllamaModel(model); - toggleOllamaDropdown(false); - document.getElementById('ollama-library-search').value = ''; - }); - - document.getElementById('ollama-library-search').oninput = debounce((e) => { - buildOllamaDropdown(e.target.value); - }, 200); - - document.getElementById('ollama-library-search').onkeydown = (e) => { - e.stopPropagation(); - if (e.key === 'Escape') { - toggleOllamaDropdown(false); - document.getElementById('ollama-select-trigger').focus(); - } - }; - - document.getElementById('ollama-model-custom').oninput = () => { - const v = document.getElementById('ollama-model-custom').value.trim(); - document.getElementById('ollama-select-label').textContent = v || 'Select model to pull...'; - }; - - document.addEventListener('click', () => { - const dd = document.getElementById('ollama-select-dropdown'); - if (dd.style.display === 'flex') toggleOllamaDropdown(false); - }); - document.getElementById('ollama-select-dropdown').onclick = (e) => e.stopPropagation(); - document.getElementById('ollama-select-dropdown').onkeydown = (e) => { - if (e.key === 'Escape') { - toggleOllamaDropdown(false); - document.getElementById('ollama-select-trigger').focus(); - } - }; - async function pullOllamaModel(name, btn, prog, logEl, barEl) { + async function pullModel(name, btn, prog, logEl, barEl) { logEl.textContent = `Pulling ${name}...`; barEl.style.width = '0%'; barEl.setAttribute('aria-valuenow', 0); - const resp = await api('/api/ollama/pull', { + const resp = await api('/api/llm/pull', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model: name }) @@ -3396,7 +3236,7 @@

Dashboard login

const d = await resp.json().catch(() => ({})); throw new Error(d.detail || `HTTP ${resp.status}`); } - const result = await pollOllamaPull(logEl, barEl); + const result = await pollGgufPull(logEl, barEl); if (result && result.success === false) { const line = (result.output || '').split('\n').filter(Boolean).pop() || 'Pull failed'; throw new Error(line); @@ -3404,11 +3244,11 @@

Dashboard login

return result; } - function pollOllamaPull(logEl, barEl) { + function pollGgufPull(logEl, barEl) { return new Promise((resolve) => { let pollErrors = 0; const poll = () => { - api('/api/ollama/pull/status').then(r => r.json()).then(s => { + api('/api/llm/pull/status').then(r => r.json()).then(s => { pollErrors = 0; if (s.output) { logEl.textContent = s.output; logEl.scrollTop = logEl.scrollHeight; } if (s.pct != null) { barEl.style.width = s.pct + '%'; barEl.setAttribute('aria-valuenow', s.pct); } @@ -3427,20 +3267,20 @@

Dashboard login

}); } - document.getElementById('ollama-starter-pack').onclick = async () => { - const btn = document.getElementById('ollama-starter-pack'); - const pullBtn = document.getElementById('ollama-pull'); - const prog = document.getElementById('ollama-progress'); - const logEl = document.getElementById('ollama-log'); - const barEl = document.getElementById('ollama-progress-bar'); + document.getElementById('llm-starter-pack').onclick = async () => { + const btn = document.getElementById('llm-starter-pack'); + const pullBtn = document.getElementById('llm-pull'); + const prog = document.getElementById('llm-progress'); + const logEl = document.getElementById('llm-log'); + const barEl = document.getElementById('llm-progress-bar'); btn.disabled = true; pullBtn.disabled = true; prog.style.display = 'block'; logEl.textContent = 'Starting gguf-puller with GGUF_MODELS from .env...'; try { - await pullOllamaModel('.env', btn, prog, logEl, barEl); + await pullModel('.env', btn, prog, logEl, barEl); toast('GGUF pull from .env finished', 'success'); - await loadOllamaModels(); + await loadModels(); } catch (e) { logEl.textContent += '\nError: ' + e.message; toast('GGUF pull failed: ' + e.message, 'error'); @@ -3449,28 +3289,28 @@

Dashboard login

pullBtn.disabled = false; }; - document.getElementById('ollama-pull').onclick = async () => { - const name = getSelectedOllamaModel(); + document.getElementById('llm-pull').onclick = async () => { + const name = getSelectedModel(); if (!name) { toast('Enter a model name', 'error'); return; } - const btn = document.getElementById('ollama-pull'); - const prog = document.getElementById('ollama-progress'); - const logEl = document.getElementById('ollama-log'); - const barEl = document.getElementById('ollama-progress-bar'); + const btn = document.getElementById('llm-pull'); + const prog = document.getElementById('llm-progress'); + const logEl = document.getElementById('llm-log'); + const barEl = document.getElementById('llm-progress-bar'); btn.disabled = true; - document.getElementById('ollama-starter-pack').disabled = true; + document.getElementById('llm-starter-pack').disabled = true; prog.style.display = 'block'; logEl.textContent = 'Connecting...'; try { - await pullOllamaModel(name, btn, prog, logEl, barEl); + await pullModel(name, btn, prog, logEl, barEl); toast(`Pulled ${name}`, 'success'); - setSelectedOllamaModel(''); - await loadOllamaModels(); + setSelectedModel(''); + await loadModels(); } catch (e) { logEl.textContent += '\nError: ' + e.message; toast('Pull failed: ' + e.message, 'error'); } btn.disabled = false; - document.getElementById('ollama-starter-pack').disabled = false; + document.getElementById('llm-starter-pack').disabled = false; }; // Guard: the #comfyui-pull button was removed from the UI but the handler survived. @@ -3562,9 +3402,9 @@

Dashboard login

try { if (target === 'llm') { try { - await pullOllamaModel(val, btn, prog, logEl, barEl); + await pullModel(val, btn, prog, logEl, barEl); toast(`GGUF pull: ${val}`, 'success'); - loadOllamaModels?.(); + loadModels?.(); } catch (llmErr) { toast(llmErr.message || 'GGUF pull failed', 'error'); } @@ -3627,8 +3467,7 @@

Dashboard login

const btn = document.getElementById('refresh-btn'); btn.classList.add('loading'); try { - if (!ollamaLibrary.length) await loadOllamaLibrary(); - await Promise.all([loadServices(), loadDependencies(), loadRagStatus(), loadOllamaModels(), loadComfyuiModels(), loadComfyuiPacks(), loadMcpServers(), loadComfyuiPanel()]); + await Promise.all([loadServices(), loadDependencies(), loadRagStatus(), loadModels(), loadComfyuiModels(), loadComfyuiPacks(), loadMcpServers(), loadComfyuiPanel()]); await Promise.all([loadPerfHero(), loadThroughputServiceUsage()]); } finally { btn.classList.remove('loading'); @@ -3730,7 +3569,7 @@

Dashboard login

if (r.ok && d.ok) { statusEl.textContent = `✓ Activating — services restarting…`; toast(`Activating ${model} — services restarting…`); - setTimeout(() => loadOllamaModels(), 8000); + setTimeout(() => loadModels(), 8000); } else { statusEl.textContent = 'Error: ' + (d.detail || 'Switch failed'); toast((d.detail || 'Switch failed') + '', 'error'); @@ -3744,7 +3583,7 @@

Dashboard login

} }); - document.getElementById('ollama-models')?.addEventListener('click', async (e) => { + document.getElementById('llm-models')?.addEventListener('click', async (e) => { const btn = e.target.closest('.btn-model-delete[data-model]'); if (!btn) return; e.preventDefault(); @@ -3752,11 +3591,11 @@

Dashboard login

if (!model || !confirm(`Delete "${model}" from disk?\n\nThis removes the GGUF file and cannot be undone.`)) return; btn.disabled = true; try { - const r = await api('/api/ollama/delete', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model }) }); + const r = await api('/api/llm/delete', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model }) }); const d = await r.json(); if (r.ok) { toast(d.message || 'Model deleted'); - loadOllamaModels(); + loadModels(); loadThroughputStats(); } else { toast((d.detail || 'Delete failed') + '', 'error'); @@ -3925,24 +3764,24 @@

Dashboard login

} async function resumeActivePulls() { - // Ollama — resume polling if a pull is in progress + // LLM (GGUF) — resume polling if a pull is in progress try { - const s = await api('/api/ollama/pull/status').then(r => r.json()); + const s = await api('/api/llm/pull/status').then(r => r.json()); if (s.running) { - const prog = document.getElementById('ollama-progress'); - const logEl = document.getElementById('ollama-log'); - const barEl = document.getElementById('ollama-progress-bar'); - const pullBtn = document.getElementById('ollama-pull'); - const packBtn = document.getElementById('ollama-starter-pack'); + const prog = document.getElementById('llm-progress'); + const logEl = document.getElementById('llm-log'); + const barEl = document.getElementById('llm-progress-bar'); + const pullBtn = document.getElementById('llm-pull'); + const packBtn = document.getElementById('llm-starter-pack'); prog.style.display = 'block'; pullBtn.disabled = true; packBtn.disabled = true; logEl.textContent = `Resuming pull: ${s.model || ''}...`; - pollOllamaPull(logEl, barEl).then(result => { + pollGgufPull(logEl, barEl).then(result => { toast(result.success ? `Pulled ${s.model}` : 'Pull failed', result.success ? 'success' : 'error'); pullBtn.disabled = false; packBtn.disabled = false; - loadOllamaModels(); + loadModels(); }); } } catch (_) {} @@ -4099,9 +3938,9 @@

Dashboard login

await loadAuthConfig(); if (authConfig.auth_required && authConfig.auth_type === 'bearer' && !sessionStorage.getItem(AUTH_STORAGE_KEY)) { showAuthModal(); - window.addEventListener('auth-ready', () => { loadOllamaLibrary().then(() => refresh()); resumeActivePulls(); }, { once: true }); + window.addEventListener('auth-ready', () => { refresh(); resumeActivePulls(); }, { once: true }); } else { - loadOllamaLibrary().then(() => refresh()); + refresh(); resumeActivePulls(); } // Visibility-aware polling — pause when tab is hidden to save CPU/network diff --git a/docker-compose.yml b/docker-compose.yml index 6f0b56c..fcb7d31 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -403,7 +403,7 @@ services: retries: 3 environment: # Route all model requests through the gateway (unified provider) - - OLLAMA_BASE_URL= + - ENABLE_OLLAMA_API=false - OPENAI_API_BASE_URL=${OPENAI_API_BASE:-http://model-gateway:11435/v1} - OPENAI_API_KEY=${LITELLM_MASTER_KEY:-local} # Auth: False = single-user local / Tailscale use. @@ -719,7 +719,6 @@ services: # session cookie so the second login is infrequent. - N8N_USER_MANAGEMENT_DISABLED=true # Route all model traffic through Model Gateway (dashboard tracking, unified provider) - - OLLAMA_HOST=http://model-gateway:11435 - OPENAI_API_BASE_URL=${OPENAI_API_BASE:-http://model-gateway:11435/v1} - OPENAI_API_KEY=local # OAuth callbacks + inbound webhooks require a public URL. diff --git a/docs/product requirements docs/component-dashboard-ui.md b/docs/product requirements docs/component-dashboard-ui.md index 9935667..fef7b6b 100644 --- a/docs/product requirements docs/component-dashboard-ui.md +++ b/docs/product requirements docs/component-dashboard-ui.md @@ -3,7 +3,7 @@ ## Purpose A web-based control plane that provides a single pane of glass for: - Managing Docker-Compose services (start/stop/restart, logs) -- Pulling and configuring AI models (Ollama, vLLM, etc.) +- Pulling and configuring AI models (GGUF/llama.cpp LLMs, ComfyUI diffusion models) - Viewing dependency health and throughput stats - Executing MCP tool calls from any browser (via the MCP Gateway) @@ -19,11 +19,12 @@ A web-based control plane that provides a single pane of glass for: | `/api/hardware` | GET | None | Host hardware stats (CPU, memory, GPU via nvidia-smi) | | `/api/auth/config` | GET | None | Auth method in use | | `/api/rag/status` | GET | None | Qdrant collection status + point count | -| `/api/ollama/models` | GET | Y | Installed Ollama models | -| `/api/ollama/pull` | POST | Y | Pull model (streaming progress) | -| `/api/ollama/delete` | POST | Y | Delete Ollama model | -| `/api/ollama/library` | GET | Y | Pullable models from Ollama registry (24h cache) | -| `/api/ollama/ps` | GET | Y | Models currently loaded in Ollama | +| `/api/llm/models` | GET | Y | Installed GGUF models (llama.cpp) | +| `/api/llm/pull` | POST | Y | Pull a GGUF model from Hugging Face (background gguf-puller) | +| `/api/llm/pull/status` | GET | Y | GGUF pull progress | +| `/api/llm/delete` | POST | Y | Delete a GGUF model file from disk | +| `/api/llm/unload` | POST | Y | Unload the active model from the gateway (keeps files) | +| `/api/llm/ps` | GET | Y | Models currently advertised by the model gateway | | `/api/comfyui/models` | GET | Y | Installed ComfyUI models | | `/api/comfyui/pull` | POST | Y | Pull ComfyUI models | | `/api/comfyui/models/{cat}/{file}` | DELETE | Y | Delete ComfyUI model | @@ -70,7 +71,7 @@ A web-based control plane that provides a single pane of glass for: 1. From a tailnet device, open `https://${CADDY_TAILNET_HOSTNAME}/dash/` and complete Google sign-in. 2. The SSO front door (Caddy + oauth2-proxy) gates browser access; `DASHBOARD_AUTH_TOKEN` is a bearer-token fallback for host scripts and non-browser API access. 3. Use the "Services" tab to stop or restart a service if an issue is suspected. -4. Pull a new Ollama or ComfyUI model from the relevant tab. +4. Pull a new LLM (GGUF) or ComfyUI model from the relevant tab. 5. In the "MCP" tab, add a new tool server (e.g., a custom web search provider) by clicking "Add" and filling the JSON manifest. --- diff --git a/tests/test_services_and_throughput.py b/tests/test_services_and_throughput.py index 0737896..7589856 100644 --- a/tests/test_services_and_throughput.py +++ b/tests/test_services_and_throughput.py @@ -1,4 +1,4 @@ -"""Tests for /api/services, /api/throughput/*, /api/ollama/library, and global exception handler.""" +"""Tests for /api/services, /api/throughput/*, and the global exception handler.""" from __future__ import annotations import os @@ -67,18 +67,6 @@ def test_services_do_not_leak_auth_token(client, monkeypatch): importlib.reload(dashboard.services_catalog) -# ── /api/ollama/library ────────────────────────────────────────────────────── - -def test_ollama_library_returns_models(client): - r = client.get("/api/ollama/library") - assert r.status_code == 200 - data = r.json() - assert "models" in data - assert data["ok"] is True - assert isinstance(data["models"], list) - assert len(data["models"]) > 0 - - # ── /api/throughput/record ─────────────────────────────────────────────────── def test_throughput_record_accepts_sample(client): @@ -167,11 +155,16 @@ def test_unhandled_exception_returns_500_not_traceback(monkeypatch): mock_client.get = AsyncMock(return_value=MagicMock(status_code=200)) monkeypatch.setattr("dashboard.app._http_client", mock_client) - # Patch ollama library to raise an unexpected error - monkeypatch.setattr("dashboard.app._fetch_ollama_library", lambda: (_ for _ in ()).throw(RuntimeError("test boom"))) + # Patch the GGUF disk scan (a dependency of /api/llm/models) to raise an + # unexpected error. It is called without a try/except in the route, so the + # error bubbles all the way to the global exception handler. + def _boom(): + raise RuntimeError("test boom") + + monkeypatch.setattr("dashboard.app._scan_gguf_models", _boom) tc = TestClient(dashboard_app.app, raise_server_exceptions=False) - r = tc.get("/api/ollama/library") + r = tc.get("/api/llm/models") assert r.status_code == 500 data = r.json() assert data["detail"] == "Internal server error"