From df500298985c19ff5cbcac382b98200d8e2cc935 Mon Sep 17 00:00:00 2001 From: Hermes Bot Date: Sun, 28 Jun 2026 07:14:15 -0400 Subject: [PATCH] feat(caddy): expose LiteLLM at /llm/* for tailnet API clients (Cline/Cursor) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Programmatic OpenAI-compatible clients on the tailnet (Cline, Cursor, scripts) had no way to reach the model gateway: it's bound 127.0.0.1-only and Caddy had no route for it (the compose comment claiming "tailnet peers reach this via Caddy" was aspirational — no such route existed, and SSO can't gate a non-interactive API client anyway). - auth/caddy/Caddyfile: add a `/llm/*` bypass handle (no forward_auth) that strips the prefix and reverse-proxies to model-gateway:11435. Auth is LiteLLM's own Bearer master key, which the gateway enforces (401 without it) — safe over the tailnet, which is the only interface Caddy binds. - docker-compose.yml: add model-gateway to proxy-net so Caddy can reach it; correct the previously-stale ports comment to describe the real /llm/* path. Clients use: base URL https:///llm/v1 , key = LITELLM_MASTER_KEY, model = local-chat. No new host port, no 0.0.0.0 exposure. Validated live: GET /llm/v1/models with the key returns local-chat/local-embed; without the key returns 401 (LiteLLM), not a 302 SSO redirect. Co-Authored-By: Claude Opus 4.8 (1M context) --- auth/caddy/Caddyfile | 10 ++++++++++ docker-compose.yml | 11 +++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/auth/caddy/Caddyfile b/auth/caddy/Caddyfile index ad6418c..9944d6f 100644 --- a/auth/caddy/Caddyfile +++ b/auth/caddy/Caddyfile @@ -45,6 +45,16 @@ reverse_proxy n8n:5678 } + # ---- LiteLLM model gateway (OpenAI-compatible API) for tailnet API clients ---- + # Bypasses SSO: programmatic clients (Cline, Cursor, scripts) can't do an + # interactive Google login. Auth is LiteLLM's own Bearer key + # (LITELLM_MASTER_KEY), which model-gateway enforces — so this is safe to + # expose over the tailnet (Caddy binds the tailnet interface only). + # handle_path strips /llm so LiteLLM sees /v1/... (e.g. /llm/v1/chat/completions). + handle_path /llm/* { + reverse_proxy model-gateway:11435 + } + # ---- Open WebUI at ROOT (no auth needed for the redirect) ---- # Open WebUI's upstream v0.9.2 image is a prebuilt SvelteKit SPA with # base="" (assets are root-absolute: /_app, /static, /api, /ws). It MUST be diff --git a/docker-compose.yml b/docker-compose.yml index 5867597..a6dbd50 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -160,10 +160,11 @@ services: - DASHBOARD_URL=http://dashboard:8080 - THROUGHPUT_RECORD_TOKEN=${THROUGHPUT_RECORD_TOKEN:-} ports: - # 127.0.0.1 bind: localhost-only host publish. Tailnet peers reach this - # service through Caddy (see auth/caddy/Caddyfile); host apps (Cline, - # VS Code, MCP clients, Hermes auth.json) keep their `localhost:11435` - # connectivity. Removes the prior 0.0.0.0 LAN exposure. + # 127.0.0.1 bind: localhost-only host publish. Tailnet peers reach the + # OpenAI-compatible API via Caddy at https:///llm/* — gated + # by the LiteLLM master key, no SSO (see auth/caddy/Caddyfile). Host apps + # (Cline, VS Code, MCP clients, Hermes auth.json) keep their + # `localhost:11435` connectivity. Removes the prior 0.0.0.0 LAN exposure. - "127.0.0.1:${MODEL_GATEWAY_PORT:-11435}:11435" healthcheck: test: ["CMD-SHELL", "python3 -c \"import os, urllib.request; req = urllib.request.Request('http://localhost:11435/v1/models', headers={'Authorization': 'Bearer ' + os.environ.get('LITELLM_MASTER_KEY', 'local')}); urllib.request.urlopen(req)\""] @@ -178,6 +179,8 @@ services: networks: - frontend - backend + # proxy-net: lets Caddy (front door) reach this for the /llm/* API route. + - proxy-net ops-controller: build: ./ops-controller