From df500298985c19ff5cbcac382b98200d8e2cc935 Mon Sep 17 00:00:00 2001
From: Hermes Bot <hermes@ordo-ai-stack.local>
Date: Sun, 28 Jun 2026 07:14:15 -0400
Subject: [PATCH] feat(caddy): expose LiteLLM at /llm/* for tailnet API clients
 (Cline/Cursor)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Programmatic OpenAI-compatible clients on the tailnet (Cline, Cursor, scripts)
had no way to reach the model gateway: it's bound 127.0.0.1-only and Caddy had no
route for it (the compose comment claiming "tailnet peers reach this via Caddy"
was aspirational — no such route existed, and SSO can't gate a non-interactive
API client anyway).

- auth/caddy/Caddyfile: add a `/llm/*` bypass handle (no forward_auth) that strips
  the prefix and reverse-proxies to model-gateway:11435. Auth is LiteLLM's own
  Bearer master key, which the gateway enforces (401 without it) — safe over the
  tailnet, which is the only interface Caddy binds.
- docker-compose.yml: add model-gateway to proxy-net so Caddy can reach it; correct
  the previously-stale ports comment to describe the real /llm/* path.

Clients use: base URL https://<tailnet-host>/llm/v1 , key = LITELLM_MASTER_KEY,
model = local-chat. No new host port, no 0.0.0.0 exposure.

Validated live: GET /llm/v1/models with the key returns local-chat/local-embed;
without the key returns 401 (LiteLLM), not a 302 SSO redirect.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 auth/caddy/Caddyfile | 10 ++++++++++
 docker-compose.yml   | 11 +++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/auth/caddy/Caddyfile b/auth/caddy/Caddyfile
index ad6418c..9944d6f 100644
--- a/auth/caddy/Caddyfile
+++ b/auth/caddy/Caddyfile
@@ -45,6 +45,16 @@
         reverse_proxy n8n:5678
     }
 
+    # ---- LiteLLM model gateway (OpenAI-compatible API) for tailnet API clients ----
+    # Bypasses SSO: programmatic clients (Cline, Cursor, scripts) can't do an
+    # interactive Google login. Auth is LiteLLM's own Bearer key
+    # (LITELLM_MASTER_KEY), which model-gateway enforces — so this is safe to
+    # expose over the tailnet (Caddy binds the tailnet interface only).
+    # handle_path strips /llm so LiteLLM sees /v1/... (e.g. /llm/v1/chat/completions).
+    handle_path /llm/* {
+        reverse_proxy model-gateway:11435
+    }
+
     # ---- Open WebUI at ROOT (no auth needed for the redirect) ----
     # Open WebUI's upstream v0.9.2 image is a prebuilt SvelteKit SPA with
     # base="" (assets are root-absolute: /_app, /static, /api, /ws). It MUST be
diff --git a/docker-compose.yml b/docker-compose.yml
index 5867597..a6dbd50 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -160,10 +160,11 @@ services:
       - DASHBOARD_URL=http://dashboard:8080
       - THROUGHPUT_RECORD_TOKEN=${THROUGHPUT_RECORD_TOKEN:-}
     ports:
-      # 127.0.0.1 bind: localhost-only host publish. Tailnet peers reach this
-      # service through Caddy (see auth/caddy/Caddyfile); host apps (Cline,
-      # VS Code, MCP clients, Hermes auth.json) keep their `localhost:11435`
-      # connectivity. Removes the prior 0.0.0.0 LAN exposure.
+      # 127.0.0.1 bind: localhost-only host publish. Tailnet peers reach the
+      # OpenAI-compatible API via Caddy at https://<tailnet-host>/llm/* — gated
+      # by the LiteLLM master key, no SSO (see auth/caddy/Caddyfile). Host apps
+      # (Cline, VS Code, MCP clients, Hermes auth.json) keep their
+      # `localhost:11435` connectivity. Removes the prior 0.0.0.0 LAN exposure.
       - "127.0.0.1:${MODEL_GATEWAY_PORT:-11435}:11435"
     healthcheck:
       test: ["CMD-SHELL", "python3 -c \"import os, urllib.request; req = urllib.request.Request('http://localhost:11435/v1/models', headers={'Authorization': 'Bearer ' + os.environ.get('LITELLM_MASTER_KEY', 'local')}); urllib.request.urlopen(req)\""]
@@ -178,6 +179,8 @@ services:
     networks:
       - frontend
       - backend
+      # proxy-net: lets Caddy (front door) reach this for the /llm/* API route.
+      - proxy-net
 
   ops-controller:
     build: ./ops-controller