From ae7c2f78c778cd5f66ee783379cd6a15072ae51d Mon Sep 17 00:00:00 2001 From: Hermes Bot Date: Tue, 30 Jun 2026 22:25:59 +0000 Subject: [PATCH] chore: update stack versions (oauth2-proxy, n8n, open-webui, qdrant, caddy) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - oauth2-proxy: v7.15.2-alpine → v7.15.3-alpine (CVE-2026-33811 fix) - n8n: 2.20.6 → 2.28.3 - Open WebUI: v0.9.2 → v0.10.1 - Qdrant: v1.17.1 → v1.18.2 - Caddy: 2.11.2-alpine → 2.11.4-alpine --- docker-compose.yml | 2490 +++++++++++++++++++------------------- scripts/stack_monitor.py | 1366 ++++++++++----------- 2 files changed, 1928 insertions(+), 1928 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index b2e5136..6f0b56c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,1245 +1,1245 @@ -name: ordo-ai-stack - -# All services start by default. Run: docker compose up -d -services: - # Self-hosted meta-search engine. Replaces the prior Tavily integration as the - # core search method exposed to Hermes / the MCP gateway. Internal-only - # (backend network, no host port); the MCP wrapper `searxng` in - # mcp/gateway/registry-custom.yaml queries this instance at http://searxng:8080. - searxng: - image: searxng/searxng:latest - restart: unless-stopped - cap_drop: [ALL] - cap_add: [CHOWN, SETGID, SETUID] # required by uwsgi worker init - security_opt: [no-new-privileges:true] - environment: - - SEARXNG_SETTINGS_PATH=/etc/searxng/settings.yml - - SEARXNG_BASE_URL=http://searxng:8080/ - # NOTE on secret_key plumbing: the upstream image reads server.secret_key - # directly from settings.yml — it does not honour $SEARXNG_SECRET in env. - # Because the bind mount runs as the unprivileged `searxng` user inside - # the container, a runtime entrypoint-sed has no write permission. The - # real secret therefore lives in data/searxng/settings.yml (gitignored, - # same protection model as .env). $SEARXNG_SECRET in .env exists as the - # canonical operator-facing source — keep it in sync with settings.yml - # when rotating (scripts/rotate-searxng-secret.sh can automate later). - volumes: - - ${DATA_PATH:-${BASE_PATH:-.}/data}/searxng:/etc/searxng - healthcheck: - test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/healthz | grep -q OK || exit 1"] - start_period: 30s - interval: 30s - timeout: 5s - retries: 3 - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - networks: - - backend - - llamacpp: - # Pinned mainline llama.cpp (ggml-org) by DIGEST for reproducibility — - # build 9765 (73618f27a), the first build that loads Qwen3.6 MTP + qwen35moe - # GGUFs natively (upstream PR #22673). This is the single source of truth for - # the image; bump the digest deliberately (stack_monitor tracks the build). - # Override LLAMACPP_IMAGE in .env only to test a different build. - image: ${LLAMACPP_IMAGE:-ghcr.io/ggml-org/llama.cpp@sha256:44cd08334f85c1fcb363e3798302f2986cdb78cdfc8c1cf56bdad44041595a32} - restart: unless-stopped - platform: linux/amd64 - entrypoint: ["/bin/sh", "/llamacpp-scripts/run-llama-server.sh"] - environment: - - LLAMACPP_MODEL=${LLAMACPP_MODEL:-model.gguf} - - LLAMACPP_CTX_SIZE=${LLAMACPP_CTX_SIZE:-262144} - - LLAMACPP_PARALLEL=${LLAMACPP_PARALLEL:-1} - - LLAMACPP_ROPE_SCALING=${LLAMACPP_ROPE_SCALING:-none} - - LLAMACPP_ROPE_SCALE=${LLAMACPP_ROPE_SCALE:-1} - - LLAMACPP_YARN_ORIG_CTX=${LLAMACPP_YARN_ORIG_CTX:-0} - - LLAMACPP_OVERRIDE_KV=${LLAMACPP_OVERRIDE_KV:-} - - LLAMACPP_GPU_LAYERS=${LLAMACPP_GPU_LAYERS:--1} - - LLAMACPP_FLASH_ATTN=${LLAMACPP_FLASH_ATTN:-auto} - # Hard ceiling on tokens per request (defense-in-depth against - # runaway-reasoning loops where --reasoning-budget fails to close - # the block). 64K is plenty for any legitimate response. - - LLAMACPP_N_PREDICT=${LLAMACPP_N_PREDICT:-65536} - # Cap on tokens spent inside .... Hoisted from - # LLAMACPP_EXTRA_ARGS so it's monitorable. Reliability depends on the - # model emitting a recognizable end-of-thinking token; N_PREDICT above - # is the unconditional ceiling that fires regardless. - - LLAMACPP_REASONING_BUDGET=${LLAMACPP_REASONING_BUDGET:-32768} - - LLAMACPP_ENABLE_KV_CACHE_QUANTIZATION=${LLAMACPP_ENABLE_KV_CACHE_QUANTIZATION:-0} - - LLAMACPP_KV_CACHE_TYPE_K=${LLAMACPP_KV_CACHE_TYPE_K:-q4_0} - - LLAMACPP_KV_CACHE_TYPE_V=${LLAMACPP_KV_CACHE_TYPE_V:-q4_0} - - LLAMACPP_EXTRA_ARGS=${LLAMACPP_EXTRA_ARGS:-} - # Optional vision projector (mmproj GGUF). Path is INSIDE the container — - # bind-mount maps host models/gguf/ to /models, so set - # LLAMACPP_MMPROJ=/models/.gguf. Empty = no vision. - - LLAMACPP_MMPROJ=${LLAMACPP_MMPROJ:-} - volumes: - - ${BASE_PATH:-.}/models/gguf:/models:ro - - ${BASE_PATH:-.}/scripts/llamacpp:/llamacpp-scripts:ro - # Large GGUFs can take many minutes before /health returns 200; 503 during load fails curl -f. - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8080/health"] - start_period: 1800s - interval: 15s - timeout: 10s - retries: 40 - # GPU config: overridden by overrides/compute.yml (run scripts/detect_hardware.py) - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - networks: - - backend - - # Use CUDA image so linux/amd64 is available (plain :server manifest can resolve to arm64 on Docker Desktop). - # Pinned by digest to the same mainline build as llamacpp (build 9765 / 73618f27a). - llamacpp-embed: - image: ${LLAMACPP_EMBED_IMAGE:-ghcr.io/ggml-org/llama.cpp@sha256:44cd08334f85c1fcb363e3798302f2986cdb78cdfc8c1cf56bdad44041595a32} - restart: unless-stopped - platform: linux/amd64 - # The upstream :server-cuda is a rolling tag that has flipped its - # ENTRYPOINT/CMD shape at least twice this week (sometimes empty - # ENTRYPOINT with binary in CMD, sometimes ENTRYPOINT=["/app/llama-server"] - # with CMD=[]). Pin both explicitly so neither variant breaks us: - # entrypoint always invokes the binary; compose's command: is its argv. - entrypoint: ["/app/llama-server"] - command: > - --host 0.0.0.0 --port 8080 - --model /models/${LLAMACPP_EMBED_MODEL:-nomic-embed-text-v1.5.Q4_K_M.gguf} - --ctx-size 8192 --embeddings - volumes: - - ${BASE_PATH:-.}/models/gguf:/models:ro - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8080/health"] - start_period: 60s - interval: 15s - timeout: 10s - retries: 5 - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - networks: - - backend - - model-gateway: - build: ./model-gateway - image: ordo-ai-stack-model-gateway:latest - pull_policy: build - restart: unless-stopped - user: "1000:1000" - read_only: true - tmpfs: - - /tmp - cap_drop: [ALL] - security_opt: [no-new-privileges:true] - depends_on: - llamacpp: - condition: service_started - llamacpp-embed: - condition: service_started - dashboard: - condition: service_started - environment: - - LLAMACPP_URL=http://llamacpp:8080 - - LLAMACPP_EMBED_URL=http://llamacpp-embed:8080 - - LLAMACPP_MODEL=${LLAMACPP_MODEL:-model.gguf} - - LLAMACPP_EMBED_MODEL=${LLAMACPP_EMBED_MODEL:-nomic-embed-text-v1.5.Q4_K_M.gguf} - - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY:-local} - - LLAMACPP_CTX_SIZE=${LLAMACPP_CTX_SIZE:-262144} - # Local model used when a Claude-compatible client sends a "claude-*" model name - - CLAUDE_CODE_LOCAL_MODEL=${CLAUDE_CODE_LOCAL_MODEL:-} - # throughput_callback.py — posts per-completion samples to the dashboard. - # Must share at least one docker network with the dashboard service (the - # `backend` membership below covers it). - - DASHBOARD_URL=http://dashboard:8080 - - THROUGHPUT_RECORD_TOKEN=${THROUGHPUT_RECORD_TOKEN:-} - ports: - # 127.0.0.1 bind: localhost-only host publish. Tailnet peers reach the - # OpenAI-compatible API via Caddy at https:///llm/* — gated - # by the LiteLLM master key, no SSO (see auth/caddy/Caddyfile). Host apps - # (Cline, VS Code, MCP clients, Hermes auth.json) keep their - # `localhost:11435` connectivity. Removes the prior 0.0.0.0 LAN exposure. - - "127.0.0.1:${MODEL_GATEWAY_PORT:-11435}:11435" - healthcheck: - test: ["CMD-SHELL", "python3 -c \"import os, urllib.request; req = urllib.request.Request('http://localhost:11435/v1/models', headers={'Authorization': 'Bearer ' + os.environ.get('LITELLM_MASTER_KEY', 'local')}); urllib.request.urlopen(req)\""] - interval: 30s - timeout: 10s - retries: 3 - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - networks: - - frontend - - backend - # proxy-net: lets Caddy (front door) reach this for the /llm/* API route. - - proxy-net - - ops-controller: - build: ./ops-controller - image: ordo-ai-stack-ops-controller:latest - pull_policy: build - restart: unless-stopped - cap_drop: [ALL] - security_opt: [no-new-privileges:true] - # Add appuser to root group so it can read /var/run/docker.sock (root:root on Docker Desktop). - # Avoids needing root-at-start or a chmod-on-entry script. - group_add: ["0"] - environment: - - COMPOSE_PROJECT_DIR=/workspace - - OPS_CONTROLLER_TOKEN=${OPS_CONTROLLER_TOKEN:-} - # Belt-and-suspenders: pin the in-container .env path so REGISTRY never - # falls back to the host-side BASE_PATH value (which resolves to a path - # that does not exist inside the Linux container). - - OPS_ENV_PATH=/workspace/.env - # Docker container name for ComfyUI, used by /comfyui/install-node-requirements - # to docker-exec a pip install. The code default is "comfyui" (assumes - # container_name: comfyui in compose), but this stack relies on Compose's - # auto-naming (project-service-N). Without this override, the endpoint - # silently returns 'Container "comfyui" not found'. - - COMFYUI_CONTAINER_NAME=ordo-ai-stack-comfyui-1 - # Pass through the operator's $HOME so this container's docker-compose - # subprocesses (POST /compose/* endpoints) interpolate ${HOME} the same - # way the operator's shell would. Without this, secret bind sources at - # ${HOME}/.ai-toolkit/runtime/secrets/* resolve against /home/appuser - # inside the container — a path that doesn't exist on the docker host - # — and `compose up` aborts on "bind source path does not exist". - - OPERATOR_HOME=${HOME} - # Read-only view of the SOPS-decrypted runtime env so compose subprocesses - # (POST /compose/*, /services/*/recreate) interpolate REAL secret values for - # secret-dependent services (oauth2-proxy, caddy, searxng, n8n, …) instead of - # leaving them unset. Path only — no secret value lives in this compose file. - # See ops-controller/main.py:_load_runtime_env. Decryption stays host-only. - - RUNTIME_ENV_FILE=/run/runtime.env - - HF_TOKEN_FILE=/run/secrets/hf_token - - AUDIT_LOG_PATH=/data/audit.log - - BASE_PATH=${BASE_PATH:-.} - - DATA_PATH=${DATA_PATH:-${BASE_PATH:-.}/data} - - COMPOSE_FILE=${COMPOSE_FILE:-docker-compose.yml} - - DEFAULT_MODEL=${DEFAULT_MODEL:-} - - COMFYUI_MODELS_DIR=/models/comfyui - # ComfyUI ↔ llamacpp VRAM serialization guardian (see ops-controller/main.py) - - COMFYUI_URL=http://comfyui:8188 - - COMFYUI_SERIALIZE_LLAMACPP=${COMFYUI_SERIALIZE_LLAMACPP:-0} - - COMFYUI_QUEUE_POLL_SECONDS=${COMFYUI_QUEUE_POLL_SECONDS:-2} - - COMFYUI_DRAIN_SECONDS=${COMFYUI_DRAIN_SECONDS:-20} - - COMFYUI_GUARDIAN_TARGET=${COMFYUI_GUARDIAN_TARGET:-llamacpp} - # Phase 1: after drain, POST ComfyUI /free so PyTorch's caching allocator - # releases. Default ON; harmless 200 OK when nothing is held. - - COMFYUI_FREE_AFTER_DRAIN=${COMFYUI_FREE_AFTER_DRAIN:-1} - # Phase 2: VRAM-pressure watchdog. Independent of the queue. When total - # used VRAM exceeds OPS_VRAM_PRESSURE_GB, call ComfyUI /free; recheck - # until below OPS_VRAM_RECOVERY_GB (or pressure-4 if unset). Disabled - # while OPS_VRAM_PRESSURE_GB <= 0. - - OPS_VRAM_PRESSURE_GB=${OPS_VRAM_PRESSURE_GB:-0} - - OPS_VRAM_RECOVERY_GB=${OPS_VRAM_RECOVERY_GB:-0} - - OPS_VRAM_POLL_SECONDS=${OPS_VRAM_POLL_SECONDS:-30} - # Self-heal watchdog (opt-in): restart any exited compose service after a - # grace window, except those in OPS_WATCHDOG_EXCLUDE. Disabled by default. - - OPS_HERMES_WATCHDOG_ENABLED=${OPS_HERMES_WATCHDOG_ENABLED:-0} - - OPS_HERMES_WATCHDOG_INTERVAL_SECONDS=${OPS_HERMES_WATCHDOG_INTERVAL_SECONDS:-30} - - OPS_HERMES_WATCHDOG_GRACE_SECONDS=${OPS_HERMES_WATCHDOG_GRACE_SECONDS:-60} - - OPS_HERMES_WATCHDOG_PAUSE_FILE=${OPS_HERMES_WATCHDOG_PAUSE_FILE:-/data/watchdog.paused} - volumes: - - /var/run/docker.sock:/var/run/docker.sock - - ${BASE_PATH:-.}:/workspace - - ${DATA_PATH:-${BASE_PATH:-.}/data}/ops-controller:/data - - ${BASE_PATH:-.}/models/comfyui:/models/comfyui - # Read-only: decrypted runtime secrets for compose interpolation (see - # RUNTIME_ENV_FILE above). Same host path the top-level `secrets:` block uses; - # `make up` runs decrypt-secrets first, so this file exists before compose up. - - ${HOME}/.ai-toolkit/runtime/.env:/run/runtime.env:ro - secrets: - - hf_token - healthcheck: - # Socket-only check — verifies the port is bound without paying for - # urllib.request's huge import graph (saved 2-5s on Docker Desktop where - # the urllib-based check was flaking past 30s). App-level health is - # already covered by every dependent service calling real endpoints. - test: ["CMD", "python3", "-c", "import socket; s=socket.create_connection(('127.0.0.1',9000),2); s.close()"] - start_period: 15s - interval: 30s - timeout: 5s - retries: 3 - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - # No host port - dashboard calls internally. Add "9000:9000" for debugging. - networks: - - backend - - dashboard: - build: ./dashboard - restart: unless-stopped - # Image entrypoint runs as root briefly to chmod bind-mounted /models, then gosu appuser. - # Do not set user: here or ComfyUI pulls fail with Permission denied on /models. - # gosu needs SETUID/SETGID; no-new-privileges breaks user switching (EPERM). - tmpfs: - - /tmp - cap_drop: [ALL] - cap_add: - - SETUID - - SETGID - depends_on: - llamacpp: - condition: service_started - extra_hosts: - - "host.docker.internal:host-gateway" - healthcheck: - test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/api/health')"] - interval: 30s - timeout: 10s - retries: 3 - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - environment: - - LLAMACPP_URL=http://llamacpp:8080 - - MODEL_GATEWAY_API_KEY=${LITELLM_MASTER_KEY:-local} - - MODELS_DIR=/models - - GGUF_MODELS_DIR=/gguf-models - - SCRIPTS_DIR=/scripts - - MCP_CONFIG_PATH=/mcp-config/servers.txt - - MCP_GATEWAY_URL=http://mcp-gateway:8811 - # Must include comfyui so mcp-gateway loads ComfyUI tools (registry-custom.yaml). Matches data/mcp/servers.txt default. - # Web search is the self-hosted searxng MCP (see services.searxng). playwright is stack-pinned in registry-custom.yaml. - - MCP_GATEWAY_SERVERS=${MCP_GATEWAY_SERVERS:-duckduckgo,n8n,searxng,comfyui,orchestration,playwright} - # Read-only: ComfyUI user workflows (host: data/comfyui-storage/ComfyUI/user/default/workflows) - - COMFYUI_WORKFLOWS_DIR=/comfyui-workflows - - OPS_CONTROLLER_URL=http://ops-controller:9000 - - OPS_CONTROLLER_TOKEN=${OPS_CONTROLLER_TOKEN:-} - - DASHBOARD_AUTH_TOKEN=${DASHBOARD_AUTH_TOKEN:-} - - THROUGHPUT_RECORD_TOKEN=${THROUGHPUT_RECORD_TOKEN:-} - - HF_TOKEN_FILE=/run/secrets/hf_token - - COMFYUI_URL=http://comfyui:8188 - - MODEL_GATEWAY_URL=http://model-gateway:11435 - - LLAMACPP_CTX_SIZE=${LLAMACPP_CTX_SIZE:-262144} - - DASHBOARD_DATA_PATH=/data/dashboard - # n8n webhook for publish_enqueue (or pass per-request); n8n owns retries/OAuth - - N8N_PUBLISH_WEBHOOK_URL=${N8N_PUBLISH_WEBHOOK_URL:-} - - COMFYUI_OUTPUT_DIR=/comfyui-output - - DASHBOARD_TRUST_PROXY_HEADERS=true - - DASHBOARD_TRUSTED_PROXY_NET=172.24.0.0/16 - volumes: - - ${BASE_PATH:-.}/models/comfyui:/models - - ${BASE_PATH:-.}/models/gguf:/gguf-models - - ${BASE_PATH:-.}/scripts:/scripts:ro - - ${DATA_PATH:-${BASE_PATH:-.}/data}/dashboard:/data/dashboard - - ${DATA_PATH:-${BASE_PATH:-.}/data}/mcp:/mcp-config - - ${BASE_PATH:-.}/data/comfyui-storage/ComfyUI/user/default/workflows:/comfyui-workflows:ro - - ${DATA_PATH:-${BASE_PATH:-.}/data}/comfyui-output:/comfyui-output:ro - secrets: - - hf_token - networks: - - frontend - - backend - - proxy-net - - worker: - build: - context: . - dockerfile: worker/Dockerfile - restart: unless-stopped - healthcheck: - test: ["CMD-SHELL", "[ -f /tmp/worker.heartbeat ] && [ $(($(date +%s) - $(cat /tmp/worker.heartbeat))) -lt 120 ]"] - start_period: 30s - interval: 30s - timeout: 5s - retries: 3 - depends_on: - dashboard: - condition: service_started - comfyui: - condition: service_started - environment: - - DASHBOARD_DATA_PATH=/data/dashboard - - COMFYUI_URL=http://comfyui:8188 - - COMFYUI_WORKFLOWS_DIR=/comfyui-workflows - - COMFYUI_OUTPUT_DIR=/comfyui-output - - WORKER_POLL_INTERVAL_SEC=${WORKER_POLL_INTERVAL_SEC:-0.5} - - WORKER_CONCURRENCY=${WORKER_CONCURRENCY:-2} - - WORKER_SCHEDULE_CHECK_SEC=30 - - WORKER_MAX_JOB_RETRIES=2 - - WORKER_PUBLISH_MAX_ATTEMPTS=5 - - N8N_PUBLISH_WEBHOOK_URL=${N8N_PUBLISH_WEBHOOK_URL:-} - volumes: - - ${DATA_PATH:-${BASE_PATH:-.}/data}/dashboard:/data/dashboard - - ${BASE_PATH:-.}/data/comfyui-storage/ComfyUI/user/default/workflows:/comfyui-workflows:ro - - ${DATA_PATH:-${BASE_PATH:-.}/data}/comfyui-output:/comfyui-output:ro - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - networks: - - backend - - open-webui: - image: ${OPEN_WEBUI_IMAGE:-ghcr.io/open-webui/open-webui:v0.9.2} - restart: unless-stopped - depends_on: - llamacpp: - condition: service_started - model-gateway: - condition: service_started - qdrant: - condition: service_started - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8080/"] - start_period: 120s - interval: 30s - timeout: 10s - retries: 3 - environment: - # Route all model requests through the gateway (unified provider) - - OLLAMA_BASE_URL= - - OPENAI_API_BASE_URL=${OPENAI_API_BASE:-http://model-gateway:11435/v1} - - OPENAI_API_KEY=${LITELLM_MASTER_KEY:-local} - # Auth: False = single-user local / Tailscale use. - - WEBUI_AUTH=${WEBUI_AUTH:-False} - # Default model shown in chat UI. Dashboard writes OPEN_WEBUI_DEFAULT_MODEL to prefer the low-context :chat alias. - - DEFAULT_MODELS=${OPEN_WEBUI_DEFAULT_MODEL:-${DEFAULT_MODEL:-}} - # RAG: use Qdrant for vector storage - - VECTOR_DB=qdrant - - QDRANT_URI=http://qdrant:6333 - - QDRANT_URL=http://qdrant:6333 - - RAG_EMBEDDING_ENGINE=openai - - RAG_OPENAI_API_BASE_URL=http://model-gateway:11435/v1 - - RAG_OPENAI_API_KEY=${LITELLM_MASTER_KEY:-local} - - RAG_EMBEDDING_MODEL=${EMBED_MODEL:-${LLAMACPP_EMBED_MODEL:-nomic-embed-text-v1.5.Q4_K_M.gguf}} - # MCP tools: connect Open WebUI to the shared mcp-gateway (streamable HTTP), - # exposing every stack MCP (n8n, comfyui, searxng, orchestration, blog, - # playwright, qdrant-rag) as callable tools. Seeds tool_server.connections - # (the DB has none yet). One aggregated endpoint = all servers' tools. - - 'TOOL_SERVER_CONNECTIONS=[{"type":"mcp","url":"http://mcp-gateway:8811/mcp","auth_type":"none","info":{"id":"ordo-mcp","name":"Ordo MCP Gateway","description":"Shared stack tools: n8n, comfyui, searxng, orchestration, blog, playwright, qdrant-rag"}}]' - volumes: - - ${DATA_PATH:-${BASE_PATH:-.}/data}/open-webui:/app/backend/data - networks: - - frontend - - backend - - proxy-net - - gguf-puller: - profiles: [models] - image: python:3.12-slim - restart: "no" - environment: - # Optional HF token via env (set HF_TOKEN in .env for gated repos); empty by - # default so public repos pull token-free. Replaces the file secret, which - # made `compose run gguf-puller` hard-fail whenever the SOPS-managed secret - # file was absent or its ${HOME} source mis-resolved under a Hermes-invoked - # compose subprocess. pull_gguf_models.py reads HF_TOKEN when the file is absent. - - HF_TOKEN=${HF_TOKEN:-} - - GGUF_MODELS=${GGUF_MODELS:-} - volumes: - - ${BASE_PATH:-.}/models/gguf:/models - - ${BASE_PATH:-.}/scripts:/scripts:ro - command: ["sh", "-c", "pip install -q huggingface_hub && python3 /scripts/pull_gguf_models.py"] - networks: - - frontend - - comfyui-model-puller: - profiles: [comfyui-models] - image: python:3.12.8-slim - restart: "no" - user: "0:0" - environment: - - MODELS_DIR=/models - - HF_TOKEN_FILE=/run/secrets/hf_token - - CIVITAI_TOKEN_FILE=/run/secrets/civitai_token - # Host: $env:COMFYUI_PACKS="flux1-dev-gguf" (PowerShell) — forwarded into the container - - COMFYUI_PACKS=${COMFYUI_PACKS:-} - - COMFYUI_QUANT=${COMFYUI_QUANT:-} - volumes: - - ${BASE_PATH:-.}/models/comfyui:/models - - ${BASE_PATH:-.}/scripts:/scripts:ro - secrets: - - hf_token - - civitai_token - # chmod first — ensures write access on Docker Desktop/Windows bind mounts - command: ["sh", "-c", "chmod -R a+w /models && python3 /scripts/comfyui/pull_comfyui_models.py"] - networks: - - frontend - - # Ensures ComfyUI-Manager is cloned before ComfyUI starts. Safe to re-run — skips if already present. - comfyui-manager-setup: - image: alpine:3.21 - restart: "no" - volumes: - - ${BASE_PATH:-.}/data/comfyui-storage:/root - command: - - sh - - -c - - | - set -eu - apk add --no-cache git >/dev/null 2>&1 - TARGET=/root/ComfyUI/custom_nodes/ComfyUI-Manager - if [ ! -d "$$TARGET/.git" ]; then - echo "Cloning ComfyUI-Manager..." - mkdir -p /root/ComfyUI/custom_nodes - git clone --depth=1 https://github.com/Comfy-Org/ComfyUI-Manager.git "$$TARGET" - echo "ComfyUI-Manager installed." - else - echo "ComfyUI-Manager already present, skipping." - fi - - comfyui: - image: ${COMFYUI_IMAGE:-yanwk/comfyui-boot:cpu} - # No fixed container_name — avoids "name already in use" when another project - # owns `comfyui`; Docker DNS still resolves the service name `comfyui` on this network. - restart: unless-stopped - depends_on: - comfyui-manager-setup: - condition: service_completed_successfully - # Backend network so MCP gateway-spawned comfyui container can reach it - # ComfyUI: run scripts/detect_hardware.py to auto-configure GPU (NVIDIA/AMD/Intel) or CPU - # Custom nodes + ComfyRegistry can take several minutes before :8188 serves; short grace marks healthy deps as failed. - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8188/"] - start_period: 420s - interval: 20s - timeout: 15s - retries: 12 - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - environment: - # --enable-manager: ComfyUI-Manager (node installs, model UI) — see docs.comfy.org/manager/install - # Override via COMFYUI_CLI_ARGS in .env (GPU defaults in overrides/compute.yml include --normalvram) - - CLI_ARGS=${COMFYUI_CLI_ARGS:---cpu --enable-manager} - - PYTORCH_CUDA_ALLOC_CONF= - # Hugging Face downloads (gated models) from Manager or built-in fetchers - - HF_TOKEN_FILE=/run/secrets/hf_token - # ComfyUI-Manager: GitHub API rate limits for custom node installs (optional; same token as GitHub MCP). - # ComfyUI's Manager reads GITHUB_TOKEN (not GITHUB_PERSONAL_ACCESS_TOKEN); _FILE pointer matches. - - GITHUB_TOKEN_FILE=/run/secrets/github_pat - # JunoLLMRefine talks to model-gateway:11435 (LiteLLM master key 'local') and - # may need to wake llamacpp via ops-controller when the guardian has paused it. - - OPS_CONTROLLER_URL=http://ops-controller:9000 - - OPS_CONTROLLER_TOKEN=${OPS_CONTROLLER_TOKEN:-} - - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY:-local} - volumes: - - ${BASE_PATH:-.}/data/comfyui-storage:/root - - ${BASE_PATH:-.}/models/comfyui:/root/ComfyUI/models - - ${DATA_PATH:-${BASE_PATH:-.}/data}/comfyui-output:/root/ComfyUI/output - secrets: - - hf_token - - github_pat - # Bridge Docker secrets *_FILE pointers to the plaintext env vars the - # upstream image's runner-scripts/entrypoint.sh + ComfyUI custom nodes - # (HF SDK, ComfyUI-Manager) read directly. The upstream image's default - # CMD is `bash /runner-scripts/entrypoint.sh`; we replace it with a shim - # that exports HF_TOKEN / GITHUB_TOKEN from the mounted secret files, - # installs every custom node's pip requirements (the image's writable - # layer is wiped on container recreate, so a manual `pip install` for a - # custom-node dep doesn't survive the next `compose up`), then exec's - # the original entrypoint. Idempotent: pip skips already-satisfied - # specifiers so warm-cache restarts add only seconds. - command: - - bash - - -c - - | - if [ -f "$${HF_TOKEN_FILE:-/run/secrets/hf_token}" ]; then - export HF_TOKEN="$$(cat "$${HF_TOKEN_FILE:-/run/secrets/hf_token}")" - fi - if [ -f "$${GITHUB_TOKEN_FILE:-/run/secrets/github_pat}" ]; then - export GITHUB_TOKEN="$$(cat "$${GITHUB_TOKEN_FILE:-/run/secrets/github_pat}")" - fi - for r in /root/ComfyUI/custom_nodes/*/requirements.txt; do - [ -f "$$r" ] || continue - echo "[deps] installing $$r" - pip install --no-cache-dir --no-warn-script-location -q -r "$$r" || echo "[deps] WARN failed $$r" - done - exec bash /runner-scripts/entrypoint.sh - networks: - - frontend - - backend - - proxy-net - - # Build ComfyUI MCP image for gateway-spawned containers (exits immediately after build) - comfyui-mcp-image: - build: ./comfyui-mcp - image: ordo-ai-stack-comfyui-mcp:latest - pull_policy: build - restart: "no" - command: ["true"] - - # Stable orchestration MCP (fixed verbs → dashboard HTTP). Optional build for gateway catalog. - orchestration-mcp-image: - build: ./orchestration-mcp - image: ordo-ai-stack-orchestration-mcp:latest - pull_policy: build - restart: "no" - command: ["true"] - - # Qdrant RAG MCP image (gateway-spawned). Semantic search over the `documents` - # collection; embeds queries via llamacpp-embed to match rag-ingestion's vectors. - qdrant-rag-mcp-image: - build: ./qdrant-rag-mcp - image: ordo-ai-stack-qdrant-rag-mcp:latest - pull_policy: build - restart: "no" - command: ["true"] - - # Codebase-Memory MCP image (gateway-spawned, stdio). Opt-in (heavy: bundles an - # embedding model). Build with: - # docker compose --profile codebase-memory build codebase-memory-mcp-image - # then enable in the gateway with: ./scripts/mcp_add.sh codebase-memory - # (requires CODE_ROOT set to your host code root). - codebase-memory-mcp-image: - build: ./codebase-memory-mcp - image: ordo-ai-stack-codebase-memory-mcp:latest - pull_policy: build - restart: "no" - command: ["true"] - profiles: ["codebase-memory"] - - # Codebase-Memory 3D graph visualization UI (long-lived; opt-in). Visualizes the - # code knowledge-graph it indexes in its own process (mounts /c/dev:ro). The - # upstream UI binds 127.0.0.1:9749 and is an absolute-asset SPA with no base path, - # so the image runs nginx (on :9750) which proxies to it and rewrites its baked - # /assets,/api,/rpc paths to the /codebase-memory/ prefix — letting Caddy serve it - # at https:///codebase-memory/ on the shared :443 SSO origin (no extra port). - # Build with: docker compose --profile codebase-memory build codebase-memory-ui - codebase-memory-ui: - build: ./codebase-memory-ui - image: ordo-ai-stack-codebase-memory-ui:latest - pull_policy: build - restart: unless-stopped - profiles: ["codebase-memory"] - volumes: - - codebase-memory-cache:/cache - # Source tree (read-only) so the UI's own long-lived process can index and - # visualize it. The graph index lives in-process (it is not reliably flushed - # to the cache volume across container exits), so the UI indexes its own - # graph rather than depending on the gateway-spawned MCP's index. - - ${CODE_ROOT:-/c/dev}:/c/dev:ro - healthcheck: - test: ["CMD-SHELL", "curl -fsS -o /dev/null http://localhost:9750/codebase-memory/ || exit 1"] - interval: 30s - timeout: 5s - retries: 5 - start_period: 20s - networks: - - proxy-net - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - - comfyui-mcp: - image: ordo-ai-stack-comfyui-mcp:latest - pull_policy: build - restart: unless-stopped - healthcheck: - test: ["CMD-SHELL", "python3 -c \"import socket; s=socket.create_connection(('127.0.0.1',9000),2); s.close()\""] - start_period: 30s - interval: 30s - timeout: 5s - retries: 3 - depends_on: - comfyui: - condition: service_started - command: ["python", "server.py"] - environment: - - COMFYUI_URL=http://comfyui:8188 - - COMFY_MCP_WORKFLOW_DIR=/workflows - # Host: data/comfyui-storage/ComfyUI/user/default/workflows (seeded API graphs under mcp-api/) - # When the agent omits workflow_id but sends prompt/width/etc. at top level, use this workflow id (path under workflows dir). - - COMFY_MCP_DEFAULT_WORKFLOW_ID=${COMFY_MCP_DEFAULT_WORKFLOW_ID:-mcp-api/generate_image} - # Require explicit workflow_id for autonomous runs (no silent default) — set to 1 to allow legacy default. - - COMFY_MCP_ALLOW_DEFAULT_WORKFLOW_ID=${COMFY_MCP_ALLOW_DEFAULT_WORKFLOW_ID:-0} - - COMFY_MCP_DEFAULT_MODEL=${COMFY_MCP_DEFAULT_MODEL:-flux1-schnell-fp8.safetensors} - - OPS_CONTROLLER_URL=http://ops-controller:9000 - - OPS_CONTROLLER_TOKEN=${OPS_CONTROLLER_TOKEN:-} - volumes: - # Same host tree as ComfyUI user/default/workflows. API-format JSON for /prompt; UI exports are listed but cannot run via MCP. - - ${BASE_PATH:-.}/data/comfyui-storage/ComfyUI/user/default/workflows:/workflows:ro - networks: - - backend - - n8n: - image: ${N8N_IMAGE:-docker.n8n.io/n8nio/n8n:2.20.6} - restart: unless-stopped - # Run as non-root (n8n image uses node user; 1000:1000 matches typical node uid) - user: "1000:1000" - depends_on: - model-gateway: - condition: service_started - mcp-gateway: - condition: service_started - healthcheck: - test: ["CMD", "wget", "-q", "-O", "/dev/null", "http://localhost:5678/"] - interval: 30s - timeout: 10s - retries: 3 - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - environment: - - N8N_HOST=0.0.0.0 - - N8N_PORT=5678 - # n8n is mounted at /n8n/ via Caddy's handle_path (prefix-strip). Without - # N8N_PATH, n8n thinks it lives at root and emits absolute /assets/... - # URLs that 404 at Caddy. With it set, n8n emits /n8n/assets/... which - # Caddy strips back to /assets/... and serves correctly. - - N8N_PATH=/n8n/ - # Caddy is one reverse-proxy hop in front; this lets n8n honour the - # X-Forwarded-* headers oauth2-proxy + Caddy inject so cookies, CSRF - # tokens, and outbound redirect URLs all use the public scheme/host. - - N8N_PROXY_HOPS=1 - # Skip the first-run owner-setup wizard. This flag does NOT bypass the - # email/password login form — in n8n 2.x `authenticationMethod` is - # constrained to [email, ldap, saml] (see dist/config/schema.js) and - # there is no `none` option. /rest/settings still reports - # `authenticationMethod: "email"` with this flag set, and the SPA shows - # the login screen accordingly. - # - # Operator workflow: oauth2-proxy at Caddy gates the /n8n/* URL with the - # single-Gmail allowlist (auth/oauth2-proxy/emails.txt), then n8n's own - # login form requires an owner account's credentials. Bootstrap the owner - # once via the first-run UI (or POST /rest/owner/setup), store the creds - # in your own secret store outside this repo, and rely on the ~7-day - # session cookie so the second login is infrequent. - - N8N_USER_MANAGEMENT_DISABLED=true - # Route all model traffic through Model Gateway (dashboard tracking, unified provider) - - OLLAMA_HOST=http://model-gateway:11435 - - OPENAI_API_BASE_URL=${OPENAI_API_BASE:-http://model-gateway:11435/v1} - - OPENAI_API_KEY=local - # OAuth callbacks + inbound webhooks require a public URL. - # Recommended: tailscale funnel --set-path /rest/oauth2-credential/callback 5678 - # tailscale funnel --set-path /webhook 5678 - # Then set N8N_WEBHOOK_URL=https://your-machine.your-tailnet.ts.net in .env - - WEBHOOK_URL=${N8N_WEBHOOK_URL:-} - - N8N_EDITOR_BASE_URL=${N8N_WEBHOOK_URL:-} - # Only the callback and webhook paths need to be reachable without a session cookie - - N8N_AUTH_EXCLUDE_ENDPOINTS=rest/oauth2-credential/callback,webhook - - N8N_SECURE_COOKIE=false - volumes: - - ${DATA_PATH:-${BASE_PATH:-.}/data}/n8n-data:/home/node/.n8n - - ${DATA_PATH:-${BASE_PATH:-.}/data}/n8n-files:/files - networks: - - frontend - - backend - - proxy-net - - mcp-gateway: - build: ./mcp - image: ordo-ai-stack-mcp-gateway:latest - pull_policy: build - # No fixed container_name — avoids conflicts when another stack already uses `mcp-gateway`. - restart: unless-stopped - depends_on: - comfyui-mcp-image: - # Build-only container; failure here means ComfyUI MCP won't work but other MCPs should still start - condition: service_completed_successfully - required: false - orchestration-mcp-image: - condition: service_completed_successfully - required: false - environment: - - MCP_CONFIG_FILE=/mcp-config/servers.txt - - MCP_GATEWAY_PORT=8811 - # Set to 1 for docker/mcp-gateway --verbose (see TROUBLESHOOTING — ComfyUI tools missing) - - MCP_GATEWAY_VERBOSE=${MCP_GATEWAY_VERBOSE:-0} - # MCP server API keys — sourced from Docker secrets (file-form). The - # gateway-wrapper.sh entrypoint bridges the *_FILE pointers back to - # plaintext env vars for the spawned MCP servers (which read the - # canonical names directly). - - GITHUB_PERSONAL_ACCESS_TOKEN_FILE=/run/secrets/github_pat - # n8n MCP server (mcp/n8n) — for workflow tools when n8n API key is set. - # `n8n.api_key` is a Docker secret mounted at /run/secrets/n8n_api_key; - # gateway-wrapper.sh reads the file and exports it as the canonical - # N8N_API_KEY env var that mcp/n8n expects. - - N8N_API_URL=${N8N_API_URL:-http://n8n:5678} - - N8N_API_KEY_FILE=/run/secrets/n8n_api_key - # ComfyUI MCP (custom registry) — passed to spawned comfyui container - - COMFYUI_URL=http://comfyui:8188 - - COMFY_MCP_DEFAULT_MODEL=${COMFY_MCP_DEFAULT_MODEL:-flux1-schnell-fp8.safetensors} - - OPS_CONTROLLER_URL=http://ops-controller:9000 - - OPS_CONTROLLER_TOKEN=${OPS_CONTROLLER_TOKEN:-} - # Injected into registry-custom.yaml for orchestration MCP (dashboard Bearer) - - DASHBOARD_AUTH_TOKEN=${DASHBOARD_AUTH_TOKEN:-} - # Codebase-Memory MCP — host code root (read-only mount source for the - # spawned container) + bind-mount allowlist. CODE_ROOT must be the HOST path - # that contains your repos (what Hermes sees as /c/dev). Allow-listing it lets - # the gateway's hardened bind logic accept the read-only /c/dev mount. - - CODE_ROOT=${CODE_ROOT:-} - - MCP_GATEWAY_DOCKER_BIND_ALLOWED_PATHS=${CODE_ROOT:-} - volumes: - - /var/run/docker.sock:/var/run/docker.sock - - ${DATA_PATH:-${BASE_PATH:-.}/data}/mcp:/mcp-config - secrets: - - github_pat - - n8n_api_key - # Published on host so external MCP clients (e.g. Cursor, Claude Desktop) - # can reach it. Backend services still address it as - # http://mcp-gateway:8811 over the docker network. - ports: - # 127.0.0.1 bind: localhost-only. Keeps .mcp.json / .cline / VS Code - # MCP clients working on the host; removes LAN exposure. - - "127.0.0.1:${MCP_GATEWAY_PORT:-8811}:8811" - healthcheck: - # Verify gateway is listening AND has loaded its tool catalog (tools/list returns >0 tools). - # Falls back to port check if curl is unavailable. - test: ["CMD-SHELL", "sh /mcp-scripts/healthcheck.sh"] - start_period: 60s - interval: 15s - timeout: 10s - retries: 5 - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - networks: - - backend - # proxy-net: lets Caddy (front door) reach the gateway for the /mcp/* route. - - proxy-net - - oauth2-proxy: - # alpine variant ships with wget for the in-container healthcheck below. - # The default :latest is distroless and has no shell or HTTP probe tools. - image: quay.io/oauth2-proxy/oauth2-proxy:v7.15.2-alpine - restart: unless-stopped - # oauth2-proxy is used purely as an authn endpoint via Caddy `forward_auth` - # (Caddy calls /oauth2/auth, oauth2-proxy returns 202 on valid session, - # 401 otherwise). It never proxies to a real upstream, so --upstream is a - # static 202 placeholder. Do not change without redesigning the front door. - command: - - --provider=google - - --http-address=0.0.0.0:4180 - - --reverse-proxy=true - - --set-xauthrequest=true - - --upstream=static://202 - - --redirect-url=https://${CADDY_TAILNET_HOSTNAME}/oauth2/callback - - --whitelist-domain=.${CADDY_TAILNET_DOMAIN} - - --cookie-domain=.${CADDY_TAILNET_DOMAIN} - - --cookie-secure=true - - --cookie-samesite=lax - - --cookie-expire=24h - # NOTE: do NOT add --email-domain=* alongside --authenticated-emails-file. - # Either condition allows the user in, so the wildcard would defeat the - # allowlist. The file is the only gate. - - --authenticated-emails-file=/etc/oauth2-proxy/emails.txt - - --skip-provider-button=true - environment: - - OAUTH2_PROXY_CLIENT_ID=${OAUTH2_PROXY_CLIENT_ID} - - OAUTH2_PROXY_CLIENT_SECRET=${OAUTH2_PROXY_CLIENT_SECRET} - - OAUTH2_PROXY_COOKIE_SECRET=${OAUTH2_PROXY_COOKIE_SECRET} - volumes: - - ./auth/oauth2-proxy/emails.txt:/etc/oauth2-proxy/emails.txt:ro - healthcheck: - test: ["CMD", "wget", "-q", "--spider", "http://localhost:4180/ping"] - interval: 30s - timeout: 5s - retries: 3 - networks: - - proxy-net - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - - caddy: - image: caddy:2.11.2-alpine - restart: unless-stopped - # CADDY_BIND must be the host's tailnet IP. The :? failsafe makes - # `docker compose config` exit non-zero if it's empty or unset, since - # an empty bind silently degrades to 0.0.0.0:443 (compose only warns). - ports: - - "${CADDY_BIND:?CADDY_BIND must be set to the host tailnet IP — never empty or 0.0.0.0}:443:443" - environment: - - CADDY_TAILNET_HOSTNAME=${CADDY_TAILNET_HOSTNAME} - - CADDY_TAILNET_DOMAIN=${CADDY_TAILNET_DOMAIN} - # Bearer token (SOPS) gating the /mcp/* route for remote MCP clients - # (Cline/Cursor). The gateway has no auth of its own; Caddy enforces this. - - MCP_GATEWAY_TOKEN=${MCP_GATEWAY_TOKEN:-} - volumes: - - ./auth/caddy/Caddyfile:/etc/caddy/Caddyfile:ro - - ${TAILSCALE_CERT_DIR:-./auth/caddy/certs}:/etc/caddy/certs:ro - - caddy_data:/data - - caddy_config:/config - depends_on: - oauth2-proxy: - condition: service_started - healthcheck: - test: ["CMD", "wget", "-q", "--spider", "http://localhost/healthz"] - interval: 30s - timeout: 5s - retries: 3 - networks: - # caddy is the SSO ingress — it only needs proxy-net to reach upstreams. - # All upstreams it proxies (oauth2-proxy, dashboard, open-webui, n8n, - # comfyui, hermes-dashboard) are on proxy-net. Membership on `frontend` - # caused Docker DNS to return frontend IPs for the dashboard, putting - # caddy's source IP outside DASHBOARD_TRUSTED_PROXY_NET=172.24.0.0/16 - # and 401-ing every /api/* call from the SSO front door. - - proxy-net - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - - # --- Voice (STT / TTS) --- - - stt: - # Speech-to-text (faster-whisper, OpenAI-compatible /v1/audio/transcriptions). - # Opt-in: docker compose --profile voice up -d. GPU pin via the model registry - # (defaults to the secondary GPU). Reached internally at http://stt:8000/v1. - profiles: ["voice"] - # sha-pinned for reproducibility. NOTE: must run on a Pascal-class GPU (the 1070); - # the registry pins it to the secondary GPU. CTranslate2 int8 has no Blackwell kernels. - image: fedirz/faster-whisper-server@sha256:0b64050ad0b9244745746b652473ee42a8d5454d501877a252c3e65f631ffc99 - restart: unless-stopped - environment: - - WHISPER__MODEL=${STT_MODEL:-Systran/faster-whisper-small} - - WHISPER__INFERENCE_DEVICE=cuda - - WHISPER__COMPUTE_TYPE=${STT_COMPUTE_TYPE:-int8} - volumes: - - ${DATA_PATH:-${BASE_PATH:-.}/data}/voice/hf-cache:/root/.cache/huggingface - healthcheck: - test: ["CMD-SHELL", "python3 -c \"import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:8000/v1/models',timeout=5).status==200 else 1)\""] - interval: 30s - timeout: 10s - retries: 5 - start_period: 90s - logging: - driver: json-file - options: { max-size: "10m", max-file: "3" } - networks: [backend] - - tts: - # Text-to-speech (Kokoro, OpenAI-compatible /v1/audio/speech). Opt-in profile. - # Reached internally at http://tts:8880/v1. Voice chosen per-request (af_bella default). - profiles: ["voice"] - # sha-pinned. NOTE: Kokoro's PyTorch build has no Blackwell kernels — must run on the - # Pascal 1070 (the registry pins it to the secondary GPU). It will crash on the 5090. - image: ghcr.io/remsky/kokoro-fastapi-gpu@sha256:63176e12e476470f020e29dfb3203bac249fa66c8fdf95e44b7482546eb4e974 - restart: unless-stopped - healthcheck: - test: ["CMD-SHELL", "python3 -c \"import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:8880/v1/audio/voices',timeout=5).status==200 else 1)\""] - interval: 30s - timeout: 10s - retries: 5 - start_period: 90s - logging: - driver: json-file - options: { max-size: "10m", max-file: "3" } - networks: [backend] - - # --- RAG --- - - qdrant: - image: qdrant/qdrant:v1.17.1 - restart: unless-stopped - ports: - # 127.0.0.1 bind: localhost-only. Internal services use http://qdrant:6333 - # over the docker network; this publish exists for host-side debugging / - # one-off scripts only. Removes LAN exposure of an unauthenticated vector DB. - - "127.0.0.1:${QDRANT_PORT:-6333}:6333" - volumes: - - ${DATA_PATH:-${BASE_PATH:-.}/data}/qdrant:/qdrant/storage - healthcheck: - test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/6333'"] - interval: 30s - timeout: 10s - retries: 3 - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - networks: - - backend - - rag-ingestion: - profiles: [rag] - build: ./rag-ingestion - restart: unless-stopped - healthcheck: - test: ["CMD", "test", "-f", "/tmp/rag-ingestion.heartbeat"] - start_period: 60s - interval: 30s - timeout: 5s - retries: 3 - environment: - # Embed directly against the raw llama.cpp embedding server. litellm's - # /v1/embeddings route 500s for the local embed model; llama-server works - # and ignores the model field. This keeps ingest + qdrant-rag-mcp queries - # in the SAME 768-dim nomic space. - - MODEL_GATEWAY_URL=http://llamacpp-embed:8080 - - EMBED_MODEL=${EMBED_MODEL:-${LLAMACPP_EMBED_MODEL:-nomic-embed-text-v1.5.Q4_K_M.gguf}} - - QDRANT_URL=http://qdrant:6333 - - QDRANT_COLLECTION=${RAG_COLLECTION:-documents} - - WATCH_DIR=/watch - - CHUNK_SIZE=${RAG_CHUNK_SIZE:-400} - - CHUNK_OVERLAP=${RAG_CHUNK_OVERLAP:-50} - - SCAN_INTERVAL_SEC=${RAG_SCAN_INTERVAL_SEC:-15} - volumes: - - ${DATA_PATH:-${BASE_PATH:-.}/data}/rag-input:/watch - depends_on: - llamacpp-embed: - condition: service_started - qdrant: - condition: service_started - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - networks: - - backend - - hermes-gateway: - build: - context: ./hermes - dockerfile: Dockerfile - image: ordo-ai-stack-hermes:latest - pull_policy: build - restart: unless-stopped - # Hermes used to hold /var/run/docker.sock (and group_add ["0"]) for its - # built-in docker tools. Plan C narrowed that surface: Hermes no longer - # has the socket. Privileged container ops route through ops-controller's - # HTTP API at OPS_CONTROLLER_URL with OPS_CONTROLLER_TOKEN. See - # docs/runbooks/bounded-hermes.md for the new verb surface. - depends_on: - # Hermes specifically requires these peers to be HEALTHY (not just - # started) — otherwise the gateway spams 5xx on every request while - # model-gateway is still loading LiteLLM config or mcp-gateway is - # still warming the catalog. Enforced by - # tests/test_hermes_docker.py::test_hermes_services_depend_on_stack; - # don't relax to `service_started` without updating that test. - model-gateway: - condition: service_healthy - mcp-gateway: - condition: service_healthy - dashboard: - condition: service_healthy - ops-controller: - condition: service_started - environment: - - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY:-local} - - PYTHONIOENCODING=utf-8 - # Voice: STT openai-provider base URL points at the local faster-whisper - # service (profile: voice). TTS base URL is set in hermes config.yaml - # (tts.openai.base_url). Auto-TTS-on-voice-input replies in voice. See - # docs/configuration.md "Voice (STT/TTS)". - - STT_OPENAI_BASE_URL=${STT_OPENAI_BASE_URL:-http://stt:8000/v1} - # ops-controller HTTP API replaces the raw Docker socket Hermes used - # to hold. Privileged verbs: /containers/*, /compose/*. See - # hermes/ops_client.py for the in-process wrapper. - - OPS_CONTROLLER_URL=http://ops-controller:9000 - - OPS_CONTROLLER_TOKEN=${OPS_CONTROLLER_TOKEN:?required for Hermes to reach ops-controller} - # Single source of truth for context window across the stack — llamacpp - # allocates KV for this; model-gateway templates it into litellm_config - # as max_input_tokens; the hermes entrypoint seeds model.context_length - # into $HERMES_HOME/config.yaml so the dashboard progress bar matches. - - LLAMACPP_CTX_SIZE=${LLAMACPP_CTX_SIZE:-262144} - # Per-turn budgets seeded into $HERMES_HOME/config.yaml by - # hermes/entrypoint.sh on startup. Override in .env to monitor or tune - # from a single place rather than editing the in-container yaml. - - HERMES_MAX_TOKENS=${HERMES_MAX_TOKENS:-65536} - - HERMES_MAX_TURNS=${HERMES_MAX_TURNS:-90} - - HERMES_GATEWAY_TIMEOUT=${HERMES_GATEWAY_TIMEOUT:-3600} - # API retry budget — covers transient 5xx + network errors, especially - # the 503 "Loading model" window when COMFYUI_SERIALIZE_LLAMACPP stops - # llamacpp during a ComfyUI generation. Default 10 ≈ 12 min cumulative - # wait (jittered exponential backoff capped at 120s per attempt). - - HERMES_API_MAX_RETRIES=${HERMES_API_MAX_RETRIES:-10} - # 24h — effectively "never timeout". Hermes's run_agent.py:6606 reads - # HERMES_STREAM_STALE_TIMEOUT (env-only, not a config key) with default 180s, - # which kills streaming responses mid-flight on slow local-model turns. - # Agent-level gateway_timeout (above) is the real upper bound; this just - # keeps the streaming stale-detector from firing first. - - HERMES_STREAM_STALE_TIMEOUT=86400 - # Discord bot token sourced from Docker secrets (file-form). The - # hermes/entrypoint.sh bridges DISCORD_BOT_TOKEN_FILE to the env var - # discord.py expects. Legacy DISCORD_TOKEN inline alias is dropped — - # use SOPS at secrets/discord_token.sops. - - DISCORD_BOT_TOKEN_FILE=/run/secrets/discord_token - # Backup-repo PAT (git push to ordo-hermes-backup). SOPS-managed; the - # entrypoint bridges GITHUB_BACKUP_PAT_FILE -> GITHUB_BACKUP_PAT env var. - - GITHUB_BACKUP_PAT_FILE=/run/secrets/github_backup_pat - - DISCORD_ALLOWED_USERS=${DISCORD_ALLOWED_USERS:-} - - DISCORD_ALLOWED_CHANNELS=${DISCORD_ALLOWED_CHANNELS:-} - - DISCORD_ALLOWED_ROLES=${DISCORD_ALLOWED_ROLES:-} - - DISCORD_REQUIRE_MENTION=${DISCORD_REQUIRE_MENTION:-true} - - DISCORD_FREE_RESPONSE_CHANNELS=${DISCORD_FREE_RESPONSE_CHANNELS:-} - - DISCORD_HOME_CHANNEL=${DISCORD_HOME_CHANNEL:-} - - DISCORD_AUTO_THREAD=${DISCORD_AUTO_THREAD:-true} - - DISCORD_REACTIONS=${DISCORD_REACTIONS:-true} - - TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN:-} - volumes: - - ${BASE_PATH:-.}:/workspace:rw - # Bind-mount: data lives at host path data/hermes/ for direct host visibility. - # Windows Docker Desktop note: bind mounts have SQLite journaling quirks — - # the Dockerfile patches journal_mode WAL→DELETE which mitigates most issues. - # If "database is locked" errors appear, revert to a named volume (see the - # volumes: block at the bottom of this file for the rollback path). - - ${BASE_PATH:-.}/data/hermes:/home/hermes/.hermes - # Mount the parent dev directory into Hermes so it can read/write files - # across sibling repos. /workspace stays scoped to the ordo-ai-stack project - # root. Default target is /projects. Override HERMES_HOST_DEV_MOUNT to a - # path that mirrors your host filesystem (e.g. /c/dev on Windows where - # dev lives at C:\dev) — the historical reason was to make sibling-stack - # bind-mounts resolve identically when Hermes shelled out to `docker - # compose`. Plan C removes that shell-out path; the override is still - # useful for any future tool that does its own filesystem path - # rewriting against the host. - - ${BASE_PATH:-.}/..:${HERMES_HOST_DEV_MOUNT:-/projects}:rw - secrets: - - discord_token - - github_backup_pat - healthcheck: - # gateway_state.json is written by `hermes gateway` on startup (Docker-mode - # doesn't create a gateway.pid — that's only for systemd/launchd installs). - test: ["CMD-SHELL", "test -f /home/hermes/.hermes/gateway_state.json"] - start_period: 60s - interval: 30s - timeout: 5s - retries: 3 - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - networks: - - frontend - - backend - command: ["hermes", "gateway"] - - hermes-dashboard: - build: - context: ./hermes - dockerfile: Dockerfile - image: ordo-ai-stack-hermes:latest - pull_policy: build - restart: unless-stopped - depends_on: - # Same as hermes-gateway: must be healthy (not just started). Enforced - # by tests/test_hermes_docker.py::test_hermes_services_depend_on_stack. - model-gateway: - condition: service_healthy - mcp-gateway: - condition: service_healthy - dashboard: - condition: service_healthy - ops-controller: - condition: service_started - environment: - - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY:-local} - - PYTHONIOENCODING=utf-8 - # Same ops-controller plumbing as hermes-gateway — see Plan C runbook. - - OPS_CONTROLLER_URL=http://ops-controller:9000 - - OPS_CONTROLLER_TOKEN=${OPS_CONTROLLER_TOKEN:?required for Hermes to reach ops-controller} - # Same single-source plumbing as hermes-gateway — dashboard shares - # the bind-mounted config.yaml so either service's entrypoint seeds it. - - LLAMACPP_CTX_SIZE=${LLAMACPP_CTX_SIZE:-262144} - - HERMES_MAX_TOKENS=${HERMES_MAX_TOKENS:-65536} - - HERMES_MAX_TURNS=${HERMES_MAX_TURNS:-90} - - HERMES_GATEWAY_TIMEOUT=${HERMES_GATEWAY_TIMEOUT:-3600} - - HERMES_API_MAX_RETRIES=${HERMES_API_MAX_RETRIES:-10} - # 24h — see hermes-gateway for rationale. Dashboard streams chat too. - - HERMES_STREAM_STALE_TIMEOUT=86400 - # Point Hermes at the pre-built SPA (Dockerfile stage 1). Without this env var, - # `hermes dashboard` tries to rebuild from web/ source, which requires npm (not - # present in the runtime image). - - HERMES_WEB_DIST=/opt/hermes-agent/hermes_cli/web_dist - volumes: - - ${BASE_PATH:-.}:/workspace:rw - # Bind-mount + host-dev mount: see hermes-gateway above for rationale, - # HERMES_HOST_DEV_MOUNT override, and rollback notes. - - ${BASE_PATH:-.}/data/hermes:/home/hermes/.hermes - - ${BASE_PATH:-.}/..:${HERMES_HOST_DEV_MOUNT:-/projects}:rw - healthcheck: - test: ["CMD", "curl", "-sf", "http://localhost:9119/"] - start_period: 30s - interval: 30s - timeout: 5s - retries: 3 - logging: - driver: json-file - options: - max-size: "10m" - max-file: "3" - networks: - - frontend - - backend - - proxy-net - # --insecure: Hermes rejects 0.0.0.0 binding without it. Safe here because - # the host-port publish was dropped (Plan A Task 13) — hermes-dashboard is - # only reachable on the internal Docker network via Caddy at /hermes/. - command: ["hermes", "dashboard", "--port", "9119", "--host", "0.0.0.0", "--no-open", "--insecure"] - -volumes: - caddy_data: - caddy_config: - # Per-container config/cache for codebase-memory (holds _config.db + config.json). - # NOTE: the graph index itself lives IN-PROCESS and is not reliably flushed here - # across container exits, so this is NOT a shared index — the gateway-spawned MCP - # and the UI each index in their own process. The volume is still shared/mounted - # by both; `name:` pins the literal name (no compose project prefix) to match the - # raw name the mcp-gateway uses when it spawns the MCP (-v codebase-memory-cache:/cache). - codebase-memory-cache: - name: codebase-memory-cache -# Hermes data is now bind-mounted from data/hermes/ (see hermes-gateway/dashboard above). -# The legacy `ordo-ai-stack_hermes-data` named volume still exists in Docker for -# rollback. To revert: re-add `hermes-data:` here, then switch the hermes services' -# data mount back from `${BASE_PATH:-.}/data/hermes:/home/hermes/.hermes` to -# `hermes-data:/home/hermes/.hermes` and `docker compose up -d`. - -networks: - frontend: - name: ordo-ai-stack-frontend - backend: - name: ordo-ai-stack-backend - # internal: false required for llama.cpp and HuggingFace model downloads. - # Set internal: true for air-gapped security (model pulls will fail). - internal: false - proxy-net: - driver: bridge - # Pinned so DASHBOARD_TRUSTED_PROXY_NET (set in dashboard's env block) - # stays in lockstep across rebuilds. Any RFC1918 /16 inside Docker's - # default address pool (172.17–172.30) works; 172.24 was the auto-assigned - # value at first boot and was kept to avoid a one-time renumber. - ipam: - config: - - subnet: 172.24.0.0/16 - -# High-value tokens are managed via SOPS (encrypted at rest under secrets/*.sops) -# and decrypted by scripts/secrets/decrypt.sh into ${HOME}/.ai-toolkit/runtime/secrets/ -# before `docker compose up`. Compose mounts each file at /run/secrets/ -# inside its consumer; service entrypoints bridge *_FILE → plaintext env where -# the app SDK doesn't natively support the _FILE pattern. -secrets: - discord_token: - file: ${HOME}/.ai-toolkit/runtime/secrets/discord_token - github_pat: - file: ${HOME}/.ai-toolkit/runtime/secrets/github_pat - github_backup_pat: - file: ${HOME}/.ai-toolkit/runtime/secrets/github_backup_pat - hf_token: - file: ${HOME}/.ai-toolkit/runtime/secrets/hf_token - civitai_token: - file: ${HOME}/.ai-toolkit/runtime/secrets/civitai_token - n8n_api_key: - file: ${HOME}/.ai-toolkit/runtime/secrets/n8n_api_key +name: ordo-ai-stack + +# All services start by default. Run: docker compose up -d +services: + # Self-hosted meta-search engine. Replaces the prior Tavily integration as the + # core search method exposed to Hermes / the MCP gateway. Internal-only + # (backend network, no host port); the MCP wrapper `searxng` in + # mcp/gateway/registry-custom.yaml queries this instance at http://searxng:8080. + searxng: + image: searxng/searxng:latest + restart: unless-stopped + cap_drop: [ALL] + cap_add: [CHOWN, SETGID, SETUID] # required by uwsgi worker init + security_opt: [no-new-privileges:true] + environment: + - SEARXNG_SETTINGS_PATH=/etc/searxng/settings.yml + - SEARXNG_BASE_URL=http://searxng:8080/ + # NOTE on secret_key plumbing: the upstream image reads server.secret_key + # directly from settings.yml — it does not honour $SEARXNG_SECRET in env. + # Because the bind mount runs as the unprivileged `searxng` user inside + # the container, a runtime entrypoint-sed has no write permission. The + # real secret therefore lives in data/searxng/settings.yml (gitignored, + # same protection model as .env). $SEARXNG_SECRET in .env exists as the + # canonical operator-facing source — keep it in sync with settings.yml + # when rotating (scripts/rotate-searxng-secret.sh can automate later). + volumes: + - ${DATA_PATH:-${BASE_PATH:-.}/data}/searxng:/etc/searxng + healthcheck: + test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/healthz | grep -q OK || exit 1"] + start_period: 30s + interval: 30s + timeout: 5s + retries: 3 + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + networks: + - backend + + llamacpp: + # Pinned mainline llama.cpp (ggml-org) by DIGEST for reproducibility — + # build 9765 (73618f27a), the first build that loads Qwen3.6 MTP + qwen35moe + # GGUFs natively (upstream PR #22673). This is the single source of truth for + # the image; bump the digest deliberately (stack_monitor tracks the build). + # Override LLAMACPP_IMAGE in .env only to test a different build. + image: ${LLAMACPP_IMAGE:-ghcr.io/ggml-org/llama.cpp@sha256:44cd08334f85c1fcb363e3798302f2986cdb78cdfc8c1cf56bdad44041595a32} + restart: unless-stopped + platform: linux/amd64 + entrypoint: ["/bin/sh", "/llamacpp-scripts/run-llama-server.sh"] + environment: + - LLAMACPP_MODEL=${LLAMACPP_MODEL:-model.gguf} + - LLAMACPP_CTX_SIZE=${LLAMACPP_CTX_SIZE:-262144} + - LLAMACPP_PARALLEL=${LLAMACPP_PARALLEL:-1} + - LLAMACPP_ROPE_SCALING=${LLAMACPP_ROPE_SCALING:-none} + - LLAMACPP_ROPE_SCALE=${LLAMACPP_ROPE_SCALE:-1} + - LLAMACPP_YARN_ORIG_CTX=${LLAMACPP_YARN_ORIG_CTX:-0} + - LLAMACPP_OVERRIDE_KV=${LLAMACPP_OVERRIDE_KV:-} + - LLAMACPP_GPU_LAYERS=${LLAMACPP_GPU_LAYERS:--1} + - LLAMACPP_FLASH_ATTN=${LLAMACPP_FLASH_ATTN:-auto} + # Hard ceiling on tokens per request (defense-in-depth against + # runaway-reasoning loops where --reasoning-budget fails to close + # the block). 64K is plenty for any legitimate response. + - LLAMACPP_N_PREDICT=${LLAMACPP_N_PREDICT:-65536} + # Cap on tokens spent inside .... Hoisted from + # LLAMACPP_EXTRA_ARGS so it's monitorable. Reliability depends on the + # model emitting a recognizable end-of-thinking token; N_PREDICT above + # is the unconditional ceiling that fires regardless. + - LLAMACPP_REASONING_BUDGET=${LLAMACPP_REASONING_BUDGET:-32768} + - LLAMACPP_ENABLE_KV_CACHE_QUANTIZATION=${LLAMACPP_ENABLE_KV_CACHE_QUANTIZATION:-0} + - LLAMACPP_KV_CACHE_TYPE_K=${LLAMACPP_KV_CACHE_TYPE_K:-q4_0} + - LLAMACPP_KV_CACHE_TYPE_V=${LLAMACPP_KV_CACHE_TYPE_V:-q4_0} + - LLAMACPP_EXTRA_ARGS=${LLAMACPP_EXTRA_ARGS:-} + # Optional vision projector (mmproj GGUF). Path is INSIDE the container — + # bind-mount maps host models/gguf/ to /models, so set + # LLAMACPP_MMPROJ=/models/.gguf. Empty = no vision. + - LLAMACPP_MMPROJ=${LLAMACPP_MMPROJ:-} + volumes: + - ${BASE_PATH:-.}/models/gguf:/models:ro + - ${BASE_PATH:-.}/scripts/llamacpp:/llamacpp-scripts:ro + # Large GGUFs can take many minutes before /health returns 200; 503 during load fails curl -f. + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/health"] + start_period: 1800s + interval: 15s + timeout: 10s + retries: 40 + # GPU config: overridden by overrides/compute.yml (run scripts/detect_hardware.py) + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + networks: + - backend + + # Use CUDA image so linux/amd64 is available (plain :server manifest can resolve to arm64 on Docker Desktop). + # Pinned by digest to the same mainline build as llamacpp (build 9765 / 73618f27a). + llamacpp-embed: + image: ${LLAMACPP_EMBED_IMAGE:-ghcr.io/ggml-org/llama.cpp@sha256:44cd08334f85c1fcb363e3798302f2986cdb78cdfc8c1cf56bdad44041595a32} + restart: unless-stopped + platform: linux/amd64 + # The upstream :server-cuda is a rolling tag that has flipped its + # ENTRYPOINT/CMD shape at least twice this week (sometimes empty + # ENTRYPOINT with binary in CMD, sometimes ENTRYPOINT=["/app/llama-server"] + # with CMD=[]). Pin both explicitly so neither variant breaks us: + # entrypoint always invokes the binary; compose's command: is its argv. + entrypoint: ["/app/llama-server"] + command: > + --host 0.0.0.0 --port 8080 + --model /models/${LLAMACPP_EMBED_MODEL:-nomic-embed-text-v1.5.Q4_K_M.gguf} + --ctx-size 8192 --embeddings + volumes: + - ${BASE_PATH:-.}/models/gguf:/models:ro + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/health"] + start_period: 60s + interval: 15s + timeout: 10s + retries: 5 + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + networks: + - backend + + model-gateway: + build: ./model-gateway + image: ordo-ai-stack-model-gateway:latest + pull_policy: build + restart: unless-stopped + user: "1000:1000" + read_only: true + tmpfs: + - /tmp + cap_drop: [ALL] + security_opt: [no-new-privileges:true] + depends_on: + llamacpp: + condition: service_started + llamacpp-embed: + condition: service_started + dashboard: + condition: service_started + environment: + - LLAMACPP_URL=http://llamacpp:8080 + - LLAMACPP_EMBED_URL=http://llamacpp-embed:8080 + - LLAMACPP_MODEL=${LLAMACPP_MODEL:-model.gguf} + - LLAMACPP_EMBED_MODEL=${LLAMACPP_EMBED_MODEL:-nomic-embed-text-v1.5.Q4_K_M.gguf} + - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY:-local} + - LLAMACPP_CTX_SIZE=${LLAMACPP_CTX_SIZE:-262144} + # Local model used when a Claude-compatible client sends a "claude-*" model name + - CLAUDE_CODE_LOCAL_MODEL=${CLAUDE_CODE_LOCAL_MODEL:-} + # throughput_callback.py — posts per-completion samples to the dashboard. + # Must share at least one docker network with the dashboard service (the + # `backend` membership below covers it). + - DASHBOARD_URL=http://dashboard:8080 + - THROUGHPUT_RECORD_TOKEN=${THROUGHPUT_RECORD_TOKEN:-} + ports: + # 127.0.0.1 bind: localhost-only host publish. Tailnet peers reach the + # OpenAI-compatible API via Caddy at https:///llm/* — gated + # by the LiteLLM master key, no SSO (see auth/caddy/Caddyfile). Host apps + # (Cline, VS Code, MCP clients, Hermes auth.json) keep their + # `localhost:11435` connectivity. Removes the prior 0.0.0.0 LAN exposure. + - "127.0.0.1:${MODEL_GATEWAY_PORT:-11435}:11435" + healthcheck: + test: ["CMD-SHELL", "python3 -c \"import os, urllib.request; req = urllib.request.Request('http://localhost:11435/v1/models', headers={'Authorization': 'Bearer ' + os.environ.get('LITELLM_MASTER_KEY', 'local')}); urllib.request.urlopen(req)\""] + interval: 30s + timeout: 10s + retries: 3 + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + networks: + - frontend + - backend + # proxy-net: lets Caddy (front door) reach this for the /llm/* API route. + - proxy-net + + ops-controller: + build: ./ops-controller + image: ordo-ai-stack-ops-controller:latest + pull_policy: build + restart: unless-stopped + cap_drop: [ALL] + security_opt: [no-new-privileges:true] + # Add appuser to root group so it can read /var/run/docker.sock (root:root on Docker Desktop). + # Avoids needing root-at-start or a chmod-on-entry script. + group_add: ["0"] + environment: + - COMPOSE_PROJECT_DIR=/workspace + - OPS_CONTROLLER_TOKEN=${OPS_CONTROLLER_TOKEN:-} + # Belt-and-suspenders: pin the in-container .env path so REGISTRY never + # falls back to the host-side BASE_PATH value (which resolves to a path + # that does not exist inside the Linux container). + - OPS_ENV_PATH=/workspace/.env + # Docker container name for ComfyUI, used by /comfyui/install-node-requirements + # to docker-exec a pip install. The code default is "comfyui" (assumes + # container_name: comfyui in compose), but this stack relies on Compose's + # auto-naming (project-service-N). Without this override, the endpoint + # silently returns 'Container "comfyui" not found'. + - COMFYUI_CONTAINER_NAME=ordo-ai-stack-comfyui-1 + # Pass through the operator's $HOME so this container's docker-compose + # subprocesses (POST /compose/* endpoints) interpolate ${HOME} the same + # way the operator's shell would. Without this, secret bind sources at + # ${HOME}/.ai-toolkit/runtime/secrets/* resolve against /home/appuser + # inside the container — a path that doesn't exist on the docker host + # — and `compose up` aborts on "bind source path does not exist". + - OPERATOR_HOME=${HOME} + # Read-only view of the SOPS-decrypted runtime env so compose subprocesses + # (POST /compose/*, /services/*/recreate) interpolate REAL secret values for + # secret-dependent services (oauth2-proxy, caddy, searxng, n8n, …) instead of + # leaving them unset. Path only — no secret value lives in this compose file. + # See ops-controller/main.py:_load_runtime_env. Decryption stays host-only. + - RUNTIME_ENV_FILE=/run/runtime.env + - HF_TOKEN_FILE=/run/secrets/hf_token + - AUDIT_LOG_PATH=/data/audit.log + - BASE_PATH=${BASE_PATH:-.} + - DATA_PATH=${DATA_PATH:-${BASE_PATH:-.}/data} + - COMPOSE_FILE=${COMPOSE_FILE:-docker-compose.yml} + - DEFAULT_MODEL=${DEFAULT_MODEL:-} + - COMFYUI_MODELS_DIR=/models/comfyui + # ComfyUI ↔ llamacpp VRAM serialization guardian (see ops-controller/main.py) + - COMFYUI_URL=http://comfyui:8188 + - COMFYUI_SERIALIZE_LLAMACPP=${COMFYUI_SERIALIZE_LLAMACPP:-0} + - COMFYUI_QUEUE_POLL_SECONDS=${COMFYUI_QUEUE_POLL_SECONDS:-2} + - COMFYUI_DRAIN_SECONDS=${COMFYUI_DRAIN_SECONDS:-20} + - COMFYUI_GUARDIAN_TARGET=${COMFYUI_GUARDIAN_TARGET:-llamacpp} + # Phase 1: after drain, POST ComfyUI /free so PyTorch's caching allocator + # releases. Default ON; harmless 200 OK when nothing is held. + - COMFYUI_FREE_AFTER_DRAIN=${COMFYUI_FREE_AFTER_DRAIN:-1} + # Phase 2: VRAM-pressure watchdog. Independent of the queue. When total + # used VRAM exceeds OPS_VRAM_PRESSURE_GB, call ComfyUI /free; recheck + # until below OPS_VRAM_RECOVERY_GB (or pressure-4 if unset). Disabled + # while OPS_VRAM_PRESSURE_GB <= 0. + - OPS_VRAM_PRESSURE_GB=${OPS_VRAM_PRESSURE_GB:-0} + - OPS_VRAM_RECOVERY_GB=${OPS_VRAM_RECOVERY_GB:-0} + - OPS_VRAM_POLL_SECONDS=${OPS_VRAM_POLL_SECONDS:-30} + # Self-heal watchdog (opt-in): restart any exited compose service after a + # grace window, except those in OPS_WATCHDOG_EXCLUDE. Disabled by default. + - OPS_HERMES_WATCHDOG_ENABLED=${OPS_HERMES_WATCHDOG_ENABLED:-0} + - OPS_HERMES_WATCHDOG_INTERVAL_SECONDS=${OPS_HERMES_WATCHDOG_INTERVAL_SECONDS:-30} + - OPS_HERMES_WATCHDOG_GRACE_SECONDS=${OPS_HERMES_WATCHDOG_GRACE_SECONDS:-60} + - OPS_HERMES_WATCHDOG_PAUSE_FILE=${OPS_HERMES_WATCHDOG_PAUSE_FILE:-/data/watchdog.paused} + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - ${BASE_PATH:-.}:/workspace + - ${DATA_PATH:-${BASE_PATH:-.}/data}/ops-controller:/data + - ${BASE_PATH:-.}/models/comfyui:/models/comfyui + # Read-only: decrypted runtime secrets for compose interpolation (see + # RUNTIME_ENV_FILE above). Same host path the top-level `secrets:` block uses; + # `make up` runs decrypt-secrets first, so this file exists before compose up. + - ${HOME}/.ai-toolkit/runtime/.env:/run/runtime.env:ro + secrets: + - hf_token + healthcheck: + # Socket-only check — verifies the port is bound without paying for + # urllib.request's huge import graph (saved 2-5s on Docker Desktop where + # the urllib-based check was flaking past 30s). App-level health is + # already covered by every dependent service calling real endpoints. + test: ["CMD", "python3", "-c", "import socket; s=socket.create_connection(('127.0.0.1',9000),2); s.close()"] + start_period: 15s + interval: 30s + timeout: 5s + retries: 3 + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + # No host port - dashboard calls internally. Add "9000:9000" for debugging. + networks: + - backend + + dashboard: + build: ./dashboard + restart: unless-stopped + # Image entrypoint runs as root briefly to chmod bind-mounted /models, then gosu appuser. + # Do not set user: here or ComfyUI pulls fail with Permission denied on /models. + # gosu needs SETUID/SETGID; no-new-privileges breaks user switching (EPERM). + tmpfs: + - /tmp + cap_drop: [ALL] + cap_add: + - SETUID + - SETGID + depends_on: + llamacpp: + condition: service_started + extra_hosts: + - "host.docker.internal:host-gateway" + healthcheck: + test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/api/health')"] + interval: 30s + timeout: 10s + retries: 3 + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + environment: + - LLAMACPP_URL=http://llamacpp:8080 + - MODEL_GATEWAY_API_KEY=${LITELLM_MASTER_KEY:-local} + - MODELS_DIR=/models + - GGUF_MODELS_DIR=/gguf-models + - SCRIPTS_DIR=/scripts + - MCP_CONFIG_PATH=/mcp-config/servers.txt + - MCP_GATEWAY_URL=http://mcp-gateway:8811 + # Must include comfyui so mcp-gateway loads ComfyUI tools (registry-custom.yaml). Matches data/mcp/servers.txt default. + # Web search is the self-hosted searxng MCP (see services.searxng). playwright is stack-pinned in registry-custom.yaml. + - MCP_GATEWAY_SERVERS=${MCP_GATEWAY_SERVERS:-duckduckgo,n8n,searxng,comfyui,orchestration,playwright} + # Read-only: ComfyUI user workflows (host: data/comfyui-storage/ComfyUI/user/default/workflows) + - COMFYUI_WORKFLOWS_DIR=/comfyui-workflows + - OPS_CONTROLLER_URL=http://ops-controller:9000 + - OPS_CONTROLLER_TOKEN=${OPS_CONTROLLER_TOKEN:-} + - DASHBOARD_AUTH_TOKEN=${DASHBOARD_AUTH_TOKEN:-} + - THROUGHPUT_RECORD_TOKEN=${THROUGHPUT_RECORD_TOKEN:-} + - HF_TOKEN_FILE=/run/secrets/hf_token + - COMFYUI_URL=http://comfyui:8188 + - MODEL_GATEWAY_URL=http://model-gateway:11435 + - LLAMACPP_CTX_SIZE=${LLAMACPP_CTX_SIZE:-262144} + - DASHBOARD_DATA_PATH=/data/dashboard + # n8n webhook for publish_enqueue (or pass per-request); n8n owns retries/OAuth + - N8N_PUBLISH_WEBHOOK_URL=${N8N_PUBLISH_WEBHOOK_URL:-} + - COMFYUI_OUTPUT_DIR=/comfyui-output + - DASHBOARD_TRUST_PROXY_HEADERS=true + - DASHBOARD_TRUSTED_PROXY_NET=172.24.0.0/16 + volumes: + - ${BASE_PATH:-.}/models/comfyui:/models + - ${BASE_PATH:-.}/models/gguf:/gguf-models + - ${BASE_PATH:-.}/scripts:/scripts:ro + - ${DATA_PATH:-${BASE_PATH:-.}/data}/dashboard:/data/dashboard + - ${DATA_PATH:-${BASE_PATH:-.}/data}/mcp:/mcp-config + - ${BASE_PATH:-.}/data/comfyui-storage/ComfyUI/user/default/workflows:/comfyui-workflows:ro + - ${DATA_PATH:-${BASE_PATH:-.}/data}/comfyui-output:/comfyui-output:ro + secrets: + - hf_token + networks: + - frontend + - backend + - proxy-net + + worker: + build: + context: . + dockerfile: worker/Dockerfile + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "[ -f /tmp/worker.heartbeat ] && [ $(($(date +%s) - $(cat /tmp/worker.heartbeat))) -lt 120 ]"] + start_period: 30s + interval: 30s + timeout: 5s + retries: 3 + depends_on: + dashboard: + condition: service_started + comfyui: + condition: service_started + environment: + - DASHBOARD_DATA_PATH=/data/dashboard + - COMFYUI_URL=http://comfyui:8188 + - COMFYUI_WORKFLOWS_DIR=/comfyui-workflows + - COMFYUI_OUTPUT_DIR=/comfyui-output + - WORKER_POLL_INTERVAL_SEC=${WORKER_POLL_INTERVAL_SEC:-0.5} + - WORKER_CONCURRENCY=${WORKER_CONCURRENCY:-2} + - WORKER_SCHEDULE_CHECK_SEC=30 + - WORKER_MAX_JOB_RETRIES=2 + - WORKER_PUBLISH_MAX_ATTEMPTS=5 + - N8N_PUBLISH_WEBHOOK_URL=${N8N_PUBLISH_WEBHOOK_URL:-} + volumes: + - ${DATA_PATH:-${BASE_PATH:-.}/data}/dashboard:/data/dashboard + - ${BASE_PATH:-.}/data/comfyui-storage/ComfyUI/user/default/workflows:/comfyui-workflows:ro + - ${DATA_PATH:-${BASE_PATH:-.}/data}/comfyui-output:/comfyui-output:ro + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + networks: + - backend + + open-webui: + image: ${OPEN_WEBUI_IMAGE:-ghcr.io/open-webui/open-webui:v0.10.1} + restart: unless-stopped + depends_on: + llamacpp: + condition: service_started + model-gateway: + condition: service_started + qdrant: + condition: service_started + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/"] + start_period: 120s + interval: 30s + timeout: 10s + retries: 3 + environment: + # Route all model requests through the gateway (unified provider) + - OLLAMA_BASE_URL= + - OPENAI_API_BASE_URL=${OPENAI_API_BASE:-http://model-gateway:11435/v1} + - OPENAI_API_KEY=${LITELLM_MASTER_KEY:-local} + # Auth: False = single-user local / Tailscale use. + - WEBUI_AUTH=${WEBUI_AUTH:-False} + # Default model shown in chat UI. Dashboard writes OPEN_WEBUI_DEFAULT_MODEL to prefer the low-context :chat alias. + - DEFAULT_MODELS=${OPEN_WEBUI_DEFAULT_MODEL:-${DEFAULT_MODEL:-}} + # RAG: use Qdrant for vector storage + - VECTOR_DB=qdrant + - QDRANT_URI=http://qdrant:6333 + - QDRANT_URL=http://qdrant:6333 + - RAG_EMBEDDING_ENGINE=openai + - RAG_OPENAI_API_BASE_URL=http://model-gateway:11435/v1 + - RAG_OPENAI_API_KEY=${LITELLM_MASTER_KEY:-local} + - RAG_EMBEDDING_MODEL=${EMBED_MODEL:-${LLAMACPP_EMBED_MODEL:-nomic-embed-text-v1.5.Q4_K_M.gguf}} + # MCP tools: connect Open WebUI to the shared mcp-gateway (streamable HTTP), + # exposing every stack MCP (n8n, comfyui, searxng, orchestration, blog, + # playwright, qdrant-rag) as callable tools. Seeds tool_server.connections + # (the DB has none yet). One aggregated endpoint = all servers' tools. + - 'TOOL_SERVER_CONNECTIONS=[{"type":"mcp","url":"http://mcp-gateway:8811/mcp","auth_type":"none","info":{"id":"ordo-mcp","name":"Ordo MCP Gateway","description":"Shared stack tools: n8n, comfyui, searxng, orchestration, blog, playwright, qdrant-rag"}}]' + volumes: + - ${DATA_PATH:-${BASE_PATH:-.}/data}/open-webui:/app/backend/data + networks: + - frontend + - backend + - proxy-net + + gguf-puller: + profiles: [models] + image: python:3.12-slim + restart: "no" + environment: + # Optional HF token via env (set HF_TOKEN in .env for gated repos); empty by + # default so public repos pull token-free. Replaces the file secret, which + # made `compose run gguf-puller` hard-fail whenever the SOPS-managed secret + # file was absent or its ${HOME} source mis-resolved under a Hermes-invoked + # compose subprocess. pull_gguf_models.py reads HF_TOKEN when the file is absent. + - HF_TOKEN=${HF_TOKEN:-} + - GGUF_MODELS=${GGUF_MODELS:-} + volumes: + - ${BASE_PATH:-.}/models/gguf:/models + - ${BASE_PATH:-.}/scripts:/scripts:ro + command: ["sh", "-c", "pip install -q huggingface_hub && python3 /scripts/pull_gguf_models.py"] + networks: + - frontend + + comfyui-model-puller: + profiles: [comfyui-models] + image: python:3.12.8-slim + restart: "no" + user: "0:0" + environment: + - MODELS_DIR=/models + - HF_TOKEN_FILE=/run/secrets/hf_token + - CIVITAI_TOKEN_FILE=/run/secrets/civitai_token + # Host: $env:COMFYUI_PACKS="flux1-dev-gguf" (PowerShell) — forwarded into the container + - COMFYUI_PACKS=${COMFYUI_PACKS:-} + - COMFYUI_QUANT=${COMFYUI_QUANT:-} + volumes: + - ${BASE_PATH:-.}/models/comfyui:/models + - ${BASE_PATH:-.}/scripts:/scripts:ro + secrets: + - hf_token + - civitai_token + # chmod first — ensures write access on Docker Desktop/Windows bind mounts + command: ["sh", "-c", "chmod -R a+w /models && python3 /scripts/comfyui/pull_comfyui_models.py"] + networks: + - frontend + + # Ensures ComfyUI-Manager is cloned before ComfyUI starts. Safe to re-run — skips if already present. + comfyui-manager-setup: + image: alpine:3.21 + restart: "no" + volumes: + - ${BASE_PATH:-.}/data/comfyui-storage:/root + command: + - sh + - -c + - | + set -eu + apk add --no-cache git >/dev/null 2>&1 + TARGET=/root/ComfyUI/custom_nodes/ComfyUI-Manager + if [ ! -d "$$TARGET/.git" ]; then + echo "Cloning ComfyUI-Manager..." + mkdir -p /root/ComfyUI/custom_nodes + git clone --depth=1 https://github.com/Comfy-Org/ComfyUI-Manager.git "$$TARGET" + echo "ComfyUI-Manager installed." + else + echo "ComfyUI-Manager already present, skipping." + fi + + comfyui: + image: ${COMFYUI_IMAGE:-yanwk/comfyui-boot:cpu} + # No fixed container_name — avoids "name already in use" when another project + # owns `comfyui`; Docker DNS still resolves the service name `comfyui` on this network. + restart: unless-stopped + depends_on: + comfyui-manager-setup: + condition: service_completed_successfully + # Backend network so MCP gateway-spawned comfyui container can reach it + # ComfyUI: run scripts/detect_hardware.py to auto-configure GPU (NVIDIA/AMD/Intel) or CPU + # Custom nodes + ComfyRegistry can take several minutes before :8188 serves; short grace marks healthy deps as failed. + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8188/"] + start_period: 420s + interval: 20s + timeout: 15s + retries: 12 + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + environment: + # --enable-manager: ComfyUI-Manager (node installs, model UI) — see docs.comfy.org/manager/install + # Override via COMFYUI_CLI_ARGS in .env (GPU defaults in overrides/compute.yml include --normalvram) + - CLI_ARGS=${COMFYUI_CLI_ARGS:---cpu --enable-manager} + - PYTORCH_CUDA_ALLOC_CONF= + # Hugging Face downloads (gated models) from Manager or built-in fetchers + - HF_TOKEN_FILE=/run/secrets/hf_token + # ComfyUI-Manager: GitHub API rate limits for custom node installs (optional; same token as GitHub MCP). + # ComfyUI's Manager reads GITHUB_TOKEN (not GITHUB_PERSONAL_ACCESS_TOKEN); _FILE pointer matches. + - GITHUB_TOKEN_FILE=/run/secrets/github_pat + # JunoLLMRefine talks to model-gateway:11435 (LiteLLM master key 'local') and + # may need to wake llamacpp via ops-controller when the guardian has paused it. + - OPS_CONTROLLER_URL=http://ops-controller:9000 + - OPS_CONTROLLER_TOKEN=${OPS_CONTROLLER_TOKEN:-} + - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY:-local} + volumes: + - ${BASE_PATH:-.}/data/comfyui-storage:/root + - ${BASE_PATH:-.}/models/comfyui:/root/ComfyUI/models + - ${DATA_PATH:-${BASE_PATH:-.}/data}/comfyui-output:/root/ComfyUI/output + secrets: + - hf_token + - github_pat + # Bridge Docker secrets *_FILE pointers to the plaintext env vars the + # upstream image's runner-scripts/entrypoint.sh + ComfyUI custom nodes + # (HF SDK, ComfyUI-Manager) read directly. The upstream image's default + # CMD is `bash /runner-scripts/entrypoint.sh`; we replace it with a shim + # that exports HF_TOKEN / GITHUB_TOKEN from the mounted secret files, + # installs every custom node's pip requirements (the image's writable + # layer is wiped on container recreate, so a manual `pip install` for a + # custom-node dep doesn't survive the next `compose up`), then exec's + # the original entrypoint. Idempotent: pip skips already-satisfied + # specifiers so warm-cache restarts add only seconds. + command: + - bash + - -c + - | + if [ -f "$${HF_TOKEN_FILE:-/run/secrets/hf_token}" ]; then + export HF_TOKEN="$$(cat "$${HF_TOKEN_FILE:-/run/secrets/hf_token}")" + fi + if [ -f "$${GITHUB_TOKEN_FILE:-/run/secrets/github_pat}" ]; then + export GITHUB_TOKEN="$$(cat "$${GITHUB_TOKEN_FILE:-/run/secrets/github_pat}")" + fi + for r in /root/ComfyUI/custom_nodes/*/requirements.txt; do + [ -f "$$r" ] || continue + echo "[deps] installing $$r" + pip install --no-cache-dir --no-warn-script-location -q -r "$$r" || echo "[deps] WARN failed $$r" + done + exec bash /runner-scripts/entrypoint.sh + networks: + - frontend + - backend + - proxy-net + + # Build ComfyUI MCP image for gateway-spawned containers (exits immediately after build) + comfyui-mcp-image: + build: ./comfyui-mcp + image: ordo-ai-stack-comfyui-mcp:latest + pull_policy: build + restart: "no" + command: ["true"] + + # Stable orchestration MCP (fixed verbs → dashboard HTTP). Optional build for gateway catalog. + orchestration-mcp-image: + build: ./orchestration-mcp + image: ordo-ai-stack-orchestration-mcp:latest + pull_policy: build + restart: "no" + command: ["true"] + + # Qdrant RAG MCP image (gateway-spawned). Semantic search over the `documents` + # collection; embeds queries via llamacpp-embed to match rag-ingestion's vectors. + qdrant-rag-mcp-image: + build: ./qdrant-rag-mcp + image: ordo-ai-stack-qdrant-rag-mcp:latest + pull_policy: build + restart: "no" + command: ["true"] + + # Codebase-Memory MCP image (gateway-spawned, stdio). Opt-in (heavy: bundles an + # embedding model). Build with: + # docker compose --profile codebase-memory build codebase-memory-mcp-image + # then enable in the gateway with: ./scripts/mcp_add.sh codebase-memory + # (requires CODE_ROOT set to your host code root). + codebase-memory-mcp-image: + build: ./codebase-memory-mcp + image: ordo-ai-stack-codebase-memory-mcp:latest + pull_policy: build + restart: "no" + command: ["true"] + profiles: ["codebase-memory"] + + # Codebase-Memory 3D graph visualization UI (long-lived; opt-in). Visualizes the + # code knowledge-graph it indexes in its own process (mounts /c/dev:ro). The + # upstream UI binds 127.0.0.1:9749 and is an absolute-asset SPA with no base path, + # so the image runs nginx (on :9750) which proxies to it and rewrites its baked + # /assets,/api,/rpc paths to the /codebase-memory/ prefix — letting Caddy serve it + # at https:///codebase-memory/ on the shared :443 SSO origin (no extra port). + # Build with: docker compose --profile codebase-memory build codebase-memory-ui + codebase-memory-ui: + build: ./codebase-memory-ui + image: ordo-ai-stack-codebase-memory-ui:latest + pull_policy: build + restart: unless-stopped + profiles: ["codebase-memory"] + volumes: + - codebase-memory-cache:/cache + # Source tree (read-only) so the UI's own long-lived process can index and + # visualize it. The graph index lives in-process (it is not reliably flushed + # to the cache volume across container exits), so the UI indexes its own + # graph rather than depending on the gateway-spawned MCP's index. + - ${CODE_ROOT:-/c/dev}:/c/dev:ro + healthcheck: + test: ["CMD-SHELL", "curl -fsS -o /dev/null http://localhost:9750/codebase-memory/ || exit 1"] + interval: 30s + timeout: 5s + retries: 5 + start_period: 20s + networks: + - proxy-net + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + + comfyui-mcp: + image: ordo-ai-stack-comfyui-mcp:latest + pull_policy: build + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "python3 -c \"import socket; s=socket.create_connection(('127.0.0.1',9000),2); s.close()\""] + start_period: 30s + interval: 30s + timeout: 5s + retries: 3 + depends_on: + comfyui: + condition: service_started + command: ["python", "server.py"] + environment: + - COMFYUI_URL=http://comfyui:8188 + - COMFY_MCP_WORKFLOW_DIR=/workflows + # Host: data/comfyui-storage/ComfyUI/user/default/workflows (seeded API graphs under mcp-api/) + # When the agent omits workflow_id but sends prompt/width/etc. at top level, use this workflow id (path under workflows dir). + - COMFY_MCP_DEFAULT_WORKFLOW_ID=${COMFY_MCP_DEFAULT_WORKFLOW_ID:-mcp-api/generate_image} + # Require explicit workflow_id for autonomous runs (no silent default) — set to 1 to allow legacy default. + - COMFY_MCP_ALLOW_DEFAULT_WORKFLOW_ID=${COMFY_MCP_ALLOW_DEFAULT_WORKFLOW_ID:-0} + - COMFY_MCP_DEFAULT_MODEL=${COMFY_MCP_DEFAULT_MODEL:-flux1-schnell-fp8.safetensors} + - OPS_CONTROLLER_URL=http://ops-controller:9000 + - OPS_CONTROLLER_TOKEN=${OPS_CONTROLLER_TOKEN:-} + volumes: + # Same host tree as ComfyUI user/default/workflows. API-format JSON for /prompt; UI exports are listed but cannot run via MCP. + - ${BASE_PATH:-.}/data/comfyui-storage/ComfyUI/user/default/workflows:/workflows:ro + networks: + - backend + + n8n: + image: ${N8N_IMAGE:-docker.n8n.io/n8nio/n8n:2.28.3} + restart: unless-stopped + # Run as non-root (n8n image uses node user; 1000:1000 matches typical node uid) + user: "1000:1000" + depends_on: + model-gateway: + condition: service_started + mcp-gateway: + condition: service_started + healthcheck: + test: ["CMD", "wget", "-q", "-O", "/dev/null", "http://localhost:5678/"] + interval: 30s + timeout: 10s + retries: 3 + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + environment: + - N8N_HOST=0.0.0.0 + - N8N_PORT=5678 + # n8n is mounted at /n8n/ via Caddy's handle_path (prefix-strip). Without + # N8N_PATH, n8n thinks it lives at root and emits absolute /assets/... + # URLs that 404 at Caddy. With it set, n8n emits /n8n/assets/... which + # Caddy strips back to /assets/... and serves correctly. + - N8N_PATH=/n8n/ + # Caddy is one reverse-proxy hop in front; this lets n8n honour the + # X-Forwarded-* headers oauth2-proxy + Caddy inject so cookies, CSRF + # tokens, and outbound redirect URLs all use the public scheme/host. + - N8N_PROXY_HOPS=1 + # Skip the first-run owner-setup wizard. This flag does NOT bypass the + # email/password login form — in n8n 2.x `authenticationMethod` is + # constrained to [email, ldap, saml] (see dist/config/schema.js) and + # there is no `none` option. /rest/settings still reports + # `authenticationMethod: "email"` with this flag set, and the SPA shows + # the login screen accordingly. + # + # Operator workflow: oauth2-proxy at Caddy gates the /n8n/* URL with the + # single-Gmail allowlist (auth/oauth2-proxy/emails.txt), then n8n's own + # login form requires an owner account's credentials. Bootstrap the owner + # once via the first-run UI (or POST /rest/owner/setup), store the creds + # in your own secret store outside this repo, and rely on the ~7-day + # session cookie so the second login is infrequent. + - N8N_USER_MANAGEMENT_DISABLED=true + # Route all model traffic through Model Gateway (dashboard tracking, unified provider) + - OLLAMA_HOST=http://model-gateway:11435 + - OPENAI_API_BASE_URL=${OPENAI_API_BASE:-http://model-gateway:11435/v1} + - OPENAI_API_KEY=local + # OAuth callbacks + inbound webhooks require a public URL. + # Recommended: tailscale funnel --set-path /rest/oauth2-credential/callback 5678 + # tailscale funnel --set-path /webhook 5678 + # Then set N8N_WEBHOOK_URL=https://your-machine.your-tailnet.ts.net in .env + - WEBHOOK_URL=${N8N_WEBHOOK_URL:-} + - N8N_EDITOR_BASE_URL=${N8N_WEBHOOK_URL:-} + # Only the callback and webhook paths need to be reachable without a session cookie + - N8N_AUTH_EXCLUDE_ENDPOINTS=rest/oauth2-credential/callback,webhook + - N8N_SECURE_COOKIE=false + volumes: + - ${DATA_PATH:-${BASE_PATH:-.}/data}/n8n-data:/home/node/.n8n + - ${DATA_PATH:-${BASE_PATH:-.}/data}/n8n-files:/files + networks: + - frontend + - backend + - proxy-net + + mcp-gateway: + build: ./mcp + image: ordo-ai-stack-mcp-gateway:latest + pull_policy: build + # No fixed container_name — avoids conflicts when another stack already uses `mcp-gateway`. + restart: unless-stopped + depends_on: + comfyui-mcp-image: + # Build-only container; failure here means ComfyUI MCP won't work but other MCPs should still start + condition: service_completed_successfully + required: false + orchestration-mcp-image: + condition: service_completed_successfully + required: false + environment: + - MCP_CONFIG_FILE=/mcp-config/servers.txt + - MCP_GATEWAY_PORT=8811 + # Set to 1 for docker/mcp-gateway --verbose (see TROUBLESHOOTING — ComfyUI tools missing) + - MCP_GATEWAY_VERBOSE=${MCP_GATEWAY_VERBOSE:-0} + # MCP server API keys — sourced from Docker secrets (file-form). The + # gateway-wrapper.sh entrypoint bridges the *_FILE pointers back to + # plaintext env vars for the spawned MCP servers (which read the + # canonical names directly). + - GITHUB_PERSONAL_ACCESS_TOKEN_FILE=/run/secrets/github_pat + # n8n MCP server (mcp/n8n) — for workflow tools when n8n API key is set. + # `n8n.api_key` is a Docker secret mounted at /run/secrets/n8n_api_key; + # gateway-wrapper.sh reads the file and exports it as the canonical + # N8N_API_KEY env var that mcp/n8n expects. + - N8N_API_URL=${N8N_API_URL:-http://n8n:5678} + - N8N_API_KEY_FILE=/run/secrets/n8n_api_key + # ComfyUI MCP (custom registry) — passed to spawned comfyui container + - COMFYUI_URL=http://comfyui:8188 + - COMFY_MCP_DEFAULT_MODEL=${COMFY_MCP_DEFAULT_MODEL:-flux1-schnell-fp8.safetensors} + - OPS_CONTROLLER_URL=http://ops-controller:9000 + - OPS_CONTROLLER_TOKEN=${OPS_CONTROLLER_TOKEN:-} + # Injected into registry-custom.yaml for orchestration MCP (dashboard Bearer) + - DASHBOARD_AUTH_TOKEN=${DASHBOARD_AUTH_TOKEN:-} + # Codebase-Memory MCP — host code root (read-only mount source for the + # spawned container) + bind-mount allowlist. CODE_ROOT must be the HOST path + # that contains your repos (what Hermes sees as /c/dev). Allow-listing it lets + # the gateway's hardened bind logic accept the read-only /c/dev mount. + - CODE_ROOT=${CODE_ROOT:-} + - MCP_GATEWAY_DOCKER_BIND_ALLOWED_PATHS=${CODE_ROOT:-} + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - ${DATA_PATH:-${BASE_PATH:-.}/data}/mcp:/mcp-config + secrets: + - github_pat + - n8n_api_key + # Published on host so external MCP clients (e.g. Cursor, Claude Desktop) + # can reach it. Backend services still address it as + # http://mcp-gateway:8811 over the docker network. + ports: + # 127.0.0.1 bind: localhost-only. Keeps .mcp.json / .cline / VS Code + # MCP clients working on the host; removes LAN exposure. + - "127.0.0.1:${MCP_GATEWAY_PORT:-8811}:8811" + healthcheck: + # Verify gateway is listening AND has loaded its tool catalog (tools/list returns >0 tools). + # Falls back to port check if curl is unavailable. + test: ["CMD-SHELL", "sh /mcp-scripts/healthcheck.sh"] + start_period: 60s + interval: 15s + timeout: 10s + retries: 5 + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + networks: + - backend + # proxy-net: lets Caddy (front door) reach the gateway for the /mcp/* route. + - proxy-net + + oauth2-proxy: + # alpine variant ships with wget for the in-container healthcheck below. + # The default :latest is distroless and has no shell or HTTP probe tools. + image: quay.io/oauth2-proxy/oauth2-proxy:v7.15.3-alpine + restart: unless-stopped + # oauth2-proxy is used purely as an authn endpoint via Caddy `forward_auth` + # (Caddy calls /oauth2/auth, oauth2-proxy returns 202 on valid session, + # 401 otherwise). It never proxies to a real upstream, so --upstream is a + # static 202 placeholder. Do not change without redesigning the front door. + command: + - --provider=google + - --http-address=0.0.0.0:4180 + - --reverse-proxy=true + - --set-xauthrequest=true + - --upstream=static://202 + - --redirect-url=https://${CADDY_TAILNET_HOSTNAME}/oauth2/callback + - --whitelist-domain=.${CADDY_TAILNET_DOMAIN} + - --cookie-domain=.${CADDY_TAILNET_DOMAIN} + - --cookie-secure=true + - --cookie-samesite=lax + - --cookie-expire=24h + # NOTE: do NOT add --email-domain=* alongside --authenticated-emails-file. + # Either condition allows the user in, so the wildcard would defeat the + # allowlist. The file is the only gate. + - --authenticated-emails-file=/etc/oauth2-proxy/emails.txt + - --skip-provider-button=true + environment: + - OAUTH2_PROXY_CLIENT_ID=${OAUTH2_PROXY_CLIENT_ID} + - OAUTH2_PROXY_CLIENT_SECRET=${OAUTH2_PROXY_CLIENT_SECRET} + - OAUTH2_PROXY_COOKIE_SECRET=${OAUTH2_PROXY_COOKIE_SECRET} + volumes: + - ./auth/oauth2-proxy/emails.txt:/etc/oauth2-proxy/emails.txt:ro + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost:4180/ping"] + interval: 30s + timeout: 5s + retries: 3 + networks: + - proxy-net + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + + caddy: + image: caddy:2.11.4-alpine + restart: unless-stopped + # CADDY_BIND must be the host's tailnet IP. The :? failsafe makes + # `docker compose config` exit non-zero if it's empty or unset, since + # an empty bind silently degrades to 0.0.0.0:443 (compose only warns). + ports: + - "${CADDY_BIND:?CADDY_BIND must be set to the host tailnet IP — never empty or 0.0.0.0}:443:443" + environment: + - CADDY_TAILNET_HOSTNAME=${CADDY_TAILNET_HOSTNAME} + - CADDY_TAILNET_DOMAIN=${CADDY_TAILNET_DOMAIN} + # Bearer token (SOPS) gating the /mcp/* route for remote MCP clients + # (Cline/Cursor). The gateway has no auth of its own; Caddy enforces this. + - MCP_GATEWAY_TOKEN=${MCP_GATEWAY_TOKEN:-} + volumes: + - ./auth/caddy/Caddyfile:/etc/caddy/Caddyfile:ro + - ${TAILSCALE_CERT_DIR:-./auth/caddy/certs}:/etc/caddy/certs:ro + - caddy_data:/data + - caddy_config:/config + depends_on: + oauth2-proxy: + condition: service_started + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost/healthz"] + interval: 30s + timeout: 5s + retries: 3 + networks: + # caddy is the SSO ingress — it only needs proxy-net to reach upstreams. + # All upstreams it proxies (oauth2-proxy, dashboard, open-webui, n8n, + # comfyui, hermes-dashboard) are on proxy-net. Membership on `frontend` + # caused Docker DNS to return frontend IPs for the dashboard, putting + # caddy's source IP outside DASHBOARD_TRUSTED_PROXY_NET=172.24.0.0/16 + # and 401-ing every /api/* call from the SSO front door. + - proxy-net + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + + # --- Voice (STT / TTS) --- + + stt: + # Speech-to-text (faster-whisper, OpenAI-compatible /v1/audio/transcriptions). + # Opt-in: docker compose --profile voice up -d. GPU pin via the model registry + # (defaults to the secondary GPU). Reached internally at http://stt:8000/v1. + profiles: ["voice"] + # sha-pinned for reproducibility. NOTE: must run on a Pascal-class GPU (the 1070); + # the registry pins it to the secondary GPU. CTranslate2 int8 has no Blackwell kernels. + image: fedirz/faster-whisper-server@sha256:0b64050ad0b9244745746b652473ee42a8d5454d501877a252c3e65f631ffc99 + restart: unless-stopped + environment: + - WHISPER__MODEL=${STT_MODEL:-Systran/faster-whisper-small} + - WHISPER__INFERENCE_DEVICE=cuda + - WHISPER__COMPUTE_TYPE=${STT_COMPUTE_TYPE:-int8} + volumes: + - ${DATA_PATH:-${BASE_PATH:-.}/data}/voice/hf-cache:/root/.cache/huggingface + healthcheck: + test: ["CMD-SHELL", "python3 -c \"import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:8000/v1/models',timeout=5).status==200 else 1)\""] + interval: 30s + timeout: 10s + retries: 5 + start_period: 90s + logging: + driver: json-file + options: { max-size: "10m", max-file: "3" } + networks: [backend] + + tts: + # Text-to-speech (Kokoro, OpenAI-compatible /v1/audio/speech). Opt-in profile. + # Reached internally at http://tts:8880/v1. Voice chosen per-request (af_bella default). + profiles: ["voice"] + # sha-pinned. NOTE: Kokoro's PyTorch build has no Blackwell kernels — must run on the + # Pascal 1070 (the registry pins it to the secondary GPU). It will crash on the 5090. + image: ghcr.io/remsky/kokoro-fastapi-gpu@sha256:63176e12e476470f020e29dfb3203bac249fa66c8fdf95e44b7482546eb4e974 + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "python3 -c \"import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:8880/v1/audio/voices',timeout=5).status==200 else 1)\""] + interval: 30s + timeout: 10s + retries: 5 + start_period: 90s + logging: + driver: json-file + options: { max-size: "10m", max-file: "3" } + networks: [backend] + + # --- RAG --- + + qdrant: + image: qdrant/qdrant:v1.18.2 + restart: unless-stopped + ports: + # 127.0.0.1 bind: localhost-only. Internal services use http://qdrant:6333 + # over the docker network; this publish exists for host-side debugging / + # one-off scripts only. Removes LAN exposure of an unauthenticated vector DB. + - "127.0.0.1:${QDRANT_PORT:-6333}:6333" + volumes: + - ${DATA_PATH:-${BASE_PATH:-.}/data}/qdrant:/qdrant/storage + healthcheck: + test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/6333'"] + interval: 30s + timeout: 10s + retries: 3 + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + networks: + - backend + + rag-ingestion: + profiles: [rag] + build: ./rag-ingestion + restart: unless-stopped + healthcheck: + test: ["CMD", "test", "-f", "/tmp/rag-ingestion.heartbeat"] + start_period: 60s + interval: 30s + timeout: 5s + retries: 3 + environment: + # Embed directly against the raw llama.cpp embedding server. litellm's + # /v1/embeddings route 500s for the local embed model; llama-server works + # and ignores the model field. This keeps ingest + qdrant-rag-mcp queries + # in the SAME 768-dim nomic space. + - MODEL_GATEWAY_URL=http://llamacpp-embed:8080 + - EMBED_MODEL=${EMBED_MODEL:-${LLAMACPP_EMBED_MODEL:-nomic-embed-text-v1.5.Q4_K_M.gguf}} + - QDRANT_URL=http://qdrant:6333 + - QDRANT_COLLECTION=${RAG_COLLECTION:-documents} + - WATCH_DIR=/watch + - CHUNK_SIZE=${RAG_CHUNK_SIZE:-400} + - CHUNK_OVERLAP=${RAG_CHUNK_OVERLAP:-50} + - SCAN_INTERVAL_SEC=${RAG_SCAN_INTERVAL_SEC:-15} + volumes: + - ${DATA_PATH:-${BASE_PATH:-.}/data}/rag-input:/watch + depends_on: + llamacpp-embed: + condition: service_started + qdrant: + condition: service_started + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + networks: + - backend + + hermes-gateway: + build: + context: ./hermes + dockerfile: Dockerfile + image: ordo-ai-stack-hermes:latest + pull_policy: build + restart: unless-stopped + # Hermes used to hold /var/run/docker.sock (and group_add ["0"]) for its + # built-in docker tools. Plan C narrowed that surface: Hermes no longer + # has the socket. Privileged container ops route through ops-controller's + # HTTP API at OPS_CONTROLLER_URL with OPS_CONTROLLER_TOKEN. See + # docs/runbooks/bounded-hermes.md for the new verb surface. + depends_on: + # Hermes specifically requires these peers to be HEALTHY (not just + # started) — otherwise the gateway spams 5xx on every request while + # model-gateway is still loading LiteLLM config or mcp-gateway is + # still warming the catalog. Enforced by + # tests/test_hermes_docker.py::test_hermes_services_depend_on_stack; + # don't relax to `service_started` without updating that test. + model-gateway: + condition: service_healthy + mcp-gateway: + condition: service_healthy + dashboard: + condition: service_healthy + ops-controller: + condition: service_started + environment: + - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY:-local} + - PYTHONIOENCODING=utf-8 + # Voice: STT openai-provider base URL points at the local faster-whisper + # service (profile: voice). TTS base URL is set in hermes config.yaml + # (tts.openai.base_url). Auto-TTS-on-voice-input replies in voice. See + # docs/configuration.md "Voice (STT/TTS)". + - STT_OPENAI_BASE_URL=${STT_OPENAI_BASE_URL:-http://stt:8000/v1} + # ops-controller HTTP API replaces the raw Docker socket Hermes used + # to hold. Privileged verbs: /containers/*, /compose/*. See + # hermes/ops_client.py for the in-process wrapper. + - OPS_CONTROLLER_URL=http://ops-controller:9000 + - OPS_CONTROLLER_TOKEN=${OPS_CONTROLLER_TOKEN:?required for Hermes to reach ops-controller} + # Single source of truth for context window across the stack — llamacpp + # allocates KV for this; model-gateway templates it into litellm_config + # as max_input_tokens; the hermes entrypoint seeds model.context_length + # into $HERMES_HOME/config.yaml so the dashboard progress bar matches. + - LLAMACPP_CTX_SIZE=${LLAMACPP_CTX_SIZE:-262144} + # Per-turn budgets seeded into $HERMES_HOME/config.yaml by + # hermes/entrypoint.sh on startup. Override in .env to monitor or tune + # from a single place rather than editing the in-container yaml. + - HERMES_MAX_TOKENS=${HERMES_MAX_TOKENS:-65536} + - HERMES_MAX_TURNS=${HERMES_MAX_TURNS:-90} + - HERMES_GATEWAY_TIMEOUT=${HERMES_GATEWAY_TIMEOUT:-3600} + # API retry budget — covers transient 5xx + network errors, especially + # the 503 "Loading model" window when COMFYUI_SERIALIZE_LLAMACPP stops + # llamacpp during a ComfyUI generation. Default 10 ≈ 12 min cumulative + # wait (jittered exponential backoff capped at 120s per attempt). + - HERMES_API_MAX_RETRIES=${HERMES_API_MAX_RETRIES:-10} + # 24h — effectively "never timeout". Hermes's run_agent.py:6606 reads + # HERMES_STREAM_STALE_TIMEOUT (env-only, not a config key) with default 180s, + # which kills streaming responses mid-flight on slow local-model turns. + # Agent-level gateway_timeout (above) is the real upper bound; this just + # keeps the streaming stale-detector from firing first. + - HERMES_STREAM_STALE_TIMEOUT=86400 + # Discord bot token sourced from Docker secrets (file-form). The + # hermes/entrypoint.sh bridges DISCORD_BOT_TOKEN_FILE to the env var + # discord.py expects. Legacy DISCORD_TOKEN inline alias is dropped — + # use SOPS at secrets/discord_token.sops. + - DISCORD_BOT_TOKEN_FILE=/run/secrets/discord_token + # Backup-repo PAT (git push to ordo-hermes-backup). SOPS-managed; the + # entrypoint bridges GITHUB_BACKUP_PAT_FILE -> GITHUB_BACKUP_PAT env var. + - GITHUB_BACKUP_PAT_FILE=/run/secrets/github_backup_pat + - DISCORD_ALLOWED_USERS=${DISCORD_ALLOWED_USERS:-} + - DISCORD_ALLOWED_CHANNELS=${DISCORD_ALLOWED_CHANNELS:-} + - DISCORD_ALLOWED_ROLES=${DISCORD_ALLOWED_ROLES:-} + - DISCORD_REQUIRE_MENTION=${DISCORD_REQUIRE_MENTION:-true} + - DISCORD_FREE_RESPONSE_CHANNELS=${DISCORD_FREE_RESPONSE_CHANNELS:-} + - DISCORD_HOME_CHANNEL=${DISCORD_HOME_CHANNEL:-} + - DISCORD_AUTO_THREAD=${DISCORD_AUTO_THREAD:-true} + - DISCORD_REACTIONS=${DISCORD_REACTIONS:-true} + - TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN:-} + volumes: + - ${BASE_PATH:-.}:/workspace:rw + # Bind-mount: data lives at host path data/hermes/ for direct host visibility. + # Windows Docker Desktop note: bind mounts have SQLite journaling quirks — + # the Dockerfile patches journal_mode WAL→DELETE which mitigates most issues. + # If "database is locked" errors appear, revert to a named volume (see the + # volumes: block at the bottom of this file for the rollback path). + - ${BASE_PATH:-.}/data/hermes:/home/hermes/.hermes + # Mount the parent dev directory into Hermes so it can read/write files + # across sibling repos. /workspace stays scoped to the ordo-ai-stack project + # root. Default target is /projects. Override HERMES_HOST_DEV_MOUNT to a + # path that mirrors your host filesystem (e.g. /c/dev on Windows where + # dev lives at C:\dev) — the historical reason was to make sibling-stack + # bind-mounts resolve identically when Hermes shelled out to `docker + # compose`. Plan C removes that shell-out path; the override is still + # useful for any future tool that does its own filesystem path + # rewriting against the host. + - ${BASE_PATH:-.}/..:${HERMES_HOST_DEV_MOUNT:-/projects}:rw + secrets: + - discord_token + - github_backup_pat + healthcheck: + # gateway_state.json is written by `hermes gateway` on startup (Docker-mode + # doesn't create a gateway.pid — that's only for systemd/launchd installs). + test: ["CMD-SHELL", "test -f /home/hermes/.hermes/gateway_state.json"] + start_period: 60s + interval: 30s + timeout: 5s + retries: 3 + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + networks: + - frontend + - backend + command: ["hermes", "gateway"] + + hermes-dashboard: + build: + context: ./hermes + dockerfile: Dockerfile + image: ordo-ai-stack-hermes:latest + pull_policy: build + restart: unless-stopped + depends_on: + # Same as hermes-gateway: must be healthy (not just started). Enforced + # by tests/test_hermes_docker.py::test_hermes_services_depend_on_stack. + model-gateway: + condition: service_healthy + mcp-gateway: + condition: service_healthy + dashboard: + condition: service_healthy + ops-controller: + condition: service_started + environment: + - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY:-local} + - PYTHONIOENCODING=utf-8 + # Same ops-controller plumbing as hermes-gateway — see Plan C runbook. + - OPS_CONTROLLER_URL=http://ops-controller:9000 + - OPS_CONTROLLER_TOKEN=${OPS_CONTROLLER_TOKEN:?required for Hermes to reach ops-controller} + # Same single-source plumbing as hermes-gateway — dashboard shares + # the bind-mounted config.yaml so either service's entrypoint seeds it. + - LLAMACPP_CTX_SIZE=${LLAMACPP_CTX_SIZE:-262144} + - HERMES_MAX_TOKENS=${HERMES_MAX_TOKENS:-65536} + - HERMES_MAX_TURNS=${HERMES_MAX_TURNS:-90} + - HERMES_GATEWAY_TIMEOUT=${HERMES_GATEWAY_TIMEOUT:-3600} + - HERMES_API_MAX_RETRIES=${HERMES_API_MAX_RETRIES:-10} + # 24h — see hermes-gateway for rationale. Dashboard streams chat too. + - HERMES_STREAM_STALE_TIMEOUT=86400 + # Point Hermes at the pre-built SPA (Dockerfile stage 1). Without this env var, + # `hermes dashboard` tries to rebuild from web/ source, which requires npm (not + # present in the runtime image). + - HERMES_WEB_DIST=/opt/hermes-agent/hermes_cli/web_dist + volumes: + - ${BASE_PATH:-.}:/workspace:rw + # Bind-mount + host-dev mount: see hermes-gateway above for rationale, + # HERMES_HOST_DEV_MOUNT override, and rollback notes. + - ${BASE_PATH:-.}/data/hermes:/home/hermes/.hermes + - ${BASE_PATH:-.}/..:${HERMES_HOST_DEV_MOUNT:-/projects}:rw + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost:9119/"] + start_period: 30s + interval: 30s + timeout: 5s + retries: 3 + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + networks: + - frontend + - backend + - proxy-net + # --insecure: Hermes rejects 0.0.0.0 binding without it. Safe here because + # the host-port publish was dropped (Plan A Task 13) — hermes-dashboard is + # only reachable on the internal Docker network via Caddy at /hermes/. + command: ["hermes", "dashboard", "--port", "9119", "--host", "0.0.0.0", "--no-open", "--insecure"] + +volumes: + caddy_data: + caddy_config: + # Per-container config/cache for codebase-memory (holds _config.db + config.json). + # NOTE: the graph index itself lives IN-PROCESS and is not reliably flushed here + # across container exits, so this is NOT a shared index — the gateway-spawned MCP + # and the UI each index in their own process. The volume is still shared/mounted + # by both; `name:` pins the literal name (no compose project prefix) to match the + # raw name the mcp-gateway uses when it spawns the MCP (-v codebase-memory-cache:/cache). + codebase-memory-cache: + name: codebase-memory-cache +# Hermes data is now bind-mounted from data/hermes/ (see hermes-gateway/dashboard above). +# The legacy `ordo-ai-stack_hermes-data` named volume still exists in Docker for +# rollback. To revert: re-add `hermes-data:` here, then switch the hermes services' +# data mount back from `${BASE_PATH:-.}/data/hermes:/home/hermes/.hermes` to +# `hermes-data:/home/hermes/.hermes` and `docker compose up -d`. + +networks: + frontend: + name: ordo-ai-stack-frontend + backend: + name: ordo-ai-stack-backend + # internal: false required for llama.cpp and HuggingFace model downloads. + # Set internal: true for air-gapped security (model pulls will fail). + internal: false + proxy-net: + driver: bridge + # Pinned so DASHBOARD_TRUSTED_PROXY_NET (set in dashboard's env block) + # stays in lockstep across rebuilds. Any RFC1918 /16 inside Docker's + # default address pool (172.17–172.30) works; 172.24 was the auto-assigned + # value at first boot and was kept to avoid a one-time renumber. + ipam: + config: + - subnet: 172.24.0.0/16 + +# High-value tokens are managed via SOPS (encrypted at rest under secrets/*.sops) +# and decrypted by scripts/secrets/decrypt.sh into ${HOME}/.ai-toolkit/runtime/secrets/ +# before `docker compose up`. Compose mounts each file at /run/secrets/ +# inside its consumer; service entrypoints bridge *_FILE → plaintext env where +# the app SDK doesn't natively support the _FILE pattern. +secrets: + discord_token: + file: ${HOME}/.ai-toolkit/runtime/secrets/discord_token + github_pat: + file: ${HOME}/.ai-toolkit/runtime/secrets/github_pat + github_backup_pat: + file: ${HOME}/.ai-toolkit/runtime/secrets/github_backup_pat + hf_token: + file: ${HOME}/.ai-toolkit/runtime/secrets/hf_token + civitai_token: + file: ${HOME}/.ai-toolkit/runtime/secrets/civitai_token + n8n_api_key: + file: ${HOME}/.ai-toolkit/runtime/secrets/n8n_api_key diff --git a/scripts/stack_monitor.py b/scripts/stack_monitor.py index 167131f..19f895d 100644 --- a/scripts/stack_monitor.py +++ b/scripts/stack_monitor.py @@ -1,686 +1,686 @@ -#!/usr/bin/env python3 -""" -Ordo-AI-Stack — Package Audit & Update Manager -══════════════════════════════════════════════════ -Comprehensive monitor that: - 1. Checks ALL services in docker-compose.yml against their latest releases - 2. Classifies severity: CRITICAL (security), HIGH (major), MEDIUM (minor), LOW (patch) - 3. Outputs structured JSON for the cron job to consume - 4. Can also APPLY updates if called with --apply - -Usage: - python3 stack_monitor.py # Audit only, outputs JSON to stdout - python3 stack_monitor.py --apply # Audit + apply approved updates (see APPROVED_UPDATES) - python3 stack_monitor.py --json # JSON output to stdout -""" - -import json -import os -import re -import subprocess -import sys -import unicodedata -from datetime import UTC, datetime -from pathlib import Path - - -def _strip_invisible(text: str) -> str: - """Remove zero-width / invisible Unicode 'format' (Cf) characters. - - GitHub release names and commit messages routinely embed these — e.g. the - zero-width joiner U+200D inside emoji sequences like 👨‍💻, or a stray U+200B. - When this report is fed back into Hermes to format for Discord, that - invisible unicode trips the prompt-injection scanner and the whole daily - cron is blocked ("prompt contains invisible unicode U+200D"). Stripping Cf - characters keeps the report scanner-safe; visible text and emoji are - unaffected (a ZWJ emoji simply renders as its component glyphs). - """ - return "".join(ch for ch in text if unicodedata.category(ch) != "Cf") - - -def _scrub_invisible(obj): - """Recursively apply _strip_invisible to every string in a JSON-like value.""" - if isinstance(obj, str): - return _strip_invisible(obj) - if isinstance(obj, dict): - return {k: _scrub_invisible(v) for k, v in obj.items()} - if isinstance(obj, list): - return [_scrub_invisible(v) for v in obj] - return obj - - -STACK_ROOT = Path(__file__).resolve().parent.parent -COMPOSE = STACK_ROOT / "docker-compose.yml" -MONITOR = STACK_ROOT / "data" / "hermes" / "scripts" / "github_monitor.py" -HERMES_DOCKERFILE = STACK_ROOT / "hermes" / "Dockerfile" -# ComfyUI ships no version in docker-compose.yml (it runs from a 3rd-party boot -# image); the real installed version is build-stamped in this file. -COMFYUI_VERSION_FILE = STACK_ROOT / "data" / "comfyui-storage" / "ComfyUI" / "comfyui_version.py" -# LiteLLM has no version pin anywhere (model-gateway is FROM -# ghcr.io/berriai/litellm:main-stable, a rolling tag) — read it live from the -# running container instead. -MODEL_GATEWAY_CONTAINER = os.environ.get("MODEL_GATEWAY_CONTAINER", "ordo-ai-stack-model-gateway-1") - -# All services to monitor (sources of truth). -# -# pin_source: "compose" (default) reads the version string from docker-compose.yml. -# "dockerfile" reads HERMES_PINNED_SHA from hermes/Dockerfile and -# compares SHAs against the upstream tag. -SERVICES = { - # GitHub-backed (API releases) - "n8n": {"repo": "n8n-io/n8n", "compose_key": "n8n", "type": "github"}, - "Open WebUI": {"repo": "open-webui/open-webui", "compose_key": "open-webui", "type": "github"}, - "Qdrant": {"repo": "qdrant/qdrant", "compose_key": "qdrant", "type": "github"}, - "Caddy": {"repo": "caddyserver/caddy", "compose_key": "caddy", "type": "github"}, - "llama.cpp": {"repo": "ggml-org/llama.cpp", "compose_key": "llamacpp-embed", "type": "github"}, - "LiteLLM": {"repo": "BerriAI/litellm", "compose_key": None, "type": "github"}, # Docker-only - "ComfyUI": {"repo": "Comfy-Org/ComfyUI", "compose_key": None, "type": "github"}, # Managed via comfyui-boot - # Docker images without GitHub releases - "ComfyUI-Manager": {"repo": "ltdrdata/ComfyUI-Manager", "compose_key": None, "type": "atom"}, - "ComfyUI-KJNodes": {"repo": "kijai/ComfyUI-KJNodes", "compose_key": None, "type": "atom"}, - "ComfyUI-VideoHelperSuite": {"repo": "Kosinkadink/ComfyUI-VideoHelperSuite", "compose_key": None, "type": "atom"}, - "oauth2-proxy": {"repo": "oauth2-proxy/oauth2-proxy", "compose_key": "oauth2-proxy", "type": "github"}, - # Source-built image — pinned by SHA in hermes/Dockerfile, not in docker-compose.yml. - "Hermes Agent": {"repo": "NousResearch/hermes-agent", "compose_key": None, "type": "github", - "pin_source": "dockerfile"}, -} - -# Last-resort fallbacks if a version can't be read from its real source. -# NOTE: ComfyUI and LiteLLM are intentionally absent — they are resolved live -# (see resolve_current_version). Do NOT add stale hardcodes for them; a wrong -# value here silently produces a misleading audit (the old "v0.20.1" ComfyUI pin -# was compared against upstream while the box actually ran 0.17.0). +#!/usr/bin/env python3 +""" +Ordo-AI-Stack — Package Audit & Update Manager +══════════════════════════════════════════════════ +Comprehensive monitor that: + 1. Checks ALL services in docker-compose.yml against their latest releases + 2. Classifies severity: CRITICAL (security), HIGH (major), MEDIUM (minor), LOW (patch) + 3. Outputs structured JSON for the cron job to consume + 4. Can also APPLY updates if called with --apply + +Usage: + python3 stack_monitor.py # Audit only, outputs JSON to stdout + python3 stack_monitor.py --apply # Audit + apply approved updates (see APPROVED_UPDATES) + python3 stack_monitor.py --json # JSON output to stdout +""" + +import json +import os +import re +import subprocess +import sys +import unicodedata +from datetime import UTC, datetime +from pathlib import Path + + +def _strip_invisible(text: str) -> str: + """Remove zero-width / invisible Unicode 'format' (Cf) characters. + + GitHub release names and commit messages routinely embed these — e.g. the + zero-width joiner U+200D inside emoji sequences like 👨‍💻, or a stray U+200B. + When this report is fed back into Hermes to format for Discord, that + invisible unicode trips the prompt-injection scanner and the whole daily + cron is blocked ("prompt contains invisible unicode U+200D"). Stripping Cf + characters keeps the report scanner-safe; visible text and emoji are + unaffected (a ZWJ emoji simply renders as its component glyphs). + """ + return "".join(ch for ch in text if unicodedata.category(ch) != "Cf") + + +def _scrub_invisible(obj): + """Recursively apply _strip_invisible to every string in a JSON-like value.""" + if isinstance(obj, str): + return _strip_invisible(obj) + if isinstance(obj, dict): + return {k: _scrub_invisible(v) for k, v in obj.items()} + if isinstance(obj, list): + return [_scrub_invisible(v) for v in obj] + return obj + + +STACK_ROOT = Path(__file__).resolve().parent.parent +COMPOSE = STACK_ROOT / "docker-compose.yml" +MONITOR = STACK_ROOT / "data" / "hermes" / "scripts" / "github_monitor.py" +HERMES_DOCKERFILE = STACK_ROOT / "hermes" / "Dockerfile" +# ComfyUI ships no version in docker-compose.yml (it runs from a 3rd-party boot +# image); the real installed version is build-stamped in this file. +COMFYUI_VERSION_FILE = STACK_ROOT / "data" / "comfyui-storage" / "ComfyUI" / "comfyui_version.py" +# LiteLLM has no version pin anywhere (model-gateway is FROM +# ghcr.io/berriai/litellm:main-stable, a rolling tag) — read it live from the +# running container instead. +MODEL_GATEWAY_CONTAINER = os.environ.get("MODEL_GATEWAY_CONTAINER", "ordo-ai-stack-model-gateway-1") + +# All services to monitor (sources of truth). +# +# pin_source: "compose" (default) reads the version string from docker-compose.yml. +# "dockerfile" reads HERMES_PINNED_SHA from hermes/Dockerfile and +# compares SHAs against the upstream tag. +SERVICES = { + # GitHub-backed (API releases) + "n8n": {"repo": "n8n-io/n8n", "compose_key": "n8n", "type": "github"}, + "Open WebUI": {"repo": "open-webui/open-webui", "compose_key": "open-webui", "type": "github"}, + "Qdrant": {"repo": "qdrant/qdrant", "compose_key": "qdrant", "type": "github"}, + "Caddy": {"repo": "caddyserver/caddy", "compose_key": "caddy", "type": "github"}, + "llama.cpp": {"repo": "ggml-org/llama.cpp", "compose_key": "llamacpp-embed", "type": "github"}, + "LiteLLM": {"repo": "BerriAI/litellm", "compose_key": None, "type": "github"}, # Docker-only + "ComfyUI": {"repo": "Comfy-Org/ComfyUI", "compose_key": None, "type": "github"}, # Managed via comfyui-boot + # Docker images without GitHub releases + "ComfyUI-Manager": {"repo": "ltdrdata/ComfyUI-Manager", "compose_key": None, "type": "atom"}, + "ComfyUI-KJNodes": {"repo": "kijai/ComfyUI-KJNodes", "compose_key": None, "type": "atom"}, + "ComfyUI-VideoHelperSuite": {"repo": "Kosinkadink/ComfyUI-VideoHelperSuite", "compose_key": None, "type": "atom"}, + "oauth2-proxy": {"repo": "oauth2-proxy/oauth2-proxy", "compose_key": "oauth2-proxy", "type": "github"}, + # Source-built image — pinned by SHA in hermes/Dockerfile, not in docker-compose.yml. + "Hermes Agent": {"repo": "NousResearch/hermes-agent", "compose_key": None, "type": "github", + "pin_source": "dockerfile"}, +} + +# Last-resort fallbacks if a version can't be read from its real source. +# NOTE: ComfyUI and LiteLLM are intentionally absent — they are resolved live +# (see resolve_current_version). Do NOT add stale hardcodes for them; a wrong +# value here silently produces a misleading audit (the old "v0.20.1" ComfyUI pin +# was compared against upstream while the box actually ran 0.17.0). PINNED = { - "n8n": "2.20.0", - "Open WebUI": "v0.9.2", - "Qdrant": "v1.17.1", - "Caddy": "2.11.2", + "n8n": "2.28.3", + "Open WebUI": "v0.10.1", + "Qdrant": "v1.18.2", + "Caddy": "2.11.4", "llama.cpp": "server-cuda", # rolling tag — classifies as ROLLING (manual review) - "oauth2-proxy":"latest-alpine", + "oauth2-proxy":"v7.15.3-alpine", } - - -def run_cmd(cmd, timeout=30): - """Run a command and return (stdout, stderr, returncode). - - Force UTF-8 decoding with replacement: GitHub release bodies routinely carry - non-ASCII bytes, and on a non-UTF-8 locale (e.g. a Windows host's cp1252) - the default decode raises mid-read, leaving stdout=None and crashing callers. - """ - try: - result = subprocess.run(cmd, capture_output=True, text=True, - encoding="utf-8", errors="replace", timeout=timeout) - return (result.stdout or ""), (result.stderr or ""), result.returncode - except subprocess.TimeoutExpired: - return "", "timeout", 1 - - -def read_hermes_pin(): - """Read HERMES_PINNED_SHA from hermes/Dockerfile (None if missing/malformed).""" - if not HERMES_DOCKERFILE.exists(): - return None - text = HERMES_DOCKERFILE.read_text() - m = re.search(r"^ARG HERMES_PINNED_SHA=([a-f0-9]+)", text, re.MULTILINE) - return m.group(1) if m else None - - -def read_comfyui_version(): - """Installed ComfyUI version, build-stamped in comfyui_version.py (e.g. 0.17.0). - - ComfyUI has no pin in docker-compose.yml, so without this the monitor used a - hardcoded guess that drifted from reality. Returns None if the file is - missing/unreadable (caller falls back to ROLLING/manual). - """ - if not COMFYUI_VERSION_FILE.exists(): - return None - try: - m = re.search(r'__version__\s*=\s*["\']([\d.]+)["\']', - COMFYUI_VERSION_FILE.read_text()) - except OSError: - return None - return m.group(1) if m else None - - -def read_litellm_version(): - """Live LiteLLM version from the running model-gateway container (e.g. 1.82.3). - - LiteLLM is pinned only by the rolling `main-stable` image tag, so the - installed package is the single source of truth. Returns None if the - container is down or docker is unavailable (caller falls back to ROLLING). - """ - cmd = ["docker", "exec", MODEL_GATEWAY_CONTAINER, "python", "-c", - "import importlib.metadata as m; print(m.version('litellm'))"] - stdout, _, rc = run_cmd(cmd, timeout=20) - if rc != 0 or not stdout.strip(): - return None - version = stdout.strip().splitlines()[-1].strip() - return version if re.match(r"^\d", version) else None - - -def resolve_current_version(name, compose_versions): - """Best source of truth for a service's currently-deployed version. - - Most services read from docker-compose.yml. ComfyUI and LiteLLM have no - usable pin there and are read from their live/build-stamped source instead. - """ - if name == "ComfyUI": - live = read_comfyui_version() - if live: - return live - if name == "LiteLLM": - live = read_litellm_version() - if live: - return live - return compose_versions.get(name, PINNED.get(name, "unknown")) - - -def fetch_tag_sha(repo, tag): - """Resolve a tag name to its commit SHA via the GitHub API. - - Handles both lightweight tags (object points directly at the commit) and - annotated tags (object points at a tag object, which must be dereferenced). - """ - cmd = ["curl", "-s", "--max-time", "15", "-L", - "-H", "Accept: application/vnd.github.v3+json", - "-H", "User-Agent: Ordo-AI-Stack-Monitor/3.0", - f"https://api.github.com/repos/{repo}/git/refs/tags/{tag}"] - stdout, _, rc = run_cmd(cmd) - if rc != 0 or not stdout.strip(): - return None - try: - data = json.loads(stdout) - obj = data.get("object", {}) - sha = obj.get("sha") - if obj.get("type") == "tag" and sha: - # Annotated tag — dereference to the commit it points at. - cmd2 = ["curl", "-s", "--max-time", "15", "-L", - "-H", "Accept: application/vnd.github.v3+json", - "-H", "User-Agent: Ordo-AI-Stack-Monitor/3.0", - f"https://api.github.com/repos/{repo}/git/tags/{sha}"] - stdout2, _, rc2 = run_cmd(cmd2) - if rc2 == 0 and stdout2.strip(): - try: - return json.loads(stdout2).get("object", {}).get("sha") - except json.JSONDecodeError: - return None - return sha - except json.JSONDecodeError: - return None - - -def fetch_compare_ahead(repo, base_sha, head_sha): - """How many commits is `head_sha` ahead of `base_sha`? Returns int or None.""" - cmd = ["curl", "-s", "--max-time", "15", "-L", - "-H", "Accept: application/vnd.github.v3+json", - "-H", "User-Agent: Ordo-AI-Stack-Monitor/3.0", - f"https://api.github.com/repos/{repo}/compare/{base_sha}...{head_sha}"] - stdout, _, rc = run_cmd(cmd) - if rc != 0 or not stdout.strip(): - return None - try: - return json.loads(stdout).get("ahead_by") - except json.JSONDecodeError: - return None - - -def evaluate_dockerfile_pinned(repo, latest_tag, body): - """Severity logic for SHA-pinned services (Hermes). Returns dict matching the entry shape.""" - pinned_sha = read_hermes_pin() - if not pinned_sha: - return {"pinned": "?", "status": "unknown", - "message": "Could not read HERMES_PINNED_SHA from hermes/Dockerfile"} - if latest_tag is None: - return {"pinned": pinned_sha[:12], "status": "unknown", - "message": "Could not fetch latest release"} - - latest_sha = fetch_tag_sha(repo, latest_tag) - if latest_sha is None: - return {"pinned": pinned_sha[:12], "latest": latest_tag, "status": "unknown", - "message": f"Could not resolve tag {latest_tag} to SHA"} - - # CVE / security mention in release notes always wins. - body_lower = (body or "").lower() - has_cve = bool(re.search(r"CVE-\d{4}-\d{4,}", body or "")) - sec_kw = ["vulnerability", "exploit", "buffer overflow", "auth bypass", - "privilege escalation", "injection attack", "denial of service", - "cve-", "security advisory"] - is_security = has_cve or any(kw in body_lower for kw in sec_kw) - - if pinned_sha == latest_sha: - severity = "SAFE" - message = f"On the latest tagged release ({latest_tag})" - elif is_security: - severity = "CRITICAL" - message = f"Security fix in {latest_tag} - update recommended immediately" - else: - ahead = fetch_compare_ahead(repo, pinned_sha, latest_sha) - severity = "HIGH" # SHA-pinned with no semver - flag as worth reviewing - if ahead is not None: - message = f"{latest_tag} available - {ahead} commits ahead of pinned" - else: - message = f"{latest_tag} available - pinned is older" - - return { - "pinned": f"{pinned_sha[:12]} (Dockerfile)", - "latest": f"{latest_tag} ({latest_sha[:12]})", - "severity": severity, - "message": message, - "manual_update": True, # apply_updates can't bump Dockerfiles; user must do this by hand - } - - -def fetch_latest_release(repo): - """Fetch latest release from GitHub API or Atom feed.""" - # Try GitHub API first - cmd = ["curl", "-s", "--max-time", "20", "-L", - "-H", "Accept: application/vnd.github.v3+json", - "-H", "User-Agent: Ordo-AI-Stack-Monitor/3.0", - f"https://api.github.com/repos/{repo}/releases/latest"] - stdout, stderr, rc = run_cmd(cmd) - if rc == 0 and stdout.strip(): - try: - data = json.loads(stdout) - if "tag_name" in data: - return data["tag_name"], data.get("body", ""), data.get("html_url", "") - except json.JSONDecodeError: - pass - - # Fall back to Atom feed - cmd = ["curl", "-s", "--max-time", "20", "-L", - "-H", "User-Agent: Ordo-AI-Stack-Monitor/3.0", - f"https://github.com/{repo}/releases.atom?per_page=1"] - stdout, stderr, rc = run_cmd(cmd) - if rc == 0 and stdout.strip(): - tag_m = re.search(r'.*?tag:github\.com, [\d-]+.*?v?([\d.]+).*?', stdout) - url_m = re.search(r']*href="([^"]+)"', stdout) - body_m = re.search(r']*>(.*?)', stdout, re.DOTALL) - - tag = tag_m.group(1) if tag_m else None - url = url_m.group(1) if url_m else "" - body = re.sub(r'<[^>]+>', '', body_m.group(1)).strip() if body_m else "" - - if tag: - return tag, body, url - - return None, "", "" - - -def classify_severity(current, latest, body=""): - """Classify update severity: CRITICAL, HIGH, MEDIUM, LOW, SAFE.""" - if latest is None or not body: - return "LOW", "Unknown update — check manually" - - # Security check — only CRITICAL for actual CVE/vulnerability mentions - body_lower = body.lower() - has_cve = bool(re.search(r'CVE-\d{4}-\d{4,}', body)) - real_security_kw = ['vulnerability', 'exploit', 'buffer overflow', - 'auth bypass', 'privilege escalation', 'injection attack', - 'denial of service', 'cve-', 'vulnerability in', - 'security advisory'] - if has_cve or any(kw in body_lower for kw in real_security_kw): - return "CRITICAL", "Security fix — update recommended immediately" - - # Parse versions — strip v/@ prefixes - # Handle special cases: n8n@X.Y.Z, etc. - clean_current = current - clean_latest = latest - if clean_current.startswith('n8n@'): - clean_current = clean_current[4:] - if clean_latest.startswith('n8n@'): - clean_latest = clean_latest[4:] - clean_current = re.sub(r'^[v@]', '', clean_current).strip() - clean_latest = re.sub(r'^[v@]', '', clean_latest).strip() - - try: - p_parts = [int(x) for x in re.findall(r'\d+', clean_current)] - l_parts = [int(x) for x in re.findall(r'\d+', clean_latest)] - - if not p_parts or not l_parts: - # No comparable semver — the current pin is a rolling tag or a - # source-built image (e.g. llama.cpp 'server-cuda'). Don't pretend - # it's a minor update; flag it for manual review instead. - return "ROLLING", (f"Pinned by rolling tag/built image ('{clean_current}') — " - f"rebuild to pull latest ({clean_latest}); review release notes") - - max_len = max(len(p_parts), len(l_parts)) - p_parts.extend([0] * (max_len - len(p_parts))) - l_parts.extend([0] * (max_len - len(l_parts))) - - if l_parts == p_parts: - return "SAFE", "Already up to date" - - major_diff = l_parts[0] - p_parts[0] - minor_diff = l_parts[1] - p_parts[1] if len(l_parts) > 1 and len(p_parts) > 1 else 0 - - if major_diff > 0: - return "HIGH", f"Major version jump ({clean_current} → {clean_latest}) — review breaking changes" - elif minor_diff > 0: - return "MEDIUM", f"Minor update ({clean_current} → {clean_latest})" - else: - return "LOW", f"Patch update ({clean_current} → {clean_latest})" - - except (ValueError, IndexError): - return "LOW", "Update available" - - -def extract_highlights(body, max_items=4): - """Extract key highlights from release body.""" - if not body: - return [] - lines = [] - for line in body.split('\n'): - stripped = line.strip() - if not stripped or stripped.startswith('>') or stripped.startswith('