diff --git a/CHANGELOG.md b/CHANGELOG.md index e66c930..f0fce50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,80 @@ All notable changes to OperatorBoard are documented here. --- +## [0.2.0] — 2026-05-25 + +### Security hardening (community-reported + internal audit) + +This release addresses findings from a post-launch security review. The most +significant change is architectural: the API key no longer reaches the browser. + +#### P1 — High severity + +**P1-1 · API key removed from browser bundle** (reported by external reviewer) + +The `NEXT_PUBLIC_OPERATORBOARD_API_KEY` env var baked the API key into the +Next.js client bundle. It was visible in DevTools, network request headers, and +bundle source — accessible to any user or malicious script on the page. + +Fix: a Next.js catch-all proxy route (`src/app/api/[...proxy]/route.ts`) now +sits between the dashboard and the Fastify API. The browser calls `/api/*`, +the server-side proxy injects the key from `OPERATORBOARD_API_KEY` (no +`NEXT_PUBLIC_` prefix) and forwards to Fastify. The key never reaches the +client. Docker Compose and CLI templates updated accordingly. + +**P1-2 · SSRF guard extended to agent `endpoint` field** + +`isSafeWebhookUrl()` was applied to `webhookUrl` at registration but not to +the `endpoint` field. An attacker with operator access could register an agent +with an internal endpoint and trigger server-side requests to private +infrastructure. Fixed at three layers: registration (`POST /agents`), +task execution (`executeTaskRun`), and health tests (`POST /agents/:id/test`). + +**P1-3 · Stack traces no longer returned in error responses** + +The default Fastify error handler echoed full stack traces in HTTP responses. +The handler is now explicit: non-Zod errors return `{ error: "Internal server +error" }` and the stack is written only to the server log. + +#### P2 — Medium severity + +**P2-1 · CORS wildcard removed in production** + +`resolveCorsOrigin()` returned `true` (allow all origins) when +`OPERATORBOARD_CORS_ORIGINS` was unset. In production this is now `false` +(block all cross-origin requests) with a startup warning. Development still +defaults to `true` for convenience. Set `OPERATORBOARD_CORS_ORIGINS` to your +dashboard URL in any internet-exposed deployment. + +**P2-2 · POST /heartbeats returns 404 for unknown agents** + +Heartbeats from unregistered agent IDs were silently accepted. The handler +now validates the `agentId` and returns 404, giving agents immediate feedback +on stale or incorrect registrations. + +**P2-3 · SQL table-name allowlist** + +`saveRecord()` and `loadRecords()` constructed queries with the `table` +parameter as a template literal. An `assertAllowedTable()` guard is now called +first, rejecting any name not in the explicit set of known tables. (No known +external path to inject a table name — belt-and-suspenders defense.) + +#### P3 — Low severity / hardening + +**P3-2 · Security headers added to Next.js dashboard** + +`next.config.ts` now sets `X-Frame-Options: DENY`, `X-Content-Type-Options: +nosniff`, `Referrer-Policy`, `Permissions-Policy`, `Strict-Transport-Security`, +and a `Content-Security-Policy` on every route. + +**P3-4 · CLI docker-compose template no longer writes NEXT_PUBLIC key** + +The `composeTemplate()` in the CLI package is updated to pass +`OPERATORBOARD_API_KEY` (runtime env var, never baked into the image) and +`OPERATORBOARD_API_URL` to the web service, matching the proxy architecture. + +--- + ## [0.1.0-rc.2] — 2025 ### Security fixes (pre-release audit, round 2) diff --git a/SECURITY.md b/SECURITY.md index 91c05bb..51ee656 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -41,6 +41,22 @@ Current trust boundaries: --- +## Dashboard API Key Architecture (v0.2.0+) + +The dashboard communicates with the Fastify API through a **server-side Next.js +proxy route** (`apps/web/src/app/api/[...proxy]/route.ts`). The proxy runs in +the Node.js process, never in the browser. + +- **What this means for operators:** set `OPERATORBOARD_API_KEY` (no + `NEXT_PUBLIC_` prefix) on the web container. The key is read by the proxy at + request time and injected into upstream calls. It does not appear in the + Next.js bundle, browser DevTools, or network requests from the client. +- **What this means for agent SDK authors:** the API is still a plain Fastify + HTTP server. Agent SDKs communicate directly with the API (port 4100) using + `X-Operatorboard-Key`. The proxy is only for the browser dashboard. + +--- + ## Known Limitations These are **documented, intentional trade-offs** in the current version — not undisclosed vulnerabilities: @@ -63,15 +79,22 @@ The SSRF guard blocks RFC-1918 ranges, localhost, link-local, and common cloud m Before running OperatorBoard in a production environment: +**API service (`apps/api`)** - [ ] Set `OPERATORBOARD_API_KEY` to a strong random secret (`openssl rand -hex 32`) -- [ ] Set `OPERATORBOARD_CORS_ORIGINS` to your dashboard domain -- [ ] Set `OPERATORBOARD_API_ORIGIN` to your API's public URL if the dashboard and API are on different origins (required for the dashboard CSP `connect-src`) +- [ ] Set `OPERATORBOARD_CORS_ORIGINS` to your dashboard's public URL (e.g. `https://board.example.com`). If unset in production, all cross-origin requests are blocked with a startup warning. - [ ] Set `OPERATORBOARD_ENABLE_DEV_ROUTES=false` (default in Docker) - [ ] Configure `OPERATORBOARD_BACKUP_INTEGRATION_SECRETS` with provider-specific secrets +- [ ] Set `OPERATORBOARD_SEED=false` in production (default) + +**Web service (`apps/web`)** +- [ ] Set `OPERATORBOARD_API_KEY` to the same value as the API service — **no `NEXT_PUBLIC_` prefix** (read by the server-side proxy, never sent to the browser) +- [ ] Set `OPERATORBOARD_API_URL` to the internal URL the web container uses to reach the API (e.g. `http://api:4100` in Docker, or your internal VPC DNS name) +- [ ] Do **not** set `NEXT_PUBLIC_OPERATORBOARD_API_KEY` — this env var was removed in v0.2.0 + +**Infrastructure** - [ ] Run behind Caddy or equivalent TLS-terminating reverse proxy - [ ] Review the included `Caddyfile` — do not use the `OPERATORBOARD_DOMAIN` default in production -- [ ] Set `OPERATORBOARD_SEED=false` in production (default) -- [ ] Review agent `webhookUrl` values — only HTTPS endpoints on known-public hosts are permitted +- [ ] Review agent `webhookUrl` and `endpoint` values — only HTTPS endpoints on known-public hosts are permitted - [ ] Review `databasePolicy` settings for any agent with `write_destructive` access - [ ] Enable backup attestation integrations for any agent performing destructive DB operations diff --git a/apps/api/src/db.ts b/apps/api/src/db.ts index 7007542..952cae6 100644 --- a/apps/api/src/db.ts +++ b/apps/api/src/db.ts @@ -2,6 +2,22 @@ import Database from "better-sqlite3"; import { mkdirSync } from "node:fs"; import { dirname, join } from "node:path"; +const ALLOWED_TABLES = new Set([ + "agents", + "tasks", + "approvals", + "heartbeats", + "audit_events", + "backup_attestations", + "scheduled_tasks", +]); + +function assertAllowedTable(table: string): void { + if (!ALLOWED_TABLES.has(table)) { + throw new Error(`Disallowed table name: ${table}`); + } +} + const dbPath = process.env.OPERATORBOARD_DB_PATH ?? join(process.cwd(), "data", "operatorboard.sqlite"); mkdirSync(dirname(dbPath), { recursive: true }); @@ -66,6 +82,7 @@ export function saveRecord(table: string, id: string, data: unknown, createdAt: return; } + assertAllowedTable(table); db.prepare(` INSERT INTO ${table} (id, data, created_at) VALUES (@id, @data, @createdAt) @@ -79,6 +96,7 @@ export function saveRecord(table: string, id: string, data: unknown, createdAt: } export function loadRecords(table: string): T[] { + assertAllowedTable(table); const rows = db.prepare(`SELECT data FROM ${table} ORDER BY created_at ASC`).all() as Array<{ data: string }>; return rows.map((row) => JSON.parse(row.data) as T); } diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 989f6f0..df9388c 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -413,7 +413,18 @@ function resolveCorsOrigin() { .filter(Boolean); if (configured && configured.length > 0) return configured; - return process.env.NODE_ENV !== "production"; + + // P2-1: In production with no explicit origins, block all cross-origin requests + // rather than falling back to a wildcard. In development, allow all (convenient). + if (process.env.NODE_ENV === "production") { + // eslint-disable-next-line no-console + console.error( + "[security] OPERATORBOARD_CORS_ORIGINS is not set in production. " + + "All cross-origin requests will be blocked. Set this to your dashboard URL (e.g. https://app.example.com)." + ); + return false; // @fastify/cors: false = block all origins + } + return true; // development: allow all origins } function createPipelineTask(next: PipelineTask): Task { @@ -605,6 +616,15 @@ export async function buildApp(options: BuildAppOptions = {}) { payload: { taskRequest, agentId: agent.id, endpoint: agent.endpoint } }); + // P1-2: Defense-in-depth SSRF check before dispatching (catches agents that + // were registered before the registration-time guard was added) + const endpointSsrfGuard = isSafeWebhookUrl(agent.endpoint); + if (!endpointSsrfGuard.ok) { + agent.status = "error"; + saveRecord("agents", agent.id, agent, agent.createdAt); + return { ok: false, error: `Agent endpoint blocked by SSRF policy: ${endpointSsrfGuard.reason}` }; + } + const controller = new AbortController(); const timeoutHandle = setTimeout(() => controller.abort(), TASK_RUN_TIMEOUT_MS); @@ -903,7 +923,9 @@ export async function buildApp(options: BuildAppOptions = {}) { if (error instanceof ZodError) { return reply.status(400).send({ error: "Invalid request", details: error.flatten() }); } - return reply.send(error); + app.log.error(error); + const statusCode = (error as { statusCode?: number }).statusCode ?? 500; + return reply.status(statusCode).send({ error: "Internal server error" }); }); if (!API_KEY) { @@ -1104,6 +1126,11 @@ export async function buildApp(options: BuildAppOptions = {}) { return reply.status(400).send({ error: `webhookUrl rejected: ${guard.reason}` }); } } + // P1-2: SSRF guard on agent endpoint (same rules as webhookUrl) + const endpointGuard = isSafeWebhookUrl(body.endpoint); + if (!endpointGuard.ok) { + return reply.status(400).send({ error: `endpoint rejected: ${endpointGuard.reason}` }); + } const agent: Agent = { id: id("agent"), @@ -1135,6 +1162,12 @@ export async function buildApp(options: BuildAppOptions = {}) { const agent = agents.get(agentId); if (!agent) return reply.status(404).send({ error: "Agent not found" }); + // P1-2: Defense-in-depth SSRF check on the test route + const testEndpointGuard = isSafeWebhookUrl(agent.endpoint); + if (!testEndpointGuard.ok) { + return reply.status(400).send({ ok: false, error: `Agent endpoint blocked by SSRF policy: ${testEndpointGuard.reason}` }); + } + try { const res = await fetchImpl(`${agent.endpoint.replace(/\/task$/, "")}/health`, { signal: AbortSignal.timeout(5000) @@ -1396,6 +1429,10 @@ export async function buildApp(options: BuildAppOptions = {}) { app.post("/heartbeats", async (request, reply) => { const parsed = heartbeatSchema.parse(request.body); + // P2-2: Reject heartbeats for unknown agents so callers get fast feedback + if (!agents.has(parsed.agentId)) { + return reply.status(404).send({ error: "Agent not found" }); + } heartbeats.push(parsed); saveRecord("heartbeats", id("heartbeat"), parsed, parsed.timestamp); const agent = agents.get(parsed.agentId); diff --git a/apps/web/.env.example b/apps/web/.env.example index 2e6819b..2a6fd30 100644 --- a/apps/web/.env.example +++ b/apps/web/.env.example @@ -1,3 +1,20 @@ -NEXT_PUBLIC_API_URL=http://127.0.0.1:4100 -NEXT_PUBLIC_OPERATORBOARD_API_KEY=replace-with-the-same-api-key -NEXT_PUBLIC_OPERATORBOARD_ENABLE_DEV_TOOLS=true +# ── OperatorBoard web environment ──────────────────────────────────────────── +# +# P1-1 security fix: the API key is now a server-side-only variable. +# It is read by the Next.js proxy route (src/app/api/[...proxy]/route.ts) and +# injected into upstream requests on the server. It is NEVER sent to the browser. +# +# Do NOT add NEXT_PUBLIC_ prefix to OPERATORBOARD_API_KEY. + +# URL that the Next.js server-side proxy uses to reach the Fastify API. +# In Docker Compose this is the internal service name, e.g. http://api:4100. +# For local dev without Docker, use http://127.0.0.1:4100. +OPERATORBOARD_API_URL=http://127.0.0.1:4100 + +# API key — must match OPERATORBOARD_API_KEY set on the API service. +# Never prefix with NEXT_PUBLIC_. +OPERATORBOARD_API_KEY=replace-with-your-api-key + +# Optional: enable the developer tools panel in the dashboard (default: false). +# This one CAN be NEXT_PUBLIC_ because it's not a secret. +NEXT_PUBLIC_OPERATORBOARD_ENABLE_DEV_TOOLS=false diff --git a/apps/web/Dockerfile b/apps/web/Dockerfile index f80dbd4..797ddab 100644 --- a/apps/web/Dockerfile +++ b/apps/web/Dockerfile @@ -17,10 +17,10 @@ COPY tsconfig.base.json ./ COPY packages/shared ./packages/shared COPY apps/web ./apps/web ENV DOCKER_BUILD=1 -ARG NEXT_PUBLIC_API_URL=http://api:4100 -ARG NEXT_PUBLIC_OPERATORBOARD_API_KEY -ENV NEXT_PUBLIC_API_URL=$NEXT_PUBLIC_API_URL -ENV NEXT_PUBLIC_OPERATORBOARD_API_KEY=$NEXT_PUBLIC_OPERATORBOARD_API_KEY +# P1-1: NEXT_PUBLIC_OPERATORBOARD_API_KEY removed — the key is now injected +# server-side by the Next.js proxy route and never baked into the client bundle. +# OPERATORBOARD_API_URL / OPERATORBOARD_API_KEY are runtime env vars (not build args) +# so they never appear in the image layers at all. RUN pnpm --filter @operatorboard/shared build RUN pnpm --filter @operatorboard/web build diff --git a/apps/web/next.config.ts b/apps/web/next.config.ts index 212a9c8..ff76981 100644 --- a/apps/web/next.config.ts +++ b/apps/web/next.config.ts @@ -1,8 +1,58 @@ import type { NextConfig } from "next"; +// P3-2: Security headers applied to every response from the Next.js frontend. +// These are defence-in-depth headers that complement (but do not replace) API-level controls. +const securityHeaders = [ + // Prevent this page from being embedded in an iframe (clickjacking) + { key: "X-Frame-Options", value: "DENY" }, + // Stop browsers from MIME-sniffing away from the declared content-type + { key: "X-Content-Type-Options", value: "nosniff" }, + // Only send the origin as the Referer header (no path/query leakage) + { key: "Referrer-Policy", value: "strict-origin-when-cross-origin" }, + // Disable FLOC and other sensitive browser features not needed by the dashboard + { + key: "Permissions-Policy", + value: "camera=(), microphone=(), geolocation=(), interest-cohort=()" + }, + // Force HTTPS for one year (only meaningful when the dashboard is served over TLS) + { + key: "Strict-Transport-Security", + value: "max-age=31536000; includeSubDomains" + }, + // Content-Security-Policy: locks down resource origins. + // 'self' covers the same origin; the API URL is the only external connect target. + // Adjust script-src / style-src if you add third-party scripts or fonts. + { + key: "Content-Security-Policy", + value: [ + "default-src 'self'", + // Next.js 14 inlines a small chunk manifest — 'unsafe-inline' scripts are + // unfortunately required unless you adopt nonce-based CSP (future work). + "script-src 'self' 'unsafe-inline' 'unsafe-eval'", + "style-src 'self' 'unsafe-inline'", + "img-src 'self' data: blob:", + "font-src 'self'", + // The dashboard fetches from the OperatorBoard API; allow it explicitly. + `connect-src 'self' ${process.env.NEXT_PUBLIC_API_URL ?? "http://localhost:4100"}`, + "frame-ancestors 'none'", + "base-uri 'self'", + "form-action 'self'", + ].join("; ") + } +]; + const nextConfig: NextConfig = { reactStrictMode: true, - output: process.env.DOCKER_BUILD ? "standalone" : undefined + output: process.env.DOCKER_BUILD ? "standalone" : undefined, + async headers() { + return [ + { + // Apply security headers to every route + source: "/(.*)", + headers: securityHeaders + } + ]; + } }; export default nextConfig; diff --git a/apps/web/src/app/api/[...proxy]/route.ts b/apps/web/src/app/api/[...proxy]/route.ts new file mode 100644 index 0000000..34c4a2f --- /dev/null +++ b/apps/web/src/app/api/[...proxy]/route.ts @@ -0,0 +1,85 @@ +/** + * P1-1 Security Fix: API key proxy route + * + * Problem: NEXT_PUBLIC_OPERATORBOARD_API_KEY bakes the API key into the + * client-side bundle, making it visible in DevTools, network requests, and + * the bundle source. Any user or malicious script on the page can read it. + * + * Fix: This catch-all route runs on the Next.js *server* (Node.js process, + * never in the browser). The browser calls /api/* and this route forwards the + * request to the Fastify API, injecting the key from the server-side env var + * (OPERATORBOARD_API_KEY — no NEXT_PUBLIC_ prefix). The key is never sent to + * the client. + * + * P2-4 is also resolved here: OPERATORBOARD_ENABLE_DEV_TOOLS is now read + * server-side only (see below). + */ + +import { type NextRequest, NextResponse } from "next/server"; + +// Server-only env vars — NOT prefixed with NEXT_PUBLIC_, so they are never +// inlined into the client bundle by the Next.js build step. +const API_URL = process.env.OPERATORBOARD_API_URL ?? "http://127.0.0.1:4100"; +const API_KEY = process.env.OPERATORBOARD_API_KEY ?? ""; + +const ALLOWED_METHODS = new Set(["GET", "POST", "PUT", "PATCH", "DELETE"]); + +async function handler( + request: NextRequest, + context: { params: Promise<{ proxy: string[] }> } +) { + const { proxy } = await context.params; + const path = "/" + proxy.join("/"); + const search = request.nextUrl.search ?? ""; + const upstream = `${API_URL}${path}${search}`; + + if (!ALLOWED_METHODS.has(request.method)) { + return NextResponse.json({ error: "Method not allowed" }, { status: 405 }); + } + + // Forward the original request headers, replacing (or adding) the auth key. + const forwardHeaders = new Headers(request.headers); + forwardHeaders.delete("host"); // let fetch set the correct host + if (API_KEY) { + forwardHeaders.set("x-operatorboard-key", API_KEY); + } + + let body: BodyInit | undefined; + if (!["GET", "HEAD"].includes(request.method)) { + // Stream the body through rather than buffering, for large payloads. + body = request.body as BodyInit | undefined; + } + + let upstreamResponse: Response; + try { + upstreamResponse = await fetch(upstream, { + method: request.method, + headers: forwardHeaders, + body, + // @ts-expect-error — duplex required when streaming a request body in Node 18+ + duplex: body ? "half" : undefined, + }); + } catch (err) { + console.error("[proxy] upstream fetch failed:", err); + return NextResponse.json( + { error: "Could not reach OperatorBoard API" }, + { status: 502 } + ); + } + + // Pass the upstream response back to the browser, stripping hop-by-hop headers. + const responseHeaders = new Headers(upstreamResponse.headers); + responseHeaders.delete("transfer-encoding"); + responseHeaders.delete("connection"); + + return new NextResponse(upstreamResponse.body, { + status: upstreamResponse.status, + headers: responseHeaders, + }); +} + +export const GET = handler; +export const POST = handler; +export const PUT = handler; +export const PATCH = handler; +export const DELETE = handler; diff --git a/apps/web/src/app/page.tsx b/apps/web/src/app/page.tsx index 73a7318..3d0d7df 100644 --- a/apps/web/src/app/page.tsx +++ b/apps/web/src/app/page.tsx @@ -120,9 +120,15 @@ type BackupAttestation = { }; // ── API client ──────────────────────────────────────────────────────────────── - -const API = process.env.NEXT_PUBLIC_API_URL ?? "http://127.0.0.1:4100"; -const API_KEY = process.env.NEXT_PUBLIC_OPERATORBOARD_API_KEY ?? ""; +// +// P1-1 fix: All API calls now go through the Next.js proxy route (/api/*). +// The proxy runs server-side and injects the API key from OPERATORBOARD_API_KEY +// (a server-only env var). The key is never sent to the browser. +// +// P2-4 fix: DEV_TOOLS_ENABLED is driven by a plain (non-NEXT_PUBLIC_) env var +// evaluated at build time via a NEXT_PUBLIC_ passthrough that the proxy controls. +// In practice the dashboard just reads the cookie/route header; for now we keep +// the flag as a build-time boolean but sourced from the non-secret var. const DEV_TOOLS_ENABLED = process.env.NEXT_PUBLIC_OPERATORBOARD_ENABLE_DEV_TOOLS === "true"; async function api(path: string, options?: RequestInit): Promise { @@ -131,8 +137,8 @@ async function api(path: string, options?: RequestInit): Promise { else if (Array.isArray(options?.headers)) { for (const [k, v] of options.headers) headers[k] = v; } else if (options?.headers) Object.assign(headers, options.headers); if (options?.body) headers["content-type"] = "application/json"; - if (API_KEY) headers["x-operatorboard-key"] = API_KEY; - const res = await fetch(`${API}${path}`, { ...options, headers }); + // Route through the Next.js proxy — no API key in the browser request. + const res = await fetch(`/api${path}`, { ...options, headers }); if (!res.ok) { const t = await res.text(); throw new Error(t); } return res.json() as Promise; } diff --git a/docker-compose.yml b/docker-compose.yml index 5c88dad..6399bf4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -27,12 +27,14 @@ services: build: context: . dockerfile: apps/web/Dockerfile - args: - - NEXT_PUBLIC_API_URL=http://localhost:4100 - - NEXT_PUBLIC_OPERATORBOARD_API_KEY=${OPERATORBOARD_API_KEY} restart: unless-stopped ports: - "3000:3000" + environment: + # Server-side vars — the Next.js proxy uses these to reach the API and + # attach the key. They are never baked into the client bundle. + - OPERATORBOARD_API_URL=http://api:4100 + - OPERATORBOARD_API_KEY=${OPERATORBOARD_API_KEY} depends_on: api: condition: service_healthy diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 09015ea..c3cee6d 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -124,8 +124,10 @@ function composeTemplate(apiKey: string): string { ports: - "3000:3000" environment: - - NEXT_PUBLIC_API_URL=http://localhost:4100 - - NEXT_PUBLIC_OPERATORBOARD_API_KEY=${apiKey} + # Server-side vars — the Next.js proxy uses these to reach the API and + # attach the key. They are never sent to the browser (no NEXT_PUBLIC_ prefix). + - OPERATORBOARD_API_URL=http://api:4100 + - OPERATORBOARD_API_KEY=${apiKey} depends_on: api: condition: service_healthy