diff --git a/docs/deploy/index.html b/docs/deploy/index.html
index 398da8b..a6dd6f9 100644
--- a/docs/deploy/index.html
+++ b/docs/deploy/index.html
@@ -298,6 +298,7 @@
Docker Compose (full stack)
OPENAI_API_KEY: ${OPENAI_API_KEY}
GITHUB_TOKEN: ${GITHUB_TOKEN:-}
GITHUB_WEBHOOK_SECRET: ${GITHUB_WEBHOOK_SECRET:-}
+ MCP_JWT_SECRET: ${MCP_JWT_SECRET}
PATHFINDER_CONFIG: /app/pathfinder.yaml
WORKSPACE_DIR: /data/workspaces
PORT: ${PORT:-3001}
@@ -398,7 +399,7 @@ Environment Variables
DATABASE_URL | For search tools | - | PostgreSQL connection string (with pgvector) |
- MCP_JWT_SECRET | Required in production | - | HMAC secret for signing OAuth access/refresh tokens. Generate with openssl rand -hex 32. Rotating this invalidates all issued tokens — clients will re-authenticate transparently. In development mode (NODE_ENV=development) a random secret is generated per process and logged as a warning. |
+ MCP_JWT_SECRET | Required in production | - | HMAC secret for signing OAuth access/refresh tokens. Generate with openssl rand -hex 32. Rotating this invalidates all issued tokens — clients will re-authenticate transparently. In any non-production environment (any NODE_ENV other than production) a random secret is generated per process and logged as a warning. |
OPENAI_API_KEY | When embedding.provider is "openai" (default) | - | OpenAI API key for computing embeddings. Not needed for ollama or local providers. |
GITHUB_TOKEN | For private repos | - | GitHub PAT for cloning private repositories |
GITHUB_WEBHOOK_SECRET | For webhooks | - | Secret for validating GitHub webhook payloads |
@@ -413,7 +414,7 @@ Environment Variables
NODE_ENV | No | development | Set to production for deployed instances |
LOG_LEVEL | No | info | Logging verbosity (debug, info, warn, error) |
CLONE_DIR | No | /tmp/mcp-repos | Directory for git repo clones |
- ANALYTICS_TOKEN | When analytics enabled | - | Bearer token for authenticating /api/analytics/* endpoints |
+ ANALYTICS_TOKEN | For privileged surfaces | - | Shared admin-access bearer token for all privileged surfaces — analytics (/api/analytics/*), Atlas ratification (/api/atlas/*), and admin ops (/admin/*). See Admin control surface. |
@@ -448,6 +449,43 @@ Webhook URLs
Slack: Set your Slack app's Event Subscriptions Request URL to https://your-domain/webhooks/slack.
Discord: Set your Discord application's Interactions Endpoint URL to https://your-domain/webhooks/discord.
+ Admin control surface
+
+ Pathfinder exposes an authenticated control plane for operational tasks that would otherwise require database surgery and a redeploy — forcing a reindex, inspecting index state, and so on.
+
+ Authentication
+ All privileged surfaces — analytics (/api/analytics/*), Atlas ratification (/api/atlas/*), and admin ops (/admin/*) — share one admin-access bearer token: the ANALYTICS_TOKEN environment variable. Authenticate every request with an Authorization: Bearer $ANALYTICS_TOKEN header.
+
+ - 401 Unauthorized — the token is missing or does not match.
+ - 503 Service Unavailable — no token is configured. These surfaces fail closed: with no
ANALYTICS_TOKEN set, they reject every request rather than running unauthenticated.
+
+
+ Force a reindex — POST /admin/reindex
+ Queues an indexing job and returns 202 Accepted. The body selects the scope:
+
+ { "scope": "full" } — reindex every configured source.
+ { "scope": "source", "source": "<configured-source-name>" } — reindex a single named source.
+ { "scope": "repo", "repo": "<configured-repo-url>" } — incrementally reindex a single git-backed source by repo URL.
+
+ An unknown source name or repo URL returns 400 Bad Request so a typo fails loud instead of silently no-op-ing.
+
+ $ curl -X POST https://your-domain/admin/reindex \
+ -H "Authorization: Bearer $ANALYTICS_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{"scope":"source","source":"my-docs"}'
+
+ Inspect index state — GET /admin/index-stats
+ Returns 200 OK with current index statistics (a POST /admin/index-stats alias is also accepted):
+ $ curl https://your-domain/admin/index-stats \
+ -H "Authorization: Bearer $ANALYTICS_TOKEN"
+ The response body has the shape:
+ {
+ "total_chunks": 1280,
+ "by_source": { "my-docs": 1280 },
+ "indexed_repos": ["https://github.com/acme/docs"],
+ "sources": [ ]
+}
+
Volume Mounts
What you mount depends on your source configuration:
diff --git a/src/__tests__/atlas-db.test.ts b/src/__tests__/atlas-db.test.ts
index 0e5bd03..dda0e1a 100644
--- a/src/__tests__/atlas-db.test.ts
+++ b/src/__tests__/atlas-db.test.ts
@@ -1,4 +1,12 @@
-import { describe, it, expect, beforeAll, afterAll, beforeEach } from "vitest";
+import {
+ describe,
+ it,
+ expect,
+ beforeAll,
+ afterAll,
+ beforeEach,
+ vi,
+} from "vitest";
import { PGlite } from "@electric-sql/pglite";
import { __setPoolForTesting, __resetPoolForTesting } from "../db/client.js";
import { generatePostSchemaMigration } from "../db/schema.js";
@@ -13,6 +21,7 @@ import {
rejectAtlasSeedEntry,
upsertAtlasCachePage,
upsertAtlasSeedCandidate,
+ __testing,
} from "../db/atlas.js";
const ATLAS_DDL_MARKER = "-- Atlas durable seed knowledge.";
@@ -457,3 +466,54 @@ describe("Atlas DB helpers", () => {
]);
});
});
+
+describe("Atlas row-mapper robustness", () => {
+ it("throws a context-bearing error (not a bare SyntaxError) for a malformed JSON seed column", () => {
+ expect(() =>
+ __testing.mapSeedRow({
+ id: 42,
+ canonical_key: "runtime:why",
+ source_name: "atlas",
+ status: "approved",
+ title: "Runtime why",
+ content: "body",
+ provenance: "{not valid json",
+ evidence: "[]",
+ }),
+ ).toThrowError(/provenance of seed row id=42 key=runtime:why/);
+ });
+
+ it("attributes a malformed cache JSON column to its row identity", () => {
+ expect(() =>
+ __testing.mapCacheRow({
+ id: 7,
+ page_key: "runtime/overview",
+ source_name: "atlas",
+ title: "Runtime overview",
+ content_hash: "hash-1",
+ stale: false,
+ generated_seed_ids: "[1, 2,",
+ provenance: "{}",
+ }),
+ ).toThrowError(
+ /generated_seed_ids of cache row id=7 key=runtime\/overview/,
+ );
+ });
+
+ it("returns null and warns for an invalid timestamp instead of yielding Invalid Date", () => {
+ const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+ const result = __testing.toDate("not-a-date", "approved_at of seed row 5");
+ expect(result).toBeNull();
+ expect(warnSpy).toHaveBeenCalledWith(
+ expect.stringContaining("invalid timestamp"),
+ );
+ warnSpy.mockRestore();
+ });
+
+ it("passes through valid timestamps unchanged", () => {
+ const iso = "2026-01-01T00:00:00.000Z";
+ const result = __testing.toDate(iso);
+ expect(result).toBeInstanceOf(Date);
+ expect(result?.toISOString()).toBe(iso);
+ });
+});
diff --git a/src/__tests__/atlas-ratification-endpoints.test.ts b/src/__tests__/atlas-ratification-endpoints.test.ts
index 9d26486..f3a2190 100644
--- a/src/__tests__/atlas-ratification-endpoints.test.ts
+++ b/src/__tests__/atlas-ratification-endpoints.test.ts
@@ -14,6 +14,7 @@ import { __setPoolForTesting, __resetPoolForTesting } from "../db/client.js";
import { generatePostSchemaMigration } from "../db/schema.js";
import {
approveAtlasSeedEntry,
+ listPendingAtlasSeedCandidates,
upsertAtlasSeedCandidate,
} from "../db/atlas.js";
import { AtlasDataProvider } from "../indexing/providers/atlas.js";
@@ -448,6 +449,60 @@ describe("Atlas ratification endpoints", () => {
expect(queueSourceReindex).toHaveBeenCalledWith("atlas");
});
+ it("reports reindexQueued:false (NOT 500) when the queue enqueue throws after a durable approval", async () => {
+ await upsertAtlasSeedCandidate({
+ canonicalKey: "runtime:approve-queue-throws",
+ sourceName: "atlas",
+ title: "Approve while queue throws",
+ content: "Candidate approved while the reindex enqueue throws",
+ provenance: {},
+ evidence: [],
+ });
+ const queueSourceReindex = vi.fn(() => {
+ throw new Error("queue is on fire");
+ });
+ __setAtlasOrchestratorForTesting({
+ queueFullReindex: vi.fn(),
+ queueSourceReindex,
+ queueIncrementalReindex: vi.fn(),
+ });
+ const consoleSpy = vi.spyOn(console, "error").mockImplementation(() => {});
+ server = await startServer();
+
+ const approved = await request(
+ server,
+ "POST",
+ "/api/atlas/candidates/approve",
+ {
+ headers: {
+ Authorization: "Bearer secret",
+ "X-Atlas-Actor": "reviewer@example.test",
+ },
+ body: { canonicalKey: "runtime:approve-queue-throws" },
+ },
+ );
+
+ // A reindex-enqueue hiccup must NOT report a committed approval as a failure.
+ expect(approved.status).toBe(200);
+ const body = JSON.parse(approved.body);
+ expect(body.reindexQueued).toBe(false);
+ expect(body.candidate).toMatchObject({
+ canonicalKey: "runtime:approve-queue-throws",
+ status: "approved",
+ });
+ expect(queueSourceReindex).toHaveBeenCalledWith("atlas");
+ expect(consoleSpy).toHaveBeenCalled();
+ consoleSpy.mockRestore();
+
+ // The approval must be durably persisted — verified here by its absence
+ // from the pending list (the candidate no longer awaits review). The
+ // 409-on-re-approve behavior is covered by a separate test.
+ const pending = await listPendingAtlasSeedCandidates();
+ expect(pending.map((row) => row.canonicalKey)).not.toContain(
+ "runtime:approve-queue-throws",
+ );
+ });
+
it("returns 409 when approving a candidate that is missing or not pending", async () => {
server = await startServer();
diff --git a/src/db/atlas.ts b/src/db/atlas.ts
index fb64fdd..d803bfa 100644
--- a/src/db/atlas.ts
+++ b/src/db/atlas.ts
@@ -120,35 +120,59 @@ export type AtlasIndexableContent =
cachePage: AtlasCachePage;
};
-function parseJsonObject(value: unknown): Record {
+// Parse a JSON string column with row-attributed context. A single malformed
+// `provenance`/`evidence`/`generated_seed_ids` blob would otherwise throw a
+// bare SyntaxError with no row identity and — because the list queries map
+// every row — poison the WHOLE list query into an opaque 500 that hides all
+// the valid rows. `ctx` names the column + offending row so the failure is
+// actionable.
+function parseJsonString(value: string, ctx: string): T {
+ try {
+ return JSON.parse(value) as T;
+ } catch (err) {
+ const detail = err instanceof Error ? err.message : String(err);
+ throw new Error(`Failed to parse JSON for ${ctx}: ${detail}`);
+ }
+}
+
+function parseJsonObject(value: unknown, ctx: string): Record {
if (value == null) return {};
if (typeof value === "string") {
- return JSON.parse(value) as Record;
+ return parseJsonString>(value, ctx);
}
return value as Record;
}
-function parseJsonArray(value: unknown): unknown[] {
+function parseJsonArray(value: unknown, ctx: string): unknown[] {
if (value == null) return [];
if (typeof value === "string") {
- return JSON.parse(value) as unknown[];
+ return parseJsonString(value, ctx);
}
return value as unknown[];
}
-function parseNumberArray(value: unknown): number[] {
- return parseJsonArray(value).filter(
+function parseNumberArray(value: unknown, ctx: string): number[] {
+ return parseJsonArray(value, ctx).filter(
(item): item is number => typeof item === "number",
);
}
-function toDate(value: unknown): Date | null {
+function toDate(value: unknown, ctx?: string): Date | null {
if (value == null) return null;
if (value instanceof Date) return value;
- return new Date(value as string);
+ const d = new Date(value as string);
+ if (isNaN(d.getTime())) {
+ console.warn(
+ `[atlas] Ignoring invalid timestamp${ctx ? ` for ${ctx}` : ""}: ` +
+ `${JSON.stringify(value)}`,
+ );
+ return null;
+ }
+ return d;
}
function mapSeedRow(row: Record): AtlasSeedEntry {
+ const ctx = `seed row id=${row.id} key=${String(row.canonical_key)}`;
return {
id: Number(row.id),
canonicalKey: row.canonical_key as string,
@@ -159,20 +183,21 @@ function mapSeedRow(row: Record): AtlasSeedEntry {
status: row.status as AtlasSeedStatus,
title: row.title as string,
content: row.content as string,
- provenance: parseJsonObject(row.provenance),
- evidence: parseJsonArray(row.evidence),
+ provenance: parseJsonObject(row.provenance, `provenance of ${ctx}`),
+ evidence: parseJsonArray(row.evidence, `evidence of ${ctx}`),
approvedBy: (row.approved_by as string | null) ?? null,
- approvedAt: toDate(row.approved_at),
+ approvedAt: toDate(row.approved_at, `approved_at of ${ctx}`),
rejectedBy: (row.rejected_by as string | null) ?? null,
- rejectedAt: toDate(row.rejected_at),
+ rejectedAt: toDate(row.rejected_at, `rejected_at of ${ctx}`),
rejectionReason: (row.rejection_reason as string | null) ?? null,
- createdAt: toDate(row.created_at) ?? new Date(0),
- updatedAt: toDate(row.updated_at) ?? new Date(0),
+ createdAt: toDate(row.created_at, `created_at of ${ctx}`) ?? new Date(0),
+ updatedAt: toDate(row.updated_at, `updated_at of ${ctx}`) ?? new Date(0),
};
}
function mapCacheRow(row: Record): AtlasCachePage {
- const rawProvenance = parseJsonObject(row.provenance);
+ const ctx = `cache row id=${row.id} key=${String(row.page_key)}`;
+ const rawProvenance = parseJsonObject(row.provenance, `provenance of ${ctx}`);
const { [CACHE_CONTENT_KEY]: contentValue, ...provenance } = rawProvenance;
return {
id: Number(row.id),
@@ -183,13 +208,16 @@ function mapCacheRow(row: Record): AtlasCachePage {
contentHash: row.content_hash as string,
stale: Boolean(row.stale),
staleReason: (row.stale_reason as string | null) ?? null,
- generatedSeedIds: parseNumberArray(row.generated_seed_ids),
+ generatedSeedIds: parseNumberArray(
+ row.generated_seed_ids,
+ `generated_seed_ids of ${ctx}`,
+ ),
provenance,
- generatedAt: toDate(row.generated_at),
- errorAt: toDate(row.error_at),
+ generatedAt: toDate(row.generated_at, `generated_at of ${ctx}`),
+ errorAt: toDate(row.error_at, `error_at of ${ctx}`),
errorMessage: (row.error_message as string | null) ?? null,
- createdAt: toDate(row.created_at) ?? new Date(0),
- updatedAt: toDate(row.updated_at) ?? new Date(0),
+ createdAt: toDate(row.created_at, `created_at of ${ctx}`) ?? new Date(0),
+ updatedAt: toDate(row.updated_at, `updated_at of ${ctx}`) ?? new Date(0),
};
}
@@ -772,10 +800,20 @@ export async function getAtlasStateToken(
seedResult.rows[0]?.state_token,
cacheResult.rows[0]?.state_token,
]
- .map((value) => toDate(value))
+ .map((value) => toDate(value, "atlas state token"))
.filter((value): value is Date => value !== null);
if (values.length === 0) return null;
return new Date(
Math.max(...values.map((value) => value.getTime())),
).toISOString();
}
+
+// Test-only exports of the otherwise-private row mappers and timestamp parser.
+// These are pure functions; exporting them lets us unit-test the robustness
+// paths (malformed JSON → context-bearing error, invalid timestamp → null)
+// directly without contriving a backing store that can hold malformed columns.
+export const __testing = {
+ mapSeedRow,
+ mapCacheRow,
+ toDate,
+};
diff --git a/src/server.ts b/src/server.ts
index d6aa5e2..6e93639 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -89,6 +89,7 @@ import {
rejectAtlasSeedEntry,
AtlasSeedNotPendingError,
} from "./db/atlas.js";
+import type { AtlasSeedEntry } from "./db/atlas.js";
import path from "node:path";
import { fileURLToPath } from "node:url";
@@ -455,9 +456,15 @@ app.post(
try {
await handler(req, res);
} catch (err) {
- console.error("[webhook] Handler error:", err);
+ // Emit a correlation ID in BOTH the log and the response so a failed
+ // delivery is greppable (mirrors the /analytics sendFile path).
+ const correlationId = randomUUID().replace(/-/g, "").slice(0, 12);
+ console.error(`[webhook] Handler error cid=${correlationId}:`, err);
if (!res.headersSent) {
- res.status(500).json({ error: "Internal webhook handler error" });
+ res.status(500).json({
+ error: "Internal webhook handler error",
+ correlation_id: correlationId,
+ });
}
}
},
@@ -478,9 +485,13 @@ app.post(
try {
await handler(req, res);
} catch (err) {
- console.error("[slack-webhook] Handler error:", err);
+ const correlationId = randomUUID().replace(/-/g, "").slice(0, 12);
+ console.error(`[slack-webhook] Handler error cid=${correlationId}:`, err);
if (!res.headersSent) {
- res.status(500).json({ error: "Internal webhook handler error" });
+ res.status(500).json({
+ error: "Internal webhook handler error",
+ correlation_id: correlationId,
+ });
}
}
},
@@ -501,9 +512,16 @@ app.post(
try {
await handler(req, res);
} catch (err) {
- console.error("[discord-webhook] Handler error:", err);
+ const correlationId = randomUUID().replace(/-/g, "").slice(0, 12);
+ console.error(
+ `[discord-webhook] Handler error cid=${correlationId}:`,
+ err,
+ );
if (!res.headersSent) {
- res.status(500).json({ error: "Internal webhook handler error" });
+ res.status(500).json({
+ error: "Internal webhook handler error",
+ correlation_id: correlationId,
+ });
}
}
},
@@ -3256,31 +3274,55 @@ async function approveAtlasCandidate(
return;
}
+ // The DB write and the reindex enqueue are split into two phases on purpose.
+ // Only DB-write failures (and pre-write validation) flow through
+ // handleAtlasRatificationError → 409/500. Once the approval is durably
+ // persisted, a reindex-enqueue hiccup must NEVER report the committed
+ // approval as a failure — otherwise the reviewer retries, hits
+ // AtlasSeedNotPendingError (already approved), and gets a confusing 409.
+ let candidate: AtlasSeedEntry;
try {
- const candidate = await approveAtlasSeedEntry(
- canonicalKey,
- atlasActor(req),
- );
- let reindexQueued = false;
- if (orchestratorRef) {
+ candidate = await approveAtlasSeedEntry(canonicalKey, atlasActor(req));
+ } catch (err) {
+ handleAtlasRatificationError(res, "approve", err);
+ return;
+ }
+
+ // Phase 2: best-effort reindex enqueue, AFTER the approval is committed.
+ let reindexQueued = false;
+ if (orchestratorRef) {
+ try {
+ // This synchronous try/catch only traps errors because
+ // queueSourceReindex is synchronous (returns void; the real impl
+ // swallows its own async drain rejection). If it were ever made
+ // async/Promise-returning, this would need `await` inside the try or
+ // the rejection would escape unhandled.
orchestratorRef.queueSourceReindex(candidate.sourceName);
reindexQueued = true;
- } else {
- // The ratification routes mount unconditionally, but orchestratorRef is
- // only wired when search/knowledge tools are enabled. With Atlas sources
- // but no such tools, approval persists yet nothing drives a reindex — so
- // make the gap loud and actionable rather than silently returning 200.
+ } catch (err) {
+ // Approval is already durable; a queue failure must NOT 500. Mirror the
+ // no-orchestrator branch's contract: 200 + reindexQueued:false, with a
+ // loud log so the missed reindex is greppable and actionable.
console.error(
`[atlas] Approved candidate "${canonicalKey}" (source "${candidate.sourceName}"): ` +
- `approval persisted but reindex NOT queued — no indexing orchestrator is wired ` +
- `(search/knowledge tools disabled). Approved content will NOT be indexed until a ` +
- `reindex runs for source "${candidate.sourceName}".`,
+ `approval persisted but reindex enqueue FAILED — approved content will NOT be ` +
+ `indexed until a reindex runs for source "${candidate.sourceName}".`,
+ err,
);
}
- res.json({ candidate, reindexQueued });
- } catch (err) {
- handleAtlasRatificationError(res, "approve", err);
+ } else {
+ // The ratification routes mount unconditionally, but orchestratorRef is
+ // only wired when search/knowledge tools are enabled. With Atlas sources
+ // but no such tools, approval persists yet nothing drives a reindex — so
+ // make the gap loud and actionable rather than silently returning 200.
+ console.error(
+ `[atlas] Approved candidate "${canonicalKey}" (source "${candidate.sourceName}"): ` +
+ `approval persisted but reindex NOT queued — no indexing orchestrator is wired ` +
+ `(search/knowledge tools disabled). Approved content will NOT be indexed until a ` +
+ `reindex runs for source "${candidate.sourceName}".`,
+ );
}
+ res.json({ candidate, reindexQueued });
}
async function rejectAtlasCandidate(
@@ -3464,6 +3506,17 @@ function adminOpsAuth(
*
* All three orchestrator methods are fire-and-forget (return void, dedupe
* internally), so we return 202 Accepted with `{ queued: }`.
+ *
+ * Non-202 outcomes:
+ * - 400 invalid_request — scope missing/unknown, or source/repo missing for
+ * a scoped reindex.
+ * - 400 unknown_source / unknown_repo — a scoped target that doesn't match
+ * any configured source/repo (a typo fails loud rather than silently
+ * no-op-ing in the orchestrator drain).
+ * - 503 orchestrator_unavailable — no indexing orchestrator is wired
+ * (search/knowledge tools disabled).
+ * - 503 config_unavailable — getServerConfig() threw on a misconfigured
+ * environment while validating a scoped target.
*/
async function adminReindexOp(
_req: Request,
@@ -3629,10 +3682,15 @@ function buildAdminOpRegistry(
if (result.status === 202) {
// notifyAdminOpToSlack swallows all its own errors and never rejects,
// so this is fire-and-forget; `void` marks the intentional non-await.
- void notifyAdminOpToSlack(
- "reindex",
- JSON.stringify((result.body as { queued: unknown }).queued),
- );
+ // Guard the `queued` extraction so a future op whose 202 body lacks it
+ // can't emit `undefined` into the Slack message.
+ const queued =
+ result.body &&
+ typeof result.body === "object" &&
+ "queued" in result.body
+ ? (result.body as { queued: unknown }).queued
+ : result.body;
+ void notifyAdminOpToSlack("reindex", JSON.stringify(queued));
}
return result;
},