diff --git a/package.json b/package.json index c65b70b..8edbb72 100644 --- a/package.json +++ b/package.json @@ -29,6 +29,7 @@ }, "main": "dist/index.js", "bin": { + "atlas": "dist/atlas-cli.js", "pathfinder": "dist/cli.js" }, "publishConfig": { @@ -36,6 +37,7 @@ }, "scripts": { "build": "tsc", + "prepublishOnly": "npm run build", "start": "node dist/index.js", "dev": "tsx watch src/index.ts", "seed-index": "tsx scripts/seed-index.ts", diff --git a/pathfinder.example.yaml b/pathfinder.example.yaml index d9398fd..efc8281 100644 --- a/pathfinder.example.yaml +++ b/pathfinder.example.yaml @@ -86,6 +86,19 @@ sources: # target_tokens: 600 # overlap_tokens: 50 +# ── Atlas source (agent-facing codebase knowledge cache) ── +# Seed knowledge is durable; generated Atlas pages are disposable Pathfinder cache. +# - name: atlas +# type: atlas +# seed_path: .pathfinder/atlas/seed +# cache_namespace: my-project +# repositories: +# - repo_url: https://github.com/your-org/your-repo.git +# refs: ["main"] +# chunk: +# target_tokens: 800 +# overlap_tokens: 80 + # ── Slack source (requires SLACK_BOT_TOKEN + OPENAI_API_KEY) ── # - name: community # type: slack @@ -121,6 +134,16 @@ tools: # description: "Browse and search community Q&A" # sources: [community] + # ── Atlas search tool ── + # - name: atlas-search + # type: search + # description: "Search Atlas codebase knowledge." + # source: atlas + # default_limit: 5 + # max_limit: 20 + # result_format: raw + # search_mode: hybrid + # Required for search tools (omit for bash-only mode) embedding: provider: openai diff --git a/src/__tests__/analytics-endpoints.test.ts b/src/__tests__/analytics-endpoints.test.ts index 171007e..e84775c 100644 --- a/src/__tests__/analytics-endpoints.test.ts +++ b/src/__tests__/analytics-endpoints.test.ts @@ -113,6 +113,25 @@ describe("analyticsAuth middleware", () => { expect(next).not.toHaveBeenCalled(); }); + it("returns 503 when root config read fails before auth options are built", () => { + mockGetConfigFn.mockImplementation(() => { + throw new Error("bad root config"); + }); + const res = mockRes(); + const next = vi.fn(); + const consoleSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + + analyticsAuth({ headers: {} } as never, res as never, next); + + expect(res.status).toHaveBeenCalledWith(503); + expect(res.json).toHaveBeenCalledWith({ + error: "misconfigured", + error_description: "Analytics config read failed", + }); + expect(next).not.toHaveBeenCalled(); + consoleSpy.mockRestore(); + }); + it("auto-generates token, logs only a fingerprint, and requires auth", () => { mockGetAnalyticsConfigFn.mockReturnValue({ enabled: true, diff --git a/src/__tests__/atlas-cli.test.ts b/src/__tests__/atlas-cli.test.ts new file mode 100644 index 0000000..be4d84d --- /dev/null +++ b/src/__tests__/atlas-cli.test.ts @@ -0,0 +1,1316 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import fs from "fs"; +import path from "path"; +import { pathToFileURL } from "url"; +import { + buildFeedbackArguments, + isAtlasCliEntrypoint, + runAtlasCli, +} from "../atlas-cli.js"; + +const PROJECT_ROOT = path.resolve(__dirname, "..", ".."); + +function jsonResponse( + body: unknown, + init?: ResponseInit & { sessionId?: string }, +): Response { + const headers = new Headers(init?.headers); + headers.set("content-type", "application/json"); + if (init?.sessionId) { + headers.set("mcp-session-id", init.sessionId); + } + return new Response(JSON.stringify(body), { ...init, headers }); +} + +function sseResponse( + body: string, + init?: ResponseInit & { sessionId?: string }, +): Response { + const headers = new Headers(init?.headers); + headers.set("content-type", "text/event-stream"); + if (init?.sessionId) { + headers.set("mcp-session-id", init.sessionId); + } + return new Response(body, { ...init, headers }); +} + +describe("atlas CLI", () => { + const originalEnv = { ...process.env }; + let stdout = ""; + let stderr = ""; + + beforeEach(() => { + // Run against a known-clean env so the default-URL/token assertions do not + // go red on a machine/CI that exports ATLAS_MCP_URL / ATLAS_TOKEN. Tests + // that exercise the env fallback set these explicitly. + delete process.env.ATLAS_MCP_URL; + delete process.env.ATLAS_TOKEN; + }); + + afterEach(() => { + process.env = { ...originalEnv }; + vi.unstubAllGlobals(); + vi.restoreAllMocks(); + stdout = ""; + stderr = ""; + }); + + it("exposes a first-party atlas bin", () => { + const packageJson = JSON.parse( + fs.readFileSync(path.join(PROJECT_ROOT, "package.json"), "utf-8"), + ) as { bin?: Record }; + + expect(packageJson.bin?.atlas).toBe("dist/atlas-cli.js"); + }); + + it("uses env fallbacks and calls the configured MCP search tool", async () => { + process.env.ATLAS_MCP_URL = "https://atlas.example.test/mcp"; + process.env.ATLAS_TOKEN = "secret-token"; + + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + id: 1, + result: { + content: [ + { type: "text", text: "Atlas says: use the provider boundary." }, + ], + }, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(0); + expect(stderr).toBe(""); + expect(stdout).toContain("Atlas says: use the provider boundary."); + expect(fetchMock).toHaveBeenCalledTimes(4); + + const [initUrl, initRequest] = fetchMock.mock.calls[0] as [ + string, + RequestInit, + ]; + expect(initUrl).toBe("https://atlas.example.test/mcp"); + expect(initRequest.headers).toMatchObject({ + Authorization: "Bearer secret-token", + Accept: "application/json, text/event-stream", + }); + expect(JSON.parse(initRequest.body as string)).toMatchObject({ + jsonrpc: "2.0", + method: "initialize", + id: 0, + }); + + const [, notifyRequest] = fetchMock.mock.calls[1] as [string, RequestInit]; + expect(notifyRequest.headers).toMatchObject({ + "Mcp-Session-Id": "session-1", + }); + expect(JSON.parse(notifyRequest.body as string)).toMatchObject({ + jsonrpc: "2.0", + method: "notifications/initialized", + }); + + const [, callRequest] = fetchMock.mock.calls[2] as [string, RequestInit]; + expect(callRequest.headers).toMatchObject({ + "Mcp-Session-Id": "session-1", + }); + expect(JSON.parse(callRequest.body as string)).toEqual({ + jsonrpc: "2.0", + method: "tools/call", + id: 1, + params: { + name: "atlas-search", + arguments: { + query: "provider boundary", + }, + }, + }); + }); + + it("terminates the MCP session after a successful search", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + id: 1, + result: { + content: [{ type: "text", text: "result before close" }], + }, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli( + ["search", "provider boundary", "--token", "secret-token"], + { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }, + ); + + expect(exitCode).toBe(0); + expect(stdout).toContain("result before close"); + expect(stderr).toBe(""); + + expect(fetchMock).toHaveBeenCalledTimes(4); + const [closeUrl, closeRequest] = fetchMock.mock.calls[3] as [ + string, + RequestInit, + ]; + expect(closeUrl).toBe("https://mcp.pathfinder.copilotkit.dev/mcp"); + expect(closeRequest.method).toBe("DELETE"); + expect(closeRequest.headers).toMatchObject({ + "Mcp-Session-Id": "session-1", + Authorization: "Bearer secret-token", + }); + expect(closeRequest.body).toBeUndefined(); + }); + + it("terminates the MCP session after a tool call error", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + id: 1, + error: { message: "tool failed" }, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stderr).toContain("tool failed"); + + expect(fetchMock).toHaveBeenCalledTimes(4); + const [, closeRequest] = fetchMock.mock.calls[3] as [string, RequestInit]; + expect(closeRequest.method).toBe("DELETE"); + expect(closeRequest.headers).toMatchObject({ + "Mcp-Session-Id": "session-1", + }); + }); + + it("fails immediately when MCP initialize returns a JSON-RPC error", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + error: { message: "initialize rejected" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stderr).toContain("initialize rejected"); + expect(fetchMock).toHaveBeenCalledTimes(2); + + const [, closeRequest] = fetchMock.mock.calls[1] as [string, RequestInit]; + expect(closeRequest.method).toBe("DELETE"); + }); + + it("terminates the MCP session when initialize returns invalid JSON with a session header", async () => { + const headers = new Headers({ + "content-type": "application/json", + "mcp-session-id": "session-1", + }); + const fetchMock = vi + .fn() + .mockResolvedValueOnce(new Response("not json", { headers })) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stderr).toContain("Unparseable response"); + expect(fetchMock).toHaveBeenCalledTimes(2); + + const [, closeRequest] = fetchMock.mock.calls[1] as [string, RequestInit]; + expect(closeRequest.method).toBe("DELETE"); + expect(closeRequest.headers).toMatchObject({ + "Mcp-Session-Id": "session-1", + }); + }); + + it("terminates the MCP session when initialize returns an HTTP error with a session header", async () => { + const headers = new Headers({ + "mcp-session-id": "session-1", + }); + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + new Response("initialize failed", { status: 500, headers }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stderr).toContain("HTTP 500: initialize failed"); + expect(fetchMock).toHaveBeenCalledTimes(2); + + const [, closeRequest] = fetchMock.mock.calls[1] as [string, RequestInit]; + expect(closeRequest.method).toBe("DELETE"); + expect(closeRequest.headers).toMatchObject({ + "Mcp-Session-Id": "session-1", + }); + }); + + it("handles a tools/call result of null without reporting No response.", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + id: 1, + result: null, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(0); + expect(stderr).toBe(""); + expect(stdout).not.toContain("No response."); + expect(stdout).not.toContain("no response from server"); + expect(stdout).toContain("No results."); + expect(fetchMock).toHaveBeenCalledTimes(4); + }); + + it("treats a tool result with isError as a failure on stderr with exit 1", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + id: 1, + result: { + isError: true, + content: [{ type: "text", text: "boom" }], + }, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stderr).toContain("boom"); + expect(fetchMock).toHaveBeenCalledTimes(4); + + const [, closeRequest] = fetchMock.mock.calls[3] as [string, RequestInit]; + expect(closeRequest.method).toBe("DELETE"); + }); + + it("renders gracefully when a tools/call result carries non-array content", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + id: 1, + result: { content: "oops" }, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(0); + expect(stderr).toBe(""); + expect(stderr).not.toContain("content.map is not a function"); + expect(stdout).toContain("No results."); + expect(fetchMock).toHaveBeenCalledTimes(4); + }); + + it("selects the tools/call response by id across a multi-frame SSE stream", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + sseResponse( + [ + 'data:{"jsonrpc":"2.0","method":"notifications/message","params":{"data":"unrelated"}}', + "", + 'data:{"jsonrpc":"2.0","id":1,"result":{"content":[{"type":"text","text":"the real answer"}]}}', + "", + ].join("\n"), + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(0); + expect(stderr).toBe(""); + expect(stdout).toContain("the real answer"); + expect(stdout).not.toContain("No response."); + expect(fetchMock).toHaveBeenCalledTimes(4); + }); + + it("selects the tools/call response when the server echoes a string id", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + id: "1", + result: { + content: [{ type: "text", text: "string id answer" }], + }, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(0); + expect(stderr).toBe(""); + expect(stdout).toContain("string id answer"); + expect(stdout).not.toContain("No response."); + expect(fetchMock).toHaveBeenCalledTimes(4); + }); + + it("surfaces a JSON-RPC error frame carrying a null id instead of the generic no-response error", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + id: null, + error: { message: "rate limited, retry later" }, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stderr).toContain("rate limited, retry later"); + expect(stderr).not.toContain("no response from server"); + expect(fetchMock).toHaveBeenCalledTimes(4); + + const [, closeRequest] = fetchMock.mock.calls[3] as [string, RequestInit]; + expect(closeRequest.method).toBe("DELETE"); + }); + + it("fails with exit 1 when no tools/call response frame carries id 1", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + sseResponse( + [ + 'data:{"jsonrpc":"2.0","method":"notifications/message","params":{"data":"only a notification, never answered id 1"}}', + "", + ].join("\n"), + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stderr).toContain("no response from server for tools/call"); + expect(fetchMock).toHaveBeenCalledTimes(4); + + const [, closeRequest] = fetchMock.mock.calls[3] as [string, RequestInit]; + expect(closeRequest.method).toBe("DELETE"); + }); + + it("renders a sole tools/call result frame whose id was omitted", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + result: { + content: [{ type: "text", text: "answer with no id" }], + }, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(0); + expect(stderr).toBe(""); + expect(stdout).toContain("answer with no id"); + expect(stdout).not.toContain("no response from server"); + expect(fetchMock).toHaveBeenCalledTimes(4); + }); + + it("fails with no-response when the only frame bears a different explicit id", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + id: 2, + result: { + content: [{ type: "text", text: "answer for a different request" }], + }, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stdout).not.toContain("answer for a different request"); + expect(stderr).toContain("no response from server for tools/call"); + expect(fetchMock).toHaveBeenCalledTimes(4); + + const [, closeRequest] = fetchMock.mock.calls[3] as [string, RequestInit]; + expect(closeRequest.method).toBe("DELETE"); + }); + + it("requires --for when building feedback arguments", () => { + expect(() => + buildFeedbackArguments("provider boundary", { + for: undefined, + rating: "helpful", + comment: "Exactly what I needed.", + }), + ).toThrow("atlas: --for is required"); + }); + + it("honors CLI options and prints raw JSON when requested", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { jsonrpc: "2.0", id: 0, result: {} }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + id: 1, + result: { + content: [{ type: "text", text: "json result" }], + }, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli( + [ + "search", + "ratification queue", + "--url", + "http://localhost:3001/mcp", + "--tool", + "atlas_search", + "--limit", + "4", + "--min-score", + "0.62", + "--json", + ], + { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }, + ); + + expect(exitCode).toBe(0); + expect(stderr).toBe(""); + expect(JSON.parse(stdout)).toMatchObject({ + jsonrpc: "2.0", + id: 1, + result: { + content: [{ type: "text", text: "json result" }], + }, + }); + + const [, callRequest] = fetchMock.mock.calls[2] as [string, RequestInit]; + expect(JSON.parse(callRequest.body as string)).toEqual({ + jsonrpc: "2.0", + method: "tools/call", + id: 1, + params: { + name: "atlas_search", + arguments: { + query: "ratification queue", + limit: 4, + min_score: 0.62, + }, + }, + }); + + expect(fetchMock).toHaveBeenCalledTimes(4); + const [, closeRequest] = fetchMock.mock.calls[3] as [string, RequestInit]; + expect(closeRequest.method).toBe("DELETE"); + }); + + it.each([ + ["--limit", "not-a-number", "limit must be a positive integer"], + ["--limit", "10abc", "limit must be a positive integer"], + ["--limit", "-1", "limit must be a positive integer"], + ["--limit", "0", "limit must be a positive integer"], + ["--limit", "NaN", "limit must be a positive integer"], + [ + "--min-score", + "not-a-score", + "min-score must be a finite number in [0, 1]", + ], + ["--min-score", "0.5abc", "min-score must be a finite number in [0, 1]"], + ["--min-score", "2", "min-score must be a finite number in [0, 1]"], + ["--min-score", "-0.1", "min-score must be a finite number in [0, 1]"], + ["--min-score", "NaN", "min-score must be a finite number in [0, 1]"], + ])( + "rejects invalid %s value %s before calling MCP", + async (option, value, expectedMessage) => { + const fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli( + ["search", "provider boundary", option, value], + { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }, + ); + + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stderr).toContain(expectedMessage); + expect(fetchMock).not.toHaveBeenCalled(); + }, + ); + + it("parses SSE events without a space after data and with multiline data", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + sseResponse('data:{"jsonrpc":"2.0","id":0,"result":{}}\n\n', { + sessionId: "session-1", + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + sseResponse( + [ + 'data:{"jsonrpc":"2.0","id":1,"result":{"content":[', + 'data:{"type":"text","text":"multiline SSE result"}', + "data:]}}", + "", + ].join("\n"), + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(0); + expect(stderr).toBe(""); + expect(stdout).toContain("multiline SSE result"); + expect(fetchMock).toHaveBeenCalledTimes(4); + }); + + it("skips empty SSE data frames without crashing", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + sseResponse('data:{"jsonrpc":"2.0","id":0,"result":{}}\n\n', { + sessionId: "session-1", + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + sseResponse( + [ + ": keepalive comment", + "data:", + "", + 'data:{"jsonrpc":"2.0","id":1,"result":{"content":[{"type":"text","text":"survived the keepalive"}]}}', + "", + ].join("\n"), + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(0); + expect(stderr).toBe(""); + expect(stdout).toContain("survived the keepalive"); + expect(fetchMock).toHaveBeenCalledTimes(4); + }); + + it("defaults to the Atlas search tool configured in pathfinder.example.yaml", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + id: 1, + result: { + content: [{ type: "text", text: "default tool result" }], + }, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["search", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(0); + expect(stderr).toBe(""); + + const [, callRequest] = fetchMock.mock.calls[2] as [string, RequestInit]; + expect(JSON.parse(callRequest.body as string)).toMatchObject({ + method: "tools/call", + params: { + name: "atlas-search", + }, + }); + }); + + it("returns an existing-style error for missing search query", async () => { + const exitCode = await runAtlasCli(["search"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stderr).toContain("error: missing required argument 'query'"); + }); + + it("submits feedback through the configured MCP feedback tool", async () => { + process.env.ATLAS_MCP_URL = "https://atlas.example.test/mcp"; + process.env.ATLAS_TOKEN = "secret-token"; + + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + id: 1, + result: { + content: [{ type: "text", text: "Feedback recorded. Thank you." }], + }, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli( + [ + "feedback", + "provider boundary", + "--rating", + "helpful", + "--comment", + "Exactly what I needed.", + ], + { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }, + ); + + expect(exitCode).toBe(0); + expect(stderr).toBe(""); + expect(stdout).toContain("Feedback recorded. Thank you."); + expect(fetchMock).toHaveBeenCalledTimes(4); + + const [, callRequest] = fetchMock.mock.calls[2] as [string, RequestInit]; + expect(JSON.parse(callRequest.body as string)).toEqual({ + jsonrpc: "2.0", + method: "tools/call", + id: 1, + params: { + name: "submit-feedback", + arguments: { + tool_name: "atlas-search", + query: "provider boundary", + rating: "helpful", + comment: "Exactly what I needed.", + }, + }, + }); + }); + + it("maps --for to tool_name and honors --tool for the feedback tool name", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + id: 1, + result: { + content: [{ type: "text", text: "Feedback recorded. Thank you." }], + }, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli( + [ + "feedback", + "ratification queue", + "--rating", + "not_helpful", + "--comment", + "Wrong section.", + "--for", + "atlas-deep-search", + "--tool", + "collect-feedback", + ], + { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }, + ); + + expect(exitCode).toBe(0); + expect(stderr).toBe(""); + + const [, callRequest] = fetchMock.mock.calls[2] as [string, RequestInit]; + expect(JSON.parse(callRequest.body as string)).toEqual({ + jsonrpc: "2.0", + method: "tools/call", + id: 1, + params: { + name: "collect-feedback", + arguments: { + tool_name: "atlas-deep-search", + query: "ratification queue", + rating: "not_helpful", + comment: "Wrong section.", + }, + }, + }); + }); + + it.each([["sometimes"], ["yes"], ["HELPFUL"], [""]])( + "rejects invalid feedback rating %s before calling MCP", + async (rating) => { + const fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli( + [ + "feedback", + "provider boundary", + "--rating", + rating, + "--comment", + "Some comment.", + ], + { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }, + ); + + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stderr).toContain("rating must be one of: helpful, not_helpful"); + expect(fetchMock).not.toHaveBeenCalled(); + }, + ); + + it("rejects an empty feedback comment before calling MCP", async () => { + const fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli( + [ + "feedback", + "provider boundary", + "--rating", + "helpful", + "--comment", + " ", + ], + { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }, + ); + + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stderr).toContain("comment must not be empty"); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it("surfaces a feedback tool-call error as exit 1", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse( + { + jsonrpc: "2.0", + id: 0, + result: { protocolVersion: "2025-03-26" }, + }, + { sessionId: "session-1" }, + ), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })) + .mockResolvedValueOnce( + jsonResponse({ + jsonrpc: "2.0", + id: 1, + error: { message: "feedback rejected" }, + }), + ) + .mockResolvedValueOnce(new Response(null, { status: 202 })); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli( + [ + "feedback", + "provider boundary", + "--rating", + "helpful", + "--comment", + "Helpful answer.", + ], + { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }, + ); + + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stderr).toContain("feedback rejected"); + expect(fetchMock).toHaveBeenCalledTimes(4); + + const [, closeRequest] = fetchMock.mock.calls[3] as [string, RequestInit]; + expect(closeRequest.method).toBe("DELETE"); + }); + + it("requires the rating and comment options for feedback", async () => { + const fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); + + const exitCode = await runAtlasCli(["feedback", "provider boundary"], { + stdout: (text) => { + stdout += text; + }, + stderr: (text) => { + stderr += text; + }, + }); + + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stderr).toContain("required option"); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it("ships a prepublishOnly build guard", () => { + const packageJson = JSON.parse( + fs.readFileSync(path.join(PROJECT_ROOT, "package.json"), "utf-8"), + ) as { scripts?: Record }; + + expect(packageJson.scripts?.prepublishOnly).toBe("npm run build"); + }); + + it("recognizes URL-escaped CLI entrypoint paths", () => { + const entrypointPath = path.join(PROJECT_ROOT, "dist", "atlas cli.js"); + const nonNormalizedArgvPath = path.join( + PROJECT_ROOT, + "dist", + "..", + "dist", + "atlas cli.js", + ); + + expect( + isAtlasCliEntrypoint( + pathToFileURL(entrypointPath).href, + nonNormalizedArgvPath, + ), + ).toBe(true); + }); + + it("recognizes symlinked CLI entrypoint paths", () => { + const tempDir = fs.mkdtempSync(path.join(PROJECT_ROOT, ".atlas-cli-")); + + try { + const realEntrypointPath = path.join(tempDir, "dist", "atlas-cli.js"); + const symlinkPath = path.join(tempDir, "node_modules", ".bin", "atlas"); + fs.mkdirSync(path.dirname(realEntrypointPath), { recursive: true }); + fs.mkdirSync(path.dirname(symlinkPath), { recursive: true }); + fs.writeFileSync(realEntrypointPath, "", "utf-8"); + fs.symlinkSync(realEntrypointPath, symlinkPath); + + expect( + isAtlasCliEntrypoint( + pathToFileURL(realEntrypointPath).href, + symlinkPath, + ), + ).toBe(true); + } finally { + fs.rmSync(tempDir, { recursive: true, force: true }); + } + }); +}); diff --git a/src/__tests__/atlas-db.test.ts b/src/__tests__/atlas-db.test.ts new file mode 100644 index 0000000..0e5bd03 --- /dev/null +++ b/src/__tests__/atlas-db.test.ts @@ -0,0 +1,459 @@ +import { describe, it, expect, beforeAll, afterAll, beforeEach } from "vitest"; +import { PGlite } from "@electric-sql/pglite"; +import { __setPoolForTesting, __resetPoolForTesting } from "../db/client.js"; +import { generatePostSchemaMigration } from "../db/schema.js"; +import { + approveAtlasSeedEntry, + clearAtlasCachePageStale, + getAtlasStateToken, + listIndexableAtlasContent, + listRemovedAtlasContentIds, + listPendingAtlasSeedCandidates, + markAtlasCachePageStale, + rejectAtlasSeedEntry, + upsertAtlasCachePage, + upsertAtlasSeedCandidate, +} from "../db/atlas.js"; + +const ATLAS_DDL_MARKER = "-- Atlas durable seed knowledge."; + +function extractAtlasDdl(): string { + const sql = generatePostSchemaMigration(); + const idx = sql.indexOf(ATLAS_DDL_MARKER); + if (idx < 0) { + throw new Error(`Could not locate "${ATLAS_DDL_MARKER}" in schema SQL`); + } + return sql.slice(idx); +} + +function poolFromPglite(db: PGlite) { + return { + query: (text: string, params?: unknown[]) => db.query(text, params), + connect: async () => ({ + query: (text: string, params?: unknown[]) => db.query(text, params), + release: () => {}, + }), + end: async () => db.close(), + }; +} + +describe("Atlas DB helpers", () => { + let db: PGlite; + + beforeAll(async () => { + db = new PGlite(); + await db.waitReady; + await db.exec(extractAtlasDdl()); + __setPoolForTesting(poolFromPglite(db)); + }); + + afterAll(async () => { + __resetPoolForTesting(); + await db.close(); + }); + + beforeEach(async () => { + await db.query("DELETE FROM atlas_cache_pages"); + await db.query("DELETE FROM atlas_seed_entries"); + }); + + it("upserts seed candidates idempotently and preserves approved rows", async () => { + const first = await upsertAtlasSeedCandidate({ + canonicalKey: "repo:main:runtime", + sourceName: "atlas", + repoUrl: "https://github.com/CopilotKit/pathfinder", + ref: "main", + subsystem: "runtime", + title: "Runtime shape", + content: "Initial rationale", + provenance: { from: "pr" }, + evidence: [{ url: "https://example.test/pr/1" }], + }); + + const updatedPending = await upsertAtlasSeedCandidate({ + canonicalKey: "repo:main:runtime", + sourceName: "atlas", + title: "Runtime shape v2", + content: "Updated rationale", + provenance: { from: "issue" }, + evidence: [], + }); + + expect(updatedPending.id).toBe(first.id); + expect(updatedPending.title).toBe("Runtime shape v2"); + expect(updatedPending.content).toBe("Updated rationale"); + expect(updatedPending.status).toBe("pending"); + + await approveAtlasSeedEntry("repo:main:runtime", "reviewer@example.test"); + const duplicateApproved = await upsertAtlasSeedCandidate({ + canonicalKey: "repo:main:runtime", + sourceName: "atlas", + title: "Should not overwrite", + content: "Should not overwrite", + provenance: { from: "duplicate" }, + evidence: [], + }); + + expect(duplicateApproved.title).toBe("Runtime shape v2"); + expect(duplicateApproved.content).toBe("Updated rationale"); + expect(duplicateApproved.status).toBe("approved"); + expect(duplicateApproved.approvedBy).toBe("reviewer@example.test"); + }); + + it("enforces approve/reject status transitions", async () => { + await upsertAtlasSeedCandidate({ + canonicalKey: "pending:one", + sourceName: "atlas", + title: "Pending one", + content: "Candidate one", + provenance: {}, + evidence: [], + }); + await upsertAtlasSeedCandidate({ + canonicalKey: "pending:two", + sourceName: "atlas", + title: "Pending two", + content: "Candidate two", + provenance: {}, + evidence: [], + }); + + const approved = await approveAtlasSeedEntry("pending:one", "alice"); + expect(approved.status).toBe("approved"); + expect(approved.approvedBy).toBe("alice"); + expect(approved.approvedAt).toBeTruthy(); + + await expect( + rejectAtlasSeedEntry("pending:one", "bob", "stale"), + ).rejects.toThrow("Cannot reject atlas seed entry"); + + const rejected = await rejectAtlasSeedEntry("pending:two", "bob", "stale"); + expect(rejected.status).toBe("rejected"); + expect(rejected.rejectedBy).toBe("bob"); + expect(rejected.rejectionReason).toBe("stale"); + + await expect(approveAtlasSeedEntry("pending:two", "alice")).rejects.toThrow( + "Cannot approve atlas seed entry", + ); + }); + + it("lists pending seed candidates oldest first with source filtering", async () => { + await upsertAtlasSeedCandidate({ + canonicalKey: "one", + sourceName: "atlas-a", + title: "One", + content: "One content", + provenance: {}, + evidence: [], + }); + await upsertAtlasSeedCandidate({ + canonicalKey: "two", + sourceName: "atlas-b", + title: "Two", + content: "Two content", + provenance: {}, + evidence: [], + }); + await upsertAtlasSeedCandidate({ + canonicalKey: "three", + sourceName: "atlas-a", + title: "Three", + content: "Three content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry("one", "reviewer"); + + const allPending = await listPendingAtlasSeedCandidates(); + expect(allPending.map((row) => row.canonicalKey)).toEqual(["two", "three"]); + + const sourcePending = await listPendingAtlasSeedCandidates({ + sourceName: "atlas-a", + }); + expect(sourcePending.map((row) => row.canonicalKey)).toEqual(["three"]); + }); + + it("upserts cache pages and marks/clears stale state", async () => { + const page = await upsertAtlasCachePage({ + pageKey: "runtime/overview", + sourceName: "atlas", + title: "Runtime overview", + content: "Generated page body", + contentHash: "hash-1", + generatedSeedIds: [1, 2], + provenance: { generatedBy: "gardener" }, + generatedAt: new Date("2026-01-01T00:00:00Z"), + }); + + expect(page.stale).toBe(false); + expect(page.content).toBe("Generated page body"); + expect(page.generatedSeedIds).toEqual([1, 2]); + + const stale = await markAtlasCachePageStale( + "runtime/overview", + "seed changed", + ); + expect(stale.stale).toBe(true); + expect(stale.staleReason).toBe("seed changed"); + + const regenerated = await clearAtlasCachePageStale({ + pageKey: "runtime/overview", + content: "Regenerated body", + contentHash: "hash-2", + generatedSeedIds: [3], + provenance: { regeneratedBy: "gardener-v2" }, + generatedAt: new Date("2026-01-02T00:00:00Z"), + }); + expect(regenerated.stale).toBe(false); + expect(regenerated.staleReason).toBeNull(); + expect(regenerated.contentHash).toBe("hash-2"); + expect(regenerated.content).toBe("Regenerated body"); + expect(regenerated.generatedSeedIds).toEqual([3]); + expect(regenerated.provenance).toMatchObject({ + generatedBy: "gardener", + regeneratedBy: "gardener-v2", + }); + }); + + it("returns only approved seed entries and non-stale cache pages for indexing", async () => { + const approved = await upsertAtlasSeedCandidate({ + canonicalKey: "approved", + sourceName: "atlas", + title: "Approved", + content: "Approved content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry(approved.canonicalKey, "reviewer"); + await upsertAtlasSeedCandidate({ + canonicalKey: "pending", + sourceName: "atlas", + title: "Pending", + content: "Pending content", + provenance: {}, + evidence: [], + }); + await upsertAtlasCachePage({ + pageKey: "fresh", + sourceName: "atlas", + title: "Fresh page", + content: "Fresh cache content", + contentHash: "fresh-hash", + }); + await upsertAtlasCachePage({ + pageKey: "stale", + sourceName: "atlas", + title: "Stale page", + content: "Stale cache content", + contentHash: "stale-hash", + }); + await markAtlasCachePageStale("stale", "seed changed"); + + const items = await listIndexableAtlasContent("atlas"); + + expect(items.map((item) => `${item.kind}:${item.key}`)).toEqual([ + "seed:approved", + "cache:fresh", + ]); + }); + + it("filters indexable Atlas content by configured repositories", async () => { + const runtime = await upsertAtlasSeedCandidate({ + canonicalKey: "runtime", + sourceName: "atlas", + repoUrl: "https://github.com/CopilotKit/pathfinder", + ref: "main", + subsystem: "runtime", + title: "Runtime", + content: "Runtime content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry(runtime.canonicalKey, "reviewer"); + const docs = await upsertAtlasSeedCandidate({ + canonicalKey: "docs", + sourceName: "atlas", + repoUrl: "https://github.com/CopilotKit/pathfinder", + ref: "main", + subsystem: "docs", + title: "Docs", + content: "Docs content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry(docs.canonicalKey, "reviewer"); + const otherRepo = await upsertAtlasSeedCandidate({ + canonicalKey: "other-repo", + sourceName: "atlas", + repoUrl: "https://github.com/CopilotKit/other", + ref: "main", + subsystem: "runtime", + title: "Other repo", + content: "Other repo content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry(otherRepo.canonicalKey, "reviewer"); + await upsertAtlasCachePage({ + pageKey: "runtime/cache", + sourceName: "atlas", + title: "Runtime cache", + content: "Runtime cache content", + contentHash: "runtime-cache", + generatedSeedIds: [runtime.id], + }); + await upsertAtlasCachePage({ + pageKey: "docs/cache", + sourceName: "atlas", + title: "Docs cache", + content: "Docs cache content", + contentHash: "docs-cache", + generatedSeedIds: [docs.id], + }); + + const items = await listIndexableAtlasContent("atlas", { + repositories: [ + { + repoUrl: "https://github.com/CopilotKit/pathfinder", + refs: ["main"], + subsystems: ["runtime"], + }, + ], + }); + + expect(items.map((item) => `${item.kind}:${item.key}`)).toEqual([ + "seed:runtime", + "cache:runtime/cache", + ]); + }); + + it("bounds Atlas acquisition queries to a captured high-water token", async () => { + await upsertAtlasSeedCandidate({ + canonicalKey: "included", + sourceName: "atlas", + title: "Included", + content: "Included content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry("included", "reviewer"); + await upsertAtlasSeedCandidate({ + canonicalKey: "future", + sourceName: "atlas", + title: "Future", + content: "Future content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry("future", "reviewer"); + await db.query( + "UPDATE atlas_seed_entries SET updated_at = $2 WHERE canonical_key = $1", + ["included", new Date("2026-01-01T00:00:00Z")], + ); + await db.query( + "UPDATE atlas_seed_entries SET updated_at = $2 WHERE canonical_key = $1", + ["future", new Date("2026-01-02T00:00:00Z")], + ); + + const items = await listIndexableAtlasContent("atlas", { + changedOnOrBefore: new Date("2026-01-01T12:00:00Z"), + }); + + expect(items.map((item) => item.key)).toEqual(["included"]); + }); + + it("surfaces stale cache pages as removals and includes them in state tokens", async () => { + await upsertAtlasCachePage({ + pageKey: "fresh", + sourceName: "atlas", + title: "Fresh page", + content: "Fresh cache content", + contentHash: "fresh-hash", + }); + await upsertAtlasCachePage({ + pageKey: "stale", + sourceName: "atlas", + title: "Stale page", + content: "Stale cache content", + contentHash: "stale-hash", + }); + await db.query( + "UPDATE atlas_cache_pages SET updated_at = $2 WHERE page_key = $1", + ["fresh", new Date("2026-01-01T00:00:00Z")], + ); + await markAtlasCachePageStale("stale", "seed changed"); + await db.query( + "UPDATE atlas_cache_pages SET updated_at = $2 WHERE page_key = $1", + ["stale", new Date("2026-01-02T00:00:00Z")], + ); + + expect(await getAtlasStateToken("atlas")).toBe("2026-01-02T00:00:00.000Z"); + expect( + await listRemovedAtlasContentIds("atlas", { + changedAfter: new Date("2026-01-01T12:00:00Z"), + }), + ).toEqual(["atlas-cache:stale"]); + }); + + it("surfaces rejected seeds and empty cache pages as removals with repository filters", async () => { + const rejected = await upsertAtlasSeedCandidate({ + canonicalKey: "rejected", + sourceName: "atlas", + repoUrl: "https://github.com/CopilotKit/pathfinder", + ref: "main", + subsystem: "runtime", + title: "Rejected", + content: "Rejected content", + provenance: {}, + evidence: [], + }); + await rejectAtlasSeedEntry(rejected.canonicalKey, "reviewer", "wrong"); + const otherRepoRejected = await upsertAtlasSeedCandidate({ + canonicalKey: "other-rejected", + sourceName: "atlas", + repoUrl: "https://github.com/CopilotKit/other", + ref: "main", + subsystem: "runtime", + title: "Other rejected", + content: "Other rejected content", + provenance: {}, + evidence: [], + }); + await rejectAtlasSeedEntry( + otherRepoRejected.canonicalKey, + "reviewer", + "wrong", + ); + await upsertAtlasCachePage({ + pageKey: "runtime/empty", + sourceName: "atlas", + title: "Runtime empty", + content: "", + contentHash: "empty-hash", + generatedSeedIds: [rejected.id], + }); + await upsertAtlasCachePage({ + pageKey: "other/empty", + sourceName: "atlas", + title: "Other empty", + content: "", + contentHash: "other-empty-hash", + generatedSeedIds: [otherRepoRejected.id], + }); + + const removedIds = await listRemovedAtlasContentIds("atlas", { + repositories: [ + { + repoUrl: "https://github.com/CopilotKit/pathfinder", + refs: ["main"], + subsystems: ["runtime"], + }, + ], + }); + + expect(removedIds).toEqual([ + "atlas-seed:rejected", + "atlas-cache:runtime/empty", + ]); + }); +}); diff --git a/src/__tests__/atlas-github-webhook.test.ts b/src/__tests__/atlas-github-webhook.test.ts new file mode 100644 index 0000000..fe32d7a --- /dev/null +++ b/src/__tests__/atlas-github-webhook.test.ts @@ -0,0 +1,343 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import crypto from "node:crypto"; +import { + createWebhookHandler, + type ReindexOrchestrator, +} from "../webhooks/github.js"; + +const mockGetConfig = vi.fn(); +const mockGetServerConfig = vi.fn(); +const mockRecordWebhookDelivery = vi.fn().mockResolvedValue(undefined); +const mockUpsertAtlasSeedCandidate = vi.fn().mockResolvedValue({ + id: 1, + canonicalKey: "github-pr:atlas:org/repo:42", + status: "pending", +}); + +vi.mock("../config.js", () => ({ + getConfig: (...args: unknown[]) => mockGetConfig(...args), + getServerConfig: (...args: unknown[]) => mockGetServerConfig(...args), +})); + +vi.mock("../db/queries.js", () => ({ + recordWebhookDelivery: (...args: unknown[]) => + mockRecordWebhookDelivery(...args), +})); + +vi.mock("../db/atlas.js", () => ({ + upsertAtlasSeedCandidate: (...args: unknown[]) => + mockUpsertAtlasSeedCandidate(...args), +})); + +const WEBHOOK_SECRET = "test-webhook-secret-123"; + +function sign(body: Buffer, secret: string = WEBHOOK_SECRET): string { + return ( + "sha256=" + crypto.createHmac("sha256", secret).update(body).digest("hex") + ); +} + +function mockReqRes( + body: object | string, + headers: Record = {}, +) { + const bodyStr = typeof body === "string" ? body : JSON.stringify(body); + const rawBody = Buffer.from(bodyStr); + + const req = { + body: rawBody, + headers: { + "x-hub-signature-256": sign(rawBody), + "x-github-event": "pull_request", + "x-github-delivery": "delivery-1", + ...headers, + }, + } as any; + + const res = { + status: vi.fn().mockReturnThis(), + json: vi.fn().mockReturnThis(), + } as any; + + return { req, res }; +} + +function makePullRequestPayload(overrides: Record = {}) { + return { + action: "closed", + repository: { + clone_url: "https://github.com/org/repo.git", + default_branch: "main", + full_name: "org/repo", + }, + pull_request: { + number: 42, + merged: true, + merge_commit_sha: "abc12345deadbeef", + title: "Explain runtime architecture", + body: "The runtime now routes requests through the agent bridge.", + html_url: "https://github.com/org/repo/pull/42", + base: { ref: "main" }, + head: { ref: "feature/runtime-architecture" }, + user: { login: "octocat" }, + merged_by: { login: "maintainer" }, + }, + ...overrides, + }; +} + +function makeServerConfig() { + return { + sources: [ + { + name: "atlas", + type: "atlas", + chunk: {}, + }, + { + name: "docs-source", + type: "markdown", + path: "docs", + file_patterns: ["**/*.md"], + chunk: {}, + }, + ], + webhook: { + repo_sources: { + "org/repo": ["atlas", "docs-source"], + }, + path_triggers: {}, + }, + }; +} + +describe("GitHub webhook Atlas seed extraction", () => { + let orchestrator: ReindexOrchestrator; + let handler: ReturnType; + + beforeEach(() => { + vi.clearAllMocks(); + mockGetConfig.mockReturnValue({ + githubWebhookSecret: WEBHOOK_SECRET, + }); + mockGetServerConfig.mockReturnValue(makeServerConfig()); + orchestrator = { + queueIncrementalReindex: vi.fn(), + queueSourceReindex: vi.fn(), + }; + handler = createWebhookHandler(orchestrator); + }); + + it("creates a pending Atlas seed candidate for a merged pull request into the default branch", async () => { + const { req, res } = mockReqRes(makePullRequestPayload()); + + await handler(req, res); + + expect(res.status).toHaveBeenCalledWith(200); + expect(res.json).toHaveBeenCalledWith({ + queued: true, + atlas_seed_candidates: 1, + }); + expect(mockUpsertAtlasSeedCandidate).toHaveBeenCalledWith({ + canonicalKey: "github-pr:atlas:org/repo:42", + sourceName: "atlas", + repoUrl: "https://github.com/org/repo.git", + ref: "main", + subsystem: null, + title: "PR #42: Explain runtime architecture", + content: expect.stringContaining("Explain runtime architecture"), + provenance: expect.objectContaining({ + provider: "github", + event: "pull_request", + delivery_id: "delivery-1", + repo: "org/repo", + pr_number: 42, + url: "https://github.com/org/repo/pull/42", + base_branch: "main", + head_branch: "feature/runtime-architecture", + merge_commit_sha: "abc12345deadbeef", + }), + evidence: [ + expect.objectContaining({ + type: "pull_request", + url: "https://github.com/org/repo/pull/42", + }), + ], + }); + expect(orchestrator.queueIncrementalReindex).not.toHaveBeenCalled(); + }); + + it("ignores merged pull requests for repos without an Atlas source", async () => { + mockGetServerConfig.mockReturnValue({ + ...makeServerConfig(), + webhook: { + repo_sources: { "org/repo": ["docs-source"] }, + path_triggers: {}, + }, + }); + const { req, res } = mockReqRes(makePullRequestPayload()); + + await handler(req, res); + + expect(res.status).toHaveBeenCalledWith(200); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + ignored: true, + reason: "repo has no atlas sources", + }), + ); + expect(mockUpsertAtlasSeedCandidate).not.toHaveBeenCalled(); + }); + + it("rejects pull request seed extraction when the signature is invalid", async () => { + const { req, res } = mockReqRes(makePullRequestPayload(), { + "x-hub-signature-256": + "sha256=0000000000000000000000000000000000000000000000000000000000000000", + }); + + await handler(req, res); + + expect(res.status).toHaveBeenCalledWith(401); + expect(mockUpsertAtlasSeedCandidate).not.toHaveBeenCalled(); + }); + + it("rejects duplicate delivery headers before extracting pull request seeds", async () => { + const { req, res } = mockReqRes(makePullRequestPayload(), { + "x-github-delivery": ["delivery-1", "delivery-2"], + }); + + await handler(req, res); + + expect(res.status).toHaveBeenCalledWith(400); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: "Duplicate GitHub webhook header", + }), + ); + expect(mockUpsertAtlasSeedCandidate).not.toHaveBeenCalled(); + }); + + it("rejects runtime duplicate delivery headers before extracting pull request seeds", async () => { + const { req, res } = mockReqRes(makePullRequestPayload(), { + "x-github-delivery": "delivery-1, delivery-2", + }); + req.rawHeaders = [ + "X-Hub-Signature-256", + req.headers["x-hub-signature-256"], + "X-GitHub-Event", + "pull_request", + "X-GitHub-Delivery", + "delivery-1", + "x-github-delivery", + "delivery-2", + ]; + + await handler(req, res); + + expect(res.status).toHaveBeenCalledWith(400); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: "Duplicate GitHub webhook header", + header: "x-github-delivery", + }), + ); + expect(mockUpsertAtlasSeedCandidate).not.toHaveBeenCalled(); + }); + + it("ignores merged pull requests whose base branch is not the default branch", async () => { + const { req, res } = mockReqRes( + makePullRequestPayload({ + pull_request: { + ...makePullRequestPayload().pull_request, + base: { ref: "release" }, + }, + }), + ); + + await handler(req, res); + + expect(res.status).toHaveBeenCalledWith(200); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + ignored: true, + reason: "not the default branch", + }), + ); + expect(mockUpsertAtlasSeedCandidate).not.toHaveBeenCalled(); + }); + + it("uses a stable canonical key so duplicate pull request deliveries are idempotent", async () => { + const payload = makePullRequestPayload(); + const first = mockReqRes(payload, { "x-github-delivery": "delivery-1" }); + const second = mockReqRes(payload, { "x-github-delivery": "delivery-2" }); + + await handler(first.req, first.res); + await handler(second.req, second.res); + + expect(mockUpsertAtlasSeedCandidate).toHaveBeenCalledTimes(2); + expect( + mockUpsertAtlasSeedCandidate.mock.calls.map(([input]) => input), + ).toEqual([ + expect.objectContaining({ canonicalKey: "github-pr:atlas:org/repo:42" }), + expect.objectContaining({ canonicalKey: "github-pr:atlas:org/repo:42" }), + ]); + }); + + it("uses source-scoped canonical keys for multiple Atlas sources on the same pull request", async () => { + mockGetServerConfig.mockReturnValue({ + ...makeServerConfig(), + sources: [ + { name: "atlas-runtime", type: "atlas", chunk: {} }, + { name: "atlas-ui", type: "atlas", chunk: {} }, + ], + webhook: { + repo_sources: { + "org/repo": ["atlas-runtime", "atlas-ui"], + }, + path_triggers: {}, + }, + }); + const { req, res } = mockReqRes(makePullRequestPayload()); + + await handler(req, res); + + expect(res.status).toHaveBeenCalledWith(200); + expect(res.json).toHaveBeenCalledWith({ + queued: true, + atlas_seed_candidates: 2, + }); + expect( + mockUpsertAtlasSeedCandidate.mock.calls.map(([input]) => input), + ).toEqual([ + expect.objectContaining({ + canonicalKey: "github-pr:atlas-runtime:org/repo:42", + sourceName: "atlas-runtime", + }), + expect.objectContaining({ + canonicalKey: "github-pr:atlas-ui:org/repo:42", + sourceName: "atlas-ui", + }), + ]); + }); + + it("fails loudly for malformed configured Atlas pull request payloads", async () => { + const { req, res } = mockReqRes({ + action: "closed", + repository: { + clone_url: "https://github.com/org/repo.git", + default_branch: "main", + full_name: "org/repo", + }, + }); + + await handler(req, res); + + expect(res.status).toHaveBeenCalledWith(400); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: "Malformed Atlas pull_request payload", + }), + ); + expect(mockUpsertAtlasSeedCandidate).not.toHaveBeenCalled(); + }); +}); diff --git a/src/__tests__/atlas-provider.test.ts b/src/__tests__/atlas-provider.test.ts new file mode 100644 index 0000000..c2f1f17 --- /dev/null +++ b/src/__tests__/atlas-provider.test.ts @@ -0,0 +1,452 @@ +import { + describe, + it, + expect, + beforeAll, + afterAll, + beforeEach, + vi, +} from "vitest"; +import { PGlite } from "@electric-sql/pglite"; +import { __setPoolForTesting, __resetPoolForTesting } from "../db/client.js"; +import { generatePostSchemaMigration } from "../db/schema.js"; +import * as atlasDb from "../db/atlas.js"; +import { + approveAtlasSeedEntry, + getAtlasStateToken, + markAtlasCachePageStale, + rejectAtlasSeedEntry, + upsertAtlasCachePage, + upsertAtlasSeedCandidate, +} from "../db/atlas.js"; +import { AtlasDataProvider } from "../indexing/providers/atlas.js"; +import { getProvider } from "../indexing/providers/index.js"; +import type { AtlasSourceConfig, SourceConfig } from "../types.js"; + +const ATLAS_DDL_MARKER = "-- Atlas durable seed knowledge."; + +function extractAtlasDdl(): string { + const sql = generatePostSchemaMigration(); + const idx = sql.indexOf(ATLAS_DDL_MARKER); + if (idx < 0) { + throw new Error(`Could not locate "${ATLAS_DDL_MARKER}" in schema SQL`); + } + return sql.slice(idx); +} + +function poolFromPglite(db: PGlite) { + return { + query: (text: string, params?: unknown[]) => db.query(text, params), + connect: async () => ({ + query: (text: string, params?: unknown[]) => db.query(text, params), + release: () => {}, + }), + end: async () => db.close(), + }; +} + +const atlasConfig: AtlasSourceConfig = { + type: "atlas", + name: "atlas", + chunk: { target_tokens: 500 }, + cache_namespace: "default", +}; + +describe("AtlasDataProvider", () => { + let db: PGlite; + + beforeAll(async () => { + db = new PGlite(); + await db.waitReady; + await db.exec(extractAtlasDdl()); + __setPoolForTesting(poolFromPglite(db)); + }); + + afterAll(async () => { + __resetPoolForTesting(); + await db.close(); + }); + + beforeEach(async () => { + await db.query("DELETE FROM atlas_cache_pages"); + await db.query("DELETE FROM atlas_seed_entries"); + }); + + it("requires an atlas source config", () => { + expect( + () => + new AtlasDataProvider( + { + type: "markdown", + name: "docs", + chunk: { target_tokens: 500 }, + path: ".", + file_patterns: ["*.md"], + } as SourceConfig, + { cloneDir: "/tmp" }, + ), + ).toThrow("AtlasDataProvider requires an atlas source config"); + }); + + it("acquires approved seed entries and fresh cache pages as ContentItems", async () => { + await upsertAtlasSeedCandidate({ + canonicalKey: "runtime:why", + sourceName: "atlas", + repoUrl: "https://github.com/CopilotKit/pathfinder", + ref: "main", + subsystem: "runtime", + title: "Runtime why", + content: "Approved seed rationale", + provenance: { source: "pr" }, + evidence: [{ url: "https://example.test/pr/1" }], + }); + await approveAtlasSeedEntry("runtime:why", "reviewer"); + await upsertAtlasSeedCandidate({ + canonicalKey: "runtime:pending", + sourceName: "atlas", + title: "Pending", + content: "Pending content", + provenance: {}, + evidence: [], + }); + await upsertAtlasCachePage({ + pageKey: "runtime/overview", + sourceName: "atlas", + title: "Runtime overview", + content: "Generated cache page", + contentHash: "cache-hash", + generatedSeedIds: [1], + }); + await upsertAtlasCachePage({ + pageKey: "runtime/stale", + sourceName: "atlas", + title: "Runtime stale", + content: "Stale cache page", + contentHash: "stale-hash", + }); + await markAtlasCachePageStale("runtime/stale", "seed changed"); + + const provider = new AtlasDataProvider(atlasConfig, { cloneDir: "/tmp" }); + const result = await provider.fullAcquire(); + + expect(result.removedIds).toEqual(["atlas-cache:runtime/stale"]); + expect(result.items).toHaveLength(2); + expect(result.items.map((item) => item.id)).toEqual([ + "atlas-seed:runtime:why", + "atlas-cache:runtime/overview", + ]); + expect(result.items[0]).toMatchObject({ + title: "Runtime why", + content: "Approved seed rationale", + sourceUrl: "atlas://seed/runtime%3Awhy", + metadata: { + atlas_kind: "seed", + atlas_key: "runtime:why", + source_name: "atlas", + repo_url: "https://github.com/CopilotKit/pathfinder", + ref: "main", + subsystem: "runtime", + }, + }); + expect(result.items[1]).toMatchObject({ + title: "Runtime overview", + content: "Generated cache page", + sourceUrl: "atlas://cache/runtime%2Foverview", + metadata: { + atlas_kind: "cache", + atlas_page_key: "runtime/overview", + source_name: "atlas", + content_hash: "cache-hash", + }, + }); + expect(result.stateToken).toMatch(/^\d{4}-\d{2}-\d{2}T/); + }); + + it("incrementally acquires only entries changed after the state token", async () => { + await upsertAtlasSeedCandidate({ + canonicalKey: "old", + sourceName: "atlas", + title: "Old", + content: "Old content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry("old", "reviewer"); + await db.query( + "UPDATE atlas_seed_entries SET updated_at = $2 WHERE canonical_key = $1", + ["old", new Date("2026-01-01T00:00:00Z")], + ); + + const provider = new AtlasDataProvider(atlasConfig, { cloneDir: "/tmp" }); + const stateToken = await provider.getCurrentStateToken(); + expect(stateToken).toBe("2026-01-01T00:00:00.000Z"); + + await upsertAtlasSeedCandidate({ + canonicalKey: "new", + sourceName: "atlas", + title: "New", + content: "New content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry("new", "reviewer"); + await db.query( + "UPDATE atlas_seed_entries SET updated_at = $2 WHERE canonical_key = $1", + ["new", new Date("2026-01-02T00:00:00Z")], + ); + + const result = await provider.incrementalAcquire(stateToken ?? ""); + expect(result.items.map((item) => item.id)).toEqual(["atlas-seed:new"]); + }); + + it("enforces configured repository, ref, and subsystem filters", async () => { + const runtime = await upsertAtlasSeedCandidate({ + canonicalKey: "runtime", + sourceName: "atlas", + repoUrl: "https://github.com/CopilotKit/pathfinder", + ref: "main", + subsystem: "runtime", + title: "Runtime", + content: "Runtime content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry(runtime.canonicalKey, "reviewer"); + const docs = await upsertAtlasSeedCandidate({ + canonicalKey: "docs", + sourceName: "atlas", + repoUrl: "https://github.com/CopilotKit/pathfinder", + ref: "main", + subsystem: "docs", + title: "Docs", + content: "Docs content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry(docs.canonicalKey, "reviewer"); + const otherRef = await upsertAtlasSeedCandidate({ + canonicalKey: "other-ref", + sourceName: "atlas", + repoUrl: "https://github.com/CopilotKit/pathfinder", + ref: "release", + subsystem: "runtime", + title: "Other ref", + content: "Other ref content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry(otherRef.canonicalKey, "reviewer"); + await upsertAtlasCachePage({ + pageKey: "runtime/cache", + sourceName: "atlas", + title: "Runtime cache", + content: "Runtime cache content", + contentHash: "runtime-cache", + generatedSeedIds: [runtime.id], + }); + await upsertAtlasCachePage({ + pageKey: "docs/cache", + sourceName: "atlas", + title: "Docs cache", + content: "Docs cache content", + contentHash: "docs-cache", + generatedSeedIds: [docs.id], + }); + + const provider = new AtlasDataProvider( + { + ...atlasConfig, + repositories: [ + { + repo_url: "https://github.com/CopilotKit/pathfinder", + refs: ["main"], + subsystems: ["runtime"], + }, + ], + }, + { cloneDir: "/tmp" }, + ); + + const result = await provider.fullAcquire(); + + expect(result.items.map((item) => item.id)).toEqual([ + "atlas-seed:runtime", + "atlas-cache:runtime/cache", + ]); + }); + + it("does not persist a state token newer than its bounded acquisition snapshot", async () => { + await upsertAtlasSeedCandidate({ + canonicalKey: "included", + sourceName: "atlas", + title: "Included", + content: "Included content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry("included", "reviewer"); + await upsertAtlasSeedCandidate({ + canonicalKey: "future", + sourceName: "atlas", + title: "Future", + content: "Future content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry("future", "reviewer"); + await db.query( + "UPDATE atlas_seed_entries SET updated_at = $2 WHERE canonical_key = $1", + ["included", new Date("2026-01-01T00:00:00Z")], + ); + await db.query( + "UPDATE atlas_seed_entries SET updated_at = $2 WHERE canonical_key = $1", + ["future", new Date("2026-01-02T00:00:00Z")], + ); + + const provider = new AtlasDataProvider(atlasConfig, { cloneDir: "/tmp" }); + const result = await provider.incrementalAcquire( + "2025-12-31T00:00:00.000Z", + ); + + expect(result.items.map((item) => item.id)).toEqual([ + "atlas-seed:included", + "atlas-seed:future", + ]); + expect(result.stateToken).toBe("2026-01-02T00:00:00.000Z"); + }); + + it("bounds incremental acquisition to the token captured before listing rows", async () => { + await upsertAtlasSeedCandidate({ + canonicalKey: "included", + sourceName: "atlas", + title: "Included", + content: "Included content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry("included", "reviewer"); + await db.query( + "UPDATE atlas_seed_entries SET updated_at = $2 WHERE canonical_key = $1", + ["included", new Date("2026-01-01T00:00:00Z")], + ); + + const provider = new AtlasDataProvider(atlasConfig, { cloneDir: "/tmp" }); + const capturedToken = "2026-01-01T00:00:00.000Z"; + const lateToken = "2026-01-02T00:00:00.000Z"; + const stateTokenSpy = vi + .spyOn(atlasDb, "getAtlasStateToken") + .mockImplementation(async () => { + await upsertAtlasSeedCandidate({ + canonicalKey: "late", + sourceName: "atlas", + title: "Late", + content: "Late content", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry("late", "reviewer"); + await db.query( + "UPDATE atlas_seed_entries SET updated_at = $2 WHERE canonical_key = $1", + ["late", new Date(lateToken)], + ); + return capturedToken; + }); + + try { + const result = await provider.incrementalAcquire( + "2025-12-31T00:00:00.000Z", + ); + + expect(stateTokenSpy).toHaveBeenCalledTimes(1); + expect(result.items.map((item) => item.id)).toEqual([ + "atlas-seed:included", + ]); + expect(result.stateToken).toBe(capturedToken); + } finally { + stateTokenSpy.mockRestore(); + } + + expect(await getAtlasStateToken("atlas")).toBe(lateToken); + + const catchup = await provider.incrementalAcquire(capturedToken); + expect(catchup.items.map((item) => item.id)).toEqual(["atlas-seed:late"]); + expect(catchup.stateToken).toBe(lateToken); + }); + + it("incrementally removes stale cache pages and advances state token", async () => { + await upsertAtlasCachePage({ + pageKey: "runtime/overview", + sourceName: "atlas", + title: "Runtime overview", + content: "Generated cache page", + contentHash: "cache-hash", + }); + await db.query( + "UPDATE atlas_cache_pages SET updated_at = $2 WHERE page_key = $1", + ["runtime/overview", new Date("2026-01-01T00:00:00Z")], + ); + + const provider = new AtlasDataProvider(atlasConfig, { cloneDir: "/tmp" }); + const stateToken = await provider.getCurrentStateToken(); + expect(stateToken).toBe("2026-01-01T00:00:00.000Z"); + + await markAtlasCachePageStale("runtime/overview", "seed changed"); + await db.query( + "UPDATE atlas_cache_pages SET updated_at = $2 WHERE page_key = $1", + ["runtime/overview", new Date("2026-01-02T00:00:00Z")], + ); + + expect(await getAtlasStateToken("atlas")).toBe("2026-01-02T00:00:00.000Z"); + + const result = await provider.incrementalAcquire(stateToken ?? ""); + expect(result.items).toEqual([]); + expect(result.removedIds).toEqual(["atlas-cache:runtime/overview"]); + expect(result.stateToken).toBe("2026-01-02T00:00:00.000Z"); + }); + + it("incrementally removes rejected seeds and empty cache pages", async () => { + const seed = await upsertAtlasSeedCandidate({ + canonicalKey: "seed-to-reject", + sourceName: "atlas", + title: "Seed to reject", + content: "Seed content", + provenance: {}, + evidence: [], + }); + await rejectAtlasSeedEntry(seed.canonicalKey, "reviewer", "wrong"); + await upsertAtlasCachePage({ + pageKey: "runtime/empty", + sourceName: "atlas", + title: "Runtime empty", + content: "", + contentHash: "empty-hash", + generatedSeedIds: [seed.id], + }); + await db.query( + "UPDATE atlas_seed_entries SET updated_at = $2 WHERE canonical_key = $1", + ["seed-to-reject", new Date("2026-01-02T00:00:00Z")], + ); + await db.query( + "UPDATE atlas_cache_pages SET updated_at = $2 WHERE page_key = $1", + ["runtime/empty", new Date("2026-01-03T00:00:00Z")], + ); + + const provider = new AtlasDataProvider(atlasConfig, { cloneDir: "/tmp" }); + const result = await provider.incrementalAcquire( + "2026-01-01T00:00:00.000Z", + ); + + expect(result.items).toEqual([]); + expect(result.removedIds).toEqual([ + "atlas-seed:seed-to-reject", + "atlas-cache:runtime/empty", + ]); + expect(result.stateToken).toBe("2026-01-03T00:00:00.000Z"); + }); + + it("provider registry resolves type atlas", () => { + const factory = getProvider("atlas"); + const provider = factory(atlasConfig, { cloneDir: "/tmp" }); + expect(provider).toBeInstanceOf(AtlasDataProvider); + }); +}); diff --git a/src/__tests__/atlas-ratification-endpoints.test.ts b/src/__tests__/atlas-ratification-endpoints.test.ts new file mode 100644 index 0000000..6548651 --- /dev/null +++ b/src/__tests__/atlas-ratification-endpoints.test.ts @@ -0,0 +1,484 @@ +import { + describe, + it, + expect, + beforeAll, + afterAll, + beforeEach, + vi, +} from "vitest"; +import express from "express"; +import http from "node:http"; +import { PGlite } from "@electric-sql/pglite"; +import { __setPoolForTesting, __resetPoolForTesting } from "../db/client.js"; +import { generatePostSchemaMigration } from "../db/schema.js"; +import { + approveAtlasSeedEntry, + upsertAtlasSeedCandidate, +} from "../db/atlas.js"; +import { AtlasDataProvider } from "../indexing/providers/atlas.js"; +import type { AtlasSourceConfig } from "../types.js"; + +vi.mock("../config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + getAnalyticsConfig: vi.fn(), + getConfig: vi.fn(() => ({ + port: 3001, + databaseUrl: "pglite:///tmp/test", + openaiApiKey: "", + githubToken: "", + githubWebhookSecret: "", + nodeEnv: "test", + logLevel: "info", + cloneDir: "/tmp/test", + slackBotToken: "", + slackSigningSecret: "", + discordBotToken: "", + discordPublicKey: "", + notionToken: "", + mcpJwtSecret: "x".repeat(32), + p2pTelemetryUrl: undefined, + p2pTelemetryDisabled: false, + packageVersion: "test", + slackWebhookUrl: "", + })), + }; +}); + +import { getAnalyticsConfig, getConfig } from "../config.js"; +import { + __setAtlasOrchestratorForTesting, + __resetAnalyticsTokenForTesting, + registerAtlasRatificationRoutes, +} from "../server.js"; + +const mockGetAnalyticsConfig = vi.mocked(getAnalyticsConfig); +const mockGetConfig = vi.mocked(getConfig); +const ATLAS_DDL_MARKER = "-- Atlas durable seed knowledge."; +const DEFAULT_TEST_CONFIG = { + port: 3001, + databaseUrl: "pglite:///tmp/test", + openaiApiKey: "", + githubToken: "", + githubWebhookSecret: "", + nodeEnv: "test", + logLevel: "info", + cloneDir: "/tmp/test", + slackBotToken: "", + slackSigningSecret: "", + discordBotToken: "", + discordPublicKey: "", + notionToken: "", + mcpJwtSecret: "x".repeat(32), + p2pTelemetryUrl: undefined, + p2pTelemetryDisabled: false, + packageVersion: "test", + slackWebhookUrl: "", +}; + +function extractAtlasDdl(): string { + const sql = generatePostSchemaMigration(); + const idx = sql.indexOf(ATLAS_DDL_MARKER); + if (idx < 0) { + throw new Error(`Could not locate "${ATLAS_DDL_MARKER}" in schema SQL`); + } + return sql.slice(idx); +} + +function poolFromPglite(db: PGlite) { + return { + query: (text: string, params?: unknown[]) => db.query(text, params), + connect: async () => ({ + query: (text: string, params?: unknown[]) => db.query(text, params), + release: () => {}, + }), + end: async () => db.close(), + }; +} + +function request( + server: http.Server, + method: string, + path: string, + opts: { headers?: Record; body?: unknown } = {}, +): Promise<{ status: number; body: string }> { + return new Promise((resolve, reject) => { + const address = server.address(); + if (!address || typeof address === "string") { + reject(new Error("server is not listening on a TCP port")); + return; + } + + const body = + opts.body === undefined ? undefined : JSON.stringify(opts.body); + const req = http.request( + { + hostname: "127.0.0.1", + port: address.port, + path, + method, + headers: { + ...(body ? { "Content-Type": "application/json" } : {}), + ...(body + ? { "Content-Length": Buffer.byteLength(body).toString() } + : {}), + ...opts.headers, + }, + }, + (res) => { + let responseBody = ""; + res.setEncoding("utf8"); + res.on("data", (chunk) => { + responseBody += chunk; + }); + res.on("end", () => { + resolve({ status: res.statusCode ?? 0, body: responseBody }); + }); + }, + ); + req.on("error", reject); + if (body) req.write(body); + req.end(); + }); +} + +async function startServer(): Promise { + const app = express(); + app.use(express.json()); + registerAtlasRatificationRoutes(app); + const server = app.listen(0); + await new Promise((resolve) => server.once("listening", resolve)); + return server; +} + +async function closeServer( + serverToClose: http.Server | undefined, +): Promise { + if (!serverToClose || !serverToClose.listening) { + return; + } + await new Promise((resolve, reject) => { + serverToClose.close((error) => { + if (error) { + reject(error); + return; + } + resolve(); + }); + }); +} +const atlasConfig: AtlasSourceConfig = { + type: "atlas", + name: "atlas", + chunk: { target_tokens: 500 }, + cache_namespace: "default", +}; + +describe("Atlas ratification endpoints", () => { + let db: PGlite; + let server: http.Server | undefined; + + beforeAll(async () => { + db = new PGlite(); + await db.waitReady; + await db.exec(extractAtlasDdl()); + __setPoolForTesting(poolFromPglite(db)); + }); + + afterAll(async () => { + await closeServer(server); + server = undefined; + __setAtlasOrchestratorForTesting(null); + __resetPoolForTesting(); + await db.close(); + }); + + beforeEach(async () => { + await closeServer(server); + server = undefined; + __setAtlasOrchestratorForTesting(null); + mockGetAnalyticsConfig.mockReturnValue({ + enabled: true, + log_queries: true, + retention_days: 90, + token: "secret", + }); + mockGetConfig.mockReturnValue(DEFAULT_TEST_CONFIG); + __resetAnalyticsTokenForTesting(); + await db.query("DELETE FROM atlas_cache_pages"); + await db.query("DELETE FROM atlas_seed_entries"); + }); + it("requires auth before returning pending seed candidates", async () => { + await upsertAtlasSeedCandidate({ + canonicalKey: "runtime:why", + sourceName: "atlas", + title: "Runtime why", + content: "Pending rationale", + provenance: {}, + evidence: [], + }); + server = await startServer(); + + const unauthorized = await request(server, "GET", "/api/atlas/candidates"); + expect(unauthorized.status).toBe(401); + + const authorized = await request(server, "GET", "/api/atlas/candidates", { + headers: { Authorization: "Bearer secret" }, + }); + expect(authorized.status).toBe(200); + const body = JSON.parse(authorized.body); + expect(body.candidates).toHaveLength(1); + expect(body.candidates[0]).toMatchObject({ + canonicalKey: "runtime:why", + sourceName: "atlas", + status: "pending", + }); + }); + + it("uses bearer auth even when analytics is disabled", async () => { + mockGetAnalyticsConfig.mockReturnValue({ + enabled: false, + log_queries: false, + retention_days: 90, + token: "secret", + }); + await upsertAtlasSeedCandidate({ + canonicalKey: "runtime:analytics-disabled", + sourceName: "atlas", + title: "Analytics disabled", + content: "Pending rationale while analytics is disabled", + provenance: {}, + evidence: [], + }); + server = await startServer(); + + const unauthorized = await request(server, "GET", "/api/atlas/candidates"); + expect(unauthorized.status).toBe(401); + + const authorized = await request(server, "GET", "/api/atlas/candidates", { + headers: { Authorization: "Bearer secret" }, + }); + expect(authorized.status).toBe(200); + expect(JSON.parse(authorized.body).candidates).toHaveLength(1); + }); + + it("returns 503 when root config read fails before auth options are built", async () => { + mockGetConfig.mockImplementation(() => { + throw new Error("bad root config"); + }); + const consoleSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + server = await startServer(); + + const res = await request(server, "GET", "/api/atlas/candidates", { + headers: { Authorization: "Bearer secret" }, + }); + + expect(res.status).toBe(503); + expect(JSON.parse(res.body)).toEqual({ + error: "misconfigured", + error_description: "Atlas ratification config read failed", + }); + consoleSpy.mockRestore(); + }); + + it("accepts opaque slash-bearing canonical keys in the request body", async () => { + const canonicalKey = "github-pr:atlas:org/repo:42"; + await upsertAtlasSeedCandidate({ + canonicalKey, + sourceName: "atlas", + title: "Slash key", + content: "Candidate with a slash-bearing key", + provenance: {}, + evidence: [], + }); + server = await startServer(); + + const approved = await request( + server, + "POST", + "/api/atlas/candidates/approve", + { + headers: { + Authorization: "Bearer secret", + "X-Atlas-Actor": "reviewer@example.test", + }, + body: { canonicalKey }, + }, + ); + + expect(approved.status).toBe(200); + expect(JSON.parse(approved.body).candidate).toMatchObject({ + canonicalKey, + status: "approved", + approvedBy: "reviewer@example.test", + }); + }); + + it("approves without an orchestrator: logs loudly and reports reindexQueued:false", async () => { + await upsertAtlasSeedCandidate({ + canonicalKey: "runtime:approve-no-orchestrator", + sourceName: "atlas", + title: "Approve without orchestrator", + content: "Candidate approved while no orchestrator is wired", + provenance: {}, + evidence: [], + }); + __setAtlasOrchestratorForTesting(null); + const consoleSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + server = await startServer(); + + const approved = await request( + server, + "POST", + "/api/atlas/candidates/approve", + { + headers: { + Authorization: "Bearer secret", + "X-Atlas-Actor": "reviewer@example.test", + }, + body: { canonicalKey: "runtime:approve-no-orchestrator" }, + }, + ); + + expect(approved.status).toBe(200); + const body = JSON.parse(approved.body); + expect(body.reindexQueued).toBe(false); + expect(body.candidate).toMatchObject({ + canonicalKey: "runtime:approve-no-orchestrator", + status: "approved", + }); + expect(consoleSpy).toHaveBeenCalledWith( + expect.stringContaining("reindex NOT queued"), + ); + consoleSpy.mockRestore(); + }); + + it("approves and rejects candidates with the authenticated actor", async () => { + await upsertAtlasSeedCandidate({ + canonicalKey: "runtime:approve", + sourceName: "atlas", + title: "Approve me", + content: "Candidate to approve", + provenance: {}, + evidence: [], + }); + await upsertAtlasSeedCandidate({ + canonicalKey: "runtime:reject", + sourceName: "atlas", + title: "Reject me", + content: "Candidate to reject", + provenance: {}, + evidence: [], + }); + server = await startServer(); + + const approved = await request( + server, + "POST", + "/api/atlas/candidates/approve", + { + headers: { + Authorization: "Bearer secret", + "X-Atlas-Actor": "reviewer@example.test", + }, + body: { canonicalKey: "runtime:approve" }, + }, + ); + expect(approved.status).toBe(200); + expect(JSON.parse(approved.body).candidate).toMatchObject({ + canonicalKey: "runtime:approve", + status: "approved", + approvedBy: "reviewer@example.test", + }); + + const rejected = await request( + server, + "POST", + "/api/atlas/candidates/reject", + { + headers: { + Authorization: "Bearer secret", + "X-Atlas-Actor": "reviewer@example.test", + }, + body: { canonicalKey: "runtime:reject", reason: "incorrect inference" }, + }, + ); + expect(rejected.status).toBe(200); + expect(JSON.parse(rejected.body).candidate).toMatchObject({ + canonicalKey: "runtime:reject", + status: "rejected", + rejectedBy: "reviewer@example.test", + rejectionReason: "incorrect inference", + }); + }); + + it("queues the approved candidate source for reindexing", async () => { + await upsertAtlasSeedCandidate({ + canonicalKey: "runtime:approve-reindex", + sourceName: "atlas", + title: "Approve and index", + content: "Candidate to approve and index", + provenance: {}, + evidence: [], + }); + const queueSourceReindex = vi.fn(); + __setAtlasOrchestratorForTesting({ + queueSourceReindex, + }); + server = await startServer(); + + const approved = await request( + server, + "POST", + "/api/atlas/candidates/approve", + { + headers: { + Authorization: "Bearer secret", + "X-Atlas-Actor": "reviewer@example.test", + }, + body: { canonicalKey: "runtime:approve-reindex" }, + }, + ); + + expect(approved.status).toBe(200); + expect(queueSourceReindex).toHaveBeenCalledWith("atlas"); + }); + + it("keeps rejected candidates out of provider acquisition", async () => { + await upsertAtlasSeedCandidate({ + canonicalKey: "runtime:approved", + sourceName: "atlas", + title: "Approved", + content: "Approved rationale", + provenance: {}, + evidence: [], + }); + await upsertAtlasSeedCandidate({ + canonicalKey: "runtime:rejected", + sourceName: "atlas", + title: "Rejected", + content: "Rejected rationale", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry("runtime:approved", "reviewer"); + server = await startServer(); + + await request(server, "POST", "/api/atlas/candidates/reject", { + headers: { + Authorization: "Bearer secret", + "X-Atlas-Actor": "reviewer", + }, + body: { canonicalKey: "runtime:rejected", reason: "bad evidence" }, + }); + + const provider = new AtlasDataProvider(atlasConfig, { cloneDir: "/tmp" }); + const result = await provider.fullAcquire(); + + expect(result.items.map((item) => item.id)).toEqual([ + "atlas-seed:runtime:approved", + ]); + }); +}); diff --git a/src/__tests__/atlas-retrieval-metrics.test.ts b/src/__tests__/atlas-retrieval-metrics.test.ts new file mode 100644 index 0000000..f70adac --- /dev/null +++ b/src/__tests__/atlas-retrieval-metrics.test.ts @@ -0,0 +1,247 @@ +import { describe, it, expect, beforeAll, afterAll, beforeEach } from "vitest"; +import { PGlite } from "@electric-sql/pglite"; +import { __setPoolForTesting, __resetPoolForTesting } from "../db/client.js"; +import { + getAnalyticsSummary, + getAtlasRetrievalMetrics, + getTopQueries, + getToolCounts, + logQuery, +} from "../db/analytics.js"; +import { generatePostSchemaMigration } from "../db/schema.js"; + +const QUERY_LOG_DDL_MARKER = + "-- Analytics: query_log table for tracking tool usage"; + +function extractAnalyticsDdl(): string { + const full = generatePostSchemaMigration(); + const idx = full.indexOf(QUERY_LOG_DDL_MARKER); + if (idx < 0) { + throw new Error(`Could not locate "${QUERY_LOG_DDL_MARKER}" in schema`); + } + return full.slice(idx); +} + +function poolFromPglite(db: PGlite) { + return { + query: (text: string, params?: unknown[]) => db.query(text, params), + connect: async () => ({ + query: (text: string, params?: unknown[]) => db.query(text, params), + release: () => {}, + }), + end: async () => db.close(), + }; +} + +async function insertQuery( + db: PGlite, + row: { + toolName: string; + queryText: string; + resultCount: number; + sourceName: string | null; + sessionId?: string | null; + }, +) { + await db.query( + `INSERT INTO query_log + (tool_name, query_text, result_count, top_score, latency_ms, source_name, session_id) + VALUES ($1, $2, $3, $4, $5, $6, $7)`, + [ + row.toolName, + row.queryText, + row.resultCount, + row.resultCount > 0 ? 0.91 : null, + 25, + row.sourceName, + row.sessionId ?? null, + ], + ); +} + +describe("Atlas retrieval analytics", () => { + let db: PGlite; + + beforeAll(async () => { + db = new PGlite(); + await db.waitReady; + await db.exec(extractAnalyticsDdl()); + __setPoolForTesting(poolFromPglite(db)); + }); + + afterAll(async () => { + __resetPoolForTesting(); + await db.close(); + }); + + beforeEach(async () => { + await db.query("DELETE FROM query_log"); + }); + + it("logs Atlas query metadata using the existing source and session columns", async () => { + await logQuery({ + tool_name: "atlas-search", + query_text: "how does runtime auth work?", + result_count: 3, + top_score: 0.88, + latency_ms: 18, + source_name: "atlas", + session_id: "service:atlas-gardener", + }); + + const rows = await db.query( + `SELECT tool_name, query_text, source_name, session_id + FROM query_log`, + ); + + expect(rows.rows).toEqual([ + { + tool_name: "atlas-search", + query_text: "how does runtime auth work?", + source_name: "atlas", + session_id: "service:atlas-gardener", + }, + ]); + }); + + it("excludes service-originated Atlas traffic from ordinary analytics aggregates by default", async () => { + await insertQuery(db, { + toolName: "search-docs", + queryText: "ordinary docs query", + resultCount: 2, + sourceName: "docs", + }); + await insertQuery(db, { + toolName: "atlas-search", + queryText: "user atlas query", + resultCount: 1, + sourceName: "atlas", + }); + await insertQuery(db, { + toolName: "atlas-search", + queryText: "gardening probe", + resultCount: 1, + sourceName: "atlas", + sessionId: "service:atlas-gardener", + }); + + const summary = await getAnalyticsSummary({}, 7); + const toolCounts = await getToolCounts(7); + const topQueries = await getTopQueries(7, 10); + + expect(summary.total_queries_window).toBe(2); + expect(summary.queries_by_source).toEqual( + expect.arrayContaining([ + { source_name: "atlas", count: 1 }, + { source_name: "docs", count: 1 }, + ]), + ); + const sortedToolCounts = [...toolCounts].sort((a, b) => + a.tool_type.localeCompare(b.tool_type), + ); + expect(sortedToolCounts).toEqual([ + { tool_type: "atlas", count: 1 }, + { tool_type: "search", count: 1 }, + ]); + expect(topQueries.map((q) => q.query_text)).not.toContain( + "gardening probe", + ); + }); + + it("can include service-originated traffic when explicitly requested", async () => { + await insertQuery(db, { + toolName: "atlas-search", + queryText: "user atlas query", + resultCount: 1, + sourceName: "atlas", + }); + await insertQuery(db, { + toolName: "atlas-search", + queryText: "gardening probe", + resultCount: 1, + sourceName: "atlas", + sessionId: "service:atlas-gardener", + }); + + const summary = await getAnalyticsSummary( + { include_service_traffic: true }, + 7, + ); + + expect(summary.total_queries_window).toBe(2); + expect(summary.queries_by_source).toEqual([ + { source_name: "atlas", count: 2 }, + ]); + }); + + it("includes exact Atlas tool names when filtering analytics by Atlas tool type", async () => { + await insertQuery(db, { + toolName: "atlas", + queryText: "exact atlas tool query", + resultCount: 1, + sourceName: "atlas", + }); + await insertQuery(db, { + toolName: "atlas-search", + queryText: "prefixed atlas tool query", + resultCount: 1, + sourceName: "atlas", + }); + await insertQuery(db, { + toolName: "search-docs", + queryText: "ordinary docs query", + resultCount: 1, + sourceName: "docs", + }); + + const summary = await getAnalyticsSummary({ tool_type: "atlas" }, 7); + const topQueries = await getTopQueries(7, 10, { tool_type: "atlas" }); + + expect(summary.total_queries_window).toBe(2); + expect(summary.queries_by_source).toEqual([ + { source_name: "atlas", count: 2 }, + ]); + expect(topQueries.map((query) => query.tool_name).sort()).toEqual([ + "atlas", + "atlas-search", + ]); + }); + + it("computes Atlas retrieval rate without ordinary search traffic in the denominator", async () => { + await insertQuery(db, { + toolName: "search-docs", + queryText: "ordinary docs query", + resultCount: 2, + sourceName: "docs", + }); + await insertQuery(db, { + toolName: "atlas-search", + queryText: "atlas hit", + resultCount: 2, + sourceName: "atlas", + }); + await insertQuery(db, { + toolName: "search-atlas", + queryText: "atlas miss", + resultCount: 0, + sourceName: "atlas", + }); + await insertQuery(db, { + toolName: "atlas-search", + queryText: "service hit", + resultCount: 1, + sourceName: "atlas", + sessionId: "service:atlas-gardener", + }); + + const metrics = await getAtlasRetrievalMetrics(7); + + expect(metrics).toEqual({ + atlas_queries_window: 2, + atlas_successful_queries_window: 1, + atlas_empty_queries_window: 1, + atlas_retrieval_rate_window: 0.5, + total_user_queries_window: 3, + }); + }); +}); diff --git a/src/__tests__/atlas-schema.test.ts b/src/__tests__/atlas-schema.test.ts new file mode 100644 index 0000000..674146e --- /dev/null +++ b/src/__tests__/atlas-schema.test.ts @@ -0,0 +1,82 @@ +import { describe, expect, it } from "vitest"; +import { generatePostSchemaMigration } from "../db/schema.js"; + +describe("Atlas schema foundation", () => { + const sql = generatePostSchemaMigration(); + + it("creates durable atlas seed entries with provenance and ratification fields", () => { + expect(sql).toContain("CREATE TABLE IF NOT EXISTS atlas_seed_entries"); + for (const column of [ + "canonical_key", + "source_name", + "repo_url", + "ref", + "subsystem", + "status", + "provenance", + "evidence", + "approved_by", + "approved_at", + "rejected_by", + "rejected_at", + "created_at", + "updated_at", + ]) { + expect(sql).toContain(column); + } + expect(sql).toContain("atlas_seed_entries_canonical_key_uniq"); + expect(sql).toContain( + "CHECK (status IN ('pending', 'approved', 'rejected'))", + ); + expect(sql).toContain("provenance JSONB NOT NULL DEFAULT '{}'"); + expect(sql).toContain("evidence JSONB NOT NULL DEFAULT '[]'"); + }); + + it("creates useful atlas seed indexes", () => { + expect(sql).toContain( + "CREATE INDEX IF NOT EXISTS idx_atlas_seed_entries_status", + ); + expect(sql).toContain( + "CREATE INDEX IF NOT EXISTS idx_atlas_seed_entries_source_name", + ); + expect(sql).toContain( + "CREATE INDEX IF NOT EXISTS idx_atlas_seed_entries_repo_ref_subsystem", + ); + }); + + it("creates disposable atlas cache pages with stale and generation metadata", () => { + expect(sql).toContain("CREATE TABLE IF NOT EXISTS atlas_cache_pages"); + for (const column of [ + "page_key", + "source_name", + "title", + "content_hash", + "stale", + "stale_reason", + "generated_seed_ids", + "provenance", + "generated_at", + "error_at", + "error_message", + "created_at", + "updated_at", + ]) { + expect(sql).toContain(column); + } + expect(sql).toContain("atlas_cache_pages_page_key_uniq"); + expect(sql).toContain("stale BOOLEAN NOT NULL DEFAULT FALSE"); + expect(sql).toContain("generated_seed_ids JSONB NOT NULL DEFAULT '[]'"); + }); + + it("creates useful atlas cache indexes", () => { + expect(sql).toContain( + "CREATE INDEX IF NOT EXISTS idx_atlas_cache_pages_source_name", + ); + expect(sql).toContain( + "CREATE INDEX IF NOT EXISTS idx_atlas_cache_pages_stale", + ); + expect(sql).toContain( + "CREATE INDEX IF NOT EXISTS idx_atlas_cache_pages_generated_at", + ); + }); +}); diff --git a/src/__tests__/atlas-staleness.test.ts b/src/__tests__/atlas-staleness.test.ts new file mode 100644 index 0000000..28b9101 --- /dev/null +++ b/src/__tests__/atlas-staleness.test.ts @@ -0,0 +1,218 @@ +import { + describe, + it, + expect, + beforeAll, + afterAll, + beforeEach, + afterEach, + vi, +} from "vitest"; +import { PGlite } from "@electric-sql/pglite"; +import { __setPoolForTesting, __resetPoolForTesting } from "../db/client.js"; +import { generatePostSchemaMigration } from "../db/schema.js"; +import { + approveAtlasSeedEntry, + listIndexableAtlasContent, + markAtlasCachePagesStaleForSources, + upsertAtlasCachePage, + upsertAtlasSeedCandidate, +} from "../db/atlas.js"; +import * as atlasDb from "../db/atlas.js"; +import { gardenAtlasCachePages } from "../indexing/atlas-gardener.js"; + +const ATLAS_DDL_MARKER = "-- Atlas durable seed knowledge."; + +function extractAtlasDdl(): string { + const sql = generatePostSchemaMigration(); + const idx = sql.indexOf(ATLAS_DDL_MARKER); + if (idx < 0) { + throw new Error(`Could not locate "${ATLAS_DDL_MARKER}" in schema SQL`); + } + return sql.slice(idx); +} + +function poolFromPglite(db: PGlite) { + return { + query: (text: string, params?: unknown[]) => db.query(text, params), + connect: async () => ({ + query: (text: string, params?: unknown[]) => db.query(text, params), + release: () => {}, + }), + end: async () => db.close(), + }; +} + +describe("Atlas cache staleness and gardening", () => { + let db: PGlite; + + beforeAll(async () => { + db = new PGlite(); + await db.waitReady; + await db.exec(extractAtlasDdl()); + __setPoolForTesting(poolFromPglite(db)); + }); + + afterAll(async () => { + __resetPoolForTesting(); + await db.close(); + }); + + beforeEach(async () => { + await db.query("DELETE FROM atlas_cache_pages"); + await db.query("DELETE FROM atlas_seed_entries"); + }); + + it("marks cache pages stale for affected seed sources without deleting seed", async () => { + const seed = await upsertAtlasSeedCandidate({ + canonicalKey: "docs:runtime", + sourceName: "docs", + title: "Runtime rationale", + content: "Runtime rationale seed", + provenance: {}, + evidence: [], + }); + await approveAtlasSeedEntry(seed.canonicalKey, "reviewer"); + await upsertAtlasCachePage({ + pageKey: "runtime/overview", + sourceName: "atlas", + title: "Runtime overview", + content: "Generated runtime overview", + contentHash: "hash-1", + generatedSeedIds: [seed.id], + }); + + const marked = await markAtlasCachePagesStaleForSources( + ["docs"], + "source docs reindexed", + ); + + expect(marked).toBe(1); + const indexable = await listIndexableAtlasContent("docs"); + expect(indexable.map((item) => `${item.kind}:${item.key}`)).toEqual([ + "seed:docs:runtime", + ]); + const cacheProjection = await listIndexableAtlasContent("atlas"); + expect(cacheProjection).toEqual([]); + + const { rows } = await db.query<{ status: string }>( + "SELECT status FROM atlas_seed_entries WHERE canonical_key = $1", + [seed.canonicalKey], + ); + expect(rows[0]?.status).toBe("approved"); + }); + + it("regenerates stale cache pages and clears stale state on success", async () => { + await upsertAtlasCachePage({ + pageKey: "runtime/overview", + sourceName: "atlas", + title: "Runtime overview", + content: "Old generated content", + contentHash: "hash-old", + }); + await markAtlasCachePagesStaleForSources(["atlas"], "manual refresh"); + + const summary = await gardenAtlasCachePages({ + sourceName: "atlas", + generatePage: async (page) => ({ + content: `Regenerated: ${page.title}`, + generatedSeedIds: [42, 43], + provenance: { generatedBy: "test-gardener" }, + }), + }); + + expect(summary).toEqual({ regenerated: 1, failed: 0 }); + const items = await listIndexableAtlasContent("atlas"); + expect(items).toHaveLength(1); + expect(items[0]).toMatchObject({ + kind: "cache", + key: "runtime/overview", + content: "Regenerated: Runtime overview", + cachePage: { + generatedSeedIds: [42, 43], + provenance: { generatedBy: "test-gardener" }, + }, + }); + }); + + it("keeps failed cache pages stale and records the error reason", async () => { + await upsertAtlasCachePage({ + pageKey: "runtime/overview", + sourceName: "atlas", + title: "Runtime overview", + content: "Old generated content", + contentHash: "hash-old", + }); + await markAtlasCachePagesStaleForSources(["atlas"], "manual refresh"); + + const summary = await gardenAtlasCachePages({ + sourceName: "atlas", + generatePage: async () => { + throw new Error("generator unavailable"); + }, + }); + + expect(summary).toEqual({ regenerated: 0, failed: 1 }); + const indexable = await listIndexableAtlasContent("atlas"); + expect(indexable).toEqual([]); + const { rows } = await db.query<{ + stale: boolean; + error_message: string | null; + }>( + "SELECT stale, error_message FROM atlas_cache_pages WHERE page_key = $1", + ["runtime/overview"], + ); + expect(rows[0]?.stale).toBe(true); + expect(rows[0]?.error_message).toBe("generator unavailable"); + }); + + describe("when error bookkeeping fails for one page", () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + it("continues the batch and still returns a summary for the other pages", async () => { + await upsertAtlasCachePage({ + pageKey: "runtime/a", + sourceName: "atlas", + title: "Page A", + content: "Old A", + contentHash: "hash-a", + }); + await upsertAtlasCachePage({ + pageKey: "runtime/b", + sourceName: "atlas", + title: "Page B", + content: "Old B", + contentHash: "hash-b", + }); + await markAtlasCachePagesStaleForSources(["atlas"], "manual refresh"); + + // Simulate the bookkeeping write throwing for the first page only — + // e.g. the row was deleted/re-keyed concurrently + // (recordAtlasCachePageGenerationError throws "not found"). + const spy = vi + .spyOn(atlasDb, "recordAtlasCachePageGenerationError") + .mockImplementationOnce(async () => { + throw new Error('Atlas cache page "runtime/a" not found'); + }); + + // Both pages fail to generate, so both hit the catch block; the first + // page's bookkeeping throws. The batch must not abort. + const summary = await gardenAtlasCachePages({ + sourceName: "atlas", + generatePage: async () => { + throw new Error("generator unavailable"); + }, + }); + + expect(summary).toEqual({ regenerated: 0, failed: 2 }); + // The second page's bookkeeping should still have run. + expect(spy).toHaveBeenCalledTimes(2); + expect(spy).toHaveBeenLastCalledWith( + "runtime/b", + "generator unavailable", + ); + }); + }); +}); diff --git a/src/__tests__/config.test.ts b/src/__tests__/config.test.ts index fabad06..1233bc7 100644 --- a/src/__tests__/config.test.ts +++ b/src/__tests__/config.test.ts @@ -687,6 +687,107 @@ describe("config.ts", () => { const cfg = getServerConfig(); expect(cfg.sources[0].type).toBe("slack"); }); + + it("accepts an atlas source referenced by a search tool", async () => { + const { stringify } = require("yaml"); + mockedExistsSync.mockImplementation((p: string) => { + if (p.includes("test.yaml")) return true; + return false; + }); + mockedReadFileSync.mockReturnValue( + stringify({ + server: { name: "test", version: "1.0" }, + sources: [ + { + name: "atlas", + type: "atlas", + seed_path: ".pathfinder/atlas/seed", + cache_namespace: "copilotkit", + chunk: { target_tokens: 800, overlap_tokens: 80 }, + }, + ], + tools: [ + { + name: "atlas-search", + type: "search", + source: "atlas", + description: "Search Atlas knowledge.", + default_limit: 5, + max_limit: 20, + result_format: "raw", + search_mode: "hybrid", + }, + ], + embedding: { + provider: "openai", + model: "text-embedding-3-small", + dimensions: 1536, + }, + indexing: { + auto_reindex: true, + reindex_hour_utc: 4, + stale_threshold_hours: 24, + }, + }), + ); + + const { getServerConfig } = await freshImport(); + const cfg = getServerConfig(); + expect(cfg.sources[0]).toMatchObject({ + name: "atlas", + type: "atlas", + seed_path: ".pathfinder/atlas/seed", + }); + expect(cfg.tools[0]).toMatchObject({ + name: "atlas-search", + source: "atlas", + }); + }); + + it("does not validate atlas seed_path as a local file source path", async () => { + const { stringify } = require("yaml"); + mockedExistsSync.mockImplementation((p: string) => { + if (p.includes("test.yaml")) return true; + return false; + }); + mockedReadFileSync.mockReturnValue( + stringify({ + server: { name: "test", version: "1.0" }, + sources: [ + { + name: "atlas", + type: "atlas", + seed_path: "./missing-atlas-seed", + chunk: {}, + }, + ], + tools: [ + { + name: "atlas-search", + type: "search", + source: "atlas", + description: "Search Atlas knowledge.", + default_limit: 5, + max_limit: 20, + result_format: "raw", + }, + ], + embedding: { + provider: "openai", + model: "text-embedding-3-small", + dimensions: 1536, + }, + indexing: { + auto_reindex: true, + reindex_hour_utc: 4, + stale_threshold_hours: 24, + }, + }), + ); + + const { getServerConfig } = await freshImport(); + expect(() => getServerConfig()).not.toThrow(); + }); }); // ── Helper functions ───────────────────────────────────────────────────── diff --git a/src/__tests__/document-integration.test.ts b/src/__tests__/document-integration.test.ts index c4b7ffd..1c3e22d 100644 --- a/src/__tests__/document-integration.test.ts +++ b/src/__tests__/document-integration.test.ts @@ -27,6 +27,13 @@ describe("document type registration", () => { const chunker = getChunker("document"); expect(chunker).toBeDefined(); }); + + it("atlas type is registered in chunker registry", async () => { + const { getChunker } = await import("../indexing/chunking/index.js"); + const { chunkMarkdown } = await import("../indexing/chunking/markdown.js"); + + expect(getChunker("atlas")).toBe(chunkMarkdown); + }); }); describe("backwards compatibility", () => { diff --git a/src/__tests__/github-webhook.test.ts b/src/__tests__/github-webhook.test.ts index 29d570d..bb79040 100644 --- a/src/__tests__/github-webhook.test.ts +++ b/src/__tests__/github-webhook.test.ts @@ -56,7 +56,7 @@ function makePushPayload(overrides: Record = {}) { function mockReqRes( body: object | string, - headers: Record = {}, + headers: Record = {}, asBuffer = true, ) { const bodyStr = typeof body === "string" ? body : JSON.stringify(body); @@ -106,7 +106,10 @@ describe("GitHub webhook handler", () => { }, }); - orchestrator = { queueIncrementalReindex: vi.fn() }; + orchestrator = { + queueIncrementalReindex: vi.fn(), + queueSourceReindex: vi.fn(), + }; handler = createWebhookHandler(orchestrator); }); @@ -178,12 +181,150 @@ describe("GitHub webhook handler", () => { expect(res.status).toHaveBeenCalledWith(401); }); + it("rejects malformed non-ASCII signatures without throwing", async () => { + const payload = makePushPayload(); + const { req, res } = mockReqRes(payload, { + "x-hub-signature-256": `sha256=${"é".repeat(64)}`, + }); + + await expect(handler(req, res)).resolves.toEqual( + expect.objectContaining({ queuedReindex: false }), + ); + + expect(res.status).toHaveBeenCalledWith(401); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: "Invalid or missing webhook signature", + }), + ); + expect(orchestrator.queueIncrementalReindex).not.toHaveBeenCalled(); + }); + it("accepts a valid signature", async () => { const { req, res } = mockReqRes(makePushPayload()); + const result = await handler(req, res); + + expect(res.status).toHaveBeenCalledWith(200); + expect(res.json).toHaveBeenCalledWith({ queued: true }); + expect(result).toEqual({ + queuedReindex: true, + affectedSourceNames: ["docs-source"], + }); + expect(orchestrator.queueIncrementalReindex).toHaveBeenCalledTimes(1); + expect(orchestrator.queueIncrementalReindex).toHaveBeenCalledWith( + "https://github.com/org/repo.git", + ); + expect(orchestrator.queueSourceReindex).not.toHaveBeenCalled(); + }); + + it("logs failed non-blocking delivery audit writes", async () => { + const auditError = new Error("audit db unavailable"); + mockRecordWebhookDelivery.mockRejectedValueOnce(auditError); + const consoleErrorSpy = vi + .spyOn(console, "error") + .mockImplementation(() => {}); + const { req, res } = mockReqRes(makePushPayload()); + + try { + await handler(req, res); + await Promise.resolve(); + + expect(res.status).toHaveBeenCalledWith(200); + expect(res.json).toHaveBeenCalledWith({ queued: true }); + expect(consoleErrorSpy).toHaveBeenCalledWith( + "[webhook] Failed to record GitHub delivery:", + auditError, + ); + } finally { + consoleErrorSpy.mockRestore(); + } + }); + + it("rejects duplicate signature headers before verification", async () => { + const payload = makePushPayload(); + const buf = Buffer.from(JSON.stringify(payload)); + const { req, res } = mockReqRes(payload, { + "x-hub-signature-256": [sign(buf), sign(buf)], + }); + + await handler(req, res); + + expect(res.status).toHaveBeenCalledWith(400); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: "Duplicate GitHub webhook header", + }), + ); + expect(orchestrator.queueIncrementalReindex).not.toHaveBeenCalled(); + }); + + it("rejects runtime duplicate signature headers before verification", async () => { + const payload = makePushPayload(); + const buf = Buffer.from(JSON.stringify(payload)); + const signature = sign(buf); + const { req, res } = mockReqRes(payload, { + "x-hub-signature-256": `${signature}, ${signature}`, + }); + req.rawHeaders = [ + "X-Hub-Signature-256", + signature, + "x-hub-signature-256", + signature, + "X-GitHub-Event", + "push", + ]; + await handler(req, res); - // Should not be 401 or 403 - expect(res.status).not.toHaveBeenCalledWith(401); - expect(res.status).not.toHaveBeenCalledWith(403); + + expect(res.status).toHaveBeenCalledWith(400); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: "Duplicate GitHub webhook header", + header: "x-hub-signature-256", + }), + ); + expect(orchestrator.queueIncrementalReindex).not.toHaveBeenCalled(); + }); + + it("rejects duplicate event headers before routing", async () => { + const { req, res } = mockReqRes(makePushPayload(), { + "x-github-event": ["push", "pull_request"], + }); + + await handler(req, res); + + expect(res.status).toHaveBeenCalledWith(400); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: "Duplicate GitHub webhook header", + }), + ); + expect(orchestrator.queueIncrementalReindex).not.toHaveBeenCalled(); + }); + + it("rejects runtime duplicate event headers before routing", async () => { + const { req, res } = mockReqRes(makePushPayload(), { + "x-github-event": "push, pull_request", + }); + req.rawHeaders = [ + "X-Hub-Signature-256", + req.headers["x-hub-signature-256"], + "X-GitHub-Event", + "push", + "x-github-event", + "pull_request", + ]; + + await handler(req, res); + + expect(res.status).toHaveBeenCalledWith(400); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: "Duplicate GitHub webhook header", + header: "x-github-event", + }), + ); + expect(orchestrator.queueIncrementalReindex).not.toHaveBeenCalled(); }); }); @@ -237,6 +378,57 @@ describe("GitHub webhook handler", () => { expect.objectContaining({ error: "Malformed JSON payload" }), ); }); + + it("rejects signed push payloads missing required repository fields", async () => { + const { req, res } = mockReqRes({ + ref: "refs/heads/main", + after: "abc12345deadbeef", + repository: { + default_branch: "main", + full_name: "org/repo", + }, + commits: [], + }); + + const result = await handler(req, res); + + expect(result).toEqual(expect.objectContaining({ queuedReindex: false })); + expect(res.status).toHaveBeenCalledWith(400); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ error: "Malformed push payload" }), + ); + expect(mockRecordWebhookDelivery).toHaveBeenCalledWith( + expect.objectContaining({ + source: "github", + event_type: "push", + decision: "error", + reason: "malformed push payload", + }), + ); + expect(orchestrator.queueIncrementalReindex).not.toHaveBeenCalled(); + }); + + it("rejects signed push payloads with malformed commits", async () => { + const { req, res } = mockReqRes({ + ref: "refs/heads/main", + after: "abc12345deadbeef", + repository: { + clone_url: "https://github.com/org/repo.git", + default_branch: "main", + full_name: "org/repo", + }, + commits: [{ added: "docs/guide.md", modified: [], removed: [] }], + }); + + const result = await handler(req, res); + + expect(result).toEqual(expect.objectContaining({ queuedReindex: false })); + expect(res.status).toHaveBeenCalledWith(400); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ error: "Malformed push payload" }), + ); + expect(orchestrator.queueIncrementalReindex).not.toHaveBeenCalled(); + }); }); // -- Branch filtering ------------------------------------------------- @@ -320,9 +512,13 @@ describe("GitHub webhook handler", () => { describe("path_triggers filtering", () => { it("queues reindex when committed files match path triggers", async () => { const { req, res } = mockReqRes(makePushPayload()); - await handler(req, res); + const result = await handler(req, res); expect(res.status).toHaveBeenCalledWith(200); expect(res.json).toHaveBeenCalledWith({ queued: true }); + expect(result).toEqual({ + queuedReindex: true, + affectedSourceNames: ["docs-source"], + }); expect(orchestrator.queueIncrementalReindex).toHaveBeenCalledWith( "https://github.com/org/repo.git", ); @@ -339,7 +535,7 @@ describe("GitHub webhook handler", () => { ], }); const { req, res } = mockReqRes(payload); - await handler(req, res); + const result = await handler(req, res); expect(res.status).toHaveBeenCalledWith(200); expect(res.json).toHaveBeenCalledWith( expect.objectContaining({ @@ -347,7 +543,47 @@ describe("GitHub webhook handler", () => { reason: "no path triggers matched", }), ); + expect(result).toEqual({ + queuedReindex: false, + affectedSourceNames: [], + }); + expect(orchestrator.queueIncrementalReindex).not.toHaveBeenCalled(); + }); + + it("does not match path trigger prefixes across path segments", async () => { + mockGetServerConfig.mockReturnValue({ + webhook: { + repo_sources: { "org/repo": ["docs-source", "api-source"] }, + path_triggers: { + "docs-source": ["docs"], + "api-source": ["src/api"], + }, + }, + }); + const payload = makePushPayload({ + commits: [ + { + added: ["docs-old/file.md"], + modified: ["src/apiary.ts"], + removed: [], + }, + ], + }); + const { req, res } = mockReqRes(payload); + const result = await handler(req, res); + + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + ignored: true, + reason: "no path triggers matched", + }), + ); + expect(result).toEqual({ + queuedReindex: false, + affectedSourceNames: [], + }); expect(orchestrator.queueIncrementalReindex).not.toHaveBeenCalled(); + expect(orchestrator.queueSourceReindex).not.toHaveBeenCalled(); }); it("queues reindex when source has no path triggers (match all)", async () => { @@ -389,7 +625,7 @@ describe("GitHub webhook handler", () => { expect(res.json).toHaveBeenCalledWith({ queued: true }); }); - it("handles multiple sources for same repo, only one matching", async () => { + it("queues only the matching source when multiple path-filtered sources map to a repo", async () => { mockGetServerConfig.mockReturnValue({ webhook: { repo_sources: { "org/repo": ["src-source", "docs-source"] }, @@ -403,10 +639,17 @@ describe("GitHub webhook handler", () => { commits: [{ added: ["docs/new.md"], modified: [], removed: [] }], }); const { req, res } = mockReqRes(payload); - await handler(req, res); + const result = await handler(req, res); expect(res.json).toHaveBeenCalledWith({ queued: true }); - // Should only call once even though there are two sources - expect(orchestrator.queueIncrementalReindex).toHaveBeenCalledTimes(1); + expect(result).toEqual({ + queuedReindex: true, + affectedSourceNames: ["docs-source"], + }); + expect(orchestrator.queueIncrementalReindex).not.toHaveBeenCalled(); + expect(orchestrator.queueSourceReindex).toHaveBeenCalledTimes(1); + expect(orchestrator.queueSourceReindex).toHaveBeenCalledWith( + "docs-source", + ); }); }); diff --git a/src/__tests__/orchestrator-source-reindex.test.ts b/src/__tests__/orchestrator-source-reindex.test.ts index 44d1824..d17e5bb 100644 --- a/src/__tests__/orchestrator-source-reindex.test.ts +++ b/src/__tests__/orchestrator-source-reindex.test.ts @@ -33,6 +33,13 @@ vi.mock("../config.js", () => ({ file_patterns: ["**/*.md"], chunk: {}, }, + { + name: "atlas", + type: "atlas", + seed_path: ".pathfinder/atlas/seed", + cache_namespace: "company-knowledge", + chunk: {}, + }, ], tools: [ { @@ -66,6 +73,10 @@ vi.mock("../db/queries.js", () => ({ upsertIndexState: vi.fn().mockResolvedValue(undefined), })); +vi.mock("../db/atlas.js", () => ({ + markAtlasCachePagesStaleForSources: vi.fn().mockResolvedValue(0), +})); + vi.mock("../indexing/embeddings.js", () => { class MockEmbeddingProvider { embed = vi.fn().mockResolvedValue([0.1, 0.2]); @@ -103,6 +114,8 @@ vi.mock("../indexing/providers/index.js", () => ({ })); import { IndexingOrchestrator } from "../indexing/orchestrator.js"; +import { markAtlasCachePagesStaleForSources } from "../db/atlas.js"; +import { getProvider } from "../indexing/providers/index.js"; describe("IndexingOrchestrator.queueSourceReindex", () => { let orchestrator: IndexingOrchestrator; @@ -127,6 +140,76 @@ describe("IndexingOrchestrator.queueSourceReindex", () => { expect(completeSpy).toHaveBeenCalledWith(["slack-support"]); }); + it("marks related Atlas cache pages stale after a source reindex", async () => { + orchestrator.queueSourceReindex("slack-support"); + + for (let i = 0; i < 50; i++) { + await new Promise((resolve) => setTimeout(resolve, 100)); + if (vi.mocked(markAtlasCachePagesStaleForSources).mock.calls.length > 0) { + break; + } + } + + expect(markAtlasCachePagesStaleForSources).toHaveBeenCalledWith( + ["slack-support"], + "source reindexed: slack-support", + ); + }); + + it("does not mark Atlas cache pages stale after an Atlas self-reindex", async () => { + const completeSpy = vi.fn(); + orchestrator.onReindexComplete = completeSpy; + + orchestrator.queueSourceReindex("atlas"); + + for (let i = 0; i < 50; i++) { + await new Promise((resolve) => setTimeout(resolve, 100)); + if (completeSpy.mock.calls.length > 0) break; + } + + expect(completeSpy).toHaveBeenCalledWith(["atlas"]); + expect(markAtlasCachePagesStaleForSources).not.toHaveBeenCalled(); + }); + + it("does not mark Atlas cache stale or complete reindex after failed indexing", async () => { + const completeSpy = vi.fn(); + orchestrator.onReindexComplete = completeSpy; + vi.mocked(getProvider).mockReturnValueOnce(() => ({ + fullAcquire: vi.fn().mockRejectedValue(new Error("provider failed")), + incrementalAcquire: vi + .fn() + .mockRejectedValue(new Error("provider failed")), + getCurrentStateToken: vi.fn().mockResolvedValue("test-token"), + })); + + orchestrator.queueSourceReindex("slack-support"); + + await new Promise((resolve) => setTimeout(resolve, 500)); + + expect(markAtlasCachePagesStaleForSources).not.toHaveBeenCalled(); + expect(completeSpy).not.toHaveBeenCalled(); + }); + + it("still completes the reindex when Atlas cache invalidation throws", async () => { + const completeSpy = vi.fn(); + orchestrator.onReindexComplete = completeSpy; + vi.mocked(markAtlasCachePagesStaleForSources).mockRejectedValueOnce( + new Error("transient DB error"), + ); + + orchestrator.queueSourceReindex("slack-support"); + + for (let i = 0; i < 50; i++) { + await new Promise((resolve) => setTimeout(resolve, 100)); + if (completeSpy.mock.calls.length > 0) break; + } + + // The Atlas cache invalidation failed, but the reindex itself succeeded, + // so onReindexComplete must still fire for the affected source. + expect(markAtlasCachePagesStaleForSources).toHaveBeenCalled(); + expect(completeSpy).toHaveBeenCalledWith(["slack-support"]); + }); + it("skips unknown source names gracefully", async () => { const completeSpy = vi.fn(); orchestrator.onReindexComplete = completeSpy; diff --git a/src/__tests__/tool-config.test.ts b/src/__tests__/tool-config.test.ts index c4404fd..b96824f 100644 --- a/src/__tests__/tool-config.test.ts +++ b/src/__tests__/tool-config.test.ts @@ -317,6 +317,28 @@ describe("ServerConfigSchema", () => { expect(result.success).toBe(true); }); + it("rejects search tool referencing undefined source", () => { + const config = { + ...minimalConfig, + tools: [ + { + name: "search-docs", + type: "search", + description: "Search", + source: "missing-docs", + default_limit: 5, + max_limit: 20, + result_format: "docs", + }, + ], + }; + const result = ServerConfigSchema.safeParse(config); + expect(result.success).toBe(false); + expect(result.error?.issues[0]?.message).toContain( + 'Tool "search-docs" references source "missing-docs"', + ); + }); + it("rejects bash tool referencing undefined source", () => { const config = { ...minimalConfig, diff --git a/src/__tests__/webhook-404-vs-503.test.ts b/src/__tests__/webhook-404-vs-503.test.ts index 5cfde85..29f9e23 100644 --- a/src/__tests__/webhook-404-vs-503.test.ts +++ b/src/__tests__/webhook-404-vs-503.test.ts @@ -66,7 +66,10 @@ describe("classifyWebhookUnavailable (R3 #2)", () => { const { classifyWebhookUnavailable } = await import("../server.js"); mockGetServerConfig.mockReturnValue({ server: { name: "t", version: "0.0.0" }, - sources: [{ name: "repo", type: "github" }], + sources: [{ name: "repo", type: "markdown" }], + webhook: { + repo_sources: { "org/repo": ["repo"] }, + }, tools: [], }); const r = classifyWebhookUnavailable({ sourceType: "github" }); diff --git a/src/atlas-cli.ts b/src/atlas-cli.ts new file mode 100644 index 0000000..0a0b902 --- /dev/null +++ b/src/atlas-cli.ts @@ -0,0 +1,522 @@ +#!/usr/bin/env node +import { Command, CommanderError } from "commander"; +import fs from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +const DEFAULT_TOOL = "atlas-search"; +const DEFAULT_FEEDBACK_TOOL = "submit-feedback"; +const FEEDBACK_RATINGS = ["helpful", "not_helpful"] as const; +const DEFAULT_MCP_URL = "https://mcp.pathfinder.copilotkit.dev/mcp"; +const INTEGER_PATTERN = /^[1-9]\d*$/; +const NUMBER_PATTERN = /^[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$/; + +type WriteFn = (text: string) => void; + +interface AtlasCliIo { + stdout?: WriteFn; + stderr?: WriteFn; +} + +interface SearchOptions { + json?: boolean; + limit?: string; + minScore?: string; + token?: string; + tool?: string; + url?: string; +} + +interface FeedbackOptions { + comment?: string; + for?: string; + json?: boolean; + rating?: string; + token?: string; + tool?: string; + url?: string; +} + +interface JsonRpcMessage { + jsonrpc?: string; + // JSON-RPC permits numeric, string, or null ids; a proxy may echo "1" for 1, + // and error frames may legitimately carry a null id. Match by coerced id and + // never let a strict numeric type discard a real response frame. + id?: number | string | null; + result?: unknown; + error?: { + message?: string; + }; +} + +interface McpPostResult { + messages: JsonRpcMessage[]; + sessionId: string | null; +} + +function parseSseMessages(text: string): JsonRpcMessage[] { + const messages: JsonRpcMessage[] = []; + let dataLines: string[] = []; + + const flushEvent = () => { + if (dataLines.length === 0) { + return; + } + + const data = dataLines.join("\n"); + dataLines = []; + + // Skip keepalive / empty `data:` frames: an empty value yields `""`, + // which would otherwise crash on JSON.parse(""). + if (data.trim() === "") { + return; + } + + try { + messages.push(JSON.parse(data) as JsonRpcMessage); + } catch { + // Skip unparseable keepalive frames rather than crashing the search. + } + }; + + for (const rawLine of text.split(/\r?\n/)) { + if (rawLine === "") { + flushEvent(); + continue; + } + + if (!rawLine.startsWith("data:")) { + continue; + } + + const data = rawLine.slice(5); + dataLines.push(data.startsWith(" ") ? data.slice(1) : data); + } + + flushEvent(); + + return messages; +} + +async function mcpPost( + url: string, + body: unknown, + options: { + onSessionId?: (sessionId: string) => void; + sessionId?: string; + token?: string; + } = {}, +): Promise { + const headers: Record = { + "Content-Type": "application/json", + Accept: "application/json, text/event-stream", + }; + + if (options.sessionId) { + headers["Mcp-Session-Id"] = options.sessionId; + } + + if (options.token) { + headers.Authorization = `Bearer ${options.token}`; + } + + const response = await fetch(url, { + method: "POST", + headers, + body: JSON.stringify(body), + }); + + const nextSessionId = + response.headers.get("mcp-session-id") ?? options.sessionId ?? null; + if (nextSessionId) { + options.onSessionId?.(nextSessionId); + } + + if (response.status === 202) { + return { messages: [], sessionId: nextSessionId }; + } + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${await response.text()}`); + } + + const text = await response.text(); + const messages = parseSseMessages(text); + + if (messages.length > 0) { + return { messages, sessionId: nextSessionId }; + } + + if (!text.trim()) { + return { messages: [], sessionId: nextSessionId }; + } + + try { + return { + messages: [JSON.parse(text) as JsonRpcMessage], + sessionId: nextSessionId, + }; + } catch { + throw new Error(`Unparseable response: ${text.slice(0, 200)}`); + } +} + +async function closeMcpSession( + url: string, + options: { + sessionId?: string; + token?: string; + }, +): Promise { + if (!options.sessionId) { + return; + } + + const headers: Record = { + "Mcp-Session-Id": options.sessionId, + }; + + if (options.token) { + headers.Authorization = `Bearer ${options.token}`; + } + + try { + await fetch(url, { + method: "DELETE", + headers, + }); + } catch { + // Best-effort cleanup must not mask the search result or original failure. + } +} + +function buildToolArguments( + query: string, + options: SearchOptions, +): Record { + const args: Record = { query }; + + if (options.limit !== undefined) { + if (!INTEGER_PATTERN.test(options.limit)) { + throw new Error("limit must be a positive integer"); + } + + const limit = Number(options.limit); + if (!Number.isSafeInteger(limit)) { + throw new Error("limit must be a positive integer"); + } + args.limit = limit; + } + + if (options.minScore !== undefined) { + if (!NUMBER_PATTERN.test(options.minScore)) { + throw new Error("min-score must be a finite number in [0, 1]"); + } + + const minScore = Number(options.minScore); + if (!Number.isFinite(minScore) || minScore < 0 || minScore > 1) { + throw new Error("min-score must be a finite number in [0, 1]"); + } + args.min_score = minScore; + } + + return args; +} + +export function buildFeedbackArguments( + query: string, + options: FeedbackOptions, +): Record { + const rating = options.rating; + if ( + rating === undefined || + !(FEEDBACK_RATINGS as readonly string[]).includes(rating) + ) { + throw new Error(`rating must be one of: ${FEEDBACK_RATINGS.join(", ")}`); + } + + const comment = options.comment; + if (comment === undefined || comment.trim() === "") { + throw new Error("comment must not be empty"); + } + + // --for always resolves via its Commander default, so a missing value here + // means the default was dropped — fail loud rather than sending an undefined + // tool_name (symmetric with the --tool guard in feedback()/search()). + if (options.for === undefined) { + throw new Error("atlas: --for is required"); + } + + return { + tool_name: options.for, + query, + rating, + comment, + }; +} + +function printToolText(message: JsonRpcMessage, write: WriteFn): void { + const result = message.result as + | { content?: Array<{ type?: string; text?: string }> } + | undefined; + const content = Array.isArray(result?.content) ? result.content : []; + const textItems = content + .map((item) => item.text) + .filter((text): text is string => typeof text === "string"); + + if (textItems.length === 0) { + write("No results.\n"); + return; + } + + for (const text of textItems) { + write(`${text}\n`); + } +} + +interface CallToolOptions { + json?: boolean; + token?: string; + url?: string; +} + +async function callTool( + toolName: string, + toolArguments: Record, + options: CallToolOptions, + write: WriteFn, +): Promise { + const url = options.url ?? process.env.ATLAS_MCP_URL ?? DEFAULT_MCP_URL; + const token = options.token ?? process.env.ATLAS_TOKEN; + let sessionId: string | undefined; + const recordSessionId = (nextSessionId: string) => { + sessionId = nextSessionId; + }; + + try { + const init = await mcpPost( + url, + { + jsonrpc: "2.0", + method: "initialize", + id: 0, + params: { + protocolVersion: "2025-03-26", + capabilities: {}, + clientInfo: { name: "atlas", version: "1.0.0" }, + }, + }, + { onSessionId: recordSessionId, token }, + ); + + sessionId = init.sessionId ?? undefined; + const initError = init.messages.find((item) => item.error)?.error; + if (initError) { + throw new Error(initError.message ?? "MCP initialize failed"); + } + + await mcpPost( + url, + { jsonrpc: "2.0", method: "notifications/initialized" }, + { onSessionId: recordSessionId, sessionId, token }, + ); + + const toolsCallRequestId = 1; + const response = await mcpPost( + url, + { + jsonrpc: "2.0", + method: "tools/call", + id: toolsCallRequestId, + params: { + name: toolName, + arguments: toolArguments, + }, + }, + { onSessionId: recordSessionId, sessionId, token }, + ); + + // Select the response frame with an exhaustive three-tier fallback: + // 1. the frame whose coerced id matches the request (a proxy echoing "1" + // for 1 still matches) and which actually carries a result or error; + // 2. else an error frame whose id is null/omitted, surfacing a real + // server error that legitimately dropped its id; + // 3. else a result frame whose id is null/omitted, tolerating an omitted + // id on a sole result frame. + // Tiers 2 and 3 only consider id-less frames so a frame bearing a clearly + // different explicit id is never surfaced for this request. + // Only when none of these exist do we declare "no response". + const message = + response.messages.find( + (item) => + String(item.id) === String(toolsCallRequestId) && + ("result" in item || "error" in item), + ) ?? + response.messages.find((item) => "error" in item && item.id == null) ?? + response.messages.find((item) => "result" in item && item.id == null); + if (!message) { + throw new Error("atlas: no response from server for tools/call"); + } + + if (message.error) { + throw new Error(message.error.message ?? "MCP tool call failed"); + } + + const toolResult = message.result as + | { isError?: boolean; content?: Array<{ type?: string; text?: string }> } + | null + | undefined; + if (toolResult?.isError === true) { + const errorContent = Array.isArray(toolResult.content) + ? toolResult.content + : []; + const errorText = errorContent + .map((item) => item.text) + .filter((text): text is string => typeof text === "string") + .join("\n"); + throw new Error(errorText || "MCP tool call reported an error"); + } + + if (options.json) { + write(`${JSON.stringify(message, null, 2)}\n`); + return; + } + + printToolText(message, write); + } finally { + await closeMcpSession(url, { sessionId, token }); + } +} + +async function search( + query: string, + options: SearchOptions, + write: WriteFn, +): Promise { + // --tool always resolves via its Commander default, so a missing value here + // means the default was dropped — fail loud rather than silently re-default. + if (options.tool === undefined) { + throw new Error("atlas: --tool is required"); + } + const toolArguments = buildToolArguments(query, options); + + await callTool(options.tool, toolArguments, options, write); +} + +async function feedback( + query: string, + options: FeedbackOptions, + write: WriteFn, +): Promise { + // --tool always resolves via its Commander default, so a missing value here + // means the default was dropped — fail loud rather than silently re-default. + if (options.tool === undefined) { + throw new Error("atlas: --tool is required"); + } + const toolArguments = buildFeedbackArguments(query, options); + + await callTool(options.tool, toolArguments, options, write); +} + +export async function runAtlasCli( + argv: string[] = process.argv.slice(2), + io: AtlasCliIo = {}, +): Promise { + const writeOut = io.stdout ?? ((text: string) => process.stdout.write(text)); + const writeErr = io.stderr ?? ((text: string) => process.stderr.write(text)); + + const program = new Command(); + program + .name("atlas") + .description("Agent-facing Atlas search over Pathfinder MCP") + .exitOverride() + .configureOutput({ + writeOut, + writeErr, + outputError: (text, write) => write(text), + }); + + program + .command("search") + .description("Search Atlas knowledge through a Pathfinder MCP endpoint") + .argument("", "Search query") + .option("--url ", "Pathfinder MCP URL") + .option("--token ", "Bearer token for the MCP endpoint") + .option("--tool ", "MCP tool name", DEFAULT_TOOL) + .option("--limit ", "Maximum number of results") + .option("--min-score ", "Minimum search score") + .option("--json", "Print the raw MCP JSON-RPC response") + .action(async (query: string, options: SearchOptions) => { + await search(query, options, writeOut); + }); + + program + .command("feedback") + .description( + "Submit Atlas retrieval feedback through a Pathfinder MCP endpoint", + ) + .argument("", "The query the feedback is about") + .requiredOption( + "--rating ", + "Feedback rating (helpful or not_helpful)", + ) + .requiredOption("--comment ", "Free-form feedback comment") + .option("--for ", "Tool the feedback is about", DEFAULT_TOOL) + .option("--url ", "Pathfinder MCP URL") + .option("--token ", "Bearer token for the MCP endpoint") + .option("--tool ", "MCP tool name", DEFAULT_FEEDBACK_TOOL) + .option("--json", "Print the raw MCP JSON-RPC response") + .action(async (query: string, options: FeedbackOptions) => { + await feedback(query, options, writeOut); + }); + + try { + await program.parseAsync(argv, { from: "user" }); + return 0; + } catch (error) { + if (error instanceof CommanderError) { + return error.exitCode; + } + + const message = error instanceof Error ? error.message : String(error); + writeErr(`error: ${message}\n`); + return 1; + } +} + +export function isAtlasCliEntrypoint( + moduleUrl: string, + argvPath: string | undefined, +): boolean { + if (!argvPath) { + return false; + } + + return ( + resolveEntrypointPath(fileURLToPath(moduleUrl)) === + resolveEntrypointPath(argvPath) + ); +} + +function resolveEntrypointPath(candidatePath: string): string { + const normalizedPath = path.resolve(candidatePath); + + try { + return fs.realpathSync(normalizedPath); + } catch { + return normalizedPath; + } +} + +if (isAtlasCliEntrypoint(import.meta.url, process.argv[1])) { + runAtlasCli() + .then((exitCode) => { + process.exitCode = exitCode; + }) + .catch((error) => { + process.stderr.write( + `error: ${error instanceof Error ? error.message : String(error)}\n`, + ); + process.exitCode = 1; + }); +} diff --git a/src/db/analytics.ts b/src/db/analytics.ts index 852760f..26560ad 100644 --- a/src/db/analytics.ts +++ b/src/db/analytics.ts @@ -71,6 +71,14 @@ export interface AnalyticsSummary { earliest_query_day: string | null; } +export interface AtlasRetrievalMetrics { + atlas_queries_window: number; + atlas_successful_queries_window: number; + atlas_empty_queries_window: number; + atlas_retrieval_rate_window: number; + total_user_queries_window: number; +} + export interface TopQuery { query_text: string; tool_name: string; @@ -95,6 +103,12 @@ export interface ToolCount { export interface AnalyticsFilter { tool_type?: string; source?: string; + /** + * Service-originated rows use the existing session_id column with a + * `service:` prefix. Analytics views exclude them by default so Atlas + * gardening/probe traffic does not inflate human/agent usage metrics. + */ + include_service_traffic?: boolean; /** * Optional inclusive date range. When both `from` and `to` are set the * underlying queries filter on `created_at >= from AND created_at <= to` @@ -189,16 +203,23 @@ function buildFilterClauses( if (filter.tool_type) { // Escape LIKE metacharacters in user input; declare the escape character // explicitly so `%` and `_` in the input match literally rather than as - // wildcards. - clauses.push(`tool_name LIKE $${idx} || '-%' ESCAPE '|'`); - params.push(escapeLikePattern(filter.tool_type)); - idx++; + // wildcards. Exact tool names also count as their own tool type: + // `tool_name = 'atlas'` and `tool_name = 'atlas-search'` should both + // match `tool_type=atlas`, mirroring getToolCounts' split_part grouping. + clauses.push( + `(tool_name = $${idx} OR tool_name LIKE $${idx + 1} || '-%' ESCAPE '|')`, + ); + params.push(filter.tool_type, escapeLikePattern(filter.tool_type)); + idx += 2; } if (filter.source) { clauses.push(`source_name = $${idx}`); params.push(filter.source); idx++; } + if (!filter.include_service_traffic) { + clauses.push(`(session_id IS NULL OR session_id NOT LIKE 'service:%')`); + } return { clauses, params, nextIdx: idx }; } @@ -764,6 +785,79 @@ export async function getToolCounts( })); } +/** + * Get Atlas-specific retrieval health for the current window. + * + * Atlas traffic is identified through the existing analytics surface: + * an Atlas source is named `atlas` or `atlas-*`/`atlas:*`, and Atlas tools + * use `atlas`/`atlas-*`/`*-atlas` names. Ordinary search traffic contributes + * only to `total_user_queries_window`, never to the Atlas retrieval-rate + * denominator. + */ +export async function getAtlasRetrievalMetrics( + days: number = 7, + filter: AnalyticsFilter = {}, +): Promise { + const pool = getPool(); + + const { clauses: fc, params: fp, nextIdx } = buildFilterClauses(filter); + const dw = buildDateWindow(filter, days, nextIdx); + const redactedIdx = dw.nextIdx; + const baseClauses = [ + ...dw.clauses, + "latency_ms >= 0", + `query_text != $${redactedIdx}`, + ]; + const params = [...fp, ...dw.params, REDACTED_QUERY_TEXT]; + const userWhere = whereAnd(baseClauses, fc); + const atlasWhere = whereAnd( + [ + ...baseClauses, + `( + source_name = 'atlas' + OR source_name LIKE 'atlas-%' + OR source_name LIKE 'atlas:%' + OR tool_name = 'atlas' + OR tool_name LIKE 'atlas-%' + OR tool_name LIKE '%-atlas' + )`, + ], + fc, + ); + + const [atlasRes, totalUserRes] = await Promise.all([ + pool.query( + `SELECT + count(*)::int AS total, + count(*) FILTER (WHERE result_count > 0)::int AS successful, + count(*) FILTER (WHERE result_count = 0)::int AS empty + FROM query_log + ${atlasWhere}`, + params, + ), + pool.query( + `SELECT count(*)::int AS total + FROM query_log + ${userWhere}`, + params, + ), + ]); + + const atlas = atlasRes.rows[0] ?? {}; + const atlasTotal = (atlas.total as number | undefined) ?? 0; + const successful = (atlas.successful as number | undefined) ?? 0; + const empty = (atlas.empty as number | undefined) ?? 0; + const totalUser = (totalUserRes.rows[0]?.total as number | undefined) ?? 0; + + return { + atlas_queries_window: atlasTotal, + atlas_successful_queries_window: successful, + atlas_empty_queries_window: empty, + atlas_retrieval_rate_window: atlasTotal > 0 ? successful / atlasTotal : 0, + total_user_queries_window: totalUser, + }; +} + // --------------------------------------------------------------------------- // Cleanup // --------------------------------------------------------------------------- diff --git a/src/db/atlas.ts b/src/db/atlas.ts new file mode 100644 index 0000000..55ee219 --- /dev/null +++ b/src/db/atlas.ts @@ -0,0 +1,772 @@ +import { getPool } from "./client.js"; + +const CACHE_CONTENT_KEY = "__atlas_content"; + +export type AtlasSeedStatus = "pending" | "approved" | "rejected"; + +export interface AtlasSeedEntry { + id: number; + canonicalKey: string; + sourceName: string; + repoUrl: string | null; + ref: string | null; + subsystem: string | null; + status: AtlasSeedStatus; + title: string; + content: string; + provenance: Record; + evidence: unknown[]; + approvedBy: string | null; + approvedAt: Date | null; + rejectedBy: string | null; + rejectedAt: Date | null; + rejectionReason: string | null; + createdAt: Date; + updatedAt: Date; +} + +export interface UpsertAtlasSeedCandidateInput { + canonicalKey: string; + sourceName: string; + repoUrl?: string | null; + ref?: string | null; + subsystem?: string | null; + title: string; + content: string; + provenance: Record; + evidence: unknown[]; +} + +export interface AtlasCachePage { + id: number; + pageKey: string; + sourceName: string; + title: string; + content: string; + contentHash: string; + stale: boolean; + staleReason: string | null; + generatedSeedIds: number[]; + provenance: Record; + generatedAt: Date | null; + errorAt: Date | null; + errorMessage: string | null; + createdAt: Date; + updatedAt: Date; +} + +export interface UpsertAtlasCachePageInput { + pageKey: string; + sourceName: string; + title: string; + content: string; + contentHash: string; + generatedSeedIds?: number[]; + provenance?: Record; + generatedAt?: Date | null; +} + +export interface ClearAtlasCachePageStaleInput { + pageKey: string; + content: string; + contentHash: string; + generatedSeedIds?: number[]; + provenance?: Record; + generatedAt?: Date | null; +} + +export interface AtlasRepositoryFilter { + repoUrl: string; + refs?: string[]; + subsystems?: string[]; +} + +export interface AtlasContentQuery { + changedAfter?: Date; + changedOnOrBefore?: Date; + repositories?: AtlasRepositoryFilter[]; +} + +export type AtlasIndexableContent = + | { + kind: "seed"; + key: string; + sourceName: string; + title: string; + content: string; + updatedAt: Date; + seed: AtlasSeedEntry; + } + | { + kind: "cache"; + key: string; + sourceName: string; + title: string; + content: string; + updatedAt: Date; + cachePage: AtlasCachePage; + }; + +function parseJsonObject(value: unknown): Record { + if (value == null) return {}; + if (typeof value === "string") { + return JSON.parse(value) as Record; + } + return value as Record; +} + +function parseJsonArray(value: unknown): unknown[] { + if (value == null) return []; + if (typeof value === "string") { + return JSON.parse(value) as unknown[]; + } + return value as unknown[]; +} + +function parseNumberArray(value: unknown): number[] { + return parseJsonArray(value).filter( + (item): item is number => typeof item === "number", + ); +} + +function toDate(value: unknown): Date | null { + if (value == null) return null; + if (value instanceof Date) return value; + return new Date(value as string); +} + +function mapSeedRow(row: Record): AtlasSeedEntry { + return { + id: Number(row.id), + canonicalKey: row.canonical_key as string, + sourceName: row.source_name as string, + repoUrl: (row.repo_url as string | null) ?? null, + ref: (row.ref as string | null) ?? null, + subsystem: (row.subsystem as string | null) ?? null, + status: row.status as AtlasSeedStatus, + title: row.title as string, + content: row.content as string, + provenance: parseJsonObject(row.provenance), + evidence: parseJsonArray(row.evidence), + approvedBy: (row.approved_by as string | null) ?? null, + approvedAt: toDate(row.approved_at), + rejectedBy: (row.rejected_by as string | null) ?? null, + rejectedAt: toDate(row.rejected_at), + rejectionReason: (row.rejection_reason as string | null) ?? null, + createdAt: toDate(row.created_at) ?? new Date(0), + updatedAt: toDate(row.updated_at) ?? new Date(0), + }; +} + +function mapCacheRow(row: Record): AtlasCachePage { + const rawProvenance = parseJsonObject(row.provenance); + const { [CACHE_CONTENT_KEY]: contentValue, ...provenance } = rawProvenance; + return { + id: Number(row.id), + pageKey: row.page_key as string, + sourceName: row.source_name as string, + title: row.title as string, + content: typeof contentValue === "string" ? contentValue : "", + contentHash: row.content_hash as string, + stale: Boolean(row.stale), + staleReason: (row.stale_reason as string | null) ?? null, + generatedSeedIds: parseNumberArray(row.generated_seed_ids), + provenance, + generatedAt: toDate(row.generated_at), + errorAt: toDate(row.error_at), + errorMessage: (row.error_message as string | null) ?? null, + createdAt: toDate(row.created_at) ?? new Date(0), + updatedAt: toDate(row.updated_at) ?? new Date(0), + }; +} + +function cacheProvenance( + content: string, + provenance: Record = {}, +): Record { + return { + ...provenance, + [CACHE_CONTENT_KEY]: content, + }; +} + +function addUpdatedAtClauses( + alias: string, + query: Pick, + params: unknown[], +): string[] { + const clauses: string[] = []; + if (query.changedAfter) { + params.push(query.changedAfter); + clauses.push(`${alias}.updated_at > $${params.length}`); + } + if (query.changedOnOrBefore) { + params.push(query.changedOnOrBefore); + clauses.push(`${alias}.updated_at <= $${params.length}`); + } + return clauses; +} + +function addSeedRepositoryClause( + alias: string, + repositories: AtlasRepositoryFilter[] | undefined, + params: unknown[], +): string | null { + if (!repositories || repositories.length === 0) return null; + + const repositoryClauses = repositories.map((repository) => { + params.push(repository.repoUrl); + const clauses = [`${alias}.repo_url = $${params.length}`]; + if (repository.refs && repository.refs.length > 0) { + params.push(repository.refs); + clauses.push(`${alias}.ref = ANY($${params.length}::text[])`); + } + if (repository.subsystems && repository.subsystems.length > 0) { + params.push(repository.subsystems); + clauses.push(`${alias}.subsystem = ANY($${params.length}::text[])`); + } + return `(${clauses.join(" AND ")})`; + }); + + return `(${repositoryClauses.join(" OR ")})`; +} + +function addCacheRepositoryClause( + repositories: AtlasRepositoryFilter[] | undefined, + params: unknown[], +): string | null { + const seedRepositoryClause = addSeedRepositoryClause( + "seed", + repositories, + params, + ); + if (!seedRepositoryClause) return null; + return ` + EXISTS ( + SELECT 1 + FROM atlas_seed_entries seed + JOIN jsonb_array_elements_text(cache.generated_seed_ids) generated(seed_id) + ON generated.seed_id::integer = seed.id + WHERE ${seedRepositoryClause} + ) + `; +} + +export async function upsertAtlasSeedCandidate( + input: UpsertAtlasSeedCandidateInput, +): Promise { + const pool = getPool(); + const { rows } = await pool.query( + ` + INSERT INTO atlas_seed_entries ( + canonical_key, + source_name, + repo_url, + ref, + subsystem, + title, + content, + provenance, + evidence + ) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8::jsonb, $9::jsonb) + ON CONFLICT (canonical_key) DO UPDATE SET + source_name = CASE + WHEN atlas_seed_entries.status = 'pending' THEN EXCLUDED.source_name + ELSE atlas_seed_entries.source_name + END, + repo_url = CASE + WHEN atlas_seed_entries.status = 'pending' THEN EXCLUDED.repo_url + ELSE atlas_seed_entries.repo_url + END, + ref = CASE + WHEN atlas_seed_entries.status = 'pending' THEN EXCLUDED.ref + ELSE atlas_seed_entries.ref + END, + subsystem = CASE + WHEN atlas_seed_entries.status = 'pending' THEN EXCLUDED.subsystem + ELSE atlas_seed_entries.subsystem + END, + title = CASE + WHEN atlas_seed_entries.status = 'pending' THEN EXCLUDED.title + ELSE atlas_seed_entries.title + END, + content = CASE + WHEN atlas_seed_entries.status = 'pending' THEN EXCLUDED.content + ELSE atlas_seed_entries.content + END, + provenance = CASE + WHEN atlas_seed_entries.status = 'pending' THEN EXCLUDED.provenance + ELSE atlas_seed_entries.provenance + END, + evidence = CASE + WHEN atlas_seed_entries.status = 'pending' THEN EXCLUDED.evidence + ELSE atlas_seed_entries.evidence + END, + updated_at = CASE + WHEN atlas_seed_entries.status = 'pending' THEN NOW() + ELSE atlas_seed_entries.updated_at + END + RETURNING * + `, + [ + input.canonicalKey, + input.sourceName, + input.repoUrl ?? null, + input.ref ?? null, + input.subsystem ?? null, + input.title, + input.content, + JSON.stringify(input.provenance), + JSON.stringify(input.evidence), + ], + ); + return mapSeedRow(rows[0] as Record); +} + +export async function approveAtlasSeedEntry( + canonicalKey: string, + actor: string, +): Promise { + const pool = getPool(); + const { rows } = await pool.query( + ` + UPDATE atlas_seed_entries + SET + status = 'approved', + approved_by = $2, + approved_at = NOW(), + rejected_by = NULL, + rejected_at = NULL, + rejection_reason = NULL, + updated_at = NOW() + WHERE canonical_key = $1 AND status = 'pending' + RETURNING * + `, + [canonicalKey, actor], + ); + if (rows[0]) return mapSeedRow(rows[0] as Record); + throw new Error( + `Cannot approve atlas seed entry "${canonicalKey}" because it is missing or not pending`, + ); +} + +export async function rejectAtlasSeedEntry( + canonicalKey: string, + actor: string, + reason: string, +): Promise { + const pool = getPool(); + const { rows } = await pool.query( + ` + UPDATE atlas_seed_entries + SET + status = 'rejected', + rejected_by = $2, + rejected_at = NOW(), + rejection_reason = $3, + updated_at = NOW() + WHERE canonical_key = $1 AND status = 'pending' + RETURNING * + `, + [canonicalKey, actor, reason], + ); + if (rows[0]) return mapSeedRow(rows[0] as Record); + throw new Error( + `Cannot reject atlas seed entry "${canonicalKey}" because it is missing or not pending`, + ); +} + +export async function listPendingAtlasSeedCandidates(filter?: { + sourceName?: string; +}): Promise { + const pool = getPool(); + const params: unknown[] = []; + const clauses = ["status = 'pending'"]; + if (filter?.sourceName) { + params.push(filter.sourceName); + clauses.push(`source_name = $${params.length}`); + } + const { rows } = await pool.query( + ` + SELECT * + FROM atlas_seed_entries + WHERE ${clauses.join(" AND ")} + ORDER BY created_at ASC, id ASC + `, + params, + ); + return rows.map((row) => mapSeedRow(row as Record)); +} + +export async function upsertAtlasCachePage( + input: UpsertAtlasCachePageInput, +): Promise { + const pool = getPool(); + const { rows } = await pool.query( + ` + INSERT INTO atlas_cache_pages ( + page_key, + source_name, + title, + content_hash, + stale, + stale_reason, + generated_seed_ids, + provenance, + generated_at, + error_at, + error_message + ) + VALUES ($1, $2, $3, $4, FALSE, NULL, $5::jsonb, $6::jsonb, $7, NULL, NULL) + ON CONFLICT (page_key) DO UPDATE SET + source_name = EXCLUDED.source_name, + title = EXCLUDED.title, + content_hash = EXCLUDED.content_hash, + stale = FALSE, + stale_reason = NULL, + generated_seed_ids = EXCLUDED.generated_seed_ids, + provenance = EXCLUDED.provenance, + generated_at = EXCLUDED.generated_at, + error_at = NULL, + error_message = NULL, + updated_at = NOW() + RETURNING * + `, + [ + input.pageKey, + input.sourceName, + input.title, + input.contentHash, + JSON.stringify(input.generatedSeedIds ?? []), + JSON.stringify(cacheProvenance(input.content, input.provenance)), + input.generatedAt ?? new Date(), + ], + ); + return mapCacheRow(rows[0] as Record); +} + +export async function markAtlasCachePageStale( + pageKey: string, + reason: string, +): Promise { + const pool = getPool(); + const { rows } = await pool.query( + ` + UPDATE atlas_cache_pages + SET stale = TRUE, stale_reason = $2, updated_at = NOW() + WHERE page_key = $1 + RETURNING * + `, + [pageKey, reason], + ); + if (rows[0]) return mapCacheRow(rows[0] as Record); + throw new Error(`Atlas cache page "${pageKey}" not found`); +} + +export async function markAtlasCachePagesStaleForSources( + sourceNames: string[], + reason: string, +): Promise { + if (sourceNames.length === 0) return 0; + const pool = getPool(); + const { rows } = await pool.query( + ` + UPDATE atlas_cache_pages cache + SET stale = TRUE, stale_reason = $2, updated_at = NOW() + WHERE + cache.source_name = ANY($1::text[]) + OR EXISTS ( + SELECT 1 + FROM atlas_seed_entries seed + JOIN jsonb_array_elements_text(cache.generated_seed_ids) generated(seed_id) + ON generated.seed_id::integer = seed.id + WHERE seed.source_name = ANY($1::text[]) + ) + RETURNING id + `, + [sourceNames, reason], + ); + return rows.length; +} + +export async function clearAtlasCachePageStale( + input: ClearAtlasCachePageStaleInput, +): Promise { + const pool = getPool(); + const { rows } = await pool.query( + ` + UPDATE atlas_cache_pages + SET + content_hash = $2, + stale = FALSE, + stale_reason = NULL, + generated_seed_ids = COALESCE($3::jsonb, generated_seed_ids), + provenance = provenance || $4::jsonb, + generated_at = $5, + error_at = NULL, + error_message = NULL, + updated_at = NOW() + WHERE page_key = $1 + RETURNING * + `, + [ + input.pageKey, + input.contentHash, + input.generatedSeedIds ? JSON.stringify(input.generatedSeedIds) : null, + JSON.stringify(cacheProvenance(input.content, input.provenance)), + input.generatedAt ?? new Date(), + ], + ); + if (rows[0]) return mapCacheRow(rows[0] as Record); + throw new Error(`Atlas cache page "${input.pageKey}" not found`); +} + +export async function recordAtlasCachePageGenerationError( + pageKey: string, + errorMessage: string, +): Promise { + const pool = getPool(); + const { rows } = await pool.query( + ` + UPDATE atlas_cache_pages + SET + stale = TRUE, + stale_reason = COALESCE(stale_reason, 'generation failed'), + error_at = NOW(), + error_message = $2, + updated_at = NOW() + WHERE page_key = $1 + RETURNING * + `, + [pageKey, errorMessage], + ); + if (rows[0]) return mapCacheRow(rows[0] as Record); + throw new Error(`Atlas cache page "${pageKey}" not found`); +} + +export async function listStaleAtlasCachePages(filter?: { + sourceName?: string; +}): Promise { + const pool = getPool(); + const params: unknown[] = []; + const clauses = ["stale = TRUE"]; + if (filter?.sourceName) { + params.push(filter.sourceName); + clauses.push(`source_name = $${params.length}`); + } + const { rows } = await pool.query( + ` + SELECT * + FROM atlas_cache_pages + WHERE ${clauses.join(" AND ")} + ORDER BY updated_at ASC, id ASC + `, + params, + ); + return rows.map((row) => mapCacheRow(row as Record)); +} + +export async function listIndexableAtlasContent( + sourceName: string, + query: AtlasContentQuery = {}, +): Promise { + const pool = getPool(); + const seedParams: unknown[] = [sourceName]; + const seedClauses = [ + "seed.source_name = $1", + "seed.status = 'approved'", + ...addUpdatedAtClauses("seed", query, seedParams), + ]; + const seedRepositoryClause = addSeedRepositoryClause( + "seed", + query.repositories, + seedParams, + ); + if (seedRepositoryClause) seedClauses.push(seedRepositoryClause); + + const cacheParams: unknown[] = [sourceName]; + const cacheClauses = [ + "cache.source_name = $1", + "cache.stale = FALSE", + ...addUpdatedAtClauses("cache", query, cacheParams), + ]; + const cacheRepositoryClause = addCacheRepositoryClause( + query.repositories, + cacheParams, + ); + if (cacheRepositoryClause) cacheClauses.push(cacheRepositoryClause); + + const [seedResult, cacheResult] = await Promise.all([ + pool.query( + ` + SELECT seed.* + FROM atlas_seed_entries seed + WHERE ${seedClauses.join(" AND ")} + ORDER BY seed.updated_at ASC, seed.id ASC + `, + seedParams, + ), + pool.query( + ` + SELECT cache.* + FROM atlas_cache_pages cache + WHERE ${cacheClauses.join(" AND ")} + ORDER BY cache.updated_at ASC, cache.id ASC + `, + cacheParams, + ), + ]); + + const seeds = seedResult.rows.map((row) => { + const seed = mapSeedRow(row as Record); + return { + kind: "seed" as const, + key: seed.canonicalKey, + sourceName: seed.sourceName, + title: seed.title, + content: seed.content, + updatedAt: seed.updatedAt, + seed, + }; + }); + + const cachePages = cacheResult.rows + .map((row) => mapCacheRow(row as Record)) + .filter((cachePage) => cachePage.content.length > 0) + .map((cachePage) => ({ + kind: "cache" as const, + key: cachePage.pageKey, + sourceName: cachePage.sourceName, + title: cachePage.title, + content: cachePage.content, + updatedAt: cachePage.updatedAt, + cachePage, + })); + + return [...seeds, ...cachePages].sort((a, b) => { + const byTime = a.updatedAt.getTime() - b.updatedAt.getTime(); + if (byTime !== 0) return byTime; + if (a.kind !== b.kind) return a.kind === "seed" ? -1 : 1; + return a.key.localeCompare(b.key); + }); +} + +export async function listRemovedAtlasContentIds( + sourceName: string, + query: AtlasContentQuery = {}, +): Promise { + const pool = getPool(); + const seedParams: unknown[] = [sourceName]; + const seedClauses = [ + "seed.source_name = $1", + "seed.status = 'rejected'", + ...addUpdatedAtClauses("seed", query, seedParams), + ]; + const seedRepositoryClause = addSeedRepositoryClause( + "seed", + query.repositories, + seedParams, + ); + if (seedRepositoryClause) seedClauses.push(seedRepositoryClause); + + const cacheParams: unknown[] = [sourceName]; + const cacheClauses = [ + "cache.source_name = $1", + `(cache.stale = TRUE OR COALESCE(cache.provenance ->> '${CACHE_CONTENT_KEY}', '') = '')`, + ...addUpdatedAtClauses("cache", query, cacheParams), + ]; + const cacheRepositoryClause = addCacheRepositoryClause( + query.repositories, + cacheParams, + ); + if (cacheRepositoryClause) cacheClauses.push(cacheRepositoryClause); + + const [seedResult, cacheResult] = await Promise.all([ + pool.query( + ` + SELECT seed.canonical_key + FROM atlas_seed_entries seed + WHERE ${seedClauses.join(" AND ")} + ORDER BY seed.updated_at ASC, seed.id ASC + `, + seedParams, + ), + pool.query( + ` + SELECT cache.page_key + FROM atlas_cache_pages cache + WHERE ${cacheClauses.join(" AND ")} + ORDER BY cache.updated_at ASC, cache.id ASC + `, + cacheParams, + ), + ]); + + return [ + ...seedResult.rows.map( + (row) => + `atlas-seed:${(row as Record).canonical_key as string}`, + ), + ...cacheResult.rows.map( + (row) => + `atlas-cache:${(row as Record).page_key as string}`, + ), + ]; +} + +export async function getAtlasStateToken( + sourceName: string, + query: Pick = {}, +): Promise { + const pool = getPool(); + const seedParams: unknown[] = [sourceName]; + const seedClauses = [ + "seed.source_name = $1", + "seed.status IN ('approved', 'rejected')", + ]; + const seedRepositoryClause = addSeedRepositoryClause( + "seed", + query.repositories, + seedParams, + ); + if (seedRepositoryClause) seedClauses.push(seedRepositoryClause); + + const cacheParams: unknown[] = [sourceName]; + const cacheClauses = ["cache.source_name = $1"]; + const cacheRepositoryClause = addCacheRepositoryClause( + query.repositories, + cacheParams, + ); + if (cacheRepositoryClause) cacheClauses.push(cacheRepositoryClause); + + const [seedResult, cacheResult] = await Promise.all([ + pool.query( + ` + SELECT MAX(seed.updated_at) AS state_token + FROM atlas_seed_entries seed + WHERE ${seedClauses.join(" AND ")} + `, + seedParams, + ), + pool.query( + ` + SELECT MAX(cache.updated_at) AS state_token + FROM atlas_cache_pages cache + WHERE ${cacheClauses.join(" AND ")} + `, + cacheParams, + ), + ]); + + const values = [ + seedResult.rows[0]?.state_token, + cacheResult.rows[0]?.state_token, + ] + .map((value) => toDate(value)) + .filter((value): value is Date => value !== null); + if (values.length === 0) return null; + return new Date( + Math.max(...values.map((value) => value.getTime())), + ).toISOString(); +} diff --git a/src/db/schema.ts b/src/db/schema.ts index 605ab6f..2127232 100644 --- a/src/db/schema.ts +++ b/src/db/schema.ts @@ -116,6 +116,59 @@ CREATE TABLE IF NOT EXISTS webhook_deliveries ( ); CREATE INDEX IF NOT EXISTS idx_webhook_deliveries_source ON webhook_deliveries (source); CREATE INDEX IF NOT EXISTS idx_webhook_deliveries_delivered_at ON webhook_deliveries (delivered_at); + +-- Atlas durable seed knowledge. Seed rows are the reviewed source of truth for +-- non-reconstructable architecture and rationale; derived pages remain cache. +CREATE TABLE IF NOT EXISTS atlas_seed_entries ( + id SERIAL PRIMARY KEY, + canonical_key TEXT NOT NULL, + source_name TEXT NOT NULL, + repo_url TEXT, + ref TEXT, + subsystem TEXT, + status TEXT NOT NULL DEFAULT 'pending', + title TEXT NOT NULL, + content TEXT NOT NULL, + provenance JSONB NOT NULL DEFAULT '{}', + evidence JSONB NOT NULL DEFAULT '[]', + approved_by TEXT, + approved_at TIMESTAMPTZ, + rejected_by TEXT, + rejected_at TIMESTAMPTZ, + rejection_reason TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT atlas_seed_entries_canonical_key_uniq UNIQUE (canonical_key), + CONSTRAINT atlas_seed_entries_status_check CHECK (status IN ('pending', 'approved', 'rejected')) +); + +CREATE INDEX IF NOT EXISTS idx_atlas_seed_entries_status ON atlas_seed_entries (status); +CREATE INDEX IF NOT EXISTS idx_atlas_seed_entries_source_name ON atlas_seed_entries (source_name); +CREATE INDEX IF NOT EXISTS idx_atlas_seed_entries_repo_ref_subsystem ON atlas_seed_entries (repo_url, ref, subsystem); + +-- Atlas derived pages. These rows describe disposable generated pages whose +-- retrieval projection is stored in chunks. +CREATE TABLE IF NOT EXISTS atlas_cache_pages ( + id SERIAL PRIMARY KEY, + page_key TEXT NOT NULL, + source_name TEXT NOT NULL, + title TEXT NOT NULL, + content_hash TEXT NOT NULL, + stale BOOLEAN NOT NULL DEFAULT FALSE, + stale_reason TEXT, + generated_seed_ids JSONB NOT NULL DEFAULT '[]', + provenance JSONB NOT NULL DEFAULT '{}', + generated_at TIMESTAMPTZ, + error_at TIMESTAMPTZ, + error_message TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT atlas_cache_pages_page_key_uniq UNIQUE (page_key) +); + +CREATE INDEX IF NOT EXISTS idx_atlas_cache_pages_source_name ON atlas_cache_pages (source_name); +CREATE INDEX IF NOT EXISTS idx_atlas_cache_pages_stale ON atlas_cache_pages (stale); +CREATE INDEX IF NOT EXISTS idx_atlas_cache_pages_generated_at ON atlas_cache_pages (generated_at); `; return coreSql; diff --git a/src/indexing/atlas-gardener.ts b/src/indexing/atlas-gardener.ts new file mode 100644 index 0000000..bd47f9b --- /dev/null +++ b/src/indexing/atlas-gardener.ts @@ -0,0 +1,93 @@ +import { createHash } from "node:crypto"; + +import { + clearAtlasCachePageStale, + listStaleAtlasCachePages, + recordAtlasCachePageGenerationError, +} from "../db/atlas.js"; +import type { AtlasCachePage } from "../db/atlas.js"; + +export interface AtlasGardenerGeneratedPage { + content: string; + contentHash?: string; + generatedSeedIds?: number[]; + provenance?: Record; +} + +export interface GardenAtlasCachePagesOptions { + sourceName?: string; + generatePage?: ( + page: AtlasCachePage, + ) => Promise | AtlasGardenerGeneratedPage; +} + +export interface GardenAtlasCachePagesSummary { + regenerated: number; + failed: number; +} + +function stableContentHash(content: string): string { + return createHash("sha256").update(content).digest("hex"); +} + +function defaultGeneratePage(page: AtlasCachePage): AtlasGardenerGeneratedPage { + const content = [ + `# ${page.title}`, + "", + `Atlas cache page placeholder for ${page.pageKey}.`, + "", + "A domain-specific generator can replace this deterministic content.", + ].join("\n"); + return { content }; +} + +function errorMessageFromUnknown(error: unknown): string { + if (error instanceof Error) return error.message; + if (typeof error === "string") return error; + return "Atlas cache page generation failed"; +} + +export async function gardenAtlasCachePages( + options: GardenAtlasCachePagesOptions = {}, +): Promise { + const pages = await listStaleAtlasCachePages({ + sourceName: options.sourceName, + }); + const generatePage = options.generatePage ?? defaultGeneratePage; + let regenerated = 0; + let failed = 0; + + for (const page of pages) { + try { + const generated = await generatePage(page); + await clearAtlasCachePageStale({ + pageKey: page.pageKey, + content: generated.content, + contentHash: + generated.contentHash ?? stableContentHash(generated.content), + generatedSeedIds: generated.generatedSeedIds, + provenance: generated.provenance, + }); + regenerated++; + } catch (error) { + failed++; + console.error( + `[atlas-gardener] Failed to regenerate cache page "${page.pageKey}":`, + error, + ); + try { + await recordAtlasCachePageGenerationError( + page.pageKey, + errorMessageFromUnknown(error), + ); + } catch (recordError) { + console.error( + `[atlas-gardener] Failed to record generation error for cache page "${page.pageKey}":`, + recordError, + ); + } + } + } + + return { regenerated, failed }; +} diff --git a/src/indexing/chunking/index.ts b/src/indexing/chunking/index.ts index 1422296..ec56801 100644 --- a/src/indexing/chunking/index.ts +++ b/src/indexing/chunking/index.ts @@ -43,3 +43,4 @@ import { chunkQa } from "./qa.js"; registerChunker("slack", chunkQa); registerChunker("discord", chunkQa); registerChunker("notion", chunkMarkdown); +registerChunker("atlas", chunkMarkdown); diff --git a/src/indexing/orchestrator.ts b/src/indexing/orchestrator.ts index 5f0c349..06116e9 100644 --- a/src/indexing/orchestrator.ts +++ b/src/indexing/orchestrator.ts @@ -20,7 +20,8 @@ import { cleanupOldWebhookDeliveries, } from "../db/queries.js"; import { cleanupOldQueryLogs } from "../db/analytics.js"; -import { isFileSourceConfig } from "../types.js"; +import { markAtlasCachePagesStaleForSources } from "../db/atlas.js"; +import { isAtlasSourceConfig, isFileSourceConfig } from "../types.js"; import type { IndexState, IndexStatus, SourceConfig } from "../types.js"; import type { ProviderOptions } from "./providers/types.js"; @@ -570,12 +571,11 @@ export class IndexingOrchestrator { let affectedSourceNames: string[] = []; if (job.type === "full-reindex") { - await this.runFullReindex( + affectedSourceNames = await this.runFullReindex( embeddingProvider, config.cloneDir, config.githubToken, ); - affectedSourceNames = serverCfg2.sources.map((s) => s.name); } else if (job.type === "full-reindex-local") { if (!job.sources || job.sources.length === 0) { console.warn( @@ -584,13 +584,13 @@ export class IndexingOrchestrator { return; } for (const sourceConfig of job.sources) { - await this.indexSourceWithState( + const succeeded = await this.indexSourceWithState( sourceConfig, embeddingProvider, config.cloneDir, ); + if (succeeded) affectedSourceNames.push(sourceConfig.name); } - affectedSourceNames = job.sources.map((s) => s.name); } else if (job.type === "incremental-reindex") { if (!job.repoUrl) { console.warn( @@ -598,13 +598,12 @@ export class IndexingOrchestrator { ); return; } - await this.runIncrementalReindex( + affectedSourceNames = await this.runIncrementalReindex( embeddingProvider, config.cloneDir, config.githubToken, job.repoUrl, ); - affectedSourceNames = getSourcesByRepo(job.repoUrl).map((s) => s.name); } else if (job.type === "source-reindex") { if (!job.sourceName) { console.warn( @@ -621,12 +620,40 @@ export class IndexingOrchestrator { ); return; } - await this.indexSourceWithState( + const succeeded = await this.indexSourceWithState( sourceConfig, embeddingProvider, config.cloneDir, ); - affectedSourceNames = [job.sourceName]; + if (succeeded) affectedSourceNames = [job.sourceName]; + } + + const atlasSourceNames = new Set( + serverCfg2.sources + .filter(isAtlasSourceConfig) + .map((source) => source.name), + ); + const atlasCacheInvalidationSourceNames = affectedSourceNames.filter( + (sourceName) => !atlasSourceNames.has(sourceName), + ); + + if ( + atlasCacheInvalidationSourceNames.length > 0 && + atlasSourceNames.size > 0 + ) { + // Isolate Atlas cache invalidation: a transient failure here must not + // suppress onReindexComplete for a reindex that actually succeeded. + try { + await markAtlasCachePagesStaleForSources( + atlasCacheInvalidationSourceNames, + `source reindexed: ${atlasCacheInvalidationSourceNames.join(", ")}`, + ); + } catch (err) { + console.error( + `[orchestrator] Failed to mark Atlas cache pages stale for ${atlasCacheInvalidationSourceNames.join(", ")}:`, + err, + ); + } } if (affectedSourceNames.length > 0 && this.onReindexComplete) { @@ -645,23 +672,26 @@ export class IndexingOrchestrator { embeddingProvider: EmbeddingProvider, cloneDir: string, githubToken?: string, - ): Promise { + ): Promise { console.log("[orchestrator] Starting full re-index"); const serverCfg = getServerConfig(); const indexableNames = getIndexableSourceNames(); + const succeededSourceNames: string[] = []; for (const sourceConfig of serverCfg.sources.filter((s) => indexableNames.has(s.name), )) { - await this.indexSourceWithState( + const succeeded = await this.indexSourceWithState( sourceConfig, embeddingProvider, cloneDir, githubToken, ); + if (succeeded) succeededSourceNames.push(sourceConfig.name); } console.log("[orchestrator] Full re-index complete"); + return succeededSourceNames; } /** @@ -672,23 +702,26 @@ export class IndexingOrchestrator { cloneDir: string, githubToken: string | undefined, repoUrl: string, - ): Promise { + ): Promise { console.log(`[orchestrator] Starting incremental re-index for ${repoUrl}`); const indexableNames = getIndexableSourceNames(); const sources = getSourcesByRepo(repoUrl).filter((s) => indexableNames.has(s.name), ); + const succeededSourceNames: string[] = []; for (const sourceConfig of sources) { - await this.indexSourceWithState( + const succeeded = await this.indexSourceWithState( sourceConfig, embeddingProvider, cloneDir, githubToken, ); + if (succeeded) succeededSourceNames.push(sourceConfig.name); } console.log(`[orchestrator] Incremental re-index complete for ${repoUrl}`); + return succeededSourceNames; } /** @@ -699,93 +732,103 @@ export class IndexingOrchestrator { embeddingProvider: EmbeddingProvider, cloneDir: string, githubToken?: string, - ): Promise { + ): Promise { const lockKey = `${sourceConfig.type}:${sourceConfig.name}`; - await this.withSourceLock(lockKey, async () => { - const providerOptions: ProviderOptions = { - cloneDir, - githubToken, - slackBotToken: getConfig().slackBotToken, - discordBotToken: getConfig().discordBotToken, - notionToken: getConfig().notionToken, - }; - const provider = getProvider(sourceConfig.type)( - sourceConfig, - providerOptions, - ); - const pipeline = new IndexingPipeline(embeddingProvider, sourceConfig); - - await this.setIndexStatus( - sourceConfig.type, - sourceConfig.name, - "indexing", - ); - - try { - const state = await getIndexState(sourceConfig.type, sourceConfig.name); - let result; - if (state?.last_commit_sha) { - result = await provider.incrementalAcquire(state.last_commit_sha); - } else { - result = await provider.fullAcquire(); - } - - if (result.removedIds.length > 0) { - await pipeline.removeItems(result.removedIds); - } - if (result.items.length > 0) { - await pipeline.indexItems(result.items, result.stateToken); - } - - await upsertIndexState({ - source_type: sourceConfig.type, - source_key: sourceConfig.name, - last_commit_sha: result.stateToken, - last_indexed_at: new Date(), - status: "idle", - }); - console.log( - `[orchestrator] Indexing complete for ${sourceConfig.name}`, + return this.withSourceLock( + lockKey, + async () => { + const providerOptions: ProviderOptions = { + cloneDir, + githubToken, + slackBotToken: getConfig().slackBotToken, + discordBotToken: getConfig().discordBotToken, + notionToken: getConfig().notionToken, + }; + const provider = getProvider(sourceConfig.type)( + sourceConfig, + providerOptions, ); - } catch (err) { - console.error( - `[orchestrator] Indexing failed for ${sourceConfig.name}:`, - err, + const pipeline = new IndexingPipeline(embeddingProvider, sourceConfig); + + await this.setIndexStatus( + sourceConfig.type, + sourceConfig.name, + "indexing", ); + try { - await this.setIndexStatus( + const state = await getIndexState( sourceConfig.type, sourceConfig.name, - "error", - err instanceof Error ? err.message : String(err), ); - } catch (statusErr) { + let result; + if (state?.last_commit_sha) { + result = await provider.incrementalAcquire(state.last_commit_sha); + } else { + result = await provider.fullAcquire(); + } + + if (result.removedIds.length > 0) { + await pipeline.removeItems(result.removedIds); + } + if (result.items.length > 0) { + await pipeline.indexItems(result.items, result.stateToken); + } + + await upsertIndexState({ + source_type: sourceConfig.type, + source_key: sourceConfig.name, + last_commit_sha: result.stateToken, + last_indexed_at: new Date(), + status: "idle", + }); + console.log( + `[orchestrator] Indexing complete for ${sourceConfig.name}`, + ); + return true; + } catch (err) { console.error( - "[orchestrator] Failed to update index status:", - statusErr, + `[orchestrator] Indexing failed for ${sourceConfig.name}:`, + err, ); + try { + await this.setIndexStatus( + sourceConfig.type, + sourceConfig.name, + "error", + err instanceof Error ? err.message : String(err), + ); + } catch (statusErr) { + console.error( + "[orchestrator] Failed to update index status:", + statusErr, + ); + } + return false; } - } - }); + }, + false, + ); } /** * Simple per-source mutex. If the source is already being indexed, skip. */ - private async withSourceLock( + private async withSourceLock( sourceKey: string, - fn: () => Promise, - ): Promise { + fn: () => Promise, + skippedValue: T, + ): Promise { if (this.activeSources.has(sourceKey)) { console.log( `[orchestrator] Skipping ${sourceKey} — already being indexed`, ); - return; + return skippedValue; } this.activeSources.add(sourceKey); try { - await fn(); + return await fn(); } finally { this.activeSources.delete(sourceKey); } diff --git a/src/indexing/providers/atlas.ts b/src/indexing/providers/atlas.ts new file mode 100644 index 0000000..ff5f838 --- /dev/null +++ b/src/indexing/providers/atlas.ts @@ -0,0 +1,122 @@ +import { + type AtlasRepositoryFilter, + getAtlasStateToken, + listIndexableAtlasContent, + listRemovedAtlasContentIds, +} from "../../db/atlas.js"; +import { isAtlasSourceConfig } from "../../types.js"; +import type { AtlasSourceConfig, SourceConfig } from "../../types.js"; +import type { + AcquisitionResult, + ContentItem, + DataProvider, + ProviderOptions, +} from "./types.js"; + +export class AtlasDataProvider implements DataProvider { + private config: AtlasSourceConfig; + + constructor(config: SourceConfig, _options: ProviderOptions) { + if (!isAtlasSourceConfig(config)) { + throw new Error("AtlasDataProvider requires an atlas source config"); + } + this.config = config; + } + + async fullAcquire(): Promise { + const stateToken = + (await this.getCurrentStateToken()) ?? new Date(0).toISOString(); + const query = { + changedOnOrBefore: new Date(stateToken), + repositories: this.repositoryFilters(), + }; + const [items, removedIds] = await Promise.all([ + this.acquireItems(query), + listRemovedAtlasContentIds(this.config.name, query), + ]); + return { + items, + removedIds, + stateToken, + }; + } + + async incrementalAcquire(lastStateToken: string): Promise { + const stateToken = (await this.getCurrentStateToken()) ?? lastStateToken; + const query = { + changedAfter: lastStateToken ? new Date(lastStateToken) : undefined, + changedOnOrBefore: stateToken ? new Date(stateToken) : undefined, + repositories: this.repositoryFilters(), + }; + const [items, removedIds] = await Promise.all([ + this.acquireItems(query), + listRemovedAtlasContentIds(this.config.name, query), + ]); + return { + items, + removedIds, + stateToken, + }; + } + + async getCurrentStateToken(): Promise { + return getAtlasStateToken(this.config.name, { + repositories: this.repositoryFilters(), + }); + } + + private async acquireItems(query: { + changedAfter?: Date; + changedOnOrBefore?: Date; + repositories?: AtlasRepositoryFilter[]; + }): Promise { + const entries = await listIndexableAtlasContent(this.config.name, query); + return entries.map((entry) => { + if (entry.kind === "seed") { + return { + id: `atlas-seed:${entry.key}`, + title: entry.title, + content: entry.content, + sourceUrl: `atlas://seed/${encodeURIComponent(entry.key)}`, + metadata: { + atlas_kind: "seed", + atlas_key: entry.key, + source_name: entry.sourceName, + repo_url: entry.seed.repoUrl, + ref: entry.seed.ref, + subsystem: entry.seed.subsystem, + provenance: entry.seed.provenance, + evidence: entry.seed.evidence, + }, + }; + } + + return { + id: `atlas-cache:${entry.key}`, + title: entry.title, + content: entry.content, + sourceUrl: `atlas://cache/${encodeURIComponent(entry.key)}`, + metadata: { + atlas_kind: "cache", + atlas_page_key: entry.key, + source_name: entry.sourceName, + content_hash: entry.cachePage.contentHash, + generated_seed_ids: entry.cachePage.generatedSeedIds, + provenance: entry.cachePage.provenance, + generated_at: entry.cachePage.generatedAt?.toISOString() ?? null, + }, + }; + }); + } + + private repositoryFilters(): AtlasRepositoryFilter[] | undefined { + if (!this.config.repositories || this.config.repositories.length === 0) { + return undefined; + } + return this.config.repositories.map((repository) => ({ + repoUrl: repository.repo_url, + refs: repository.refs, + subsystems: repository.subsystems, + })); + } +} diff --git a/src/indexing/providers/index.ts b/src/indexing/providers/index.ts index 9feaed6..1825995 100644 --- a/src/indexing/providers/index.ts +++ b/src/indexing/providers/index.ts @@ -59,3 +59,10 @@ registerProvider( "notion", (config, options) => new NotionDataProvider(config, options), ); + +import { AtlasDataProvider } from "./atlas.js"; + +registerProvider( + "atlas", + (config, options) => new AtlasDataProvider(config, options), +); diff --git a/src/server.ts b/src/server.ts index a4a35a6..5c3149b 100644 --- a/src/server.ts +++ b/src/server.ts @@ -39,7 +39,10 @@ import { import { IndexingOrchestrator } from "./indexing/orchestrator.js"; import { runReindexAudit } from "./indexing/reindex-audit.js"; -import { createWebhookHandler } from "./webhooks/github.js"; +import { + createWebhookHandler, + type GitHubWebhookResult, +} from "./webhooks/github.js"; import { createSlackWebhookHandler } from "./webhooks/slack.js"; import { createDiscordWebhookHandler } from "./webhooks/discord.js"; import { SessionStateManager } from "./mcp/tools/bash-session.js"; @@ -75,6 +78,11 @@ import { getToolCounts, } from "./db/analytics.js"; import type { AnalyticsFilter } from "./db/analytics.js"; +import { + approveAtlasSeedEntry, + listPendingAtlasSeedCandidates, + rejectAtlasSeedEntry, +} from "./db/atlas.js"; import path from "node:path"; import { fileURLToPath } from "node:url"; @@ -187,8 +195,9 @@ export function assertWebhookRawBodyOrder(routeName: string): RequestHandler { }; } -let webhookHandler: ((req: Request, res: Response) => Promise) | null = - null; +let webhookHandler: + | ((req: Request, res: Response) => Promise) + | null = null; let slackWebhookHandler: | ((req: Request, res: Response) => Promise) | null = null; @@ -206,14 +215,6 @@ let telemetryFlushInterval: ReturnType | undefined; // SSE handlers via getter; passed directly into handleSessionInitAccept. let p2pTelemetry: P2PTelemetry | undefined; -// Pending webhook-triggered bash-refresh timers. Each webhook delivery -// schedules a setTimeout to refresh bash instances after a brief delay -// (post-reindex). Without tracking these handles, a shutdown() racing -// with an in-flight webhook would leave the timers armed and keep the -// Node event loop alive, delaying process exit. We add handles on -// scheduling, remove them on fire, and clear the entire set on shutdown. -const pendingBashRefreshTimers = new Set>(); - /** * Rebuild bash instances for tools affected by the given sources. * @@ -325,6 +326,12 @@ export function __clearBashInstancesForTesting(): void { bashInstances.clear(); } +export function __setAtlasOrchestratorForTesting( + orchestrator: Pick | null, +): void { + orchestratorRef = orchestrator as IndexingOrchestrator | null; +} + /** * Test-only accessor to the rejected-sid marker Set so * `rollbackSessionAfterConnectFailure` regression tests can assert the @@ -399,14 +406,16 @@ export function runStartupIndexAndBashRefresh( export function classifyWebhookUnavailable(opts: { sourceType: "github" | "slack" | "discord"; }): { status: 404 | 503; body: { error: string; sourceType: string } } { - const configured = getServerConfig().sources.some( - (s) => s.type === opts.sourceType, - ); + const serverCfg = getServerConfig(); + const configured = + opts.sourceType === "github" + ? Object.keys(serverCfg.webhook?.repo_sources ?? {}).length > 0 + : serverCfg.sources.some((s) => s.type === opts.sourceType); if (!configured) { return { status: 404, body: { - error: `${opts.sourceType} webhook not configured — no sources of type '${opts.sourceType}' in config`, + error: `${opts.sourceType} webhook not configured`, sourceType: opts.sourceType, }, }; @@ -436,27 +445,6 @@ app.post( } try { await handler(req, res); - // Schedule bash refresh after webhook-triggered reindex. This path - // uses a delay heuristic rather than orchestrator.onReindexComplete - // because that callback only fires on scheduled/nightly reindex, not - // per-webhook — the webhook handler reindexes inline via the - // orchestrator's handler path without going through the completion - // callback. Unifying the two notification paths is a larger refactor. - const serverCfg = getServerConfig(); - const bashTools = serverCfg.tools.filter((t) => t.type === "bash"); - if (bashTools.length > 0) { - const REFRESH_DELAY_MS = 30_000; - // Track the timer handle so shutdown() can cancel any refresh - // still pending. The self-delete in the callback keeps the Set - // from accumulating stale handles after the timer fires. - const handle: ReturnType = setTimeout(() => { - pendingBashRefreshTimers.delete(handle); - refreshBashInstances(serverCfg.sources.map((s) => s.name)).catch( - (err) => console.error("[webhook] Bash refresh failed:", err), - ); - }, REFRESH_DELAY_MS); - pendingBashRefreshTimers.add(handle); - } } catch (err) { console.error("[webhook] Handler error:", err); if (!res.headersSent) { @@ -2451,13 +2439,22 @@ function getAnalyticsToken(): string | undefined { } /** - * Analytics auth middleware — exported so tests can import and exercise the - * real code instead of reimplementing the logic in test doubles. + * Shared bearer-token check used by analytics and Atlas admin endpoints. + * Atlas intentionally reuses the same configured token source without tying + * its availability to analytics.enabled. */ -export function analyticsAuth( +function bearerTokenAuth( req: Request, res: Response, next: express.NextFunction, + opts: { + logPrefix: "analytics" | "atlas"; + configReadFailureDescription: string; + requireAnalyticsEnabled: boolean; + disabledResponse?: { status: number; body: Record }; + tokenDescription: string; + invalidTokenDescription: string; + }, ): void { // Mirror the getAnalyticsToken() pattern: a throw from the config read // (e.g. corrupt YAML on hot reload, env parse failure) would otherwise @@ -2465,22 +2462,27 @@ export function analyticsAuth( // a 503 so callers see a stable error shape and operators get a // diagnostic log line. let analyticsCfg: ReturnType; + let config: ReturnType; try { analyticsCfg = getAnalyticsConfig(); + config = getConfig(); } catch (err) { console.error( - `[analytics] auth misconfigured: config read failed: ${formatErrorForLog(err)}`, + `[${opts.logPrefix}] auth misconfigured: config read failed: ${formatErrorForLog(err)}`, ); res.status(503).json({ error: "misconfigured", - error_description: "Analytics config read failed", + error_description: opts.configReadFailureDescription, }); return; } - const config = getConfig(); - if (!analyticsCfg?.enabled) { - res.status(404).json({ error: "Analytics not enabled" }); + if (opts.requireAnalyticsEnabled && !analyticsCfg?.enabled) { + const disabled = opts.disabledResponse ?? { + status: 404, + body: { error: "Feature not enabled" }, + }; + res.status(disabled.status).json(disabled.body); return; } @@ -2492,25 +2494,16 @@ export function analyticsAuth( return; } - // Message is conditional on nodeEnv so non-prod operators don't get a - // misleading "requires ANALYTICS_TOKEN in production" hint when the root - // cause is something else (e.g. a downstream config-read failure). The - // production copy still surfaces the concrete remediation step. - const prodTokenMsg = - "Analytics requires ANALYTICS_TOKEN in production (env var or analytics.token in config)."; - const nonProdTokenMsg = - "Analytics token unavailable — check analytics config / logs."; - const tokenDescription = - config.nodeEnv === "production" ? prodTokenMsg : nonProdTokenMsg; - let token: string | undefined; try { token = getAnalyticsToken(); } catch (err) { - console.error(`[analytics] auth misconfigured: ${formatErrorForLog(err)}`); + console.error( + `[${opts.logPrefix}] auth misconfigured: ${formatErrorForLog(err)}`, + ); res.status(503).json({ error: "misconfigured", - error_description: tokenDescription, + error_description: opts.tokenDescription, }); return; } @@ -2518,10 +2511,10 @@ export function analyticsAuth( if (!token) { // Should not happen — getAnalyticsToken auto-generates or throws. // Fail closed rather than silently bypassing auth. - console.error("[analytics] auth misconfigured: no token available"); + console.error(`[${opts.logPrefix}] auth misconfigured: no token available`); res.status(503).json({ error: "misconfigured", - error_description: tokenDescription, + error_description: opts.tokenDescription, }); return; } @@ -2558,14 +2551,14 @@ export function analyticsAuth( if (providedBuf.length !== tokenBuf.length) { res.status(403).json({ error: "forbidden", - error_description: "Invalid analytics token", + error_description: opts.invalidTokenDescription, }); return; } if (!timingSafeEqual(providedBuf, tokenBuf)) { res.status(403).json({ error: "forbidden", - error_description: "Invalid analytics token", + error_description: opts.invalidTokenDescription, }); return; } @@ -2573,6 +2566,84 @@ export function analyticsAuth( next(); } +/** + * Analytics auth middleware — exported so tests can import and exercise the + * real code instead of reimplementing the logic in test doubles. + */ +export function analyticsAuth( + req: Request, + res: Response, + next: express.NextFunction, +): void { + // Message is conditional on nodeEnv so non-prod operators don't get a + // misleading "requires ANALYTICS_TOKEN in production" hint when the root + // cause is something else (e.g. a downstream config-read failure). The + // production copy still surfaces the concrete remediation step. + const prodTokenMsg = + "Analytics requires ANALYTICS_TOKEN in production (env var or analytics.token in config)."; + const nonProdTokenMsg = + "Analytics token unavailable — check analytics config / logs."; + let tokenDescription: string; + try { + tokenDescription = + getConfig().nodeEnv === "production" ? prodTokenMsg : nonProdTokenMsg; + } catch (err) { + console.error( + `[analytics] auth misconfigured: config read failed: ${formatErrorForLog(err)}`, + ); + res.status(503).json({ + error: "misconfigured", + error_description: "Analytics config read failed", + }); + return; + } + + bearerTokenAuth(req, res, next, { + logPrefix: "analytics", + configReadFailureDescription: "Analytics config read failed", + requireAnalyticsEnabled: true, + disabledResponse: { + status: 404, + body: { error: "Analytics not enabled" }, + }, + tokenDescription, + invalidTokenDescription: "Invalid analytics token", + }); +} + +function atlasRatificationAuth( + req: Request, + res: Response, + next: express.NextFunction, +): void { + const prodTokenMsg = + "Atlas ratification requires ANALYTICS_TOKEN in production (env var or analytics.token in config)."; + const nonProdTokenMsg = + "Atlas ratification token unavailable — check analytics token config / logs."; + let tokenDescription: string; + try { + tokenDescription = + getConfig().nodeEnv === "production" ? prodTokenMsg : nonProdTokenMsg; + } catch (err) { + console.error( + `[atlas] auth misconfigured: config read failed: ${formatErrorForLog(err)}`, + ); + res.status(503).json({ + error: "misconfigured", + error_description: "Atlas ratification config read failed", + }); + return; + } + + bearerTokenAuth(req, res, next, { + logPrefix: "atlas", + configReadFailureDescription: "Atlas ratification config read failed", + requireAnalyticsEnabled: false, + tokenDescription, + invalidTokenDescription: "Invalid atlas ratification token", + }); +} + const ISO_DATE_RE = /^\d{4}-\d{2}-\d{2}$/; /** @@ -3071,6 +3142,138 @@ export function registerAnalyticsRoutes( // (b) coupled mere module import (e.g. an unrelated re-export in a test // fixture) to mutating the module-level app. +function atlasActor(req: Request): string { + const header = req.header("X-Atlas-Actor"); + return header?.trim() || "atlas-admin"; +} + +function atlasCanonicalKeyFromBody(req: Request): string { + const value = req.body?.canonicalKey; + return typeof value === "string" ? value.trim() : ""; +} + +function handleAtlasRatificationError( + res: Response, + action: "approve" | "reject", + err: unknown, +): void { + const message = err instanceof Error ? err.message : String(err); + if (message.includes("missing or not pending")) { + res.status(409).json({ + error: `atlas_candidate_not_${action}able`, + error_description: message, + }); + return; + } + console.error(`[atlas] Failed to ${action} seed candidate:`, err); + res.status(500).json({ error: `Failed to ${action} atlas candidate` }); +} + +async function approveAtlasCandidate( + canonicalKey: string, + req: Request, + res: Response, +): Promise { + if (!canonicalKey) { + res.status(400).json({ + error: "atlas_candidate_key_required", + error_description: "canonicalKey is required", + }); + return; + } + + try { + const candidate = await approveAtlasSeedEntry( + canonicalKey, + atlasActor(req), + ); + let reindexQueued = false; + if (orchestratorRef) { + orchestratorRef.queueSourceReindex(candidate.sourceName); + reindexQueued = true; + } else { + // The ratification routes mount unconditionally, but orchestratorRef is + // only wired when search/knowledge tools are enabled. With Atlas sources + // but no such tools, approval persists yet nothing drives a reindex — so + // make the gap loud and actionable rather than silently returning 200. + console.error( + `[atlas] Approved candidate "${canonicalKey}" (source "${candidate.sourceName}"): ` + + `approval persisted but reindex NOT queued — no indexing orchestrator is wired ` + + `(search/knowledge tools disabled). Approved content will NOT be indexed until a ` + + `reindex runs for source "${candidate.sourceName}".`, + ); + } + res.json({ candidate, reindexQueued }); + } catch (err) { + handleAtlasRatificationError(res, "approve", err); + } +} + +async function rejectAtlasCandidate( + canonicalKey: string, + req: Request, + res: Response, +): Promise { + if (!canonicalKey) { + res.status(400).json({ + error: "atlas_candidate_key_required", + error_description: "canonicalKey is required", + }); + return; + } + + const reason = + typeof req.body?.reason === "string" && req.body.reason.trim() + ? req.body.reason.trim() + : "rejected by reviewer"; + try { + const candidate = await rejectAtlasSeedEntry( + canonicalKey, + atlasActor(req), + reason, + ); + res.json({ candidate }); + } catch (err) { + handleAtlasRatificationError(res, "reject", err); + } +} + +export function registerAtlasRatificationRoutes(app: express.Express): void { + app.get( + "/api/atlas/candidates", + atlasRatificationAuth, + async (req: Request, res: Response) => { + const sourceName = + typeof req.query.source === "string" && req.query.source.trim() + ? req.query.source.trim() + : undefined; + try { + const candidates = await listPendingAtlasSeedCandidates({ sourceName }); + res.json({ candidates }); + } catch (err) { + console.error("[atlas] Failed to list seed candidates:", err); + res.status(500).json({ error: "Failed to list atlas candidates" }); + } + }, + ); + + app.post( + "/api/atlas/candidates/approve", + atlasRatificationAuth, + async (req: Request, res: Response) => { + await approveAtlasCandidate(atlasCanonicalKeyFromBody(req), req, res); + }, + ); + + app.post( + "/api/atlas/candidates/reject", + atlasRatificationAuth, + async (req: Request, res: Response) => { + await rejectAtlasCandidate(atlasCanonicalKeyFromBody(req), req, res); + }, + ); +} + // --------------------------------------------------------------------------- // Startup // --------------------------------------------------------------------------- @@ -3352,6 +3555,7 @@ async function startServerInner(options?: ServerOptions): Promise { // R4-19: explicit mount — the module-load side-effect was removed so this // is now the single call site for the production app. + registerAtlasRatificationRoutes(app); registerAnalyticsRoutes(app); const serverName = serverCfg.server.name; @@ -3395,12 +3599,6 @@ async function startServerInner(options?: ServerOptions): Promise { clearInterval(sessionReaperInterval); sessionReaperInterval = undefined; } - // Cancel any webhook-triggered bash-refresh timers that haven't fired - // yet — otherwise they can hold the event loop open past shutdown. - if (pendingBashRefreshTimers.size > 0) { - for (const handle of pendingBashRefreshTimers) clearTimeout(handle); - pendingBashRefreshTimers.clear(); - } try { await bashTelemetry?.flush(); } catch (e) { diff --git a/src/types.ts b/src/types.ts index 07ec189..91826a4 100644 --- a/src/types.ts +++ b/src/types.ts @@ -152,12 +152,31 @@ export const NotionSourceConfigSchema = z.object({ include_properties: z.boolean().optional().default(true), }); +export const AtlasSourceConfigSchema = z.object({ + ...BaseSourceFields, + type: z.literal("atlas"), + seed_path: z.string().min(1).optional(), + cache_namespace: z.string().min(1).optional(), + path: z.undefined().optional(), + file_patterns: z.undefined().optional(), + repositories: z + .array( + z.object({ + repo_url: z.string().url(), + refs: z.array(z.string().min(1)).optional(), + subsystems: z.array(z.string().min(1)).optional(), + }), + ) + .optional(), +}); + // Union: TypeScript infers the right shape based on `type` export const SourceConfigSchema = z.discriminatedUnion("type", [ FileSourceConfigSchema, SlackSourceConfigSchema, DiscordSourceConfigSchema, NotionSourceConfigSchema, + AtlasSourceConfigSchema, ]); // ── Tool configuration schemas ──────────────────────────────────────────────── @@ -385,6 +404,13 @@ export const ServerConfigSchema = z path: ["tools"], }); } + if (tool.type === "search" && !sourceNames.has(tool.source)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `Tool "${tool.name}" references source "${tool.source}" which is not defined in sources.`, + path: ["tools"], + }); + } if (tool.type === "bash") { for (const src of tool.sources) { if (!sourceNames.has(src)) { @@ -439,6 +465,7 @@ export type SlackSourceConfig = z.infer; export type DiscordChannelConfig = z.infer; export type DiscordSourceConfig = z.infer; export type NotionSourceConfig = z.infer; +export type AtlasSourceConfig = z.infer; export type SearchToolConfig = z.infer; export type BashToolConfig = z.infer; export type CollectToolConfig = z.infer; @@ -487,6 +514,12 @@ export function isNotionSourceConfig( return config.type === "notion"; } +export function isAtlasSourceConfig( + config: SourceConfig, +): config is AtlasSourceConfig { + return config.type === "atlas"; +} + // ── Data types: unified chunk ───────────────────────────────────────────────── export interface Chunk { diff --git a/src/webhooks/atlas.ts b/src/webhooks/atlas.ts new file mode 100644 index 0000000..8ca3bbb --- /dev/null +++ b/src/webhooks/atlas.ts @@ -0,0 +1,150 @@ +import type { UpsertAtlasSeedCandidateInput } from "../db/atlas.js"; +import type { AtlasSourceConfig } from "../types.js"; + +interface PullRequestUser { + login?: unknown; +} + +export interface AtlasPullRequestPayload { + action?: unknown; + repository?: { + clone_url?: unknown; + default_branch?: unknown; + full_name?: unknown; + }; + pull_request?: { + number?: unknown; + merged?: unknown; + merge_commit_sha?: unknown; + title?: unknown; + body?: unknown; + html_url?: unknown; + base?: { ref?: unknown }; + head?: { ref?: unknown }; + user?: PullRequestUser; + merged_by?: PullRequestUser | null; + }; +} + +export interface AtlasPullRequestSeedExtraction { + repoFullName: string; + repoUrl: string; + defaultBranch: string; + baseBranch: string; + isMergedPullRequest: boolean; + candidates: UpsertAtlasSeedCandidateInput[]; +} + +function requireString(value: unknown, field: string): string { + if (typeof value !== "string" || value.length === 0) { + throw new Error(`Missing ${field}`); + } + return value; +} + +function optionalString(value: unknown): string | null { + return typeof value === "string" && value.length > 0 ? value : null; +} + +export function extractAtlasPullRequestSeedCandidates( + payload: AtlasPullRequestPayload, + atlasSources: AtlasSourceConfig[], + deliveryId: string | undefined, +): AtlasPullRequestSeedExtraction { + const repoFullName = requireString( + payload.repository?.full_name, + "repository.full_name", + ); + const repoUrl = requireString( + payload.repository?.clone_url, + "repository.clone_url", + ); + const defaultBranch = requireString( + payload.repository?.default_branch, + "repository.default_branch", + ); + const pr = payload.pull_request; + if (!pr || typeof pr !== "object") { + throw new Error("Missing pull_request"); + } + + const baseBranch = requireString(pr.base?.ref, "pull_request.base.ref"); + const isMergedPullRequest = payload.action === "closed" && pr.merged === true; + if (!isMergedPullRequest) { + return { + repoFullName, + repoUrl, + defaultBranch, + baseBranch, + isMergedPullRequest, + candidates: [], + }; + } + + const prNumber = + typeof pr.number === "number" && Number.isInteger(pr.number) + ? pr.number + : null; + if (prNumber == null) { + throw new Error("Missing pull_request.number"); + } + const title = requireString(pr.title, "pull_request.title"); + const url = requireString(pr.html_url, "pull_request.html_url"); + const mergeSha = optionalString(pr.merge_commit_sha); + const body = optionalString(pr.body); + const author = optionalString(pr.user?.login); + const mergedBy = optionalString(pr.merged_by?.login); + const headBranch = optionalString(pr.head?.ref); + const ref = baseBranch; + const content = [ + `# PR #${prNumber}: ${title}`, + "", + `Repository: ${repoFullName}`, + `Base branch: ${baseBranch}`, + headBranch ? `Head branch: ${headBranch}` : null, + mergeSha ? `Merge commit: ${mergeSha}` : null, + author ? `Author: ${author}` : null, + mergedBy ? `Merged by: ${mergedBy}` : null, + `URL: ${url}`, + "", + body ?? "(No pull request body provided.)", + ] + .filter((line): line is string => line != null) + .join("\n"); + + return { + repoFullName, + repoUrl, + defaultBranch, + baseBranch, + isMergedPullRequest, + candidates: atlasSources.map((source) => ({ + canonicalKey: `github-pr:${source.name}:${repoFullName}:${prNumber}`, + sourceName: source.name, + repoUrl, + ref, + subsystem: null, + title: `PR #${prNumber}: ${title}`, + content, + provenance: { + provider: "github", + event: "pull_request", + delivery_id: deliveryId ?? null, + repo: repoFullName, + pr_number: prNumber, + url, + base_branch: baseBranch, + head_branch: headBranch, + merge_commit_sha: mergeSha, + }, + evidence: [ + { + type: "pull_request", + url, + title, + body, + }, + ], + })), + }; +} diff --git a/src/webhooks/github.ts b/src/webhooks/github.ts index 536f725..f29e236 100644 --- a/src/webhooks/github.ts +++ b/src/webhooks/github.ts @@ -5,7 +5,13 @@ import crypto from "node:crypto"; import type { Request, Response } from "express"; import { getConfig, getServerConfig } from "../config.js"; +import { upsertAtlasSeedCandidate } from "../db/atlas.js"; import { recordWebhookDelivery } from "../db/queries.js"; +import { isAtlasSourceConfig } from "../types.js"; +import { + extractAtlasPullRequestSeedCandidates, + type AtlasPullRequestPayload, +} from "./atlas.js"; // --------------------------------------------------------------------------- // Types @@ -29,6 +35,24 @@ interface PushPayload { commits: PushCommit[]; } +export interface GitHubWebhookResult { + queuedReindex: boolean; + affectedSourceNames: string[]; +} + +const NO_REINDEX: GitHubWebhookResult = { + queuedReindex: false, + affectedSourceNames: [], +}; + +type HeaderValue = string | string[] | undefined; + +type NormalizedHeader = + | { ok: true; value: string | undefined } + | { ok: false; reason: string }; + +type DuplicateHeader = Extract; + /** * Minimal interface for the orchestrator dependency. The full * IndexingOrchestrator lives in ../indexing/orchestrator.ts — we only @@ -37,6 +61,7 @@ interface PushPayload { */ export interface ReindexOrchestrator { queueIncrementalReindex(repoUrl: string): void; + queueSourceReindex(sourceName: string): void; } // --------------------------------------------------------------------------- @@ -54,13 +79,49 @@ function verifySignature( "sha256=" + crypto.createHmac("sha256", secret).update(rawBody).digest("hex"); - // Both strings must be the same length for timingSafeEqual - if (signatureHeader.length !== expected.length) return false; + const signatureBuffer = Buffer.from(signatureHeader, "utf-8"); + const expectedBuffer = Buffer.from(expected, "utf-8"); - return crypto.timingSafeEqual( - Buffer.from(signatureHeader, "utf-8"), - Buffer.from(expected, "utf-8"), - ); + // timingSafeEqual requires equal byte lengths, not equal JS string lengths. + if (signatureBuffer.length !== expectedBuffer.length) return false; + + return crypto.timingSafeEqual(signatureBuffer, expectedBuffer); +} + +function normalizeSingleHeader( + value: HeaderValue, + headerName: string, + rawHeaders: readonly string[] | undefined, +): NormalizedHeader { + if (Array.isArray(value)) { + return { + ok: false, + reason: `duplicate ${headerName} header`, + }; + } + if (countRawHeaders(rawHeaders, headerName) > 1) { + return { + ok: false, + reason: `duplicate ${headerName} header`, + }; + } + return { ok: true, value }; +} + +function countRawHeaders( + rawHeaders: readonly string[] | undefined, + headerName: string, +): number { + if (!Array.isArray(rawHeaders)) return 0; + + let count = 0; + const normalizedHeaderName = headerName.toLowerCase(); + for (let i = 0; i < rawHeaders.length; i += 2) { + if (rawHeaders[i]?.toLowerCase() === normalizedHeaderName) { + count += 1; + } + } + return count; } // --------------------------------------------------------------------------- @@ -72,6 +133,21 @@ function isDefaultBranchPush(payload: PushPayload): boolean { return branch === payload.repository.default_branch; } +function normalizePathTrigger(trigger: string): string { + return trigger.replace(/^\.?\//, "").replace(/\/+$/, ""); +} + +function matchesPathTrigger(filePath: string, trigger: string): boolean { + const normalizedTrigger = normalizePathTrigger(trigger); + if (normalizedTrigger.length === 0) return true; + + const normalizedPath = filePath.replace(/^\.?\//, ""); + return ( + normalizedPath === normalizedTrigger || + normalizedPath.startsWith(`${normalizedTrigger}/`) + ); +} + /** * Check if any committed files match any of the given path prefixes. * An empty prefixes array means "match everything" (no path filtering). @@ -81,13 +157,74 @@ function touchesPaths(payload: PushPayload, prefixes: string[]): boolean { for (const commit of payload.commits) { const allPaths = [...commit.added, ...commit.modified, ...commit.removed]; - if (allPaths.some((p) => prefixes.some((prefix) => p.startsWith(prefix)))) { + if ( + allPaths.some((p) => + prefixes.some((prefix) => matchesPathTrigger(p, prefix)), + ) + ) { return true; } } return false; } +function hasPathTriggers( + pathTriggers: Record | undefined, + sourceName: string, +): boolean { + return (pathTriggers?.[sourceName] ?? []).length > 0; +} + +function isStringArray(value: unknown): value is string[] { + return ( + Array.isArray(value) && value.every((item) => typeof item === "string") + ); +} + +function isPushCommit(value: unknown): value is PushCommit { + if (typeof value !== "object" || value == null) return false; + const commit = value as Record; + return ( + isStringArray(commit.added) && + isStringArray(commit.modified) && + isStringArray(commit.removed) + ); +} + +function isPushPayload(value: unknown): value is PushPayload { + if (typeof value !== "object" || value == null) return false; + const payload = value as Record; + const repository = payload.repository; + if (typeof repository !== "object" || repository == null) return false; + const repo = repository as Record; + return ( + typeof payload.ref === "string" && + typeof payload.after === "string" && + typeof payload.before === "string" && + typeof repo.clone_url === "string" && + typeof repo.default_branch === "string" && + typeof repo.full_name === "string" && + Array.isArray(payload.commits) && + payload.commits.every(isPushCommit) + ); +} + +function recordPullRequestDelivery( + delivery: Parameters[0], +): void { + recordGithubWebhookDelivery(delivery); +} + +function recordGithubWebhookDelivery( + delivery: Parameters[0], +): void { + // Delivery tracking is non-blocking audit telemetry; webhook correctness + // depends on signature validation and seed writes, not analytics persistence. + recordWebhookDelivery(delivery).catch((err) => { + console.error("[webhook] Failed to record GitHub delivery:", err); + }); +} + // --------------------------------------------------------------------------- // Factory: create a handler wired to a specific orchestrator instance // --------------------------------------------------------------------------- @@ -96,7 +233,7 @@ export function createWebhookHandler(orchestrator: ReindexOrchestrator) { return async function handleGithubWebhook( req: Request, res: Response, - ): Promise { + ): Promise { const cfg = getConfig(); // -- Signature verification ---------------------------------------- @@ -108,56 +245,231 @@ export function createWebhookHandler(orchestrator: ReindexOrchestrator) { console.error( "[webhook] req.body is not a Buffer — ensure the route uses express.raw()", ); - recordWebhookDelivery({ + recordGithubWebhookDelivery({ source: "github", decision: "error", reason: "req.body not a Buffer", payload_size: payloadSize, - }).catch(() => {}); + }); res .status(500) .json({ error: "Server misconfiguration: raw body not available" }); - return; + return NO_REINDEX; } if (!cfg.githubWebhookSecret?.trim()) { console.log( "[webhook] Rejecting request — webhook secret not configured", ); - recordWebhookDelivery({ + recordGithubWebhookDelivery({ source: "github", decision: "error", reason: "webhook secret not configured", payload_size: payloadSize, - }).catch(() => {}); + }); res.status(403).json({ error: "Forbidden" }); - return; + return NO_REINDEX; + } + + const signatureHeader = normalizeSingleHeader( + req.headers["x-hub-signature-256"], + "x-hub-signature-256", + req.rawHeaders, + ); + const eventHeader = normalizeSingleHeader( + req.headers["x-github-event"], + "x-github-event", + req.rawHeaders, + ); + const deliveryHeader = normalizeSingleHeader( + req.headers["x-github-delivery"], + "x-github-delivery", + req.rawHeaders, + ); + + const duplicateHeader = [signatureHeader, eventHeader, deliveryHeader].find( + (header): header is DuplicateHeader => !header.ok, + ); + if (duplicateHeader) { + recordGithubWebhookDelivery({ + source: "github", + decision: "error", + reason: duplicateHeader.reason, + payload_size: payloadSize, + }); + res.status(400).json({ + error: "Duplicate GitHub webhook header", + header: duplicateHeader.reason + .replace(/^duplicate /, "") + .replace(/ header$/, ""), + }); + return NO_REINDEX; } - const signature = req.headers["x-hub-signature-256"] as string | undefined; + const signature = signatureHeader.ok ? signatureHeader.value : undefined; if (!verifySignature(rawBody, signature, cfg.githubWebhookSecret)) { - recordWebhookDelivery({ + recordGithubWebhookDelivery({ source: "github", decision: "error", reason: "invalid signature", payload_size: payloadSize, - }).catch(() => {}); + }); res.status(401).json({ error: "Invalid or missing webhook signature" }); - return; + return NO_REINDEX; } // -- Event routing ------------------------------------------------- - const event = req.headers["x-github-event"] as string | undefined; + const event = eventHeader.ok ? eventHeader.value : undefined; + if (event === "pull_request") { + let payload: AtlasPullRequestPayload; + try { + payload = JSON.parse( + rawBody.toString("utf-8"), + ) as AtlasPullRequestPayload; + } catch { + recordPullRequestDelivery({ + source: "github", + event_type: "pull_request", + decision: "error", + reason: "malformed JSON", + payload_size: payloadSize, + }); + res.status(400).json({ error: "Malformed JSON payload" }); + return NO_REINDEX; + } + + const repoFullName = payload.repository?.full_name; + if (typeof repoFullName !== "string" || repoFullName.length === 0) { + recordPullRequestDelivery({ + source: "github", + event_type: "pull_request", + decision: "error", + reason: "missing repository.full_name", + payload_size: payloadSize, + }); + res.status(400).json({ error: "Malformed Atlas pull_request payload" }); + return NO_REINDEX; + } + + const serverCfg = getServerConfig(); + const webhookCfg = serverCfg.webhook; + const sourceNames = webhookCfg?.repo_sources?.[repoFullName] ?? []; + if (sourceNames.length === 0) { + recordPullRequestDelivery({ + source: "github", + event_type: "pull_request", + repo: repoFullName, + decision: "ignored", + reason: "repo not in webhook config", + payload_size: payloadSize, + }); + res + .status(200) + .json({ ignored: true, reason: "repo not in webhook config" }); + return NO_REINDEX; + } + + const configuredSourceNames = new Set(sourceNames); + const atlasSources = serverCfg.sources + .filter(isAtlasSourceConfig) + .filter((source) => configuredSourceNames.has(source.name)); + + if (atlasSources.length === 0) { + recordPullRequestDelivery({ + source: "github", + event_type: "pull_request", + repo: repoFullName, + decision: "ignored", + reason: "repo has no atlas sources", + payload_size: payloadSize, + }); + res + .status(200) + .json({ ignored: true, reason: "repo has no atlas sources" }); + return NO_REINDEX; + } + + let extraction; + try { + extraction = extractAtlasPullRequestSeedCandidates( + payload, + atlasSources, + deliveryHeader.ok ? deliveryHeader.value : undefined, + ); + } catch (error) { + recordPullRequestDelivery({ + source: "github", + event_type: "pull_request", + repo: repoFullName, + decision: "error", + reason: + error instanceof Error + ? `malformed Atlas pull_request payload: ${error.message}` + : "malformed Atlas pull_request payload", + payload_size: payloadSize, + }); + res.status(400).json({ error: "Malformed Atlas pull_request payload" }); + return NO_REINDEX; + } + + if (!extraction.isMergedPullRequest) { + recordPullRequestDelivery({ + source: "github", + event_type: "pull_request", + repo: repoFullName, + decision: "ignored", + reason: "not a merged pull request", + payload_size: payloadSize, + }); + res + .status(200) + .json({ ignored: true, reason: "not a merged pull request" }); + return NO_REINDEX; + } + + if (extraction.baseBranch !== extraction.defaultBranch) { + recordPullRequestDelivery({ + source: "github", + event_type: "pull_request", + repo: repoFullName, + decision: "ignored", + reason: "not the default branch", + payload_size: payloadSize, + }); + res + .status(200) + .json({ ignored: true, reason: "not the default branch" }); + return NO_REINDEX; + } + + for (const candidate of extraction.candidates) { + await upsertAtlasSeedCandidate(candidate); + } + + recordPullRequestDelivery({ + source: "github", + event_type: "pull_request", + repo: repoFullName, + decision: "queued", + payload_size: payloadSize, + }); + res.status(200).json({ + queued: true, + atlas_seed_candidates: extraction.candidates.length, + }); + return NO_REINDEX; + } + if (event !== "push") { - recordWebhookDelivery({ + recordGithubWebhookDelivery({ source: "github", event_type: event ?? "unknown", decision: "ignored", reason: "not a push event", payload_size: payloadSize, - }).catch(() => {}); + }); res.status(200).json({ ignored: true, reason: "not a push event" }); - return; + return NO_REINDEX; } // -- Parse payload ------------------------------------------------- @@ -165,28 +477,40 @@ export function createWebhookHandler(orchestrator: ReindexOrchestrator) { try { payload = JSON.parse(rawBody.toString("utf-8")) as PushPayload; } catch { - recordWebhookDelivery({ + recordGithubWebhookDelivery({ source: "github", event_type: "push", decision: "error", reason: "malformed JSON", payload_size: payloadSize, - }).catch(() => {}); + }); res.status(400).json({ error: "Malformed JSON payload" }); - return; + return NO_REINDEX; + } + + if (!isPushPayload(payload)) { + recordGithubWebhookDelivery({ + source: "github", + event_type: "push", + decision: "error", + reason: "malformed push payload", + payload_size: payloadSize, + }); + res.status(400).json({ error: "Malformed push payload" }); + return NO_REINDEX; } if (!isDefaultBranchPush(payload)) { - recordWebhookDelivery({ + recordGithubWebhookDelivery({ source: "github", event_type: "push", repo: payload.repository.full_name, decision: "ignored", reason: "not the default branch", payload_size: payloadSize, - }).catch(() => {}); + }); res.status(200).json({ ignored: true, reason: "not the default branch" }); - return; + return NO_REINDEX; } const repoFullName = payload.repository.full_name; @@ -201,48 +525,47 @@ export function createWebhookHandler(orchestrator: ReindexOrchestrator) { console.log( `[webhook] Push to ${repoFullName} at ${sha.slice(0, 8)} — repo not in webhook config, ignoring`, ); - recordWebhookDelivery({ + recordGithubWebhookDelivery({ source: "github", event_type: "push", repo: repoFullName, decision: "ignored", reason: "repo not in webhook config", payload_size: payloadSize, - }).catch(() => {}); + }); res .status(200) .json({ ignored: true, reason: "repo not in webhook config" }); - return; + return NO_REINDEX; } // Check path triggers for each source. If any source's triggers match // (or it has no triggers, meaning "match all"), queue a reindex. - let shouldReindex = false; + const affectedSourceNames: string[] = []; for (const sourceName of sourceNames) { const triggers = webhookCfg?.path_triggers?.[sourceName] ?? []; if (touchesPaths(payload, triggers)) { - shouldReindex = true; - break; // one reindex covers all sources from the same repo + affectedSourceNames.push(sourceName); } } - if (!shouldReindex) { + if (affectedSourceNames.length === 0) { console.log( `[webhook] Push to ${repoFullName} at ${sha.slice(0, 8)} — ` + `no path triggers matched, ignoring`, ); - recordWebhookDelivery({ + recordGithubWebhookDelivery({ source: "github", event_type: "push", repo: repoFullName, decision: "ignored", reason: "no path triggers matched", payload_size: payloadSize, - }).catch(() => {}); + }); res .status(200) .json({ ignored: true, reason: "no path triggers matched" }); - return; + return NO_REINDEX; } console.log( @@ -250,14 +573,29 @@ export function createWebhookHandler(orchestrator: ReindexOrchestrator) { `(${payload.repository.default_branch}) at ${sha.slice(0, 8)} — queuing reindex`, ); - recordWebhookDelivery({ + recordGithubWebhookDelivery({ source: "github", event_type: "push", repo: repoFullName, decision: "queued", payload_size: payloadSize, - }).catch(() => {}); - orchestrator.queueIncrementalReindex(repoUrl); + }); + const shouldReindexWholeRepo = + affectedSourceNames.length === sourceNames.length || + affectedSourceNames.some( + (sourceName) => !hasPathTriggers(webhookCfg?.path_triggers, sourceName), + ); + if (shouldReindexWholeRepo) { + orchestrator.queueIncrementalReindex(repoUrl); + } else { + for (const sourceName of affectedSourceNames) { + orchestrator.queueSourceReindex(sourceName); + } + } res.status(200).json({ queued: true }); + return { + queuedReindex: true, + affectedSourceNames, + }; }; }