From dccb5af4b0c8bc043daeb61bfe4f7992e8083b8d Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:25:09 +0800 Subject: [PATCH 001/110] feat(web): assistant SSE event union + message types (#970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the type layer for the Ornn Assistant chatbot's data path. The assistant reuses the Playground SSE transport but speaks its own event contract, so the strings are deliberately distinct (`chat_text_delta` vs the Playground's `text-delta`) to keep the two streams from ever cross-wiring. The event union is a Zod discriminated union over chat_start / chat_text_delta / chat_error / chat_finish / keepalive — no `as any`. `parseAssistantEvent` validates each parsed SSE payload and returns null for anything malformed / off-contract, so a drifting backend surfaces as a dropped event rather than an untyped object reaching React. `usage` on chat_finish is forward-compatible: known fields optional, unknown keys stripped. `AssistantMessage` is a structural subset of `PlaygroundMessage` (id/role/content) so the existing ChatMessage renderer can paint assistant turns without adaptation. --- ornn-web/src/types/assistant.test.ts | 94 +++++++++++++++++++++ ornn-web/src/types/assistant.ts | 117 +++++++++++++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100644 ornn-web/src/types/assistant.test.ts create mode 100644 ornn-web/src/types/assistant.ts diff --git a/ornn-web/src/types/assistant.test.ts b/ornn-web/src/types/assistant.test.ts new file mode 100644 index 00000000..4684973a --- /dev/null +++ b/ornn-web/src/types/assistant.test.ts @@ -0,0 +1,94 @@ +/** + * UT-WEB-ASSISTANT-EVENT-001 — assistant SSE event union (#970). + * + * The Zod discriminated union is the only gate between raw SSE payloads + * and the chat UI. It must accept every contracted event, tolerate + * forward-compatible extra `usage` keys, and reject anything malformed or + * off-contract so a drifting backend surfaces as a dropped event rather + * than an untyped object reaching React. + * + * @module types/assistant.test + */ + +import { describe, it, expect } from "vitest"; +import { + assistantChatEventSchema, + parseAssistantEvent, + type AssistantChatEvent, +} from "./assistant"; + +describe("assistant SSE event union", () => { + it("accepts chat_start", () => { + const r = assistantChatEventSchema.safeParse({ type: "chat_start", model: "gpt-5" }); + expect(r.success).toBe(true); + }); + + it("accepts chat_text_delta", () => { + const r = assistantChatEventSchema.safeParse({ type: "chat_text_delta", delta: "Hi" }); + expect(r.success).toBe(true); + }); + + it("accepts chat_error with code + message", () => { + const r = assistantChatEventSchema.safeParse({ + type: "chat_error", + code: "rate_limited", + message: "Slow down", + }); + expect(r.success).toBe(true); + }); + + it("accepts chat_finish with no usage", () => { + const r = assistantChatEventSchema.safeParse({ type: "chat_finish" }); + expect(r.success).toBe(true); + }); + + it("accepts chat_finish with usage and tolerates extra usage keys", () => { + const r = assistantChatEventSchema.safeParse({ + type: "chat_finish", + usage: { promptTokens: 10, completionTokens: 5, totalTokens: 15, costUsd: 0.01 }, + }); + expect(r.success).toBe(true); + if (r.success && r.data.type === "chat_finish") { + expect(r.data.usage?.totalTokens).toBe(15); + // Forward-compatible extra key is stripped, not retained. + expect(r.data.usage).not.toHaveProperty("costUsd"); + } + }); + + it("accepts keepalive", () => { + const r = assistantChatEventSchema.safeParse({ type: "keepalive" }); + expect(r.success).toBe(true); + }); + + it("rejects an unknown event type", () => { + const r = assistantChatEventSchema.safeParse({ type: "text-delta", delta: "x" }); + expect(r.success).toBe(false); + }); + + it("rejects chat_text_delta missing its delta", () => { + const r = assistantChatEventSchema.safeParse({ type: "chat_text_delta" }); + expect(r.success).toBe(false); + }); + + it("rejects chat_error missing code", () => { + const r = assistantChatEventSchema.safeParse({ type: "chat_error", message: "boom" }); + expect(r.success).toBe(false); + }); +}); + +describe("parseAssistantEvent", () => { + it("returns the typed event for valid input", () => { + const event = parseAssistantEvent({ type: "chat_text_delta", delta: "yo" }); + expect(event).toEqual({ type: "chat_text_delta", delta: "yo" }); + // Type-narrowing sanity (compile-time guarantee, asserted at runtime). + const narrowed: AssistantChatEvent | null = event; + expect(narrowed?.type).toBe("chat_text_delta"); + }); + + it("returns null for malformed input", () => { + expect(parseAssistantEvent({ type: "nope" })).toBeNull(); + expect(parseAssistantEvent("not an object")).toBeNull(); + expect(parseAssistantEvent(null)).toBeNull(); + expect(parseAssistantEvent({})).toBeNull(); + }); +}); diff --git a/ornn-web/src/types/assistant.ts b/ornn-web/src/types/assistant.ts new file mode 100644 index 00000000..3fd2cc4e --- /dev/null +++ b/ornn-web/src/types/assistant.ts @@ -0,0 +1,117 @@ +/** + * Frontend type definitions for the Ornn Assistant feature (#970). + * + * The Assistant is a repo-aware Q&A chatbot. It reuses the Playground SSE + * transport stack but speaks its OWN event contract — the event `type` + * strings are distinct from the Playground's (`chat_text_delta` vs + * `text-delta`, etc.) so the two streams never get cross-wired. + * + * The event union is Zod-typed: every `data:` payload coming off the SSE + * stream is validated by `assistantChatEventSchema` before the hook acts + * on it. No `as any` / unchecked casts — a drifting backend surfaces as a + * dropped (failed-parse) event rather than a runtime crash deep in the UI. + * + * @module types/assistant + */ + +import { z } from "zod"; + +// --------------------------------------------------------------------------- +// Conversation messages +// --------------------------------------------------------------------------- + +/** + * A single turn in the assistant conversation. Structurally a subset of + * `PlaygroundMessage` (id/role/content) so the existing `ChatMessage` + * renderer can paint these without adaptation — the assistant has no tool + * calls, so the optional tool fields are simply absent. + */ +export interface AssistantMessage { + /** Stable unique identifier for React reconciliation. */ + id: string; + role: "user" | "assistant"; + content: string; +} + +/** Wire shape sent up in the request body — role + content only. */ +export interface AssistantWireMessage { + role: "user" | "assistant"; + content: string; +} + +// --------------------------------------------------------------------------- +// SSE event contract (mirrors the backend POST /api/v1/assistant/chat) +// --------------------------------------------------------------------------- + +/** Stream opened; carries the resolved model id the backend chose. */ +export const assistantChatStartSchema = z.object({ + type: z.literal("chat_start"), + model: z.string(), +}); + +/** Incremental assistant text. Appended to the live answer buffer. */ +export const assistantChatTextDeltaSchema = z.object({ + type: z.literal("chat_text_delta"), + delta: z.string(), +}); + +/** Terminal error. `code` is a stable machine token; `message` is human copy. */ +export const assistantChatErrorSchema = z.object({ + type: z.literal("chat_error"), + code: z.string(), + message: z.string(), +}); + +/** + * Usage accounting attached to `chat_finish`. Shape is advisory — the + * backend may add fields, so unknown keys are tolerated (Zod strips them) + * and every known field is optional. + */ +export const assistantUsageSchema = z + .object({ + promptTokens: z.number().optional(), + completionTokens: z.number().optional(), + totalTokens: z.number().optional(), + }) + .optional(); + +/** Terminal success; optional `usage` accounting. */ +export const assistantChatFinishSchema = z.object({ + type: z.literal("chat_finish"), + usage: assistantUsageSchema, +}); + +/** + * Heartbeat to keep the connection warm through proxies. Carries no + * payload and is ignored by the consumer — present in the union so it + * parses cleanly instead of being dropped as "unknown". + */ +export const assistantKeepaliveSchema = z.object({ + type: z.literal("keepalive"), +}); + +/** Discriminated union over every assistant SSE event. */ +export const assistantChatEventSchema = z.discriminatedUnion("type", [ + assistantChatStartSchema, + assistantChatTextDeltaSchema, + assistantChatErrorSchema, + assistantChatFinishSchema, + assistantKeepaliveSchema, +]); + +export type AssistantChatStartEvent = z.infer; +export type AssistantChatTextDeltaEvent = z.infer; +export type AssistantChatErrorEvent = z.infer; +export type AssistantChatFinishEvent = z.infer; +export type AssistantChatEvent = z.infer; +export type AssistantUsage = z.infer; + +/** + * Validate one parsed SSE payload against the event union. + * Returns the typed event on success, or `null` for anything that doesn't + * match (malformed / unknown / drifted) so callers can simply skip it. + */ +export function parseAssistantEvent(payload: unknown): AssistantChatEvent | null { + const result = assistantChatEventSchema.safeParse(payload); + return result.success ? result.data : null; +} From 92938b61690101ccb6b8632fd69d9d491b7c0b3f Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:25:19 +0800 Subject: [PATCH 002/110] feat(web): session-scoped assistant chat store (#970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zustand store for the assistant conversation, streaming state, the live answer buffer, and the slide-in panel's open/closed state. Session-only (NO persist) exactly like playgroundStore — the assistant is a transient helper, not a saved thread, so conversation + panel state reset on reload. clearMessages resets the conversation but intentionally leaves panel visibility untouched (clearing a chat shouldn't close the panel the user is looking at). finalizeAssistantMessage is a no-op on an empty buffer so an aborted-before-first-token turn doesn't push a blank bubble. --- ornn-web/src/stores/assistantStore.test.ts | 92 ++++++++++++++++++++ ornn-web/src/stores/assistantStore.ts | 98 ++++++++++++++++++++++ 2 files changed, 190 insertions(+) create mode 100644 ornn-web/src/stores/assistantStore.test.ts create mode 100644 ornn-web/src/stores/assistantStore.ts diff --git a/ornn-web/src/stores/assistantStore.test.ts b/ornn-web/src/stores/assistantStore.test.ts new file mode 100644 index 00000000..6b2d58c7 --- /dev/null +++ b/ornn-web/src/stores/assistantStore.test.ts @@ -0,0 +1,92 @@ +/** + * UT-WEB-ASSISTANT-STORE-001 — assistant store actions (#970). + * + * Verifies the session-scoped conversation/streaming/panel state machine: + * user + assistant turns accumulate in order, the live buffer finalizes + * into a message, and clearing/streaming/panel toggles behave. + * + * @module stores/assistantStore.test + */ + +import { describe, it, expect, beforeEach } from "vitest"; +import { useAssistantStore } from "./assistantStore"; + +function reset() { + useAssistantStore.setState({ + isOpen: false, + messages: [], + isStreaming: false, + error: null, + currentAssistantContent: "", + }); +} + +describe("assistantStore", () => { + beforeEach(reset); + + it("adds a user message and clears any prior error", () => { + useAssistantStore.getState().setError("old"); + useAssistantStore.getState().addUserMessage("What is Ornn?"); + const s = useAssistantStore.getState(); + expect(s.messages).toHaveLength(1); + expect(s.messages[0]).toMatchObject({ role: "user", content: "What is Ornn?" }); + expect(s.error).toBeNull(); + }); + + it("assigns unique ids to successive messages", () => { + const s = useAssistantStore.getState(); + s.addUserMessage("a"); + s.addUserMessage("b"); + const ids = useAssistantStore.getState().messages.map((m) => m.id); + expect(new Set(ids).size).toBe(2); + }); + + it("buffers streamed deltas and finalizes into an assistant message", () => { + const s = useAssistantStore.getState(); + s.startAssistantMessage(); + s.appendAssistantDelta("Hel"); + s.appendAssistantDelta("lo"); + expect(useAssistantStore.getState().currentAssistantContent).toBe("Hello"); + + s.finalizeAssistantMessage(); + const after = useAssistantStore.getState(); + expect(after.currentAssistantContent).toBe(""); + expect(after.messages.at(-1)).toMatchObject({ role: "assistant", content: "Hello" }); + }); + + it("does not push an empty assistant message on finalize", () => { + const s = useAssistantStore.getState(); + s.startAssistantMessage(); + s.finalizeAssistantMessage(); + expect(useAssistantStore.getState().messages).toHaveLength(0); + }); + + it("toggles panel open/closed", () => { + const s = useAssistantStore.getState(); + expect(useAssistantStore.getState().isOpen).toBe(false); + s.openPanel(); + expect(useAssistantStore.getState().isOpen).toBe(true); + s.togglePanel(); + expect(useAssistantStore.getState().isOpen).toBe(false); + s.togglePanel(); + expect(useAssistantStore.getState().isOpen).toBe(true); + s.closePanel(); + expect(useAssistantStore.getState().isOpen).toBe(false); + }); + + it("clearMessages resets conversation but leaves panel state untouched", () => { + const s = useAssistantStore.getState(); + s.openPanel(); + s.addUserMessage("hi"); + s.setStreaming(true); + s.setError("boom"); + s.clearMessages(); + const after = useAssistantStore.getState(); + expect(after.messages).toHaveLength(0); + expect(after.isStreaming).toBe(false); + expect(after.error).toBeNull(); + expect(after.currentAssistantContent).toBe(""); + // Panel visibility is independent of conversation reset. + expect(after.isOpen).toBe(true); + }); +}); diff --git a/ornn-web/src/stores/assistantStore.ts b/ornn-web/src/stores/assistantStore.ts new file mode 100644 index 00000000..29fb948c --- /dev/null +++ b/ornn-web/src/stores/assistantStore.ts @@ -0,0 +1,98 @@ +/** + * Zustand store for the Ornn Assistant chat (#970). + * + * Manages the conversation, streaming state, the live answer buffer, and + * the widget's open/closed state. Session-only (NO persist) — the + * conversation and panel state reset on page reload, exactly like + * `playgroundStore`. The assistant is a transient helper, not a saved + * thread. + * + * @module stores/assistantStore + */ + +import { create } from "zustand"; +import type { AssistantMessage } from "@/types/assistant"; + +/** Monotonically increasing counter for stable message IDs. */ +let messageIdCounter = 0; +function nextMessageId(): string { + messageIdCounter += 1; + return `assistant-msg-${messageIdCounter}`; +} + +interface AssistantState { + /** Whether the slide-in chat panel is open. */ + isOpen: boolean; + /** Full conversation history (user + assistant turns). */ + messages: AssistantMessage[]; + /** Whether the assistant is currently streaming a reply. */ + isStreaming: boolean; + /** Current error message, or null. */ + error: string | null; + /** Buffer for the assistant turn currently being streamed. */ + currentAssistantContent: string; + + // Actions — panel + openPanel: () => void; + closePanel: () => void; + togglePanel: () => void; + + // Actions — conversation + addUserMessage: (content: string) => void; + startAssistantMessage: () => void; + appendAssistantDelta: (delta: string) => void; + finalizeAssistantMessage: () => void; + setStreaming: (isStreaming: boolean) => void; + setError: (error: string | null) => void; + clearMessages: () => void; +} + +export const useAssistantStore = create((set, get) => ({ + isOpen: false, + messages: [], + isStreaming: false, + error: null, + currentAssistantContent: "", + + openPanel: () => set({ isOpen: true }), + closePanel: () => set({ isOpen: false }), + togglePanel: () => set((state) => ({ isOpen: !state.isOpen })), + + addUserMessage: (content) => { + set((state) => ({ + messages: [...state.messages, { id: nextMessageId(), role: "user", content }], + error: null, + })); + }, + + startAssistantMessage: () => { + set({ currentAssistantContent: "" }); + }, + + appendAssistantDelta: (delta) => { + set((state) => ({ + currentAssistantContent: state.currentAssistantContent + delta, + })); + }, + + finalizeAssistantMessage: () => { + const content = get().currentAssistantContent; + if (content) { + set((state) => ({ + messages: [...state.messages, { id: nextMessageId(), role: "assistant", content }], + currentAssistantContent: "", + })); + } + }, + + setStreaming: (isStreaming) => set({ isStreaming }), + setError: (error) => set({ error }), + + clearMessages: () => + set({ + messages: [], + isStreaming: false, + error: null, + currentAssistantContent: "", + }), +})); From eb983d3d5b27572256bf53e8b3c16b3f2e118a24 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:25:19 +0800 Subject: [PATCH 003/110] feat(web): assistant SSE stream client (#970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit POST /api/v1/assistant/chat streaming client, mirroring playgroundStreamApi: canonical Bearer auth via the auth store's ensureFreshToken, AbortController cancel, shared parseSseChunk. Every parsed data: payload is validated through parseAssistantEvent before dispatch; unrecognized events are debug-logged and skipped rather than forwarded untyped. Non-OK responses are decoded as RFC 7807 problem+json (per docs/CONVENTIONS.md) — detail/code are lifted into a synthetic chat_error event so the UI has one uniform failure channel regardless of whether the failure was transport-level or a streamed error. --- ornn-web/src/services/assistantStreamApi.ts | 153 ++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 ornn-web/src/services/assistantStreamApi.ts diff --git a/ornn-web/src/services/assistantStreamApi.ts b/ornn-web/src/services/assistantStreamApi.ts new file mode 100644 index 00000000..0d87284a --- /dev/null +++ b/ornn-web/src/services/assistantStreamApi.ts @@ -0,0 +1,153 @@ +/** + * SSE streaming client for the Ornn Assistant chat endpoint (#970). + * + * POST /api/v1/assistant/chat — AUTH REQUIRED. Sends the conversation + * history (+ optional model override) as a JSON body and consumes the + * Server-Sent Events response. Mirrors `playgroundStreamApi` (canonical + * auth header, AbortController cancel, shared `parseSseChunk`) but speaks + * the assistant's own Zod-validated event contract. + * + * @module services/assistantStreamApi + */ + +import { parseSseChunk } from "@/utils/sseParser"; +import { useAuthStore } from "@/stores/authStore"; +import { + parseAssistantEvent, + type AssistantChatEvent, + type AssistantWireMessage, +} from "@/types/assistant"; +import { config } from "@/config"; +import { createLogger } from "@/lib/logger"; + +const logger = createLogger("assistantStreamApi"); + +const API_BASE = config.apiBaseUrl; + +export interface AssistantStreamParams { + messages: AssistantWireMessage[]; + // exactOptionalPropertyTypes (#657) + modelId?: string | undefined; +} + +export interface StreamHandle { + abort: () => void; +} + +/** Ensure a fresh token and retrieve the Bearer header from the auth store. */ +async function getAuthHeaders(): Promise> { + await useAuthStore.getState().ensureFreshToken(); + const token = useAuthStore.getState().accessToken; + if (!token) return {}; + return { Authorization: `Bearer ${token}` }; +} + +/** + * Stream assistant replies from the SSE endpoint. + * + * Every parsed `data:` payload is validated against the assistant event + * union; anything that fails (malformed / unknown / drifted) is skipped + * with a debug log rather than forwarded as an untyped object. + * + * @param params conversation history + optional model override + * @param onEvent invoked once per validated SSE event, in arrival order + * @returns a handle whose `abort()` cancels the in-flight request + */ +export function streamAssistantChat( + params: AssistantStreamParams, + onEvent: (event: AssistantChatEvent) => void, +): StreamHandle { + const controller = new AbortController(); + const url = new URL(`${API_BASE}/api/v1/assistant/chat`, window.location.origin); + + (async () => { + try { + const authHeaders = await getAuthHeaders(); + logger.info("assistant chat stream opening", { messageCount: params.messages.length }); + + const response = await fetch(url.toString(), { + method: "POST", + headers: { + "Content-Type": "application/json", + Accept: "text/event-stream", + ...authHeaders, + }, + body: JSON.stringify({ + messages: params.messages, + modelId: params.modelId, + }), + signal: controller.signal, + }); + + if (!response.ok) { + const text = await response.text().catch(() => ""); + let message = `HTTP ${response.status}: ${response.statusText}`; + let code = `http_${response.status}`; + try { + const json = JSON.parse(text); + // RFC 7807 problem+json (docs/CONVENTIONS.md) — prefer detail/title. + if (typeof json.detail === "string") message = json.detail; + else if (json.error?.message) message = json.error.message; + if (typeof json.code === "string") code = json.code; + } catch { + /* use default message */ + } + logger.error("assistant chat stream failed", { status: response.status, code }); + onEvent({ type: "chat_error", code, message }); + return; + } + + const reader = response.body?.getReader(); + if (!reader) { + onEvent({ + type: "chat_error", + code: "stream_unsupported", + message: "ReadableStream not supported", + }); + return; + } + + const decoder = new TextDecoder(); + let buffer = ""; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const { events, remainder } = parseSseChunk(buffer); + buffer = remainder; + + for (const raw of events) { + const event = parseAssistantEvent(raw); + if (event) onEvent(event); + else logger.debug("dropped unrecognized assistant event"); + } + } + + // Flush any trailing buffered event. + if (buffer.trim()) { + const { events } = parseSseChunk(buffer + "\n\n"); + for (const raw of events) { + const event = parseAssistantEvent(raw); + if (event) onEvent(event); + } + } + + logger.info("assistant chat stream closed"); + } catch (err) { + if ((err as Error).name === "AbortError") { + logger.debug("assistant chat stream aborted"); + return; + } + logger.error("assistant chat stream error", { message: (err as Error).message }); + onEvent({ + type: "chat_error", + code: "stream_failed", + message: (err as Error).message ?? "Stream connection failed", + }); + } + })(); + + return { abort: () => controller.abort() }; +} From e0b925519f09b21506047c6b8461ed888ba96d5b Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:25:31 +0800 Subject: [PATCH 004/110] feat(web): useAssistantChat send/stream hook (#970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drives the send → stream → display loop. Reuses the playground typewriter pacer: chat_text_delta events fill a pending buffer that a fixed-cadence pacer drains char-by-char, with adaptive catch-up so the visible text stays within ~1s of what's been received. Terminal events (chat_finish / chat_error / abort) flush the buffer immediately. Errors surface on BOTH the store (inline panel display) and a toast, so a user who scrolled away still sees the failure. keepalive heartbeats are ignored. The in-flight stream and pacer interval are torn down on unmount. --- ornn-web/src/hooks/useAssistantChat.test.tsx | 151 +++++++++++++++ ornn-web/src/hooks/useAssistantChat.ts | 192 +++++++++++++++++++ 2 files changed, 343 insertions(+) create mode 100644 ornn-web/src/hooks/useAssistantChat.test.tsx create mode 100644 ornn-web/src/hooks/useAssistantChat.ts diff --git a/ornn-web/src/hooks/useAssistantChat.test.tsx b/ornn-web/src/hooks/useAssistantChat.test.tsx new file mode 100644 index 00000000..465aa624 --- /dev/null +++ b/ornn-web/src/hooks/useAssistantChat.test.tsx @@ -0,0 +1,151 @@ +/** + * UT-WEB-ASSISTANT-HOOK-001 — useAssistantChat send/stream loop (#970). + * + * Drives the hook through a full turn against a stubbed stream client: + * a sent message lands as a user turn, streamed deltas flush into the + * finalized assistant message on `chat_finish`, `chat_error` toasts + + * records the error, and abort stops streaming. The stream module is + * mocked so the test owns the `onEvent` callback and never opens a real + * connection; the toast store is mocked to a spy. + * + * @module hooks/useAssistantChat.test + */ + +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { act, renderHook } from "@testing-library/react"; +import type { AssistantChatEvent } from "@/types/assistant"; +import type { AssistantStreamParams, StreamHandle } from "@/services/assistantStreamApi"; + +// --- Stubbed stream client ------------------------------------------------- +// Captures the latest (params, onEvent) so the test can replay SSE events, +// and exposes an `abort` spy on the returned handle. +let lastParams: AssistantStreamParams | null = null; +let lastOnEvent: ((e: AssistantChatEvent) => void) | null = null; +const abortSpy = vi.fn(); + +vi.mock("@/services/assistantStreamApi", () => ({ + streamAssistantChat: ( + params: AssistantStreamParams, + onEvent: (e: AssistantChatEvent) => void, + ): StreamHandle => { + lastParams = params; + lastOnEvent = onEvent; + return { abort: abortSpy }; + }, +})); + +// --- Stubbed toast store --------------------------------------------------- +const addToast = vi.fn(); +vi.mock("@/stores/toastStore", () => ({ + useToastStore: (selector: (s: { addToast: typeof addToast }) => T) => + selector({ addToast }), +})); + +import { useAssistantChat } from "./useAssistantChat"; +import { useAssistantStore } from "@/stores/assistantStore"; + +/** Replay one SSE event through the captured handler, inside act(). */ +function emit(event: AssistantChatEvent) { + act(() => { + lastOnEvent?.(event); + }); +} + +beforeEach(() => { + lastParams = null; + lastOnEvent = null; + abortSpy.mockReset(); + addToast.mockReset(); + useAssistantStore.setState({ + isOpen: false, + messages: [], + isStreaming: false, + error: null, + currentAssistantContent: "", + }); +}); + +describe("useAssistantChat", () => { + it("sends the trimmed user turn and forwards conversation + model", () => { + const { result } = renderHook(() => useAssistantChat()); + + act(() => result.current.sendMessage(" What is Ornn? ", "gpt-5")); + + expect(result.current.messages).toHaveLength(1); + expect(result.current.messages[0]).toMatchObject({ + role: "user", + content: "What is Ornn?", + }); + expect(result.current.isStreaming).toBe(true); + expect(lastParams?.modelId).toBe("gpt-5"); + expect(lastParams?.messages).toEqual([{ role: "user", content: "What is Ornn?" }]); + }); + + it("ignores empty / whitespace-only sends", () => { + const { result } = renderHook(() => useAssistantChat()); + act(() => result.current.sendMessage(" ")); + expect(result.current.messages).toHaveLength(0); + expect(lastOnEvent).toBeNull(); + }); + + it("flushes streamed deltas into a finalized assistant message on finish", () => { + const { result } = renderHook(() => useAssistantChat()); + + act(() => result.current.sendMessage("hi")); + emit({ type: "chat_start", model: "gpt-5" }); + emit({ type: "chat_text_delta", delta: "Ornn " }); + emit({ type: "chat_text_delta", delta: "is a registry." }); + emit({ type: "chat_finish" }); + + expect(result.current.isStreaming).toBe(false); + expect(result.current.currentAssistantContent).toBe(""); + expect(result.current.messages.at(-1)).toMatchObject({ + role: "assistant", + content: "Ornn is a registry.", + }); + }); + + it("toasts and records the error on chat_error", () => { + const { result } = renderHook(() => useAssistantChat()); + + act(() => result.current.sendMessage("hi")); + emit({ type: "chat_error", code: "rate_limited", message: "Slow down" }); + + expect(result.current.isStreaming).toBe(false); + expect(result.current.error).toBe("Slow down"); + expect(addToast).toHaveBeenCalledWith({ type: "error", message: "Slow down" }); + }); + + it("ignores keepalive heartbeats", () => { + const { result } = renderHook(() => useAssistantChat()); + act(() => result.current.sendMessage("hi")); + emit({ type: "keepalive" }); + expect(result.current.isStreaming).toBe(true); + expect(result.current.error).toBeNull(); + }); + + it("abort stops streaming and finalizes whatever was received", () => { + const { result } = renderHook(() => useAssistantChat()); + + act(() => result.current.sendMessage("hi")); + emit({ type: "chat_text_delta", delta: "Partial" }); + act(() => result.current.abort()); + + expect(abortSpy).toHaveBeenCalled(); + expect(result.current.isStreaming).toBe(false); + expect(result.current.messages.at(-1)).toMatchObject({ + role: "assistant", + content: "Partial", + }); + }); + + it("clearChat empties the conversation and aborts any stream", () => { + const { result } = renderHook(() => useAssistantChat()); + + act(() => result.current.sendMessage("hi")); + act(() => result.current.clearChat()); + + expect(abortSpy).toHaveBeenCalled(); + expect(result.current.messages).toHaveLength(0); + }); +}); diff --git a/ornn-web/src/hooks/useAssistantChat.ts b/ornn-web/src/hooks/useAssistantChat.ts new file mode 100644 index 00000000..e11d635f --- /dev/null +++ b/ornn-web/src/hooks/useAssistantChat.ts @@ -0,0 +1,192 @@ +/** + * Hook driving the Ornn Assistant send → stream → display loop (#970). + * + * Streaming/typewriter model is identical to `usePlaygroundChat`: + * - SSE `chat_text_delta` events arrive at upstream-LLM rate and are + * pushed into a `pendingTokensRef` buffer. + * - A pacer drains that buffer one (or more) characters per tick onto + * the visible answer at a steady cadence — the typewriter effect is + * decoupled from network jitter. When the model races ahead, the + * pacer takes bigger bites so the visible text stays within ~1s of + * what's been received. + * - On any terminal event (`chat_finish` / `chat_error` / abort) we + * drain whatever's left immediately — the typewriter is a nicety, + * not a contract. + * + * Errors surface BOTH on the store (for inline display in the panel) and + * as a toast, so a user who has scrolled away still sees the failure. + * + * @module hooks/useAssistantChat + */ + +import { useCallback, useRef, useEffect } from "react"; +import { useAssistantStore } from "@/stores/assistantStore"; +import { streamAssistantChat, type StreamHandle } from "@/services/assistantStreamApi"; +import { useToastStore } from "@/stores/toastStore"; +import type { AssistantChatEvent } from "@/types/assistant"; +import { createLogger } from "@/lib/logger"; + +const logger = createLogger("useAssistantChat"); + +/** Pacer tick interval. 22 ms ≈ 45 chars/sec when the buffer is small. */ +const PACE_TICK_MS = 22; + +/** + * Adaptive drain rate — keep visible text within ~1s of what's received. + * - < 60 chars buffered: 1 char/tick → calm typewriter + * - < 200 chars buffered: 3 chars/tick + * - >= 200 chars buffered: ceil(buffer / 60) chars/tick → catch up + */ +function charsPerTick(bufferLength: number): number { + if (bufferLength < 60) return 1; + if (bufferLength < 200) return 3; + return Math.ceil(bufferLength / 60); +} + +export function useAssistantChat() { + const store = useAssistantStore(); + const addToast = useToastStore((s) => s.addToast); + const streamRef = useRef(null); + + // Pacer state — chars received from SSE that haven't been painted yet. + const pendingTokensRef = useRef(""); + const paceTimerRef = useRef | null>(null); + + /** Pop one tick's worth of chars from the buffer and append. Iterate + * via `Array.from` so a 4-byte emoji counts as one character. */ + const drainOneTick = useCallback(() => { + const buf = pendingTokensRef.current; + if (!buf) { + if (paceTimerRef.current !== null) { + clearInterval(paceTimerRef.current); + paceTimerRef.current = null; + } + return; + } + const chars = Array.from(buf); + const take = Math.min(charsPerTick(chars.length), chars.length); + const head = chars.slice(0, take).join(""); + const tail = chars.slice(take).join(""); + pendingTokensRef.current = tail; + useAssistantStore.getState().appendAssistantDelta(head); + }, []); + + const ensurePacer = useCallback(() => { + if (paceTimerRef.current !== null) return; + paceTimerRef.current = setInterval(drainOneTick, PACE_TICK_MS); + }, [drainOneTick]); + + /** Drain everything to the display immediately (terminal events). */ + const drainAll = useCallback(() => { + if (paceTimerRef.current !== null) { + clearInterval(paceTimerRef.current); + paceTimerRef.current = null; + } + const buf = pendingTokensRef.current; + if (buf) { + pendingTokensRef.current = ""; + useAssistantStore.getState().appendAssistantDelta(buf); + } + }, []); + + const handleEvent = useCallback( + (event: AssistantChatEvent) => { + const s = useAssistantStore.getState(); + + switch (event.type) { + case "chat_start": + logger.debug("assistant stream started", { model: event.model }); + break; + + case "chat_text_delta": + pendingTokensRef.current += event.delta; + ensurePacer(); + break; + + case "chat_error": + drainAll(); + s.setError(event.message); + s.setStreaming(false); + logger.error("assistant stream error", { code: event.code }); + addToast({ type: "error", message: event.message }); + break; + + case "chat_finish": + drainAll(); + s.finalizeAssistantMessage(); + s.setStreaming(false); + logger.info("assistant stream finished"); + break; + + case "keepalive": + // Heartbeat — nothing to do. + break; + } + }, + [addToast, drainAll, ensurePacer], + ); + + const sendMessage = useCallback( + (content: string, modelId?: string) => { + const trimmed = content.trim(); + if (!trimmed) return; + + streamRef.current?.abort(); + pendingTokensRef.current = ""; + if (paceTimerRef.current !== null) { + clearInterval(paceTimerRef.current); + paceTimerRef.current = null; + } + + const s = useAssistantStore.getState(); + s.addUserMessage(trimmed); + s.setStreaming(true); + s.setError(null); + s.startAssistantMessage(); + + const msgs = useAssistantStore + .getState() + .messages.map((m) => ({ role: m.role, content: m.content })); + streamRef.current = streamAssistantChat({ messages: msgs, modelId }, handleEvent); + }, + [handleEvent], + ); + + const abort = useCallback(() => { + streamRef.current?.abort(); + streamRef.current = null; + drainAll(); + const s = useAssistantStore.getState(); + s.finalizeAssistantMessage(); + s.setStreaming(false); + }, [drainAll]); + + const clearChat = useCallback(() => { + streamRef.current?.abort(); + streamRef.current = null; + if (paceTimerRef.current !== null) { + clearInterval(paceTimerRef.current); + paceTimerRef.current = null; + } + pendingTokensRef.current = ""; + useAssistantStore.getState().clearMessages(); + }, []); + + // Abort any in-flight stream + stop the pacer on unmount. + useEffect(() => { + return () => { + streamRef.current?.abort(); + if (paceTimerRef.current !== null) clearInterval(paceTimerRef.current); + }; + }, []); + + return { + messages: store.messages, + isStreaming: store.isStreaming, + error: store.error, + currentAssistantContent: store.currentAssistantContent, + sendMessage, + abort, + clearChat, + }; +} From b74a97ebe84069817227519975a2aad4c02d3ae2 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:28:43 +0800 Subject: [PATCH 005/110] feat(web): add assistant surface flags to provider model types (#970) Mirror the backend LlmProviderModel contract (ornn-api settings llmProviders/types.ts) on the web client: add enabledForAssistant + defaultForAssistant to LlmProviderModel, and the matching optional fields to ModelFlagsPatchInput so the admin drawer can PATCH the assistant surface. The backend PATCH schema already accepts both as optional booleans and the repository defaults them to false, so existing provider docs deserialize unchanged. --- ornn-web/src/services/settingsApi.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ornn-web/src/services/settingsApi.ts b/ornn-web/src/services/settingsApi.ts index 162e6579..2d368171 100644 --- a/ornn-web/src/services/settingsApi.ts +++ b/ornn-web/src/services/settingsApi.ts @@ -196,8 +196,12 @@ export interface LlmProviderModel { * `defaultFor` invariant across every provider. */ enabledForPlayground: boolean; enabledForSkillGen: boolean; + /** #970 — Ornn Assistant surface (repo-aware Q&A chatbot). */ + enabledForAssistant: boolean; defaultForPlayground: boolean; defaultForSkillGen: boolean; + /** #970 — Ornn Assistant surface default. */ + defaultForAssistant: boolean; removed: boolean; firstSeenAt?: string; lastSyncedAt?: string; @@ -231,8 +235,10 @@ export interface LlmProviderInput { export interface ModelFlagsPatchInput { enabledForPlayground?: boolean; enabledForSkillGen?: boolean; + enabledForAssistant?: boolean; defaultForPlayground?: boolean; defaultForSkillGen?: boolean; + defaultForAssistant?: boolean; } export async function listLlmProviders(): Promise { From c109f26ebe5b2f05334f984c9d8d6cd48cc19ac2 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:30:19 +0800 Subject: [PATCH 006/110] feat(web): assistant surface column in ProviderModelsDrawer (#970) Add a fourth surface column (Assistant) to the per-provider model manager, mirroring Playground and Skill-Gen: an enable Toggle plus a default Radio per model row, each firing the existing optimistic PATCH with enabledForAssistant / defaultForAssistant. Switched the table off table-fixed so four columns share width gracefully, and bumped the archived-section colSpan to 4. Column header copy stays inline English to match its hardcoded Playground / Skill-Gen siblings in this admin-only drawer; the user-facing assistant strings are fully i18n'd in the widget. --- .../settings/ProviderModelsDrawer.test.tsx | 135 ++++++++++++++++++ .../admin/settings/ProviderModelsDrawer.tsx | 27 +++- 2 files changed, 160 insertions(+), 2 deletions(-) create mode 100644 ornn-web/src/components/admin/settings/ProviderModelsDrawer.test.tsx diff --git a/ornn-web/src/components/admin/settings/ProviderModelsDrawer.test.tsx b/ornn-web/src/components/admin/settings/ProviderModelsDrawer.test.tsx new file mode 100644 index 00000000..bc4204fe --- /dev/null +++ b/ornn-web/src/components/admin/settings/ProviderModelsDrawer.test.tsx @@ -0,0 +1,135 @@ +/** + * ProviderModelsDrawer — assistant surface column (#970). + * + * The drawer gained a fourth surface column (Assistant) alongside + * Playground and Skill-Gen. This guards that the assistant Toggle/Radio + * render and PATCH the right flag, so the admin can target the Ornn + * Assistant surface exactly like the other two. + * + * framer-motion is stubbed pass-through; the toast store + settings API + * are mocked so no network / localStorage init chain runs. The drawer's + * patch mutation is built inline with useMutation, so the component is + * wrapped in a QueryClientProvider. + * + * @module components/admin/settings/ProviderModelsDrawer.test + */ + +import { describe, expect, it, vi, beforeEach, afterEach } from "vitest"; +import { cleanup, fireEvent, render, screen, waitFor } from "@testing-library/react"; +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import type { ReactNode } from "react"; +import type { LlmProvider } from "@/services/settingsApi"; + +const addToast = vi.fn(); +const patchProviderModelFlags = vi.fn(); + +vi.mock("framer-motion", () => ({ + AnimatePresence: ({ children }: { children: React.ReactNode }) => <>{children}, + motion: new Proxy( + {}, + { + get: + (_t, tag: string) => + ({ + children, + initial: _i, + animate: _a, + exit: _e, + transition: _tr, + ...rest + }: Record & { children?: React.ReactNode }) => { + void _i; + void _a; + void _e; + void _tr; + const Tag = tag as keyof React.JSX.IntrinsicElements; + return {children}; + }, + }, + ), +})); + +vi.mock("@/stores/toastStore", () => ({ + useToastStore: (selector: (s: { addToast: typeof addToast }) => T) => + selector({ addToast }), +})); + +vi.mock("@/services/settingsApi", () => ({ + patchProviderModelFlags: (...args: unknown[]) => patchProviderModelFlags(...args), +})); + +import { ProviderModelsDrawer } from "./ProviderModelsDrawer"; + +const PROVIDER: LlmProvider = { + _id: "prov-1", + name: "Alpha Gateway", + gatewayUrl: "https://alpha.example.com/v1", + modelListUrl: "https://alpha.example.com/v1/models", + apiFormat: "chat-completion", + auth: { kind: "apiKey", apiKey: "k" }, + maxOutputTokens: 4096, + defaultTemperature: 0.7, + models: [ + { + id: "gpt-5", + displayName: "GPT-5", + enabledForPlayground: true, + enabledForSkillGen: false, + enabledForAssistant: false, + defaultForPlayground: false, + defaultForSkillGen: false, + defaultForAssistant: false, + removed: false, + }, + ], +}; + +function renderDrawer() { + const qc = new QueryClient({ defaultOptions: { queries: { retry: false } } }); + return render( + + {}} provider={PROVIDER} /> + , + ); +} + +beforeEach(() => { + addToast.mockReset(); + patchProviderModelFlags.mockReset(); + patchProviderModelFlags.mockResolvedValue(PROVIDER); +}); + +afterEach(cleanup); + +describe("ProviderModelsDrawer — assistant column", () => { + it("renders the Assistant column header", () => { + renderDrawer(); + expect(screen.getByText("Assistant")).toBeInTheDocument(); + }); + + it("renders the assistant enable toggle + default radio for a model", () => { + renderDrawer(); + expect(screen.getByLabelText("Enabled for assistant")).toBeInTheDocument(); + expect(screen.getByLabelText("Default for assistant")).toBeInTheDocument(); + }); + + it("PATCHes enabledForAssistant when the assistant toggle is flipped", async () => { + renderDrawer(); + fireEvent.click(screen.getByLabelText("Enabled for assistant")); + await waitFor(() => + expect(patchProviderModelFlags).toHaveBeenCalledWith("prov-1", "gpt-5", { + enabledForAssistant: true, + }), + ); + }); + + it("PATCHes defaultForAssistant when the assistant default radio is selected", async () => { + renderDrawer(); + fireEvent.click(screen.getByLabelText("Default for assistant")); + await waitFor(() => + expect(patchProviderModelFlags).toHaveBeenCalledWith("prov-1", "gpt-5", { + defaultForAssistant: true, + }), + ); + }); +}); diff --git a/ornn-web/src/components/admin/settings/ProviderModelsDrawer.tsx b/ornn-web/src/components/admin/settings/ProviderModelsDrawer.tsx index a6c21db1..10186759 100644 --- a/ornn-web/src/components/admin/settings/ProviderModelsDrawer.tsx +++ b/ornn-web/src/components/admin/settings/ProviderModelsDrawer.tsx @@ -7,10 +7,12 @@ * operator can: * - toggle "Enabled for Playground" * - toggle "Enabled for SkillGen" + * - toggle "Enabled for Assistant" (#970) * - radio-pick "Default for Playground" (server enforces at-most-one * across **all** providers, so flipping one default unselects every * other provider's default for that surface in the same write) * - radio-pick "Default for SkillGen" (same) + * - radio-pick "Default for Assistant" (same — #970) * * Removed-from-upstream rows (`removed: true`) are segregated below the * active rows with an "archived" badge and a disabled toggle column — @@ -156,7 +158,7 @@ export function ProviderModelsDrawer({ to pull the upstream catalog.

) : ( - +
+ @@ -182,7 +187,7 @@ export function ProviderModelsDrawer({ {archived.length > 0 && ( <> - @@ -270,6 +275,24 @@ function ModelRow({ model, disabled, onPatch }: ModelRowProps) { /> + ); } From 8ba6290dd1704d9de92b1e34ff89389bc852b9d3 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:31:40 +0800 Subject: [PATCH 007/110] feat(web): show assistant-enabled model count in providers list (#970) The per-provider row summary counted playground- and skillGen-enabled models; add the assistant-enabled count so admins can see at a glance how many models back the Ornn Assistant surface. Extends the modelCounts i18n string (en + zh) with the new {{assistant}} slot. --- ornn-web/src/i18n/en.json | 2 +- ornn-web/src/i18n/zh.json | 2 +- .../src/pages/admin/settings/sections/LlmProvidersSection.tsx | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ornn-web/src/i18n/en.json b/ornn-web/src/i18n/en.json index 5c233e4d..66a973e0 100644 --- a/ornn-web/src/i18n/en.json +++ b/ornn-web/src/i18n/en.json @@ -1547,7 +1547,7 @@ "models": "Models", "actions": "Actions" }, - "modelCounts": "{{playground}} playground · {{skillGen}} skillGen · {{total}} total", + "modelCounts": "{{playground}} playground · {{skillGen}} skillGen · {{assistant}} assistant · {{total}} total", "archivedCount": "{{count}} archived", "toast": { "synced": "Synced {{name}}: +{{added}} added, {{updated}} updated, {{removed}} removed.", diff --git a/ornn-web/src/i18n/zh.json b/ornn-web/src/i18n/zh.json index bfd64b0c..63c78cc0 100644 --- a/ornn-web/src/i18n/zh.json +++ b/ornn-web/src/i18n/zh.json @@ -1547,7 +1547,7 @@ "models": "模型", "actions": "操作" }, - "modelCounts": "{{playground}} playground · {{skillGen}} skillGen · 共 {{total}}", + "modelCounts": "{{playground}} playground · {{skillGen}} skillGen · {{assistant}} assistant · 共 {{total}}", "archivedCount": "已归档 {{count}}", "toast": { "synced": "已同步 {{name}}:新增 +{{added}},更新 {{updated}},移除 {{removed}}。", diff --git a/ornn-web/src/pages/admin/settings/sections/LlmProvidersSection.tsx b/ornn-web/src/pages/admin/settings/sections/LlmProvidersSection.tsx index 08b5adb5..b63ba194 100644 --- a/ornn-web/src/pages/admin/settings/sections/LlmProvidersSection.tsx +++ b/ornn-web/src/pages/admin/settings/sections/LlmProvidersSection.tsx @@ -210,6 +210,7 @@ function ProviderRow({ const active = provider.models.filter((m) => !m.removed); const playground = active.filter((m) => m.enabledForPlayground).length; const skillGen = active.filter((m) => m.enabledForSkillGen).length; + const assistant = active.filter((m) => m.enabledForAssistant).length; const removedCount = provider.models.length - active.length; return ( @@ -225,6 +226,7 @@ function ProviderRow({ {t("adminSettings.sections.llmProviders.modelCounts", { playground, skillGen, + assistant, total: active.length, })} {removedCount > 0 && ( From aea964245048dea5ae696deaebb2255950e70f25 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:34:20 +0800 Subject: [PATCH 008/110] =?UTF-8?q?feat(api):=20introduce=20'assistant'=20?= =?UTF-8?q?LLM=20surface=20=E2=80=94=20model=20flags=20+=20settings=20sect?= =?UTF-8?q?ion=20(#970)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the third LLM surface (after playground/skillGen) that backs the Ornn Assistant repo-aware Q&A chatbot. - LlmProviderModel gains per-surface flags enabledForAssistant / defaultForAssistant; SurfaceKey gains 'Assistant'. Reads of pre-#970 provider docs default both to false so an existing model never auto-routes to the assistant until an admin opts it in. - Surface union widened to 'playground'|'skillGen'|'assistant'. The surface-field helpers and the at-most-one-default-per-surface invariant are now table-driven (SURFACE_KEY + ALL_SURFACES) so a future surface is a one-line addition, not scattered ternaries. - New 'assistant' settings section (defaultProviderId, defaultModelId, sseKeepAliveMs, defaultMonthlyQuota) mirroring playground/skillGen, registered in the section registry and exposed via SettingsService.getAssistant(). Surfaces stay independent: flipping the assistant default leaves the playground/skillGen defaults untouched (covered by new unit tests for resolver precedence + the cross-provider single-default invariant). --- .../settings/exportImport/exporter.test.ts | 3 + .../settings/exportImport/importer.test.ts | 1 + .../settings/exportImport/routes.test.ts | 1 + .../settings/llmProviders/repository.test.ts | 6 + .../settings/llmProviders/repository.ts | 7 +- .../settings/llmProviders/service.test.ts | 145 +++++++++++++++++- .../domains/settings/llmProviders/service.ts | 41 ++++- .../domains/settings/llmProviders/types.ts | 4 + .../domains/settings/sections/assistant.ts | 49 ++++++ .../src/domains/settings/sections/index.ts | 5 + .../settings/sections/sections.test.ts | 55 +++++++ ornn-api/src/domains/settings/service.ts | 4 + ornn-api/src/domains/settings/types.ts | 2 + 13 files changed, 312 insertions(+), 11 deletions(-) create mode 100644 ornn-api/src/domains/settings/sections/assistant.ts diff --git a/ornn-api/src/domains/settings/exportImport/exporter.test.ts b/ornn-api/src/domains/settings/exportImport/exporter.test.ts index 9cafb7da..8db03b1c 100644 --- a/ornn-api/src/domains/settings/exportImport/exporter.test.ts +++ b/ornn-api/src/domains/settings/exportImport/exporter.test.ts @@ -58,8 +58,10 @@ function fakeSettingsService(): SettingsService { displayName: "GPT-4o", enabledForPlayground: true, enabledForSkillGen: true, + enabledForAssistant: false, defaultForPlayground: true, defaultForSkillGen: false, + defaultForAssistant: false, removed: false, firstSeenAt: new Date("2026-01-01"), lastSyncedAt: new Date("2026-04-01"), @@ -79,6 +81,7 @@ function fakeSettingsService(): SettingsService { return { getPlayground: () => make("playground"), getSkillGen: () => make("skillGen"), + getAssistant: () => make("assistant"), getMirror: () => make("mirror"), getNyxid: () => make("nyxid"), getSkillAudit: () => make("skillAudit"), diff --git a/ornn-api/src/domains/settings/exportImport/importer.test.ts b/ornn-api/src/domains/settings/exportImport/importer.test.ts index 5d592111..40af9765 100644 --- a/ornn-api/src/domains/settings/exportImport/importer.test.ts +++ b/ornn-api/src/domains/settings/exportImport/importer.test.ts @@ -32,6 +32,7 @@ function fakeSettingsService(initial?: Partial store.get("playground") as never, getSkillGen: async () => store.get("skillGen") as never, + getAssistant: async () => store.get("assistant") as never, getMirror: async () => store.get("mirror") as never, getNyxid: async () => store.get("nyxid") as never, getSkillAudit: async () => store.get("skillAudit") as never, diff --git a/ornn-api/src/domains/settings/exportImport/routes.test.ts b/ornn-api/src/domains/settings/exportImport/routes.test.ts index b643eede..58a96f1e 100644 --- a/ornn-api/src/domains/settings/exportImport/routes.test.ts +++ b/ornn-api/src/domains/settings/exportImport/routes.test.ts @@ -27,6 +27,7 @@ function fakeSettingsService(): SettingsService { return { getPlayground: async () => store.get("playground") as never, getSkillGen: async () => store.get("skillGen") as never, + getAssistant: async () => store.get("assistant") as never, getMirror: async () => store.get("mirror") as never, getNyxid: async () => store.get("nyxid") as never, getSkillAudit: async () => store.get("skillAudit") as never, diff --git a/ornn-api/src/domains/settings/llmProviders/repository.test.ts b/ornn-api/src/domains/settings/llmProviders/repository.test.ts index d63c7ce0..2e1189e3 100644 --- a/ornn-api/src/domains/settings/llmProviders/repository.test.ts +++ b/ornn-api/src/domains/settings/llmProviders/repository.test.ts @@ -61,8 +61,10 @@ function model( displayName: id, enabledForPlayground: false, enabledForSkillGen: false, + enabledForAssistant: false, defaultForPlayground: false, defaultForSkillGen: false, + defaultForAssistant: false, removed: false, firstSeenAt: NOW, lastSyncedAt: NOW, @@ -245,6 +247,10 @@ describe("LlmProvidersRepository.normalizeModel (read shim)", () => { expect(m.enabledForSkillGen).toBe(true); expect(m.defaultForPlayground).toBe(false); expect(m.defaultForSkillGen).toBe(false); + // #970 — a legacy doc predating the assistant surface reads back + // with both assistant flags defaulted to false (never auto-routes). + expect(m.enabledForAssistant).toBe(false); + expect(m.defaultForAssistant).toBe(false); expect(m.removed).toBe(false); }); diff --git a/ornn-api/src/domains/settings/llmProviders/repository.ts b/ornn-api/src/domains/settings/llmProviders/repository.ts index d516d6fc..48e16e01 100644 --- a/ornn-api/src/domains/settings/llmProviders/repository.ts +++ b/ornn-api/src/domains/settings/llmProviders/repository.ts @@ -45,7 +45,7 @@ export interface StoredProvider { } /** Surface key — must match the in-store field naming convention. */ -export type SurfaceKey = "Playground" | "SkillGen"; +export type SurfaceKey = "Playground" | "SkillGen" | "Assistant"; export class LlmProvidersRepository { private readonly collection: Collection; @@ -172,8 +172,13 @@ function normalizeModel(raw: LlmProviderModel & { enabled?: boolean }): LlmProvi typeof raw.enabledForSkillGen === "boolean" ? raw.enabledForSkillGen : raw.enabled === true, + // #970 — assistant is a net-new surface; pre-#970 docs lack the + // flag entirely. Default `false` so an existing model never + // auto-routes to the assistant until an admin opts it in. + enabledForAssistant: raw.enabledForAssistant === true, defaultForPlayground: raw.defaultForPlayground === true, defaultForSkillGen: raw.defaultForSkillGen === true, + defaultForAssistant: raw.defaultForAssistant === true, removed: raw.removed === true, firstSeenAt: raw.firstSeenAt instanceof Date ? raw.firstSeenAt : new Date(raw.firstSeenAt), lastSyncedAt: raw.lastSyncedAt instanceof Date ? raw.lastSyncedAt : new Date(raw.lastSyncedAt), diff --git a/ornn-api/src/domains/settings/llmProviders/service.test.ts b/ornn-api/src/domains/settings/llmProviders/service.test.ts index e1c4e81f..fee3b999 100644 --- a/ornn-api/src/domains/settings/llmProviders/service.test.ts +++ b/ornn-api/src/domains/settings/llmProviders/service.test.ts @@ -38,11 +38,10 @@ class FakeRepo { return this.rows.delete(id); } async clearDefaultsForSurfaceExcept( - surface: "Playground" | "SkillGen", + surface: "Playground" | "SkillGen" | "Assistant", keep: { providerId: string; modelId: string } | null, ): Promise { - const defKey = - surface === "Playground" ? "defaultForPlayground" : "defaultForSkillGen"; + const defKey = `defaultFor${surface}` as const; for (const [id, doc] of this.rows) { const isKeeper = keep && id === keep.providerId; const nextModels = doc.models.map((m) => { @@ -416,6 +415,146 @@ describe("LlmProvidersService", () => { expect(isMidMaskSentinel(masked.auth.apiKey)).toBe(true); }); + // ──────────────── #970 — assistant surface ──────────────── + + it("UT-LLM-ASST-001: resolveModel(assistant) → no-models-enabled until a model opts in", async () => { + // baseInput enables gpt-4o for playground + skillGen but NOT for the + // assistant — a brand-new surface must start with zero routable + // models so it never silently borrows another surface's default. + const { svc } = makeService(); + await svc.create( + { ...baseInput, auth: { kind: "apiKey", apiKey: "k" } }, + ACTOR, + ); + const resolution = await svc.resolveModel({ surface: "assistant" }); + expect(resolution.kind).toBe("no-models-enabled"); + }); + + it("UT-LLM-ASST-002: patchModel(defaultForAssistant) auto-enables + resolveModel picks it", async () => { + const { svc } = makeService(); + const created = await svc.create( + { ...baseInput, auth: { kind: "apiKey", apiKey: "k" } }, + ACTOR, + ); + const after = await svc.patchModel( + created._id, + "gpt-4o", + { defaultForAssistant: true }, + ACTOR, + ); + const gpt4o = after.models.find((m) => m.id === "gpt-4o")!; + expect(gpt4o.defaultForAssistant).toBe(true); + expect(gpt4o.enabledForAssistant).toBe(true); + + const resolution = await svc.resolveModel({ surface: "assistant" }); + expect(resolution.kind).toBe("ok"); + if (resolution.kind === "ok") expect(resolution.modelId).toBe("gpt-4o"); + }); + + it("UT-LLM-ASST-003: resolveModel(assistant) prefers default over first-enabled", async () => { + const { svc } = makeService(); + await svc.create( + { + ...baseInput, + name: "asst", + auth: { kind: "apiKey", apiKey: "k" }, + models: [ + { + id: "alpha", + displayName: "Alpha", + enabledForAssistant: true, + }, + { + id: "bravo", + displayName: "Bravo", + enabledForAssistant: true, + defaultForAssistant: true, + }, + ], + }, + ACTOR, + ); + const resolution = await svc.resolveModel({ surface: "assistant" }); + expect(resolution.kind).toBe("ok"); + // bravo is the surface default even though alpha sorts first by name. + if (resolution.kind === "ok") expect(resolution.modelId).toBe("bravo"); + }); + + it("UT-LLM-ASST-004: requested model not enabled for assistant → not-enabled", async () => { + const { svc } = makeService(); + await svc.create( + { ...baseInput, auth: { kind: "apiKey", apiKey: "k" } }, + ACTOR, + ); + // gpt-4o is enabled for playground/skillGen but not assistant. + const resolution = await svc.resolveModel({ + surface: "assistant", + requested: "gpt-4o", + }); + expect(resolution.kind).toBe("not-enabled"); + }); + + it("UT-LLM-ASST-005: assistant default flip is surface-isolated (#970)", async () => { + // baseInput marks gpt-4o as the playground AND skillGen default. + // Setting gpt-3.5 as the assistant default must NOT disturb either + // of gpt-4o's other-surface defaults — surfaces are independent. + const { svc } = makeService(); + const created = await svc.create( + { ...baseInput, auth: { kind: "apiKey", apiKey: "k" } }, + ACTOR, + ); + const after = await svc.patchModel( + created._id, + "gpt-3.5", + { defaultForAssistant: true }, + ACTOR, + ); + const gpt4o = after.models.find((m) => m.id === "gpt-4o")!; + expect(gpt4o.defaultForPlayground).toBe(true); + expect(gpt4o.defaultForSkillGen).toBe(true); + expect(gpt4o.defaultForAssistant).toBe(false); + const gpt35 = after.models.find((m) => m.id === "gpt-3.5")!; + expect(gpt35.defaultForAssistant).toBe(true); + expect(gpt35.enabledForAssistant).toBe(true); + }); + + it("UT-LLM-ASST-006: assistant default is at-most-one across providers (#970)", async () => { + const { svc } = makeService(); + const a = await svc.create( + { + ...baseInput, + name: "alpha", + auth: { kind: "apiKey", apiKey: "k1" }, + models: [ + { + id: "m-a", + displayName: "M-A", + enabledForAssistant: true, + defaultForAssistant: true, + }, + ], + }, + ACTOR, + ); + const b = await svc.create( + { + ...baseInput, + name: "beta", + auth: { kind: "apiKey", apiKey: "k2" }, + models: [{ id: "m-b", displayName: "M-B", enabledForAssistant: true }], + }, + ACTOR, + ); + // Promote m-b on provider beta → m-a's default must clear. + await svc.patchModel(b._id, "m-b", { defaultForAssistant: true }, ACTOR); + const alpha = await svc.get(a._id); + const mA = alpha!.models.find((m) => m.id === "m-a")!; + expect(mA.defaultForAssistant).toBe(false); + const beta = await svc.get(b._id); + const mB = beta!.models.find((m) => m.id === "m-b")!; + expect(mB.defaultForAssistant).toBe(true); + }); + it("UT-LLM-013: sentinel apiKey on update preserves DB value", async () => { const { svc } = makeService(); const created = await svc.create( diff --git a/ornn-api/src/domains/settings/llmProviders/service.ts b/ornn-api/src/domains/settings/llmProviders/service.ts index ac26bf5f..3b149da1 100644 --- a/ornn-api/src/domains/settings/llmProviders/service.ts +++ b/ornn-api/src/domains/settings/llmProviders/service.ts @@ -62,13 +62,23 @@ import type { const logger = createLogger("llmProvidersService"); /** Surfaces the picker / resolver care about. Mirror of `quota/types.ts:Surface`. */ -export type Surface = "playground" | "skillGen"; +export type Surface = "playground" | "skillGen" | "assistant"; const SURFACE_KEY: Record = { playground: "Playground", skillGen: "SkillGen", + assistant: "Assistant", }; +/** + * Canonical surface list. Loops that must touch every surface (model + * coherence rules, default-clearing) iterate this so adding a surface + * is a single-line change to `SURFACE_KEY` + this array. + */ +export const ALL_SURFACES: ReadonlyArray = Object.keys( + SURFACE_KEY, +) as Surface[]; + // --------------------------------------------------------------------------- // Input schemas // --------------------------------------------------------------------------- @@ -109,8 +119,10 @@ const modelInputSchema = z.object({ displayName: z.string().min(1), enabledForPlayground: z.boolean().optional(), enabledForSkillGen: z.boolean().optional(), + enabledForAssistant: z.boolean().optional(), defaultForPlayground: z.boolean().optional(), defaultForSkillGen: z.boolean().optional(), + defaultForAssistant: z.boolean().optional(), removed: z.boolean().optional(), }); @@ -150,8 +162,10 @@ export const modelFlagsPatchSchema = z .object({ enabledForPlayground: z.boolean().optional(), enabledForSkillGen: z.boolean().optional(), + enabledForAssistant: z.boolean().optional(), defaultForPlayground: z.boolean().optional(), defaultForSkillGen: z.boolean().optional(), + defaultForAssistant: z.boolean().optional(), }) .refine((v) => Object.keys(v).length > 0, { message: "At least one flag must be provided", @@ -399,8 +413,10 @@ export class LlmProvidersService { displayName: m.displayName, enabledForPlayground: m.enabledForPlayground === true, enabledForSkillGen: m.enabledForSkillGen === true, + enabledForAssistant: m.enabledForAssistant === true, defaultForPlayground: m.defaultForPlayground === true, defaultForSkillGen: m.defaultForSkillGen === true, + defaultForAssistant: m.defaultForAssistant === true, removed: m.removed === true, firstSeenAt: now, lastSyncedAt: now, @@ -449,10 +465,14 @@ export class LlmProvidersService { m.enabledForPlayground ?? prev?.enabledForPlayground ?? false, enabledForSkillGen: m.enabledForSkillGen ?? prev?.enabledForSkillGen ?? false, + enabledForAssistant: + m.enabledForAssistant ?? prev?.enabledForAssistant ?? false, defaultForPlayground: m.defaultForPlayground ?? prev?.defaultForPlayground ?? false, defaultForSkillGen: m.defaultForSkillGen ?? prev?.defaultForSkillGen ?? false, + defaultForAssistant: + m.defaultForAssistant ?? prev?.defaultForAssistant ?? false, removed: m.removed ?? prev?.removed ?? false, firstSeenAt: prev?.firstSeenAt ?? now, lastSyncedAt: prev?.lastSyncedAt ?? now, @@ -532,7 +552,7 @@ export class LlmProvidersService { // Compute the new flags, applying coherence rules. let next: LlmProviderModel = { ...current }; - for (const surface of ["playground", "skillGen"] as const) { + for (const surface of ALL_SURFACES) { const enKey = enabledFieldFor(surface); const defKey = defaultFieldFor(surface); if (flags[enKey] !== undefined) { @@ -554,7 +574,7 @@ export class LlmProvidersService { // Cross-provider clears: for each surface where this row is now // the default, blow away the flag on every other model first. - for (const surface of ["playground", "skillGen"] as const) { + for (const surface of ALL_SURFACES) { const defKey = defaultFieldFor(surface); if (next[defKey] === true) { await this.repo.clearDefaultsForSurfaceExcept(SURFACE_KEY[surface], { @@ -574,8 +594,10 @@ export class LlmProvidersService { ...m, enabledForPlayground: next.enabledForPlayground, enabledForSkillGen: next.enabledForSkillGen, + enabledForAssistant: next.enabledForAssistant, defaultForPlayground: next.defaultForPlayground, defaultForSkillGen: next.defaultForSkillGen, + defaultForAssistant: next.defaultForAssistant, } : m, ); @@ -640,8 +662,10 @@ export class LlmProvidersService { displayName: u.displayName, enabledForPlayground: false, enabledForSkillGen: false, + enabledForAssistant: false, defaultForPlayground: false, defaultForSkillGen: false, + defaultForAssistant: false, removed: false, firstSeenAt: now, lastSyncedAt: now, @@ -657,8 +681,10 @@ export class LlmProvidersService { displayName: u.displayName, enabledForPlayground: prev.enabledForPlayground, enabledForSkillGen: prev.enabledForSkillGen, + enabledForAssistant: prev.enabledForAssistant, defaultForPlayground: prev.defaultForPlayground, defaultForSkillGen: prev.defaultForSkillGen, + defaultForAssistant: prev.defaultForAssistant, removed: false, firstSeenAt: prev.firstSeenAt, lastSyncedAt: now, @@ -675,6 +701,7 @@ export class LlmProvidersService { removed: true, defaultForPlayground: false, defaultForSkillGen: false, + defaultForAssistant: false, lastSyncedAt: now, }); if (!wasRemoved) removed += 1; @@ -804,12 +831,12 @@ export class LlmProvidersService { // Helpers // --------------------------------------------------------------------------- -export function enabledFieldFor(surface: Surface): "enabledForPlayground" | "enabledForSkillGen" { - return surface === "playground" ? "enabledForPlayground" : "enabledForSkillGen"; +export function enabledFieldFor(surface: Surface): `enabledFor${SurfaceKey}` { + return `enabledFor${SURFACE_KEY[surface]}`; } -export function defaultFieldFor(surface: Surface): "defaultForPlayground" | "defaultForSkillGen" { - return surface === "playground" ? "defaultForPlayground" : "defaultForSkillGen"; +export function defaultFieldFor(surface: Surface): `defaultFor${SurfaceKey}` { + return `defaultFor${SURFACE_KEY[surface]}`; } function safeDecrypt(blob: string, key: string): string { diff --git a/ornn-api/src/domains/settings/llmProviders/types.ts b/ornn-api/src/domains/settings/llmProviders/types.ts index faa5698b..5d22379c 100644 --- a/ornn-api/src/domains/settings/llmProviders/types.ts +++ b/ornn-api/src/domains/settings/llmProviders/types.ts @@ -26,6 +26,8 @@ export interface LlmProviderModel { */ readonly enabledForPlayground: boolean; readonly enabledForSkillGen: boolean; + /** #970 — Ornn Assistant surface (repo-aware Q&A chatbot). */ + readonly enabledForAssistant: boolean; /** * Per-surface default flags. Server enforces at-most-one-true * across **all providers** — setting a default on one model clears @@ -36,6 +38,8 @@ export interface LlmProviderModel { */ readonly defaultForPlayground: boolean; readonly defaultForSkillGen: boolean; + /** #970 — Ornn Assistant surface default. */ + readonly defaultForAssistant: boolean; /** * `removed` flips to true when a previously-known model disappears * from the upstream catalog. Kept for history / lifetime breakdowns; diff --git a/ornn-api/src/domains/settings/sections/assistant.ts b/ornn-api/src/domains/settings/sections/assistant.ts new file mode 100644 index 00000000..ed03dd7c --- /dev/null +++ b/ornn-api/src/domains/settings/sections/assistant.ts @@ -0,0 +1,49 @@ +/** + * Ornn Assistant section schema (#970). + * + * The Assistant is the third LLM surface (after `playground` and + * `skillGen`). It powers the repo-aware Q&A chatbot — a pure, + * non-agentic completion grounded in a curated knowledge base plus a + * visibility-scoped skill retrieval. This section owns the same knobs + * every LLM surface owns so the resolver / quota / SSE machinery can + * treat it uniformly: + * + * - Default LLM provider + model (picker seed + execute-path fallback) + * - SSE keep-alive cadence for the streaming chat + * - Default monthly quota for non-admin users + * + * Mirrors `playground.ts` / `skillGen.ts` field-for-field so a new + * surface is purely additive: one section schema + one `getXxx()` + * accessor + the per-model surface flags. No other surface's behaviour + * changes. + * + * @module domains/settings/sections/assistant + */ +import { z } from "zod"; +import type { SectionMeta } from "./index"; + +export const assistantSchema = z.object({ + defaultProviderId: z.string().nullable(), + defaultModelId: z.string().nullable(), + sseKeepAliveMs: z.number().int().min(1000).max(600_000), + defaultMonthlyQuota: z.number().int().min(0).max(1_000_000), +}); + +export type AssistantSection = z.infer; + +export const assistantDefaults: AssistantSection = { + defaultProviderId: null, + defaultModelId: null, + sseKeepAliveMs: 15_000, + // Q&A turns are cheaper + more frequent than skill generation but the + // surface is still LLM-billed; seed a middle-ground monthly allotment. + defaultMonthlyQuota: 100, +}; + +export const assistantSection: SectionMeta = { + id: "assistant", + publicPath: "assistant", + schema: assistantSchema, + secretFields: [], + defaults: assistantDefaults, +}; diff --git a/ornn-api/src/domains/settings/sections/index.ts b/ornn-api/src/domains/settings/sections/index.ts index 3909a3a7..a4d268df 100644 --- a/ornn-api/src/domains/settings/sections/index.ts +++ b/ornn-api/src/domains/settings/sections/index.ts @@ -9,6 +9,7 @@ * @module domains/settings/sections */ +import { assistantSection, type AssistantSection } from "./assistant"; import { mirrorSection, type MirrorSection } from "./mirror"; import { nyxidSection, type NyxidSection } from "./nyxid"; import { playgroundSection, type PlaygroundSection } from "./playground"; @@ -18,6 +19,7 @@ import { telemetrySection, type TelemetrySection } from "./telemetry"; import { extrasSection, type ExtrasSection } from "./extras"; export { + assistantSection, mirrorSection, nyxidSection, playgroundSection, @@ -28,6 +30,7 @@ export { }; export type { + AssistantSection, MirrorSection, NyxidSection, PlaygroundSection, @@ -40,6 +43,7 @@ export type { export type SectionId = | "playground" | "skillGen" + | "assistant" | "mirror" | "nyxid" | "skillAudit" @@ -62,6 +66,7 @@ export interface SectionMeta { export const sections = { playground: playgroundSection, skillGen: skillGenSection, + assistant: assistantSection, mirror: mirrorSection, nyxid: nyxidSection, skillAudit: skillAuditSection, diff --git a/ornn-api/src/domains/settings/sections/sections.test.ts b/ornn-api/src/domains/settings/sections/sections.test.ts index b97eb35f..259564f3 100644 --- a/ornn-api/src/domains/settings/sections/sections.test.ts +++ b/ornn-api/src/domains/settings/sections/sections.test.ts @@ -6,6 +6,7 @@ import { describe, expect, it } from "bun:test"; import { + assistantSection, extrasSection, mirrorSection, nyxidSection, @@ -105,6 +106,60 @@ describe("section schemas", () => { ).toBe(false); }); + // -------- assistant (#970) -------- + it("UT-SCHEMA-ASST-001: assistant defaults are valid + nullable provider/model", () => { + expect( + assistantSection.schema.safeParse(assistantSection.defaults).success, + ).toBe(true); + expect(assistantSection.defaults.defaultProviderId).toBeNull(); + expect(assistantSection.defaults.defaultModelId).toBeNull(); + expect(assistantSection.id).toBe("assistant"); + expect(assistantSection.publicPath).toBe("assistant"); + expect(assistantSection.secretFields).toEqual([]); + }); + + it("UT-SCHEMA-ASST-002: assistant sseKeepAliveMs bounds", () => { + expect( + assistantSection.schema.safeParse({ + ...assistantSection.defaults, + sseKeepAliveMs: 15_000, + }).success, + ).toBe(true); + expect( + assistantSection.schema.safeParse({ + ...assistantSection.defaults, + sseKeepAliveMs: 999, + }).success, + ).toBe(false); + expect( + assistantSection.schema.safeParse({ + ...assistantSection.defaults, + sseKeepAliveMs: 600_001, + }).success, + ).toBe(false); + }); + + it("UT-SCHEMA-ASST-003: assistant defaultMonthlyQuota bounds", () => { + expect( + assistantSection.schema.safeParse({ + ...assistantSection.defaults, + defaultMonthlyQuota: 0, + }).success, + ).toBe(true); + expect( + assistantSection.schema.safeParse({ + ...assistantSection.defaults, + defaultMonthlyQuota: -1, + }).success, + ).toBe(false); + expect( + assistantSection.schema.safeParse({ + ...assistantSection.defaults, + defaultMonthlyQuota: 1_000_001, + }).success, + ).toBe(false); + }); + // -------- nyxid -------- it("UT-SCHEMA-NYX-001: tokenUrl must be http(s) or empty", () => { expect( diff --git a/ornn-api/src/domains/settings/service.ts b/ornn-api/src/domains/settings/service.ts index d7ac959c..d3677c70 100644 --- a/ornn-api/src/domains/settings/service.ts +++ b/ornn-api/src/domains/settings/service.ts @@ -31,6 +31,7 @@ import type { LlmProvider } from "./llmProviders/types"; import type { SettingsRepository } from "./repository"; import { sections, + type AssistantSection, type ExtrasSection, type MirrorSection, type NyxidSection, @@ -98,6 +99,9 @@ export class SettingsServiceImpl implements SettingsService { async getSkillGen(): Promise { return this.getSection("skillGen"); } + async getAssistant(): Promise { + return this.getSection("assistant"); + } async getMirror(): Promise { return this.getSection("mirror"); } diff --git a/ornn-api/src/domains/settings/types.ts b/ornn-api/src/domains/settings/types.ts index dd08eb31..a135c47d 100644 --- a/ornn-api/src/domains/settings/types.ts +++ b/ornn-api/src/domains/settings/types.ts @@ -13,6 +13,7 @@ */ import type { + AssistantSection, ExtrasSection, MirrorSection, NyxidSection, @@ -51,6 +52,7 @@ export interface SettingsService { // ---- Per-section typed accessors ---- getPlayground(): Promise; getSkillGen(): Promise; + getAssistant(): Promise; getMirror(): Promise; getNyxid(): Promise; getSkillAudit(): Promise; From 9c51e29e6e8a37fb8233829cc6ececfea501e1af Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:34:40 +0800 Subject: [PATCH 009/110] feat(api): wire assistant surface into model picker + execute resolvers (#970) - GET /me/models accepts surface=assistant; resolution-error labels are table-driven so the assistant surface emits a coherent MODEL_UNAVAILABLE message instead of borrowing skill-generation's. - bootstrap resolveLlmProviderForSurface / resolveSurfaceDefaults now resolve the 'assistant' surface from the new settings section. The shared NyxLlmClient is reused unchanged. --- ornn-api/src/bootstrap.ts | 12 ++++++++---- .../domains/settings/llmProviders/routes.test.ts | 5 ++--- .../src/domains/settings/llmProviders/routes.ts | 14 ++++++++++---- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/ornn-api/src/bootstrap.ts b/ornn-api/src/bootstrap.ts index 73983bf1..79e3f5f5 100644 --- a/ornn-api/src/bootstrap.ts +++ b/ornn-api/src/bootstrap.ts @@ -332,12 +332,14 @@ export async function bootstrap( // shape stays narrow — the empty `gatewayUrl` is what triggers the // fail-closed branch downstream. const resolveLlmProviderForSurface = async ( - surface: "playground" | "skillGen", + surface: "playground" | "skillGen" | "assistant", ): Promise<{ gatewayUrl: string; apiKey: string; apiFormat: ApiFormat }> => { const sec = surface === "playground" ? await settingsService.getPlayground() - : await settingsService.getSkillGen(); + : surface === "skillGen" + ? await settingsService.getSkillGen() + : await settingsService.getAssistant(); if (!sec.defaultProviderId) { return { gatewayUrl: "", apiKey: "", apiFormat: "responses" }; } @@ -362,12 +364,14 @@ export async function bootstrap( // override for callers that want to pin a specific model regardless // of the cross-provider default. const resolveSurfaceDefaults = async ( - surface: "playground" | "skillGen", + surface: "playground" | "skillGen" | "assistant", ): Promise<{ model: string; maxOutputTokens: number; temperature: number }> => { const sec = surface === "playground" ? await settingsService.getPlayground() - : await settingsService.getSkillGen(); + : surface === "skillGen" + ? await settingsService.getSkillGen() + : await settingsService.getAssistant(); let model = sec.defaultModelId ?? ""; if (!model) { const resolution = await llmProvidersService.resolveModel({ surface }); diff --git a/ornn-api/src/domains/settings/llmProviders/routes.test.ts b/ornn-api/src/domains/settings/llmProviders/routes.test.ts index 3046107f..df53230c 100644 --- a/ornn-api/src/domains/settings/llmProviders/routes.test.ts +++ b/ornn-api/src/domains/settings/llmProviders/routes.test.ts @@ -55,11 +55,10 @@ class FakeRepo { // patchModel needs this when a default flag is flipped on (matches the // in-memory implementation used by service.test.ts). async clearDefaultsForSurfaceExcept( - surface: "Playground" | "SkillGen", + surface: "Playground" | "SkillGen" | "Assistant", keep: { providerId: string; modelId: string } | null, ): Promise { - const defKey = - surface === "Playground" ? "defaultForPlayground" : "defaultForSkillGen"; + const defKey = `defaultFor${surface}` as const; for (const [id, doc] of this.rows) { const isKeeper = keep && id === keep.providerId; const nextModels = doc.models.map((m) => { diff --git a/ornn-api/src/domains/settings/llmProviders/routes.ts b/ornn-api/src/domains/settings/llmProviders/routes.ts index 3c740c11..a0191b11 100644 --- a/ornn-api/src/domains/settings/llmProviders/routes.ts +++ b/ornn-api/src/domains/settings/llmProviders/routes.ts @@ -37,7 +37,14 @@ import { validateBody, getValidatedBody } from "../../../middleware/validate"; import type { SettingsActor } from "../types"; import type { LlmProvidersService, ModelResolution, Surface } from "./service"; -const surfaceSchema = z.enum(["playground", "skillGen"]); +const surfaceSchema = z.enum(["playground", "skillGen", "assistant"]); + +/** Human-facing surface labels for resolution-error messages. */ +const SURFACE_LABEL: Record = { + playground: "playground", + skillGen: "skill-generation", + assistant: "assistant", +}; /** * Translate a `ModelResolution` failure into an HTTP error. Shared @@ -49,8 +56,7 @@ export function throwModelResolutionError(resolution: ModelResolution): never { throw new Error("throwModelResolutionError called on ok resolution"); } if (resolution.kind === "no-models-enabled") { - const surfaceLabel = - resolution.surface === "playground" ? "playground" : "skill-generation"; + const surfaceLabel = SURFACE_LABEL[resolution.surface]; throw AppError.serviceUnavailable( "MODEL_UNAVAILABLE", `${surfaceLabel} is temporarily unavailable — contact admin to enable a model.`, @@ -201,7 +207,7 @@ export function createLlmPickerRoutes( if (!parsed.success) { throw AppError.badRequest( "invalid_surface", - "Query param 'surface' must be 'playground' or 'skillGen'", + "Query param 'surface' must be 'playground', 'skillGen', or 'assistant'", ); } const surface: Surface = parsed.data; From b6c33aa538eeffff4eff25f8ad112bfc6e3c1459 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:38:11 +0800 Subject: [PATCH 010/110] chore(web): drop unused ReactNode import in ProviderModelsDrawer test (#970) Carried over from the ProviderEditDrawer test template; the models-drawer test never references ReactNode. Removing it clears the @typescript-eslint/no-unused-vars lint error. --- .../src/components/admin/settings/ProviderModelsDrawer.test.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/ornn-web/src/components/admin/settings/ProviderModelsDrawer.test.tsx b/ornn-web/src/components/admin/settings/ProviderModelsDrawer.test.tsx index bc4204fe..cc7d543e 100644 --- a/ornn-web/src/components/admin/settings/ProviderModelsDrawer.test.tsx +++ b/ornn-web/src/components/admin/settings/ProviderModelsDrawer.test.tsx @@ -17,7 +17,6 @@ import { describe, expect, it, vi, beforeEach, afterEach } from "vitest"; import { cleanup, fireEvent, render, screen, waitFor } from "@testing-library/react"; import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; -import type { ReactNode } from "react"; import type { LlmProvider } from "@/services/settingsApi"; const addToast = vi.fn(); From b7260fbe5fe0d1e5bc4071b586de24b6ccaf8e73 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:38:11 +0800 Subject: [PATCH 011/110] =?UTF-8?q?feat(web):=20Ornn=20Assistant=20widget?= =?UTF-8?q?=20=E2=80=94=20launcher=20+=20slide-in=20chat=20panel=20(#970)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A floating bottom-right launcher opens a corner chat panel that streams repo-aware answers about Ornn. Reuses the Playground chat primitives (ChatMessage / ChatInput) and the assistant data layer (useAssistantChat + useAssistantStore). Forge Workshop language (docs/DESIGN.md): semantic tokens only, letterpress impression shadows (cta-letterpress / card-impression), press-DOWN launcher hover, Framer Motion panel reveal that collapses under prefers-reduced-motion. a11y: ESC + backdrop close, focus moves into the composer on open and returns to the launcher on close, focus-visible ember rings, labelled controls, a status-role thinking indicator. Empty state suggests the three example questions ("What is Ornn?", "How is Ornn different?", "Find a skill that does X"); clicking one fills the composer for editing before send. All strings via react-i18next (en + zh). Session-scoped — no persistence. --- .../assistant/AssistantWidget.test.tsx | 137 ++++++ .../components/assistant/AssistantWidget.tsx | 429 ++++++++++++++++++ ornn-web/src/i18n/en.json | 19 + ornn-web/src/i18n/zh.json | 19 + 4 files changed, 604 insertions(+) create mode 100644 ornn-web/src/components/assistant/AssistantWidget.test.tsx create mode 100644 ornn-web/src/components/assistant/AssistantWidget.tsx diff --git a/ornn-web/src/components/assistant/AssistantWidget.test.tsx b/ornn-web/src/components/assistant/AssistantWidget.test.tsx new file mode 100644 index 00000000..0db52227 --- /dev/null +++ b/ornn-web/src/components/assistant/AssistantWidget.test.tsx @@ -0,0 +1,137 @@ +/** + * AssistantWidget — launcher + panel behavior (#970). + * + * Covers the user-facing contract: signed-out visitors get nothing; the + * launcher opens a dialog with the three example questions; a suggestion + * click fills the composer; sending forwards to the chat hook; close + * dismisses the panel. + * + * framer-motion is stubbed pass-through (incl. useReducedMotion); the + * auth store + chat hook are mocked; the assistant store (open/close) is + * the real session store, reset per test. react-i18next is stubbed + * globally in src/test/setup.ts, resolving the real en.json copy. + * + * @module components/assistant/AssistantWidget.test + */ + +import { describe, expect, it, vi, beforeEach, afterEach } from "vitest"; +import { cleanup, fireEvent, render, screen } from "@testing-library/react"; + +let isAuthed = true; +const sendMessage = vi.fn(); +const abort = vi.fn(); +const clearChat = vi.fn(); + +vi.mock("framer-motion", () => ({ + AnimatePresence: ({ children }: { children: React.ReactNode }) => <>{children}, + useReducedMotion: () => false, + motion: new Proxy( + {}, + { + get: + (_t, tag: string) => + ({ + children, + initial: _i, + animate: _a, + exit: _e, + transition: _tr, + ...rest + }: Record & { children?: React.ReactNode }) => { + void _i; + void _a; + void _e; + void _tr; + const Tag = tag as keyof React.JSX.IntrinsicElements; + return {children}; + }, + }, + ), +})); + +vi.mock("@/stores/authStore", () => ({ + useIsAuthenticated: () => isAuthed, +})); + +vi.mock("@/hooks/useAssistantChat", () => ({ + useAssistantChat: () => ({ + messages: [], + isStreaming: false, + error: null, + currentAssistantContent: "", + sendMessage, + abort, + clearChat, + }), +})); + +import { AssistantWidget } from "./AssistantWidget"; +import { useAssistantStore } from "@/stores/assistantStore"; + +beforeEach(() => { + isAuthed = true; + sendMessage.mockReset(); + abort.mockReset(); + clearChat.mockReset(); + useAssistantStore.setState({ + isOpen: false, + messages: [], + isStreaming: false, + error: null, + currentAssistantContent: "", + }); +}); + +afterEach(cleanup); + +function openPanel() { + fireEvent.click(screen.getByLabelText("Ask Ornn")); +} + +describe("AssistantWidget", () => { + it("renders nothing when signed out", () => { + isAuthed = false; + render(); + expect(screen.queryByLabelText("Ask Ornn")).not.toBeInTheDocument(); + }); + + it("shows the launcher when signed in", () => { + render(); + expect(screen.getByLabelText("Ask Ornn")).toBeInTheDocument(); + // Panel is closed initially. + expect(screen.queryByRole("dialog")).not.toBeInTheDocument(); + }); + + it("opens the panel with the three example questions", () => { + render(); + openPanel(); + expect(screen.getByRole("dialog", { name: "Ornn Assistant" })).toBeInTheDocument(); + expect(screen.getByText("What is Ornn?")).toBeInTheDocument(); + expect(screen.getByText("How is Ornn different?")).toBeInTheDocument(); + expect(screen.getByText("Find a skill that does X")).toBeInTheDocument(); + }); + + it("fills the composer when a suggestion is clicked", () => { + render(); + openPanel(); + fireEvent.click(screen.getByText("What is Ornn?")); + const textarea = screen.getByLabelText("Chat message input") as HTMLTextAreaElement; + expect(textarea.value).toBe("What is Ornn?"); + }); + + it("forwards a sent message to the chat hook", () => { + render(); + openPanel(); + fireEvent.click(screen.getByText("How is Ornn different?")); + fireEvent.click(screen.getByLabelText("Send message")); + expect(sendMessage).toHaveBeenCalledWith("How is Ornn different?"); + }); + + it("closes the panel via the close button", () => { + render(); + openPanel(); + expect(screen.getByRole("dialog")).toBeInTheDocument(); + fireEvent.click(screen.getByLabelText("Close assistant")); + expect(screen.queryByRole("dialog")).not.toBeInTheDocument(); + }); +}); diff --git a/ornn-web/src/components/assistant/AssistantWidget.tsx b/ornn-web/src/components/assistant/AssistantWidget.tsx new file mode 100644 index 00000000..e47674b6 --- /dev/null +++ b/ornn-web/src/components/assistant/AssistantWidget.tsx @@ -0,0 +1,429 @@ +/** + * AssistantWidget — the Ornn Assistant launcher + slide-in chat panel (#970). + * + * A floating launcher (bottom-right) opens a corner chat panel that + * streams repo-aware answers about Ornn. Reuses the Playground chat + * primitives (`ChatMessage`, `ChatInput`) and the assistant data layer + * (`useAssistantChat` + `useAssistantStore`). + * + * Forge Workshop language (docs/DESIGN.md): + * - semantic Tailwind tokens only (bg-page / bg-card / text-strong / + * text-accent / border-subtle …) + * - letterpress impression shadows via `cta-letterpress` / + * `card-impression` utilities — no soft drop shadows + * - press-DOWN hover on the launcher (never lift) + * - Framer Motion panel reveal at motion-medium cadence; respects + * prefers-reduced-motion (transforms collapse, content still appears) + * + * a11y: launcher + panel are keyboard operable, ESC and backdrop close, + * focus moves into the composer on open and returns to the launcher on + * close, every control carries a focus-visible ember ring + label. + * + * Mounted once in the authed app shell (RootLayout); renders nothing for + * signed-out visitors. + * + * @module components/assistant/AssistantWidget + */ + +import { useEffect, useRef, type ReactNode } from "react"; +import { createPortal } from "react-dom"; +import { AnimatePresence, motion, useReducedMotion } from "framer-motion"; +import { useTranslation } from "react-i18next"; +import { ChatMessage } from "@/components/playground/ChatMessage"; +import { ChatInput, type ChatInputHandle } from "@/components/playground/ChatInput"; +import { useAssistantChat } from "@/hooks/useAssistantChat"; +import { useAssistantStore } from "@/stores/assistantStore"; +import { useIsAuthenticated } from "@/stores/authStore"; +import type { AssistantMessage } from "@/types/assistant"; + +/** Example questions shown in the empty state (i18n keys). */ +const SUGGESTION_KEYS = [ + "assistant.suggestions.what", + "assistant.suggestions.different", + "assistant.suggestions.findSkill", +] as const; + +export function AssistantWidget() { + const isAuthenticated = useIsAuthenticated(); + const isOpen = useAssistantStore((s) => s.isOpen); + const openPanel = useAssistantStore((s) => s.openPanel); + const closePanel = useAssistantStore((s) => s.closePanel); + + // Authed-only surface — never mount for signed-out visitors. + if (!isAuthenticated) return null; + + return createPortal( + <> + + + , + document.body, + ); +} + +// --------------------------------------------------------------------------- +// Launcher +// --------------------------------------------------------------------------- + +function AssistantLauncher({ isOpen, onOpen }: { isOpen: boolean; onOpen: () => void }) { + const { t } = useTranslation(); + const reduceMotion = useReducedMotion(); + + return ( + + {!isOpen && ( + + + + {t("assistant.launch")} + + + )} + + ); +} + +// --------------------------------------------------------------------------- +// Panel +// --------------------------------------------------------------------------- + +function AssistantPanel({ isOpen, onClose }: { isOpen: boolean; onClose: () => void }) { + const { t } = useTranslation(); + const reduceMotion = useReducedMotion(); + const { + messages, + isStreaming, + currentAssistantContent, + sendMessage, + abort, + clearChat, + } = useAssistantChat(); + + const inputRef = useRef(null); + const scrollRef = useRef(null); + // Remember what had focus before opening so we can restore it on close. + const restoreFocusRef = useRef(null); + + // ESC closes; capture the previously-focused element on open. + useEffect(() => { + if (!isOpen) return; + restoreFocusRef.current = document.activeElement as HTMLElement | null; + const onKey = (e: KeyboardEvent) => { + if (e.key === "Escape") onClose(); + }; + document.addEventListener("keydown", onKey); + return () => { + document.removeEventListener("keydown", onKey); + // Return focus to the launcher (or whatever opened the panel). + restoreFocusRef.current?.focus?.(); + }; + }, [isOpen, onClose]); + + // Focus the composer once the panel is open. + useEffect(() => { + if (isOpen) { + const id = requestAnimationFrame(() => inputRef.current?.focus()); + return () => cancelAnimationFrame(id); + } + return undefined; + }, [isOpen]); + + // Keep the transcript pinned to the latest turn / streamed token. + useEffect(() => { + const el = scrollRef.current; + if (el) el.scrollTop = el.scrollHeight; + }, [messages, currentAssistantContent, isStreaming]); + + const hasConversation = messages.length > 0 || currentAssistantContent.length > 0; + // A streaming turn before its first token → show the thinking indicator. + const showThinking = isStreaming && currentAssistantContent.length === 0; + + const handleSuggestion = (text: string) => { + inputRef.current?.setValue(text); + }; + + return createPortal( + + {isOpen && ( +
+ {/* Backdrop — light scrim, click closes. Keeps the page faintly + visible (corner widget, not a full modal takeover). */} +
+ )} +
, + document.body, + ); +} + +function PanelHeader({ + onClose, + onClear, + canClear, +}: { + onClose: () => void; + onClear: () => void; + canClear: boolean; +}) { + const { t } = useTranslation(); + return ( +
+
+ + + +
+

+ {t("assistant.title")} +

+

+ {t("assistant.subtitle")} +

+
+
+
+ {canClear && ( + + + + )} + + + +
+
+ ); +} + +function AssistantEmptyState({ onSuggestion }: { onSuggestion: (text: string) => void }) { + const { t } = useTranslation(); + return ( +
+
+
+ + + +

+ {t("assistant.empty.title")} +

+

+ {t("assistant.empty.subtitle")} +

+
+ +
+

+ {t("assistant.empty.hint")} +

+
+ {SUGGESTION_KEYS.map((key) => { + const label = t(key); + return ( + + ); + })} +
+
+
+
+ ); +} + +function ThinkingIndicator({ label }: { label: string }) { + return ( +
+
+ + + + {label} +
+
+ ); +} + +function Dot({ delay }: { delay: string }) { + // motion-safe pulse; reduced-motion users get a static dot (still legible). + return ( + + ); +} + +function IconButton({ + label, + onClick, + children, +}: { + label: string; + onClick: () => void; + children: ReactNode; +}) { + return ( + + ); +} + +// --------------------------------------------------------------------------- +// Icons (inline — no external icon dependency) +// --------------------------------------------------------------------------- + +function SparkIcon({ className }: { className?: string }) { + return ( + + ); +} + +function CloseIcon({ className }: { className?: string }) { + return ( + + ); +} + +function TrashIcon({ className }: { className?: string }) { + return ( + + ); +} + +function ArrowIcon({ className }: { className?: string }) { + return ( + + ); +} diff --git a/ornn-web/src/i18n/en.json b/ornn-web/src/i18n/en.json index 66a973e0..b13b93de 100644 --- a/ornn-web/src/i18n/en.json +++ b/ornn-web/src/i18n/en.json @@ -687,6 +687,25 @@ "nowPublic": "Skill is now public. All users can view it.", "failed": "Failed to update skill visibility" }, + "assistant": { + "launch": "Ask Ornn", + "title": "Ornn Assistant", + "subtitle": "Repo-aware answers", + "placeholder": "Ask about Ornn…", + "close": "Close assistant", + "clear": "Clear conversation", + "thinking": "Thinking…", + "empty": { + "title": "Ask Ornn anything", + "subtitle": "Repo-aware answers about what Ornn is, how it works, and which skills fit your agent.", + "hint": "Try asking" + }, + "suggestions": { + "what": "What is Ornn?", + "different": "How is Ornn different?", + "findSkill": "Find a skill that does X" + } + }, "chatInput": { "model": "Model", "generating": "Generating response...", diff --git a/ornn-web/src/i18n/zh.json b/ornn-web/src/i18n/zh.json index 63c78cc0..cffddeac 100644 --- a/ornn-web/src/i18n/zh.json +++ b/ornn-web/src/i18n/zh.json @@ -687,6 +687,25 @@ "nowPublic": "技能已设为公开,所有用户都能看到。", "failed": "更新可见范围失败" }, + "assistant": { + "launch": "问问 Ornn", + "title": "Ornn 助手", + "subtitle": "基于代码库的回答", + "placeholder": "向 Ornn 提问……", + "close": "关闭助手", + "clear": "清空对话", + "thinking": "思考中……", + "empty": { + "title": "向 Ornn 提问", + "subtitle": "基于代码库回答关于 Ornn 是什么、如何运作,以及哪些技能适合你的智能体。", + "hint": "试着问问" + }, + "suggestions": { + "what": "Ornn 是什么?", + "different": "Ornn 有何不同?", + "findSkill": "找一个能做某事的技能" + } + }, "chatInput": { "model": "模型", "generating": "正在生成回复…", From 581360422a26014d1aae738f9907cd0201c87c2d Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:38:11 +0800 Subject: [PATCH 012/110] feat(web): mount Ornn Assistant in the authed app shell (#970) Render from RootLayout, gated on useIsAuthenticated so it only exists for signed-in users. The widget portals its own launcher + panel to document.body, so mounting it here (rather than inside
, which is overflow-hidden) keeps it clear of the layout clip and above page content while staying below toasts. --- ornn-web/src/components/layout/RootLayout.tsx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ornn-web/src/components/layout/RootLayout.tsx b/ornn-web/src/components/layout/RootLayout.tsx index a9c0078a..dbe58f3f 100644 --- a/ornn-web/src/components/layout/RootLayout.tsx +++ b/ornn-web/src/components/layout/RootLayout.tsx @@ -3,6 +3,7 @@ import { useTranslation } from "react-i18next"; import { Navbar } from "./Navbar"; import { ToastContainer } from "@/components/ui/Toast"; import { QuotaChip } from "@/components/quota/QuotaChip"; +import { AssistantWidget } from "@/components/assistant/AssistantWidget"; import { useIsAuthenticated } from "@/stores/authStore"; import { useSkill } from "@/hooks/useSkills"; @@ -173,6 +174,9 @@ export function RootLayout() {
+ {/* Ornn Assistant — authed-only floating chatbot (#970). Renders its + own launcher + slide-in panel via a portal; self-guards on auth. */} + {isAuthenticated && } ); } From 3de9095dc6ff59afe5d24b1f11e265bdb64c5646 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:46:08 +0800 Subject: [PATCH 013/110] =?UTF-8?q?feat(api):=20add=20assistant=20knowledg?= =?UTF-8?q?e-base=20module=20=E2=80=94=20budget,=20distiller,=20loader=20(?= =?UTF-8?q?#970)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces the reusable KB primitives that ground the Ornn Assistant's answers in the repo's own knowledge: - tokens.ts: deterministic chars/4 token estimate + budget clamp + ASSISTANT_KB_TOKEN_BUDGET env resolution (default 18k). Model-agnostic on purpose — an exact tokenizer for one model is meaningless for another; determinism beats precision here. - distiller.ts: the KbDistiller contract + DeterministicKbDistiller v1 (markdown section extraction → per-source cap → titled concat → global budget clamp). The interface is the documented hook for a future LlmKbDistiller ("big model reads the repo at build time") — same contract, model-driven summarization swapped in underneath. - loader.ts: cached, fail-soft runtime loader that reads the committed digest artifact, strips its provenance header, and defensively clamps to budget. A failed read degrades to empty grounding, never a crash. - sources.ts: priority-ordered, capped source manifest (docs only, never code/secrets) consumed at build time. Pure + deterministic by construction (no clocks/RNG/network). Unit tests cover budget enforcement, section extraction, caching, and fail-soft. --- .../domains/assistant/kb/distiller.test.ts | 165 ++++++++++++++ .../src/domains/assistant/kb/distiller.ts | 208 ++++++++++++++++++ .../src/domains/assistant/kb/loader.test.ts | 122 ++++++++++ ornn-api/src/domains/assistant/kb/loader.ts | 124 +++++++++++ ornn-api/src/domains/assistant/kb/sources.ts | 89 ++++++++ ornn-api/src/domains/assistant/kb/tokens.ts | 78 +++++++ 6 files changed, 786 insertions(+) create mode 100644 ornn-api/src/domains/assistant/kb/distiller.test.ts create mode 100644 ornn-api/src/domains/assistant/kb/distiller.ts create mode 100644 ornn-api/src/domains/assistant/kb/loader.test.ts create mode 100644 ornn-api/src/domains/assistant/kb/loader.ts create mode 100644 ornn-api/src/domains/assistant/kb/sources.ts create mode 100644 ornn-api/src/domains/assistant/kb/tokens.ts diff --git a/ornn-api/src/domains/assistant/kb/distiller.test.ts b/ornn-api/src/domains/assistant/kb/distiller.test.ts new file mode 100644 index 00000000..c8816715 --- /dev/null +++ b/ornn-api/src/domains/assistant/kb/distiller.test.ts @@ -0,0 +1,165 @@ +/** + * UT-KB-DISTILL-* — DeterministicKbDistiller + extractSections (#970). + * + * @module domains/assistant/kb/distiller.test + */ + +import { describe, expect, it } from "bun:test"; +import { + DeterministicKbDistiller, + extractSections, + type KbSourceDoc, +} from "./distiller"; +import { CHARS_PER_TOKEN, estimateTokens } from "./tokens"; + +const distiller = new DeterministicKbDistiller(); + +function repeat(token: string, times: number): string { + return Array.from({ length: times }, () => token).join(" "); +} + +describe("DeterministicKbDistiller", () => { + it("UT-KB-DISTILL-001: concatenates sources in manifest order under titles", () => { + const sources: KbSourceDoc[] = [ + { id: "a", title: "Alpha", text: "alpha body" }, + { id: "b", title: "Bravo", text: "bravo body" }, + ]; + const digest = distiller.distill(sources, { budgetTokens: 1_000 }); + expect(digest.text.indexOf("## Alpha")).toBeLessThan( + digest.text.indexOf("## Bravo"), + ); + expect(digest.text).toContain("alpha body"); + expect(digest.text).toContain("bravo body"); + expect(digest.sources.map((s) => s.id)).toEqual(["a", "b"]); + }); + + it("UT-KB-DISTILL-002: per-source cap clips an oversized doc", () => { + // ~400 chars ≈ 100 tokens, cap to 10 tokens (~40 chars). + const big = repeat("word", 80); + const digest = distiller.distill( + [{ id: "big", title: "Big", text: big, maxTokens: 10 }], + { budgetTokens: 10_000 }, + ); + const stat = digest.sources[0]!; + expect(stat.truncated).toBe(true); + expect(stat.estimatedTokens).toBeLessThanOrEqual(10); + }); + + it("UT-KB-DISTILL-003: global budget clamps the whole digest", () => { + const sources: KbSourceDoc[] = [ + { id: "a", title: "Alpha", text: repeat("aaaa", 200) }, + { id: "b", title: "Bravo", text: repeat("bbbb", 200) }, + { id: "c", title: "Charlie", text: repeat("cccc", 200) }, + ]; + const budgetTokens = 50; + const digest = distiller.distill(sources, { budgetTokens }); + // Hard invariant: the produced grounding never exceeds the budget. + expect(digest.estimatedTokens).toBeLessThanOrEqual(budgetTokens); + expect(digest.text.length).toBeLessThanOrEqual(budgetTokens * CHARS_PER_TOKEN); + expect(digest.budgetTokens).toBe(budgetTokens); + }); + + it("UT-KB-DISTILL-004: tail source dropped by global clamp is marked truncated", () => { + const sources: KbSourceDoc[] = [ + { id: "a", title: "Alpha", text: repeat("aaaa", 100) }, + { id: "z", title: "Zulu", text: repeat("zzzz", 100) }, + ]; + // Budget only fits the first block — Zulu's content shouldn't survive. + const digest = distiller.distill(sources, { budgetTokens: 30 }); + expect(digest.text).not.toContain("## Zulu"); + const zulu = digest.sources.find((s) => s.id === "z")!; + expect(zulu.truncated).toBe(true); + }); + + it("UT-KB-DISTILL-005: deterministic — identical inputs yield identical output", () => { + const sources: KbSourceDoc[] = [ + { id: "a", title: "Alpha", text: "one two three", maxTokens: 100 }, + { id: "b", title: "Bravo", text: "four five six" }, + ]; + const first = distiller.distill(sources, { budgetTokens: 500 }); + const second = distiller.distill(sources, { budgetTokens: 500 }); + expect(first.text).toBe(second.text); + expect(first.estimatedTokens).toBe(second.estimatedTokens); + }); + + it("UT-KB-DISTILL-006: empty / whitespace source contributes nothing", () => { + const digest = distiller.distill( + [ + { id: "empty", title: "Empty", text: " \n " }, + { id: "real", title: "Real", text: "real content" }, + ], + { budgetTokens: 1_000 }, + ); + expect(digest.text).not.toContain("## Empty"); + expect(digest.text).toContain("## Real"); + const empty = digest.sources.find((s) => s.id === "empty")!; + expect(empty.chars).toBe(0); + }); + + it("UT-KB-DISTILL-007: estimatedTokens matches estimateTokens(text)", () => { + const digest = distiller.distill( + [{ id: "a", title: "A", text: "some grounding text here" }], + { budgetTokens: 1_000 }, + ); + expect(digest.estimatedTokens).toBe(estimateTokens(digest.text)); + }); + + it("UT-KB-DISTILL-008: generatedFrom defaults + honours override", () => { + const def = distiller.distill([{ id: "a", title: "A", text: "x" }], { + budgetTokens: 100, + }); + expect(def.generatedFrom).toBe("DeterministicKbDistiller"); + const overridden = distiller.distill([{ id: "a", title: "A", text: "x" }], { + budgetTokens: 100, + generatedFrom: "custom-note", + }); + expect(overridden.generatedFrom).toBe("custom-note"); + }); +}); + +describe("extractSections", () => { + const doc = [ + "# Title", + "intro line", + "", + "## Keep Me", + "kept body 1", + "kept body 2", + "", + "### Nested Under Keep", + "still kept (deeper heading)", + "", + "## Drop Me", + "dropped body", + "", + "## Also Keep", + "second kept body", + ].join("\n"); + + it("UT-KB-EXTRACT-001: keeps only named sections, in document order", () => { + const out = extractSections(doc, ["Keep Me", "Also Keep"]); + expect(out).toContain("## Keep Me"); + expect(out).toContain("kept body 1"); + expect(out).toContain("## Also Keep"); + expect(out).toContain("second kept body"); + expect(out).not.toContain("## Drop Me"); + expect(out).not.toContain("dropped body"); + }); + + it("UT-KB-EXTRACT-002: a kept section includes its deeper subsections", () => { + const out = extractSections(doc, ["Keep Me"]); + expect(out).toContain("### Nested Under Keep"); + expect(out).toContain("still kept (deeper heading)"); + // …but stops at the next same-level heading. + expect(out).not.toContain("## Drop Me"); + }); + + it("UT-KB-EXTRACT-003: heading match is case-insensitive + trimmed", () => { + const out = extractSections(doc, [" keep me "]); + expect(out).toContain("kept body 1"); + }); + + it("UT-KB-EXTRACT-004: unmatched headings degrade to empty, never throw", () => { + expect(extractSections(doc, ["Does Not Exist"])).toBe(""); + }); +}); diff --git a/ornn-api/src/domains/assistant/kb/distiller.ts b/ornn-api/src/domains/assistant/kb/distiller.ts new file mode 100644 index 00000000..36f0e71c --- /dev/null +++ b/ornn-api/src/domains/assistant/kb/distiller.ts @@ -0,0 +1,208 @@ +/** + * Knowledge-base distillation (#970). + * + * A *distiller* turns a set of raw repo documents into a single, + * size-budgeted grounding digest for the Ornn Assistant. v1 ships + * {@link DeterministicKbDistiller} — pure, repeatable curation: + * + * 1. optionally extract only the relevant markdown sections of a doc + * (so e.g. CLAUDE.md contributes its "Product Positioning" section, + * not its release-process boilerplate), + * 2. clip each doc to its per-source token cap, + * 3. render each as a titled block and concatenate in manifest order, + * 4. clamp the whole thing to the global token budget. + * + * The {@link KbDistiller} interface is the documented extension point for + * the "big model reads the repo at build time" idea: an `LlmKbDistiller` + * would implement the same contract but replace steps 1–2 with a + * model-driven summarization pass, then reuse the same budget clamp. The + * build script depends on the interface, not the implementation, so + * swapping distillers is a one-line change with no downstream churn. + * + * Distillation is deterministic by construction — no clocks, no RNG, no + * network. The same inputs always produce the same digest, which is what + * lets the committed artifact be diff-reviewable and the loader cache be + * trusted. + * + * @module domains/assistant/kb/distiller + */ + +import { clampToTokenBudget, estimateTokens } from "./tokens"; + +/** Raw input document for distillation. */ +export interface KbSourceDoc { + /** Stable id (used in stats + provenance). */ + readonly id: string; + /** Human-facing section title rendered into the digest. */ + readonly title: string; + /** Full document text (already read from disk by the caller). */ + readonly text: string; + /** + * Optional per-source token cap. When omitted the source is bounded + * only by the global budget. + */ + readonly maxTokens?: number; + /** + * Optional list of markdown headings (exact text, without leading `#`s) + * to extract from the source. When set, only those sections survive — + * everything else in the doc is dropped before budgeting. When omitted + * the whole document is used. + */ + readonly headings?: ReadonlyArray; +} + +/** Per-source accounting in the produced digest. */ +export interface KbSourceStat { + readonly id: string; + readonly title: string; + readonly chars: number; + readonly estimatedTokens: number; + /** True if this source was clipped by its per-source cap or the global budget. */ + readonly truncated: boolean; +} + +/** The distilled grounding digest. */ +export interface KbDigest { + /** The grounding text fed to the model as system context. */ + readonly text: string; + readonly estimatedTokens: number; + readonly budgetTokens: number; + readonly sources: ReadonlyArray; + /** Provenance note (e.g. which builder + when), for the artifact header. */ + readonly generatedFrom: string; +} + +export interface KbDistillOptions { + readonly budgetTokens: number; + /** Free-text provenance note copied into {@link KbDigest.generatedFrom}. */ + readonly generatedFrom?: string; +} + +/** + * Contract every distiller honours. Implementations MUST be deterministic + * for a given input + options. + */ +export interface KbDistiller { + distill( + sources: ReadonlyArray, + opts: KbDistillOptions, + ): KbDigest; +} + +const BLOCK_SEPARATOR = "\n\n---\n\n"; + +/** + * Deterministic, dependency-free distiller (v1). See module doc for the + * pipeline. No LLM calls — this is the baseline grounding everyone gets. + */ +export class DeterministicKbDistiller implements KbDistiller { + distill( + sources: ReadonlyArray, + opts: KbDistillOptions, + ): KbDigest { + const budgetTokens = Math.max(0, Math.floor(opts.budgetTokens)); + const blocks: string[] = []; + const stats: KbSourceStat[] = []; + + for (const src of sources) { + // 1. section-extract (optional) → 2. per-source clip. + const selected = + src.headings && src.headings.length > 0 + ? extractSections(src.text, src.headings) + : src.text; + const normalized = selected.trim(); + if (normalized.length === 0) { + stats.push({ + id: src.id, + title: src.title, + chars: 0, + estimatedTokens: 0, + truncated: src.text.trim().length > 0, + }); + continue; + } + const capped = + src.maxTokens !== undefined + ? clampToTokenBudget(normalized, src.maxTokens) + : { text: normalized, truncated: false }; + blocks.push(`## ${src.title}\n\n${capped.text}`); + stats.push({ + id: src.id, + title: src.title, + chars: capped.text.length, + estimatedTokens: estimateTokens(capped.text), + truncated: capped.truncated, + }); + } + + // 3. concatenate → 4. global budget clamp. + const joined = blocks.join(BLOCK_SEPARATOR); + const clamped = clampToTokenBudget(joined, budgetTokens); + + return { + text: clamped.text, + estimatedTokens: estimateTokens(clamped.text), + budgetTokens, + // If the global clamp trimmed the tail, the last source(s) lost + // content beyond what their own stat recorded — flag globally. + sources: clamped.truncated ? markTailTruncated(stats, clamped.text) : stats, + generatedFrom: opts.generatedFrom ?? "DeterministicKbDistiller", + }; + } +} + +/** + * Extract the named markdown sections from `markdown`, in document order. + * A "section" is a heading line (`#`..`######`) whose trimmed text matches + * one of `headings`, plus every line up to (but excluding) the next + * heading at the same or shallower level. Unmatched headings are skipped + * silently — a renamed doc heading degrades to less grounding, never a + * crash. + */ +export function extractSections( + markdown: string, + headings: ReadonlyArray, +): string { + const wanted = new Set(headings.map((h) => h.trim().toLowerCase())); + const lines = markdown.split("\n"); + const out: string[] = []; + let capturing = false; + let captureLevel = 0; + + for (const line of lines) { + const m = /^(#{1,6})\s+(.*)$/.exec(line); + if (m) { + const level = m[1]!.length; + const title = m[2]!.trim().toLowerCase(); + if (capturing && level <= captureLevel) { + // A heading at the same or shallower level closes the section. + capturing = false; + } + if (!capturing && wanted.has(title)) { + capturing = true; + captureLevel = level; + out.push(line); + continue; + } + } + if (capturing) out.push(line); + } + + return out.join("\n").trim(); +} + +/** + * After a global-budget clip, mark sources whose content fell entirely + * outside the surviving text as truncated, so the stats don't claim + * content the digest no longer carries. + */ +function markTailTruncated( + stats: ReadonlyArray, + survivingText: string, +): KbSourceStat[] { + return stats.map((s) => { + if (s.truncated || s.chars === 0) return { ...s }; + const present = survivingText.includes(`## ${s.title}`); + return present ? { ...s } : { ...s, truncated: true }; + }); +} diff --git a/ornn-api/src/domains/assistant/kb/loader.test.ts b/ornn-api/src/domains/assistant/kb/loader.test.ts new file mode 100644 index 00000000..ac12679f --- /dev/null +++ b/ornn-api/src/domains/assistant/kb/loader.test.ts @@ -0,0 +1,122 @@ +/** + * UT-KB-LOAD-* — AssistantKbLoader, token helpers, and a guard test on + * the committed digest artifact (#970). + * + * @module domains/assistant/kb/loader.test + */ + +import { describe, expect, it } from "bun:test"; +import { AssistantKbLoader, stripMetadataBlock } from "./loader"; +import { + CHARS_PER_TOKEN, + DEFAULT_KB_TOKEN_BUDGET, + clampToTokenBudget, + estimateTokens, + resolveKbTokenBudget, +} from "./tokens"; + +describe("token helpers", () => { + it("UT-KB-TOKEN-001: estimateTokens ~ chars/4", () => { + expect(estimateTokens("")).toBe(0); + expect(estimateTokens("a".repeat(4))).toBe(1); + expect(estimateTokens("a".repeat(5))).toBe(2); + }); + + it("UT-KB-TOKEN-002: clampToTokenBudget never exceeds budget", () => { + const text = "word ".repeat(1_000); + const { text: clipped, truncated } = clampToTokenBudget(text, 20); + expect(truncated).toBe(true); + expect(clipped.length).toBeLessThanOrEqual(20 * CHARS_PER_TOKEN); + }); + + it("UT-KB-TOKEN-003: under-budget text is returned untouched", () => { + const { text, truncated } = clampToTokenBudget("short", 1_000); + expect(text).toBe("short"); + expect(truncated).toBe(false); + }); + + it("UT-KB-TOKEN-004: resolveKbTokenBudget honours env, falls back on garbage", () => { + expect(resolveKbTokenBudget({})).toBe(DEFAULT_KB_TOKEN_BUDGET); + expect(resolveKbTokenBudget({ ASSISTANT_KB_TOKEN_BUDGET: "5000" })).toBe(5_000); + expect(resolveKbTokenBudget({ ASSISTANT_KB_TOKEN_BUDGET: "nope" })).toBe( + DEFAULT_KB_TOKEN_BUDGET, + ); + expect(resolveKbTokenBudget({ ASSISTANT_KB_TOKEN_BUDGET: "-5" })).toBe( + DEFAULT_KB_TOKEN_BUDGET, + ); + }); +}); + +describe("stripMetadataBlock", () => { + it("UT-KB-LOAD-001: removes a single leading HTML comment block", () => { + const raw = "\n\n## Body\n\ncontent"; + const out = stripMetadataBlock(raw); + expect(out.startsWith("## Body")).toBe(true); + expect(out).not.toContain("meta: here"); + }); + + it("UT-KB-LOAD-002: leaves a digest without a header untouched", () => { + const raw = "## Body\n\ncontent"; + expect(stripMetadataBlock(raw)).toBe(raw); + }); +}); + +describe("AssistantKbLoader", () => { + const HEADER = "\n\n"; + + it("UT-KB-LOAD-003: loads, strips header, and caches (reads once)", () => { + let reads = 0; + const loader = new AssistantKbLoader({ + budgetTokens: 10_000, + readDigest: () => { + reads += 1; + return `${HEADER}## Ornn\n\nOrnn is a skill-lifecycle API.`; + }, + }); + const first = loader.load(); + const second = loader.load(); + expect(reads).toBe(1); // cached + expect(first).toBe(second); + expect(first.text.startsWith("## Ornn")).toBe(true); + expect(first.text).not.toContain("meta"); + expect(first.estimatedTokens).toBe(estimateTokens(first.text)); + expect(first.truncated).toBe(false); + }); + + it("UT-KB-LOAD-004: budget enforcement — oversized artifact is clamped on load", () => { + const body = "word ".repeat(5_000); // ~6250 tokens + const loader = new AssistantKbLoader({ + budgetTokens: 100, + readDigest: () => `${HEADER}${body}`, + }); + const kb = loader.load(); + expect(kb.truncated).toBe(true); + expect(kb.estimatedTokens).toBeLessThanOrEqual(100); + expect(kb.text.length).toBeLessThanOrEqual(100 * CHARS_PER_TOKEN); + }); + + it("UT-KB-LOAD-005: read failure degrades to empty grounding (no throw)", () => { + const loader = new AssistantKbLoader({ + readDigest: () => { + throw new Error("ENOENT"); + }, + }); + const kb = loader.load(); + expect(kb.text).toBe(""); + expect(kb.estimatedTokens).toBe(0); + }); + + it("UT-KB-LOAD-006: invalidate() forces a re-read", () => { + let reads = 0; + const loader = new AssistantKbLoader({ + readDigest: () => { + reads += 1; + return `${HEADER}content`; + }, + }); + loader.load(); + loader.invalidate(); + loader.load(); + expect(reads).toBe(2); + }); +}); diff --git a/ornn-api/src/domains/assistant/kb/loader.ts b/ornn-api/src/domains/assistant/kb/loader.ts new file mode 100644 index 00000000..d93ba4ed --- /dev/null +++ b/ornn-api/src/domains/assistant/kb/loader.ts @@ -0,0 +1,124 @@ +/** + * Runtime loader for the Ornn Assistant knowledge base (#970). + * + * The KB digest is a *committed build artifact* (`digest.generated.md`) + * produced by `scripts/build-assistant-kb.ts`. The loader's only job is to + * read that single file, strip its provenance header, defensively clamp it + * to the token budget, and cache the result in-process. It deliberately + * does NOT re-read the repo's source docs at runtime — those don't ship in + * the container, and re-distilling on every boot would be non-deterministic + * and slow. Build-time produces; runtime consumes. + * + * Deterministic + cached: the first `load()` reads + parses the artifact; + * every subsequent call returns the cached value. A failed read degrades to + * empty grounding (logged) rather than crashing the assistant — the skill + * retrieval + the user's question still produce a useful answer. + * + * @module domains/assistant/kb/loader + */ + +import { join } from "node:path"; +import { readFileSync } from "node:fs"; +import { createLogger } from "../../../shared/logger"; +import { + clampToTokenBudget, + estimateTokens, + resolveKbTokenBudget, +} from "./tokens"; + +const logger = createLogger("assistantKb"); + +/** Loaded grounding, ready to drop into the LLM system context. */ +export interface AssistantKb { + /** Grounding text (provenance header stripped, budget-clamped). */ + readonly text: string; + readonly estimatedTokens: number; + readonly budgetTokens: number; + /** True if the artifact exceeded the budget and was clamped on load. */ + readonly truncated: boolean; +} + +/** Filename of the committed digest artifact, colocated with this module. */ +export const DIGEST_ARTIFACT_FILENAME = "digest.generated.md"; + +/** Default reader — reads the colocated committed artifact. */ +export function defaultDigestReader(): string { + return readFileSync(join(import.meta.dir, DIGEST_ARTIFACT_FILENAME), "utf-8"); +} + +export interface AssistantKbLoaderDeps { + /** Token budget; defaults to the env-resolved value. */ + readonly budgetTokens?: number; + /** Digest source; injectable for tests. Defaults to the artifact file. */ + readonly readDigest?: () => string; +} + +/** + * Reads + caches the assistant KB digest. One instance per process is the + * intended usage (constructed in bootstrap); the cache lives for the + * process lifetime since the artifact is immutable at runtime. + */ +export class AssistantKbLoader { + private readonly budgetTokens: number; + private readonly readDigest: () => string; + private cached: AssistantKb | null = null; + + constructor(deps: AssistantKbLoaderDeps = {}) { + this.budgetTokens = deps.budgetTokens ?? resolveKbTokenBudget(); + this.readDigest = deps.readDigest ?? defaultDigestReader; + } + + load(): AssistantKb { + if (this.cached) return this.cached; + + let raw = ""; + try { + raw = this.readDigest(); + } catch (err) { + logger.error( + { err: (err as Error).message }, + "assistant KB digest read failed — grounding degrades to empty", + ); + } + + const body = stripMetadataBlock(raw).trim(); + const { text, truncated } = clampToTokenBudget(body, this.budgetTokens); + if (truncated) { + logger.warn( + { budgetTokens: this.budgetTokens }, + "assistant KB digest exceeded token budget on load — clamped defensively", + ); + } + + const kb: AssistantKb = { + text, + estimatedTokens: estimateTokens(text), + budgetTokens: this.budgetTokens, + truncated, + }; + this.cached = kb; + logger.info( + { + estimatedTokens: kb.estimatedTokens, + budgetTokens: kb.budgetTokens, + truncated, + }, + "assistant KB digest loaded", + ); + return kb; + } + + /** Ops/test hook: drop the cache so the next `load()` re-reads. */ + invalidate(): void { + this.cached = null; + } +} + +/** + * Strip a single leading HTML comment block — the generated-artifact + * provenance header — so build metadata never reaches the model context. + * A BOM, if present, is tolerated before the comment. + */ +export function stripMetadataBlock(raw: string): string { + return raw.replace(/^\uFEFF?\s*\s*/, ""); +} diff --git a/ornn-api/src/domains/assistant/kb/sources.ts b/ornn-api/src/domains/assistant/kb/sources.ts new file mode 100644 index 00000000..a4a92b1b --- /dev/null +++ b/ornn-api/src/domains/assistant/kb/sources.ts @@ -0,0 +1,89 @@ +/** + * Build-time source manifest for the Ornn Assistant knowledge base (#970). + * + * Declares WHICH repo docs feed the grounding digest, in priority order, + * with per-source token caps and (where a doc is mostly irrelevant to + * Q&A) a heading allow-list so only the useful sections survive. + * + * This manifest is consumed ONLY by `scripts/build-assistant-kb.ts` at + * build time — paths are relative to the repo root and the files don't + * ship in the runtime container. The runtime loads the produced artifact, + * not these sources. Curation policy lives here so adding/retuning a + * source is a one-line data change, not code. + * + * Ground rules: + * - Docs only. Never list source code, configs, or anything that could + * carry secrets — the digest is fed verbatim to a model and streamed + * to users. + * - Order = priority. Earlier sources win the budget if the global cap + * bites; the assistant's identity ("what is Ornn") leads. + * + * @module domains/assistant/kb/sources + */ + +/** A planned source: where to read it and how much of it to keep. */ +export interface KbSourceSpec { + readonly id: string; + readonly title: string; + /** Path relative to the repo root. */ + readonly repoRelPath: string; + /** Per-source token cap applied after section extraction. */ + readonly maxTokens: number; + /** Optional markdown heading allow-list (exact heading text). */ + readonly headings?: ReadonlyArray; +} + +/** + * Curated, priority-ordered manifest. Tuned so the sum of caps lands a + * little under the default 18k budget, leaving headroom for the global + * clamp — see `scripts/build-assistant-kb.ts`. + */ +export const KB_SOURCE_MANIFEST: ReadonlyArray = [ + { + // The single best "what is Ornn / why / how it works" doc. + id: "readme", + title: "Ornn — Overview (README)", + repoRelPath: "README.md", + maxTokens: 2_800, + }, + { + // Positioning only — skip the release-process / deploy boilerplate. + id: "claude-positioning", + title: "Product Positioning", + repoRelPath: "CLAUDE.md", + maxTokens: 1_500, + headings: ["Product Positioning"], + }, + { + // External services + skill format + observability pipeline. + id: "architecture", + title: "Architecture", + repoRelPath: "docs/ARCHITECTURE.md", + maxTokens: 2_400, + }, + { + // The authoritative agent contract: search → pull → execute → build → + // upload → share over HTTP. The most-asked "how do I …" answers live + // here. HTTP manual is the live path (no CLI shipped yet). + id: "agent-manual-http", + title: "Using Ornn from an AI Agent (HTTP API)", + repoRelPath: "skills/ornn-agent-manual-http/SKILL.md", + maxTokens: 5_500, + }, + { + // Normative /api/v1 contract — envelope, errors, paths, auth. + id: "conventions", + title: "API Conventions", + repoRelPath: "docs/CONVENTIONS.md", + maxTokens: 2_600, + }, + { + // Visual spec is mostly irrelevant to Q&A; keep only the opening + // philosophy/overview so "what does Ornn look/feel like" has an anchor. + id: "design-overview", + title: "Design System (Overview)", + repoRelPath: "docs/DESIGN.md", + maxTokens: 700, + headings: ["Product Context", "Design Thesis"], + }, +]; diff --git a/ornn-api/src/domains/assistant/kb/tokens.ts b/ornn-api/src/domains/assistant/kb/tokens.ts new file mode 100644 index 00000000..3312cc07 --- /dev/null +++ b/ornn-api/src/domains/assistant/kb/tokens.ts @@ -0,0 +1,78 @@ +/** + * Token-budget arithmetic for the Ornn Assistant knowledge base (#970). + * + * The assistant grounds every answer in a curated, size-budgeted digest + * of the repo's knowledge-bearing docs. We never want that grounding to + * blow the model's context window, so the digest is bounded by a *token + * budget* both at build time (the curation pass) and at load time (a + * defensive clamp). + * + * Token counts here are deliberately a cheap, deterministic heuristic + * (chars ÷ 4) rather than a real tokenizer: the digest is model-agnostic + * (Claude / GPT / Gemini all differ), so an exact count for one model is + * meaningless for another. ~4 chars/token is the well-known English + * average and is conservative enough for budgeting headroom. Determinism + * matters more than precision — the same input must always yield the same + * digest so the loader cache and CI artifact stay stable. + * + * @module domains/assistant/kb/tokens + */ + +/** Conservative average characters-per-token for English prose. */ +export const CHARS_PER_TOKEN = 4; + +/** + * Default grounding budget in tokens (~15–20k target per #970). Kept well + * under any modern model's context window so the retrieved-skills block + + * the conversation still fit comfortably alongside it. + */ +export const DEFAULT_KB_TOKEN_BUDGET = 18_000; + +/** Env var name for overriding the digest token budget (build + load). */ +export const KB_TOKEN_BUDGET_ENV = "ASSISTANT_KB_TOKEN_BUDGET"; + +/** + * Resolve the active token budget from the environment, falling back to + * {@link DEFAULT_KB_TOKEN_BUDGET}. Invalid / non-positive values are + * ignored (fall back to the default) rather than throwing — a misconfig + * must never take the assistant offline. + */ +export function resolveKbTokenBudget( + env: Record = process.env, +): number { + const raw = env[KB_TOKEN_BUDGET_ENV]; + if (raw === undefined || raw.trim() === "") return DEFAULT_KB_TOKEN_BUDGET; + const parsed = Number(raw); + if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_KB_TOKEN_BUDGET; + return Math.floor(parsed); +} + +/** Estimate the token count of `text` using the chars-per-token heuristic. */ +export function estimateTokens(text: string): number { + if (text.length === 0) return 0; + return Math.ceil(text.length / CHARS_PER_TOKEN); +} + +/** + * Clip `text` so its estimated token count does not exceed `budgetTokens`. + * Clips on a whitespace boundary near the limit when possible so the digest + * doesn't end mid-word. Returns the (possibly clipped) text and whether a + * clip happened. + */ +export function clampToTokenBudget( + text: string, + budgetTokens: number, +): { readonly text: string; readonly truncated: boolean } { + if (budgetTokens <= 0) return { text: "", truncated: text.length > 0 }; + const maxChars = budgetTokens * CHARS_PER_TOKEN; + if (text.length <= maxChars) return { text, truncated: false }; + const hardCut = text.slice(0, maxChars); + // Prefer the last newline, then the last space, to avoid cutting a word. + const lastBreak = Math.max(hardCut.lastIndexOf("\n"), hardCut.lastIndexOf(" ")); + // Only honour the soft break if it's reasonably close to the limit + // (within the last 20%) — otherwise a doc with no whitespace near the + // cut would throw away too much content. + const softCut = + lastBreak >= maxChars * 0.8 ? hardCut.slice(0, lastBreak) : hardCut; + return { text: softCut.trimEnd(), truncated: true }; +} From 318a720e746b1d1b2f022de588c78ae943fa9e27 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:46:17 +0800 Subject: [PATCH 014/110] feat(api): build-assistant-kb script + committed grounding digest (#970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the build pipeline that turns the curated source manifest into the runtime grounding artifact: - scripts/build-assistant-kb.ts reads the manifest docs from the repo root, runs the DeterministicKbDistiller, and writes the committed digest.generated.md. Output carries an HTML-comment provenance header (stripped by the loader, never fed to the model) and NO timestamp, so re-running on unchanged inputs is byte-identical (clean diffs, stable CI). Missing source docs are skipped with a warning, not a failure. - digest.generated.md: the committed artifact (~12.4k tokens, 6 sources) — README overview, CLAUDE positioning, ARCHITECTURE, the HTTP agent manual, API conventions, and a design overview slice. - digest.artifact.test.ts guards the shipped artifact: non-empty, within budget, header hidden, Ornn-grounded, no secret-shaped content. - package.json: `bun run build:assistant-kb` to regenerate. Regenerate the artifact whenever the manifest or source docs change. --- ornn-api/package.json | 3 +- ornn-api/scripts/build-assistant-kb.ts | 154 +++ .../assistant/kb/digest.artifact.test.ts | 42 + .../domains/assistant/kb/digest.generated.md | 1019 +++++++++++++++++ 4 files changed, 1217 insertions(+), 1 deletion(-) create mode 100644 ornn-api/scripts/build-assistant-kb.ts create mode 100644 ornn-api/src/domains/assistant/kb/digest.artifact.test.ts create mode 100644 ornn-api/src/domains/assistant/kb/digest.generated.md diff --git a/ornn-api/package.json b/ornn-api/package.json index 5580f918..e335f851 100644 --- a/ornn-api/package.json +++ b/ornn-api/package.json @@ -10,7 +10,8 @@ "migrate:versions": "bun run scripts/migrate-skill-versions.ts", "migrate:ownership": "bun run scripts/migrate-skill-ownership.ts", "migrate:drop-topics": "bun run scripts/drop-topics.ts", - "audit:reserved-verbs": "bun run scripts/audit-reserved-verbs.ts" + "audit:reserved-verbs": "bun run scripts/audit-reserved-verbs.ts", + "build:assistant-kb": "bun run scripts/build-assistant-kb.ts" }, "dependencies": { "@agendajs/mongo-backend": "^4.0.2", diff --git a/ornn-api/scripts/build-assistant-kb.ts b/ornn-api/scripts/build-assistant-kb.ts new file mode 100644 index 00000000..cad6551e --- /dev/null +++ b/ornn-api/scripts/build-assistant-kb.ts @@ -0,0 +1,154 @@ +/** + * Build the Ornn Assistant knowledge-base digest (#970). + * + * bun run scripts/build-assistant-kb.ts + * + * Reads the curated repo docs declared in + * `src/domains/assistant/kb/sources.ts`, distills them into a single + * size-budgeted grounding digest, and writes the committed artifact at + * `src/domains/assistant/kb/digest.generated.md`. The runtime loader reads + * that artifact — it never re-reads these source docs. + * + * v1 uses the deterministic distiller (priority-ordered curation + per- + * source caps + global budget clamp). The "big model reads the repo at + * build time" idea slots in here: swap `DeterministicKbDistiller` for an + * `LlmKbDistiller` that implements the same `KbDistiller` contract — the + * rest of this script (sourcing, writing, provenance) is unchanged. + * + * Budget is overridable via the `ASSISTANT_KB_TOKEN_BUDGET` env var. + * + * Determinism note: the artifact header intentionally carries NO timestamp + * so re-running on unchanged inputs yields a byte-identical file (clean + * diffs, stable CI). Provenance is the input list + budget, not a clock. + * + * @module scripts/build-assistant-kb + */ + +import { readFileSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { createLogger } from "../src/shared/logger"; +import { + DeterministicKbDistiller, + type KbDistiller, + type KbSourceDoc, +} from "../src/domains/assistant/kb/distiller"; +import { KB_SOURCE_MANIFEST } from "../src/domains/assistant/kb/sources"; +import { DIGEST_ARTIFACT_FILENAME } from "../src/domains/assistant/kb/loader"; +import { resolveKbTokenBudget } from "../src/domains/assistant/kb/tokens"; + +const logger = createLogger("buildAssistantKb"); + +// scripts/ → ornn-api/ → repo root +const REPO_ROOT = join(import.meta.dir, "..", ".."); +const ARTIFACT_PATH = join( + import.meta.dir, + "..", + "src", + "domains", + "assistant", + "kb", + DIGEST_ARTIFACT_FILENAME, +); + +/** + * Read each manifest source from the repo root. Missing files are skipped + * with a warning — a doc rename must not break the build, it just yields + * less grounding until the manifest is retuned. + */ +function readSources(): KbSourceDoc[] { + const docs: KbSourceDoc[] = []; + for (const spec of KB_SOURCE_MANIFEST) { + const abs = join(REPO_ROOT, spec.repoRelPath); + let text: string; + try { + text = readFileSync(abs, "utf-8"); + } catch (err) { + logger.warn( + { id: spec.id, path: spec.repoRelPath, err: (err as Error).message }, + "KB source missing — skipping", + ); + continue; + } + docs.push({ + id: spec.id, + title: spec.title, + text, + maxTokens: spec.maxTokens, + ...(spec.headings ? { headings: spec.headings } : {}), + }); + } + return docs; +} + +function renderArtifact( + digestText: string, + header: { + budgetTokens: number; + estimatedTokens: number; + sources: ReadonlyArray<{ id: string; estimatedTokens: number; truncated: boolean }>; + }, +): string { + const sourceLines = header.sources + .map( + (s) => + ` - ${s.id}: ~${s.estimatedTokens} tok${s.truncated ? " (clipped)" : ""}`, + ) + .join("\n"); + // HTML-comment provenance block — stripped by the loader, never fed to + // the model. No timestamp (see module doc: deterministic output). + const meta = [ + "", + "", + ].join("\n"); + return `${meta}\n${digestText}\n`; +} + +function main(): void { + const budgetTokens = resolveKbTokenBudget(); + const distiller: KbDistiller = new DeterministicKbDistiller(); + + const sources = readSources(); + if (sources.length === 0) { + logger.error("No KB sources could be read — aborting without writing artifact"); + process.exitCode = 1; + return; + } + + const digest = distiller.distill(sources, { + budgetTokens, + generatedFrom: "scripts/build-assistant-kb.ts (DeterministicKbDistiller)", + }); + + const artifact = renderArtifact(digest.text, { + budgetTokens: digest.budgetTokens, + estimatedTokens: digest.estimatedTokens, + sources: digest.sources, + }); + + writeFileSync(ARTIFACT_PATH, artifact, "utf-8"); + + logger.info( + { + artifact: ARTIFACT_PATH, + budgetTokens: digest.budgetTokens, + estimatedTokens: digest.estimatedTokens, + sourceCount: digest.sources.length, + sources: digest.sources.map((s) => ({ + id: s.id, + tokens: s.estimatedTokens, + truncated: s.truncated, + })), + }, + "Assistant KB digest written", + ); +} + +main(); diff --git a/ornn-api/src/domains/assistant/kb/digest.artifact.test.ts b/ornn-api/src/domains/assistant/kb/digest.artifact.test.ts new file mode 100644 index 00000000..3ddb2148 --- /dev/null +++ b/ornn-api/src/domains/assistant/kb/digest.artifact.test.ts @@ -0,0 +1,42 @@ +/** + * UT-KB-ARTIFACT-* — guard tests on the COMMITTED digest artifact (#970). + * + * These assert the build output that actually ships: it must exist, be + * non-empty, stay within budget, hide its provenance header from the + * model, carry Ornn-identity grounding, and contain no secret-shaped + * content. Regenerate via `bun run build:assistant-kb` if these fail + * after editing the source manifest or docs. + * + * @module domains/assistant/kb/digest.artifact.test + */ + +import { describe, expect, it } from "bun:test"; +import { AssistantKbLoader, defaultDigestReader } from "./loader"; +import { DEFAULT_KB_TOKEN_BUDGET } from "./tokens"; + +describe("committed digest artifact", () => { + it("UT-KB-ARTIFACT-001: ships, is non-empty, within budget, and grounded", () => { + const loader = new AssistantKbLoader({ readDigest: defaultDigestReader }); + const kb = loader.load(); + expect(kb.text.length).toBeGreaterThan(2_000); + expect(kb.estimatedTokens).toBeLessThanOrEqual(DEFAULT_KB_TOKEN_BUDGET); + // Provenance header must not leak into the grounding. + expect(kb.text).not.toContain("GENERATED FILE"); + // Sanity: the digest actually carries Ornn-identity grounding. + expect(kb.text.toLowerCase()).toContain("ornn"); + expect(kb.text.toLowerCase()).toContain("skill"); + }); + + it("UT-KB-ARTIFACT-002: carries no obvious secret-shaped content", () => { + const loader = new AssistantKbLoader({ readDigest: defaultDigestReader }); + const text = loader.load().text; + // Docs-only digest: assert none of the secret-ish markers leaked in. + for (const needle of [ + "BEGIN PRIVATE KEY", + "BEGIN RSA", + "clientSecret", + ]) { + expect(text.includes(needle)).toBe(false); + } + }); +}); diff --git a/ornn-api/src/domains/assistant/kb/digest.generated.md b/ornn-api/src/domains/assistant/kb/digest.generated.md new file mode 100644 index 00000000..dc073472 --- /dev/null +++ b/ornn-api/src/domains/assistant/kb/digest.generated.md @@ -0,0 +1,1019 @@ + + +## Ornn — Overview (README) + +

+ + Ornn — agent-facing skill-lifecycle API + +

+ +

+ CI + Latest release + License +  The skill lifecycle API for AI agents, not another marketplace. +

+ +--- + +## What is Ornn + +Ornn is an **agent-facing skill-lifecycle API**. AI agents call Ornn directly — over HTTPS — to manage the full lifecycle of their skills: + +``` +search → pull → install → execute → audit → build → upload → share +``` + +Closest analog: **npm registry + npm CLI, fused, model-agnostic.** It works for Claude, GPT, Gemini, or any custom agent runtime. Not locked to a single model. + +### Why we built it + +Modern AI agents do real work by composing **skills** — packaged prompts, scripts, and tools the agent invokes on demand. As soon as you build more than one agent, the same gaps show up: + +- **No shared registry.** Skills live in private repos, gists, and one-off config files. There's no way for an agent to discover one it doesn't already know about. +- **Model-locked alternatives.** Anthropic Skills, OpenAI custom GPTs, and Gemini Gems each ship a registry — but only for their own runtime. Skills don't cross. +- **No lifecycle.** Versioning, sandboxed execution, security audit, publish — every team rebuilds these from scratch. + +Ornn closes the gaps. One model-agnostic registry, one API surface, and a CLI (`nyxid`) every agent can drive end-to-end. The web UI at [ornn.chrono-ai.fun](https://ornn.chrono-ai.fun) is a thin admin layer for skill owners; the API is the product. + +## How it works + +```mermaid +%%{init: { + "theme": "base", + "themeVariables": { + "background": "#0B0907", + "primaryColor": "#1A1610", + "primaryTextColor": "#F1ECDE", + "primaryBorderColor": "#3A3328", + "lineColor": "#7E776B", + "secondaryColor": "#221E16", + "tertiaryColor": "#14110B", + "edgeLabelBackground": "#0B0907", + "clusterBkg": "#14110B", + "clusterBorder": "#3A3328", + "fontFamily": "JetBrains Mono, ui-monospace, SFMono-Regular, Menlo, monospace", + "fontSize": "13px" + } +}}%% +flowchart LR + subgraph local["[ § YOUR MACHINE ]"] + direction TB + Agent["AI agent"] + CLI["nyxid CLI"] + Skill["Pulled skills"] + end + subgraph cloud["[ § ORNN CLOUD ]"] + direction TB + API["ornn-api"] + Auth["NyxID"] + Store[("Skill registry")] + Sandbox["Sandbox"] + end + + Agent -->|invoke| CLI + CLI ==>|HTTPS| API + API -->|verify| Auth + API -->|r/w| Store + API -->|exec| Sandbox + API -.->|artifact| Agent + Agent -->|run| Skill + + classDef ember fill:#FF7322,stroke:#C9460D,color:#14130E,stroke-width:2px,font-weight:bold + classDef arc fill:#5BC8E8,stroke:#3A8FB8,color:#14130E,stroke-width:2px,font-weight:bold + classDef forged fill:#1A1610,stroke:#3A3328,color:#F1ECDE,stroke-width:1.5px + classDef storage fill:#221E16,stroke:#3A3328,color:#C9BFAD,stroke-width:1.5px + + class Agent forged + class CLI forged + class Skill forged + class Sandbox forged + class API ember + class Auth arc + class Store storage + + style local fill:#221E16,stroke:#3A3328,color:#F1ECDE,stroke-width:1.5px + style cloud fill:#14110B,stroke:#3A3328,color:#F1ECDE,stroke-width:1.5px + + linkStyle 1 stroke:#FF7322,stroke-width:2.5px +``` + +Every API call is mediated by [`nyxid`](https://github.com/ChronoAIProject/NyxID) — the shared identity + brokering layer ChronoAI uses across products. The agent never holds a long-lived token: `nyxid` refreshes credentials transparently and brokers per-service access for each request. + +## Quickstart + +> **Status:** alpha. The API surface can still change before v1 — pin a release tag if you ship to production. + +### 1. Create a NyxID account + +Sign up at [**nyx.chrono-ai.fun**](https://nyx.chrono-ai.fun) with invite code **`NYX-2XXJI08A`**. Sign in with **GitHub**, **Google**, or **Apple** — NyxID is the identity layer that authenticates every Ornn API call. One account covers every ChronoAI service. + +### 2. Install the Ornn agent manual into your AI agent + +Open [**`ornn-agent-manual-cli`**](https://ornn.chrono-ai.fun/skills/ornn-agent-manual-cli) and follow the install instructions for your agent runtime (Claude Code, OpenAI Codex, Cursor, …). This skill is the **operational manual Ornn ships for AI agents**: once it's loaded into your agent, the agent knows how to drive the full `search → pull → execute → build → upload → share` lifecycle on its own — no further hand-holding required. + +Partway through setup, your agent will prompt you to install [**`nyxid`**](https://github.com/ChronoAIProject/NyxID) — the CLI Ornn calls under the hood to broker authenticated requests. Approve the prompt; the agent finishes onboarding itself. + +### 3. Talk to your agent + +That's it. Your agent now has the full Ornn lifecycle. Try any of these in plain language — no special syntax, no flags to memorise: + +- **Search the registry.** + > *"Find me a skill that converts CSV to JSON."* + + Hits semantic + keyword search across every public skill. + +- **Pull and install a skill.** + > *"Pull and install the skill `pdf-extractor`, then use it on `report.pdf`."* + + Fetches the latest versioned artifact into your local runtime and runs it. + +- **Trigger a security audit.** + > *"Run a security audit on the skill `web-scraper`."* + + Kicks the AgentSeal pipeline against a published version — static analysis, sandbox probe, dependency scan. + +- **Build and publish a new skill.** + > *"Build me a skill that summarises RSS feeds and upload it under my account."* + + Drives `ornn-build` to generate the skill, packages it, and publishes a new version through your NyxID identity. + +For the full API contract (every endpoint, every error code), see [**ornn.chrono-ai.fun/docs**](https://ornn.chrono-ai.fun/docs). + +## Community and Contributing + +- **Questions / how-to** → [Discussions → Q&A](https://github.com/ChronoAIProject/Ornn/discussions/categories/q-a) +- **Ideas / RFCs** → [Discussions → Ideas](https://github.com/ChronoAIProject/Ornn/discussions/categories/ideas) +- **Show off your agent integration** → [Discussions → Show & Tell](https://github.com/ChronoAIProject/Ornn/discussions/categories/show-and-tell) +- **Bug or feature** → [open an issue](https://github.com/ChronoAIProject/Ornn/issues/new/choose) +- **Roadmap** → [Issues](https://github.com/ChronoAIProject/Ornn/issues) · [Milestones](https://github.com/ChronoAIProject/Ornn/milestones) · [Releases](https://github.com/ChronoAIProject/Ornn/releases) +- **Security report** → [Private Vulnerability Reporting](https://github.com/ChronoAIProject/Ornn/security/advisories/new) (see [SECURITY.md](SECURITY.md)) +- **Support guide** → [SUPPORT.md](SUPPORT.md) +- **Pull requests** → read [CONTRIBUTING.md](CONTRIBUTING.md) first — it covers the issue-first workflow, branching, commit decomposition, and the changeset rule (CI blocks PRs without one). By participating you agree to follow our [Code of Conduct](CODE_OF_CONDUCT.md). + +## License + +[Apache License 2.0](LICENSE) + +--- + +## Product Positioning + +## Product Positioning + +**Ornn is an agent-facing skill-lifecycle API, not a human marketplace.** + +The primary customer is the AI agent developer / agentic-system builder. Agents call Ornn directly — over HTTP or MCP — to manage their own skill lifecycle: search → pull → install → execute → build → upload → share. Closest analog: **npm registry + npm CLI fused, model-agnostic** (works for Claude / GPT / Gemini / custom — not locked to one model runtime). + +Implications when proposing or building features: + +- Lead with the **agent-API contract** (REST / MCP ergonomics, stable schemas, model-agnostic guarantees) before any human-UX angle. +- `ornn-web` is a *secondary* surface for skill owners and platform admins — it is not the primary product. UI features that don't translate into agent-API value are deprioritized. +- Avoid feature framing that drifts toward "another skill marketplace" (social ranking, browse-style discovery, recommendation feeds, leaderboards) unless we deliberately decide to. When a feature looks marketplace-shaped, surface that tension before building. + +--- + +## Architecture + +# Architecture — chrono-ornn + +> For API v1 and architecture conventions, see [`CONVENTIONS.md`](./CONVENTIONS.md). Active refactor work is tracked under the [`Refactor` milestone](https://github.com/ChronoAIProject/Ornn/milestone/6). + +## Project Overview + +chrono-ornn is an AI skill platform. Users create, publish, search, and execute AI skills (packaged prompts + scripts) via a web UI or API. Authentication and LLM calls go through NyxID. Script execution runs in chrono-sandbox. + +## External Services + +| Service | How ornn-api talks to it | +|---------|---------------------------| +| NyxID | JWT verification (JWKS), API key introspection, LLM Gateway (Responses API) | +| chrono-sandbox | `POST /execute` — script execution with env vars, dependencies, file retrieval | +| chrono-storage | Upload/download/delete skill packages (presigned URLs) | + +## Skill Format + +- Available runtimes: `node`, `python` +- Frontmatter field for dependencies: `runtime-dependency` +- Category types: `plain`, `tool-based`, `runtime-based`, `mixed` +- Output types: `text` (stdout), `file` (generated files retrieved via glob) + +## Audit + analytics (PostHog) + +Issue #271 collapsed every observability surface in Ornn — the +universal API audit middleware (#245), the `activities` Mongo +collection, and the OpenTelemetry placeholder section — into a single +PostHog-driven pipeline. There is **no custom audit code** in Ornn +anymore; everything flows through the `posthog-node` SDK and is +viewed in the PostHog dashboard. + +### Event taxonomy + +Backend events (server-emitted, every event carries `source: "api"` +so dashboards can disambiguate from frontend events of the same name): + +| event | when | properties | +|---|---|---| +| `api.request` | every authenticated `/api/v1/*` request | `userId`, `callerType`, `method`, `path`, `routePattern`, `status`, `durationMs`, `sourceIp` (truncated /24 IPv4, /48 IPv6), `requestId` | +| `api.error` | sampled 5xx responses | `statusCode`, `errorCode`, `method`, `path`, `requestId` | +| `api.skill.pull` | every skill package materialization | `callerType`, `skillId`, `skillName`, `skillVersion` | +| `api.skill.published` | skill create + version publish | `skillId`, `skillVersion`, `isNewSkill` | +| `user.login` / `user.logout` | session open / close | — | +| `skill.created` / `.updated` / `.deleted` / `.version_deleted` | mutation routes | `skillId`, `skillName`, `version`, `adminAction?` | +| `skill.visibility_changed` / `.permissions_changed` | visibility + sharing flips | `skillId`, `isPrivate`, `sharedWithUsers`, `sharedWithOrgs` | +| `skill.refresh` / `.source_linked` / `.source_unlinked` | source-pointer ops | `skillId`, `repo`, `ref`, `commit` | +| `skill.nyxid_service_tied` / `.agentseal_rescanned` | tie + admin-rescan | `skillId`, `isSystemSkill`, `score` | +| `settings.exported` / `.imported` | settings IO | `schemaVersion`, `aggregateStatus`, `dryRun`, `sections` | + +Frontend events (browser SDK — `ornn-web/src/lib/analytics.ts`) carry +auto-pageview + cookie-consent state and the typed event union in +that file. Identity is set via `posthog.identify(userId, traits)` on +every NyxID login. + +### Caller-type detection + +`api.request` is emitted from `apiRequestTrackingMiddleware` mounted +on `/api/v1/*` AFTER `proxyAuthSetup`. `callerType` derives from auth +shape: + +| auth shape | `X-Ornn-Caller` | `callerType` | +|---|---|---| +| browser session (NyxID OAuth cookie / browser-scope Bearer) | — | `web` | +| NyxID forwarded user-access token (agent via NyxID proxy) | — | `api` | +| anonymous | `system` / `playground` | matches header | +| anonymous | other | `web` | + +The header is informational only. Source IP is read from +`X-Forwarded-For` (first hop), falls back to `X-Real-IP`, then +truncated to /24 (IPv4) or /48 (IPv6) before emit. + +### Configuration + +PostHog config lives in the admin `telemetry` settings section. +Backend reads it once at boot (`bootstrap.ts`) and falls back to env +vars when the DB section has no API key set: + +| field | env fallback | meaning | +|---|---|---| +| `postHogEnabled` | `POSTHOG_ENABLED` | master switch — off forces NoopTracker even with a key | +| `postHogApiKey` | `POSTHOG_API_KEY` | public project key (`phc_…`); empty disables | +| `postHogHost` | `POSTHOG_HOST` | ingest host (e.g. `https://eu.i.posthog.com`) | +| `postHogProjectId` | `POSTHOG_PROJECT_ID` | informational, surfaced in log lines | +| `postHogErrorSampleRate` | `POSTHOG_ERROR_SAMPLE_RATE` | `[0,1]` sampling for `api.error` | + +Admin DB is canonical: a non-empty `postHogApiKey` in the section +makes the entire DB record authoritative; otherwise env wins. +Restart-required for changes to apply (the SDK is initialized once +at boot). + +### Failure modes accepted + +- **No body archive.** Request/response bodies are not captured. + Forensic body-replay post-incident is not possible. The previous + MinIO-offload pipeline (#245) was removed. +- **Audit retention = PostHog retention.** Cloud free tier is + approximately 1 year of events; paid extends. Self-hosted PostHog + retains as long as the storage volume allows. +- **PostHog-side outages** drop events that miss the in-process + buffer. The drain on `shutdown()` flushes the buffer; sigterm + during a backlog can lose tail events. + +### Viewing data + +There is **no in-Ornn activity feed UI**. Admins use the PostHog +dashboard for the full event explorer, funnels, retention, and SQL +queries. The Ornn admin dashboard at `/admin` deep-links to the +PostHog Activity / Insights views via +`ornn-web/src/lib/postHogLinks.ts`, which translates the configured +ingest host (`.i.posthog.com`) into the matching dashboard +host (`.posthog.com`). + +### What about OpenTelemetry? + +Considered and deferred (issue #271 discussion). For Ornn's current +single-service architecture and the requirements covered here +(per-request audit, user activity, who-called-what), PostHog alone +is sufficient. OpenTelemetry's value (distributed tracing, metrics +histograms) doesn't justify standing up a collector + Tempo / Loki / +Jaeger today. Reopen as a separate issue if/when the architecture +splits across services or a concrete tracing pain point appears. + +### User directory + +The unified `users` Mongo collection (built in #271, replaces +`activities` + `admin_users` + `users_meta`) is fed lazily by +`proxyAuthSetup.onAuthSeen` on every authenticated request. It is +NOT audit data — it's an identity cache backing the skill-permissions +typeahead, the admin user list, and the dashboard role partition. +NyxID stays authoritative for permission checks; this collection is +display + indexing only. See +`ornn-api/src/domains/users/repository.ts`. + +--- + +## Using Ornn from an AI Agent (HTTP API) + +--- +name: ornn-agent-manual-http +description: 'Operational manual for AI agents using the Ornn skill-lifecycle API via direct HTTPS with a NyxID bearer token (`curl -H "Authorization: Bearer $TOKEN" …`). Once loaded, the host agent can search / pull / execute / build / upload / share skills end-to-end. Authoritative contract between Ornn and the agent. Pair this file with references/api-reference.md (the full per-endpoint catalogue + error legend) — both ship together as one Ornn skill.' +metadata: + category: plain + tag: + - ornn-api + - agent + - manual + - skill-lifecycle + - http +version: "1.1" +lastUpdated: 2026-04-29 +--- + +# Agent Manual (HTTPS variant) + +> **DEPRECATED — replaced by `chrono-ai-service-manual`.** This skill is kept for one minor release while consumers migrate. The unified manual folds NyxID identity / proxy AND the Ornn skill lifecycle into a single skill so an agent that wants to drive both halves of the stack only needs one install. The CLI vs HTTP distinction is preserved as a §-level switch inside `chrono-ai-service-manual` (§0.6 "Transport choice"), so HTTP-only agents lose nothing by switching. **New work should pull `chrono-ai-service-manual` instead.** This file will be removed in the release after the unified manual lands. + +> **You are an AI agent reading this manual to learn how to use Ornn.** Throughout this document, *"you"* always refers to **the agent itself** — not the human user driving you. +> +> ## First: install this skill locally +> +> Try to install this skill into your runtime's local skills directory: +> +> - **Claude Code:** `~/.claude/skills/ornn-agent-manual-http/` +> - **OpenAI Codex CLI:** `~/.codex/skills/ornn-agent-manual-http/` +> - **Cursor:** `.cursor/rules/ornn-agent-manual-http.md` +> - **Other agents:** whatever skill-discovery directory your platform uses. +> +> If your runtime has no local skills mechanism, load `SKILL.md` (and `references/api-reference.md`) into your system context and keep them there for the rest of the session. Either way, the manual must be in your context whenever you do anything Ornn-related. +> +> **And in *all* cases — whether you wrote the skill to disk or only into context — append an entry for it to `~/.ornn/installed-skills.json`.** That file is the persistent registry of every Ornn skill you (or any prior session on this machine) have installed; it's how the next session, or a different agent on the same box, knows what's already loaded and which versions to re-check. Schema + workflow detailed in §0.5. +> +> ## Always load this manual for any Ornn operation +> +> You **must** have this manual loaded whenever the user asks you to: +> +> - **Find, pull, and execute an existing Ornn skill** — or **build a new one and upload** when nothing matches — §2.1. +> - **Update a skill's visibility** (private / shared / public) — §2.2. +> - **Publish a new version** of a skill you own — §2.3. +> - **Trigger an audit** or **review the audit history** for a skill — §2.4 / §2.5. +> - **Pull a non-latest version**, **compare two versions**, or **delete / deprecate a version** — §2.6 / §2.7 / §2.10. +> - **Check usage analytics** for a skill — §2.8. +> - **Bind a skill to a NyxID service** (system / personal) — §2.9. +> - **Delete a skill** entirely — §2.11. +> - **Find skills** (by tag, author, system, shared, etc.) — §2.12. +> - **Pull your Ornn notifications** (audit fan-out, etc.) — §2.13. +> - **Link a skill to GitHub** or **trigger a sync** from the linked source — §2.14. +> +> Without this manual loaded, you do not know which endpoint to call, how to authenticate, or how to read the response shapes. +> +> Ornn's product is **Skill-as-a-Service for AI agents.** Skills are packaged AI capabilities (a `SKILL.md` prompt + optional scripts + YAML metadata) that any agent can pull and execute. This manual is the contract between Ornn and **you, the agent**. + +## §0. Updating this manual + +This manual is itself an Ornn skill (`name: ornn-agent-manual-http`). Its source of truth is the Ornn registry, not a static docs page — pull a fresh copy through the same API every other skill flows through. + +> **Base URL for every example below:** `https://ornn.chrono-ai.fun/api`. The `ornn-web` nginx in front of that domain routes any `/api/*` request through to the NyxID proxy, which authenticates with the bearer token you pass and forwards to `ornn-api`. You never call NyxID directly. Throughout this manual, `$TOKEN` stands for your NyxID bearer access token. + +**Whenever you want to check for an update, follow these steps verbatim:** + +1. Pull the latest version of this skill from Ornn: + + ```bash + curl -H "Authorization: Bearer $TOKEN" \ + "https://ornn.chrono-ai.fun/api/v1/skills/ornn-agent-manual-http/json" + ``` + +2. The response is `{ data: { name, description, metadata, files: { "SKILL.md": "...", "references/api-reference.md": "..." } } }`. Read the YAML frontmatter at the top of `SKILL.md` and find the `version:` field. + +3. Compare it to the `version:` in **your loaded copy** (the frontmatter at the top of this file). If they match, you are current — stop here. + +4. If the published version is newer: + - List every version available with `curl -H "Authorization: Bearer $TOKEN" "https://ornn.chrono-ai.fun/api/v1/skills/ornn-agent-manual-http/versions"`. The response has one row per version, newest first. + - Ask the user which version they want to load (they may want to pin to an older one for reproducibility). + - Once the user picks, fetch `https://ornn.chrono-ai.fun/api/v1/skills/ornn-agent-manual-http/json?version=` and replace your context with the new `SKILL.md` (and `references/api-reference.md` if you consume it). The new content's frontmatter overwrites the old. + +5. If step 1 returns `404 SKILL_NOT_FOUND`, the registry instance you are pointing at has not published this skill yet. Keep operating on the version you have. The Ornn API is backwards-compatible within `/api/v1`, so older manuals continue to produce valid calls — you will only miss capabilities introduced in newer versions. + +If the network is unreachable or the bearer token has expired, keep operating on the version you have. Do not retry-loop the update check; treat it as a once-per-session inquiry the user can re-trigger explicitly. + +--- + +## §0.5 Tracking and re-checking installed Ornn skills + +### The persistent registry: `~/.ornn/installed-skills.json` + +Every Ornn skill you install **must** be recorded in `~/.ornn/installed-skills.json`. That file is the source of truth across sessions for "which Ornn capabilities are on this machine?" — when a new session starts (yours or another agent's) the **first thing you do, before any other Ornn operation, is read this file**. + +The schema is a flat array: + +```json +[ + { + "name": "ornn-agent-manual-http", + "ornnGuid": "1d9bfda2-dea8-4032-85bd-b0cbe1621684", + "installedVersion": "1.0", + "installedAt": "2026-04-29T17:27:55Z", + "localPath": "~/.claude/skills/ornn-agent-manual-http/" + } +] +``` + +Required fields: `name`, `ornnGuid`, `installedVersion`. Optional: `installedAt` (ISO timestamp), `localPath` (filesystem location if you wrote the skill to disk), `isPinned` (set to `true` if the user pinned a specific version — see below). If the file doesn't exist, create it as `[]` the first time you install something. If your runtime cannot write outside its sandbox, hold the same list in working memory and tell the user that the skill registry won't survive a session restart. + +### When to update the registry + +| Event | What to write | +|---|---| +| Installed a new skill | Append a new record | +| Updated an installed skill to a new version | Bump `installedVersion` + `installedAt` | +| Removed / uninstalled a skill | Remove the record | +| User pinned a version | Set `isPinned: true` so future sessions don't auto-prompt to update | + +### Re-checking before each execution + +**Before you actually execute an installed Ornn skill** on the user's task, check whether a newer version exists. One API call: + +```bash +curl -H "Authorization: Bearer $TOKEN" \ + "https://ornn.chrono-ai.fun/api/v1/skills//versions" +``` + +For public skills you can drop the `Authorization` header and call the same URL anonymously — see §2.1 step 3 for fetch alternatives. + +The response is `{ items: [{ version, skillHash, createdOn, isDeprecated, deprecationNote, releaseNotes, ... }, ...] }` sorted newest-first. Compare `items[0].version` to the `installedVersion` on the matching record in `~/.ornn/installed-skills.json` and act: + +- **Same version** → execute as-is. +- **Newer version available** → tell the user `"Skill has a newer version (you have ). Release notes: . Update? (y/n)"`. If yes, re-fetch the package (§2.1 step 3), overwrite the local copy, update `installedVersion` + `installedAt` in `~/.ornn/installed-skills.json`, then execute. +- **Your installed version is `isDeprecated: true`** → warn with the `deprecationNote` and recommend updating before executing. +- **Skill 404s** → the skill was deleted or hidden from you. Tell the user; if they agree, remove the record from `~/.ornn/installed-skills.json`. Otherwise leave the record (with a note) so the local copy is still usable. + +Skip the version check only when the matching record carries `isPinned: true` — the user has explicitly locked that skill to a specific version for reproducibility. + +### Audit-risk fan-out + +If the skill is tied to a NyxID admin service (a "system skill" — `isSystemSkill: true`), the audit pipeline can also notify you mid-session via `GET /api/v1/notifications` (§2.13). Treat any `audit.risky_for_consumer` notification as a hard signal to stop, surface it to the user, and ask before continuing. + +--- + +## §1. Prerequisites + +Every API call in this manual is executed via direct HTTPS, authenticated with a **NyxID bearer token** that you pass in the `Authorization: Bearer …` header. The base URL `https://ornn.chrono-ai.fun/api` is fronted by an nginx instance that routes every `/api/*` request through to the NyxID proxy, which validates your token, decodes the identity, and forwards the request to `ornn-api`. You never call NyxID directly. + +### 1.1 Get a NyxID bearer token + +You need a valid bearer token from NyxID. Three paths to mint one — pick whichever the user's environment supports. **All involve user interaction** (entering credentials, approving scopes, possibly clicking a verification link), so you cannot complete this step entirely on your own. None of these affect how you call Ornn afterward — they only produce a `$TOKEN` value that you pass to `Authorization: Bearer …` in every subsequent HTTPS call. + +#### Option A — Mint via the `nyxid` binary (NyxID's auth client) + +Ask the user to run: + +```bash +nyxid login +``` + +This opens a browser for the OAuth authorization-code flow. Wait for it to report success. The access token is then on disk: + +```bash +cat ~/.nyxid/access_token +``` + +Save that value as `$TOKEN` and use it for every API call below. + +#### Option B — OAuth flow against NyxID's IdP directly + +If `nyxid` is unavailable, run the OAuth authorization-code flow against NyxID directly (consult NyxID's own docs for the exact `/oauth/authorize` + `/oauth/token` endpoints for your deployment). The user must complete the consent step in a browser; once you have the resulting `access_token`, use it as `$TOKEN`. Headless agents typically cannot drive this end-to-end alone. + +#### Option C — Plainly ask the user + +If neither A nor B fits, just ask: *"Please paste a NyxID bearer token. You can get one by running `nyxid login` and reading `~/.nyxid/access_token`, or your NyxID admin can mint one for you."* Save the value as `$TOKEN`. + +### 1.2 Verify the token works + +```bash +curl -H "Authorization: Bearer $TOKEN" \ + "https://ornn.chrono-ai.fun/api/v1/me" +``` + +Expected response (HTTP 200): + +```jsonc +{ + "data": { + "userId": "user_…", + "email": "…", + "displayName": "…", + "roles": ["ornn-user"], + "permissions": ["ornn:skill:read", "ornn:skill:create", "…"] + }, + "error": null +} +``` + +If you get `401 AUTH_MISSING` (or `401 invalid_token`), the bearer is bad or expired — go back to §1.1 and re-mint. If you get a network error, the user's machine cannot reach `https://ornn.chrono-ai.fun` — confirm the endpoint URL with the user (in some deployments it's a different domain) and stop. + +### 1.3 Confirm required permissions + +The `permissions` array on the §1.2 response tells you exactly what the token is authorized for. Cross-check against the actions the user is asking you to perform: + +| Action | Required permission | +|---|---| +| Pull a skill's full content (`GET /skills/:idOrName/json`) | `ornn:skill:read` | +| Validate a skill ZIP locally (`POST /skill-format/validate`) | `ornn:skill:read` | +| Upload a new skill (`POST /skills`) or import from GitHub (`POST /skills/pull`) | `ornn:skill:create` | +| Publish a new version (`PUT /skills/:id`), refresh from source, change permissions, toggle deprecation, bind to a NyxID service | `ornn:skill:update` (+ skill author or platform admin) | +| Delete a skill or a single version | `ornn:skill:delete` (+ skill author or platform admin) | +| Generate a skill with AI (`POST /skills/generate*`) | `ornn:skill:build` | +| Use the Playground (`POST /playground/chat`) | `ornn:playground:use` | +| Trigger an audit (`POST /skills/:idOrName/audit`) | none (owner or `ornn:admin:skill`) | +| Admin operations (`/admin/*`, force-audit, platform settings) | `ornn:admin:skill` | +| Manage categories (`/admin/categories/*`) | `ornn:admin:category` | + +Most read operations — browsing public skills, version listings, skill format rules, audit verdicts on visible skills, notifications — **need no scalar permission**; they're open to any authenticated caller (and some are anonymous, in which case `$TOKEN` can be omitted entirely). The exact gates for every endpoint live in `references/api-reference.md`. + +If a required permission is missing, ask the user's NyxID admin to grant the corresponding role (typically `ornn-user`). Without it, the relevant call returns `403 FORBIDDEN` with `Missing permission: ` in the message. + +--- + +## §2. Core Workflows + +You (the agent) are **highly encouraged to connect with Ornn for any skill-related operations**, including but not limited to the thirteen use cases below. Each one is a recipe — read the matching subsection top-to-bottom and execute the API calls in order. The per-endpoint contract (request / response shapes, every error code, full auth + authorization rules) lives in `references/api-reference.md` — pull it into context whenever you need the underlying API specifics. + +> Reminder: every command below uses `https://ornn.chrono-ai.fun/api/v1/...` as the base URL and `$TOKEN` as the NyxID bearer token (see §1.1). Public endpoints can drop the `Authorization` header entirely. + +### 2.1 Performing a task — find or build the right skill — *spec: `api-reference.md` §3 Skills CRUD, §5 Skill search, §6 Skill format, §7 Skill generation, §8 Playground* + +This is the master loop. Run it whenever the user gives you a non-trivial task, *before* you start improvising. + +**Step 1 — Check `~/.ornn/installed-skills.json` first.** Read the file. For every record, look at the local `SKILL.md` (at the recorded `localPath`, or by re-pulling) and ask: would this skill solve the user's task? If yes, jump to step 4. If no skills are installed, or none match, continue to step 2. + +**Step 2 — Search Ornn.** Try both keyword and semantic modes with the broadest possible scope (`mixed` covers public + your private + shared-with-you in one call): + +```bash +# Keyword search +curl -H "Authorization: Bearer $TOKEN" \ + "https://ornn.chrono-ai.fun/api/v1/skill-search?query=&mode=keyword&scope=mixed&pageSize=20" + +# Semantic search (natural language) +curl -H "Authorization: Bearer $TOKEN" \ + "https://ornn.chrono-ai.fun/api/v1/skill-search?query=&mode=semantic&scope=mixed&pageSize=20" + +# System skills only — admin-bound, platform-wide. Add to either search above. +curl -H "Authorization: Bearer $TOKEN" \ + "https://ornn.chrono-ai.fun/api/v1/skill-search?systemFilter=only&scope=public&pageSize=20" +``` + +**Try up to 5 different queries** before concluding no skill exists. Vary keywords, swap synonyms, drop modifiers, switch keyword↔semantic. The response is `{ items: [{ guid, name, description, ... }, ...] }` — read each candidate's `description` to judge fit. + +**Step 3 — Pull the skill.** Use the `/json` endpoint so you get every file inline: + +```bash +curl -H "Authorization: Bearer $TOKEN" \ + "https://ornn.chrono-ai.fun/api/v1/skills//json" +``` + +The response is `{ data: { name, description, metadata, files: { "SKILL.md": "...", "scripts/...": "..." } } }`. Write each `files[path]` entry to your runtime's local skills directory (e.g. `~/.claude/skills//`), preserving directory structure. Then **append a record to `~/.ornn/installed-skills.json`** with `{ name, ornnGuid, installedVersion, installedAt, localPath }` — see §0.5 for the schema. + +**Step 4 — Load the SKILL.md into context and execute.** Read the SKILL.md you just installed and follow its instructions. For runtime-based / mixed skills, run the scripts under `scripts/` locally as directed; or send them to Ornn's playground for sandboxed execution via `POST /api/v1/playground/chat` (SSE; see `references/api-reference.md` § "Playground" for the event shapes). + +**Step 5 — If steps 2–3 yielded nothing after 5 search attempts**, you may decide your own way to perform the task. **And if the task is definitive and potentially repeatable, build a skill and upload it back to Ornn so future you (or other agents) can find it.** Build flow: + +1. *(Optional)* **Bootstrap with AI generation** — Ornn's LLM can scaffold a skill from a prompt, source code, or an OpenAPI spec via `POST /api/v1/skills/generate*` (SSE). Useful when you need a starter; the generated skill still needs validation + your edits. + +2. **Read the skill format spec** so you write a valid one: + + ```bash + curl -H "Authorization: Bearer $TOKEN" \ + "https://ornn.chrono-ai.fun/api/v1/skill-format/rules" + ``` + + The response is `{ data: { rules: "" } }` — read the markdown carefully; it specifies the package layout, required `SKILL.md` frontmatter fields, naming rules, etc. + +3. **Write your skill.** Author `SKILL.md` + any `scripts/`, `references/`, `assets/` the task needs. + +4. **Validate before uploading.** ZIP the package (single root folder named after the skill) and call: + + ```bash + curl -X POST \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/zip" \ + --data-binary @my-skill.zip \ + "https://ornn.chrono-ai.fun/api/v1/skill-format/validate" + ``` + + The response is `{ data: { valid: true } }` on pass, or `{ data: { valid: false, violations: [{ rule, message }, ...] } }` on fail. **If validation fails, fix the violations and call validate again — loop until it passes.** + +5. **Upload.** + + ```bash + curl -X POST \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/zip" \ + --data-binary @my-skill.zip \ + "https://ornn.chrono-ai.fun/api/v1/skills" + ``` + + On success the response is `{ data: { guid, name, isPrivate: true, ... }, error: null }`. **Note: the new skill is private by default** — see §2.2 if you want to share it. + +6. **Install it locally** (because it's now an Ornn skill, the same rules apply): write the same files to your local skills dir + append to `~/.ornn/installed-skills.json` with the GUID returned in step 5. + +7. **Now execute the skill on the original task** — same as step 4 above. + +### 2.2 Update a skill's visibility — *spec: `api-reference.md` §3 Skills CRUD* + +Ornn has three visibility tiers: + +- **Public** — every Ornn user can see + pull this skill. +- **Limited access** — only specific orgs (every member of those orgs) and / or specific users can see + pull. Pick orgs only, users only, or both. +- **Private** — only you (and platform admins) can see + pull. **New skills land here by default.** + +**Step 1 — Check the current visibility.** + +```bash +curl -H "Authorization: Bearer $TOKEN" \ + "https://ornn.chrono-ai.fun/api/v1/skills/" +``` + +If `data.isPrivate: false` → currently public. If `isPrivate: true` and either share-list (`sharedWithUsers` / `sharedWithOrgs`) is non-empty → limited. If `isPrivate: true` and both lists empty → private. + +**Step 2 — Decide the target tier.** Confirm with the user if it's not obvious from their request. + +**Step 3a — Set to public.** + +```bash +curl -X PUT \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"isPrivate":false,"sharedWithUsers":[],"sharedWithOrgs":[]}' \ + "https://ornn.chrono-ai.fun/api/v1/skills//permissions" +``` + +**Step 3b — Set to limited access.** First fetch the candidate orgs and users: + +```bash +# Orgs the caller belongs to +curl -H "Authorization: Bearer $TOKEN" \ + "https://ornn.chrono-ai.fun/api/v1/me/orgs" + +# Users searchable by email prefix (typeahead) +curl -H "Authorization: Bearer $TOKEN" \ + "https://ornn.chrono-ai.fun/api/v1/users/search?q=&limit=20" + +# Resolve known user_ids to email + display name +curl -H "Authorization: Bearer $TOKEN" \ + "https://ornn.chrono-ai.fun/api/v1/users/resolve?ids=," +``` + +Pick which orgs / users to share with. **If unclear, confirm with the user** — never grant access to anyone the user didn't name. Then save: + +```bash +curl -X PUT \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"isPrivate":true,"sharedWithUsers":["user_abc"],"sharedWithOrgs":["org_xyz"]}' \ + "https://ornn.chrono-ai.fun/api/v1/skills//permissions" +``` + +**Step 3c — Set to private.** + +```bash +curl -X PUT \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d + +--- + +## API Conventions + +# ornn API & Architecture Conventions + +The contract every `/api/v1/*` endpoint and every `ornn-api` module must follow. All future endpoints and modules MUST conform. Changes that violate a convention are blocked at review. + +This document is normative. It is the authoritative source for decisions that would otherwise be re-litigated per PR. When in doubt, this file wins. + +--- + +## Table of Contents + +1. [Response & error format](#1-response--error-format) +2. [URL structure](#2-url-structure) +3. [HTTP semantics](#3-http-semantics) +4. [Query parameters](#4-query-parameters) +5. [Authentication & authorization](#5-authentication--authorization) +6. [SSE streaming](#6-sse-streaming) +7. [Deprecation](#7-deprecation) +8. [Caching](#8-caching) +9. [Observability headers](#9-observability-headers) +10. [OpenAPI](#10-openapi) +11. [Architecture conventions](#11-architecture-conventions) +12. [Every new `/v1/` endpoint checklist](#12-every-new-v1-endpoint-checklist) + +--- + +## 1. Response & error format + +### 1.1 Success — single resource + +Return the resource directly. No envelope. + +```http +GET /v1/skills/abc +200 OK +Content-Type: application/json + +{ + "id": "abc", + "name": "pdf-extract", + "createdOn": "2026-04-22T10:00:00Z", + ... +} +``` + +### 1.2 Success — collection + +Wrap in `{ items, meta }`: + +```http +GET /v1/skills?q=pdf&limit=20 +200 OK +Content-Type: application/json + +{ + "items": [ { "id": "abc", ... }, { "id": "def", ... } ], + "meta": { "nextCursor": "eyJpZCI6...", "hasMore": true, "limit": 20 } +} +``` + +`meta` MUST contain `limit` and `hasMore`. When `hasMore === true`, `nextCursor` MUST be a non-empty opaque string. When `hasMore === false`, `nextCursor` MAY be omitted. Endpoint-specific metadata (e.g. `searchMode`) lives alongside pagination fields in `meta`. + +### 1.3 Errors — RFC 7807 `application/problem+json` + +```http +POST /v1/skills/abc/permissions +400 Bad Request +Content-Type: application/problem+json +X-Request-ID: req_01HXYZ... + +{ + "type": "https://github.com/ChronoAIProject/Ornn/blob/main/docs/ERRORS.md#validation_error", + "title": "Validation failed", + "status": 400, + "detail": "Request body failed validation", + "instance": "/v1/skills/abc/permissions", + "requestId": "req_01HXYZ...", + "errors": [ + { "path": "sharedWithUsers[3]", "code": "invalid_user_id", "message": "..." } + ] +} +``` + +Required fields: `type`, `title`, `status`, `instance`, `requestId`. +Optional: `detail`, `errors[]`. + +### 1.4 Error code catalog (lowercase snake_case) + +| Code | HTTP | Meaning | +|---|---|---| +| `validation_error` | 400 | Body / query / path param validation failed — details in `errors[]` | +| `invalid_zip` | 400 | Uploaded payload is not a parseable ZIP (malformed / unreadable) | +| `unsupported_media_type` | 415 | Request `Content-Type` not accepted | +| `payload_too_large` | 413 | Upload exceeds max size | +| `uncompressed_too_large` | 413 | Uncompressed size or compression ratio of skill ZIP exceeds caps (zip-bomb guard) | +| `too_many_files` | 413 | Skill ZIP entry count exceeds `MAX_PACKAGE_FILE_COUNT` | +| `authentication_required` | 401 | No valid identity | +| `permission_denied` | 403 | Authenticated but lacks required permission | +| `resource_not_found` | 404 | Target resource does not exist or not visible to caller | +| `resource_conflict` | 409 | State conflict (duplicate, concurrent modification, etc.) | +| `rate_limited` | 429 | Caller exceeded rate limit | +| `upstream_unavailable` | 502 / 503 | Dependency (NyxID, LLM, sandbox, ...) failed | +| `org_membership_unavailable` | 503 | NyxID org-membership lookup unresolved — forwarded token absent or lookup failed. Retryable | +| `internal_error` | 500 | Unhandled server error | + +New codes require convention-doc update. Handlers MUST NOT invent ad-hoc codes. + +### 1.5 `X-Request-ID` + +- Generated server-side on every request (or echoed if the client provided one). +- Returned as `X-Request-ID` header on **every** response (2xx, 4xx, 5xx). +- Also embedded as `requestId` in every error body. +- Logged with every request/response pair on the server. + +### 1.6 Error `type` URLs + +Point to GitHub markdown anchors in this repository: + +``` +https://github.com/ChronoAIProject/Ornn/blob/main/docs/ERRORS.md# +``` + +The catalog lives in [`docs/ERRORS.md`](ERRORS.md) with `##` headings per code (GitHub auto-generates anchors). Zero infra cost; resolves day one. Future migration to a docs domain (`docs.ornn.xyz`) is a one-time redirect configuration; no client changes required. + +--- + +## 2. URL structure + +### 2.1 Versioning + +All endpoints live under `/api/v1/`. Breaking changes ship under `/api/v2/`. Additive changes ship under `v1`. + +### 2.2 Resource paths + +- Plural resource nouns: `/skills`, `/categories`, `/tags`, `/users`, `/activities`. +- Canonical URL uses the stable ID (GUID). **No polymorphic `:idOrName` on write operations.** +- Name→ID resolution via `GET /v1/{resource}/lookup?name=` (returns `{ id }`). +- Caller-scoped resources under `/v1/me/*`. + +### 2.3 Non-CRUD actions — sub-resource + +Custom actions as sub-resource paths: + +``` +POST /v1/skills/generate +POST /v1/skills/generate/from-openapi +POST /v1/skills/validate +POST /v1/skills/search +POST /v1/playground/chat +``` + +Router config MUST declare static action segments with priority over `:id` params (Hono / Express / Rails default behavior). Skill / category names that collide with reserved action verbs are rejected at create time. + +Reserved action verbs per resource documented in `ornn-api/src/shared/reservedVerbs.ts`. + +### 2.4 Search — dual-track + +- `GET /v1/{resource}?q=...&` — simple keyword filter over URL params (cacheable, bookmarkable). +- `POST /v1/{resource}/search` — complex queries with structured body (semantic mode, long queries, compound filters). + +Both return the same collection shape (`{ items, meta }`). + +--- + +## 3. HTTP semantics + +### 3.1 Methods + +| Method | Semantics | +|---|---| +| `GET` | Safe, idempotent read | +| `POST` | Create, or custom action | +| `PUT` | Full replace of a resource (idempotent) | +| `PATCH` | Partial update | +| `DELETE` | Remove (idempotent) | + +Partial updates MUST use `PATCH`. `PUT` MUST accept a complete representation. + +### 3.2 Status codes + +| Code | Use | +|---|---| +| `200 OK` | Successful read / update returning a body | +| `201 Created` | Successful create. MUST include `Location: /v1/{resource}/{id}` header | +| `202 Accepted` | Async job accepted (not currently used) | +| `204 No Content` | Successful delete, or update with no body to return | +| `400` | `validation_error` | +| `401` | `authentication_required` | +| `403` | `permission_denied` | +| `404` | `resource_not_found` | +| `409` | `resource_conflict` | +| `413` | `payload_too_large` | +| `415` | `unsupported_media_type` | +| `429` | `rate_limited` | +| `500` | `internal_error` | +| `502` / `503` | `upstream_unavailable` | + +### 3.3 Content negotiation + +When a resource has multiple representations, select via `Accept`: + +``` +GET /v1/skills/abc +Accept: application/json → JSON metadata + file contents +Accept: application/zip → raw ZIP package +``` + +Do not encode representation in the URL path (no `/skills/:id/json`). + +### 3.4 Idempotency + +`POST` creates accept optional `Idempotency-Key: ` header. Server persists the key + response for 24h and returns the cached response on retry. Implementation: middleware layer in `ornn-api/src/middleware/idempotency.ts`. + +### 3.5 Bulk operations + +Bulk-capable endpoints are symmetric: + +``` +POST /v1/{parent}/{id}/{child} { Ids: [...] } # add +DELETE /v1/{parent}/{id}/{child} { Ids: [...] } # remove (body) +``` + +Single-item convenience endpoints MAY exist alongside. + +--- + +## 4. Query parameters + +### 4.1 Naming + +- `camelCase` everywhere (matches JSON body convention). +- Search query param is `q` (never `query`). +- Booleans are `true` / `false` — omit for "any". + +### 4.2 Arrays — repeated keys + +``` +?sharedWithOrgs=a&sharedWithOrgs=b&sharedWithOrgs=c +``` + +Never CSV. Never bracket notation. Handler: `c.req.queries('sharedWithOrgs')` returns `string[]`. + +### 4.3 Pagination — cursor-only + +``` +?cursor=&limit=<1-100> +``` + +- `cursor` is opaque (base64-encoded server-chosen payload). Clients MUST NOT parse. +- `limit` defaults per-endpoint (typically 20), max 100. +- Absence of `cursor` = first page. +- Response `meta.nextCursor` feeds the next request. +- **Total counts** are NOT part of pagination. Endpoints needing a count expose a sibling (e.g. `GET /v1/skills/counts`) or fold the count into list `meta`. + +### 4.4 Filters + +Endpoint-specific. Rules: + +- Orthogonal filters are separate params. Do NOT overload (avoid `scope=shared-with-me|mine|...`). +- Booleans instead of tri-state enums when possible. +- For `/v1/skills`: + - `visibility` — `public | private` (omit for "any" within caller's reach) + - `owner` — `me | others` (omit for "any") + - `sharedWith` — `me` (filters to skills shared with caller) + - `isSystem` — boolean (omit for "any") + +--- + +## 5. Authentication & authorization + +### 5.1 Transport + +- `Authorization: Bearer ` between client and the NyxID proxy. +- `X-NyxID-Identity-Token` and `X-NyxID-*` headers between proxy and `ornn-api` (internal). +- OpenAPI declares one `bearerAuth` scheme; `X-NyxID-*` is not part of the public contract. + +### 5.2 Permission strings + +Format: `ornn::`. + +Actions: `read`, `write`, `admin`, plus resource-specific high-cost actions when needed. + +| Permission | Grants | +|---|---| +| `ornn:skill:read` | Read skills (respects visibility) | +| `ornn:skill:write` | Create, update, delete own skills | +| `ornn:skill:admin` | Manage any skill (override ownership); delete any skill | +| `ornn:skill:generate` | Invoke skill generation endpoints (high LLM cost) | +| `ornn:skill:execute` | Invoke playground chat (runs user code) | +| `ornn:category:read` | List categories | +| `ornn:category:admin` | Manage categories | +| `ornn:tag:read` | List tags | +| `ornn:tag:admin` | Manage tags | +| `ornn:user:admin` | User dashboard (list users, aggregate stats per user) | +| `ornn:activity:read` | Platform activity log read access | +| `ornn:stats:read` | Platform-wide dashboard aggregates | + +NyxID composes a **"Platform Admin"** role that grants all `*:admin` + `*:read` permissions above; current platform admins inherit this role with zero UX change. Sub-admin roles (content moderator, tag curator, support) can be + +--- + +## Design System (Overview) + +## Product Context +- **What this is:** A Skill-as-a-Service platform for discovering, installing, publishing, and operating AI agent skills through a web UI, docs, and API-adjacent tooling. +- **Who it is for:** Agent developers, platform builders, technical teams, and operators who expect tools to feel composed and credible rather than playful or trend-driven. +- **Scope of this document:** Whole app, landing-led. The landing page is the flagship expression, and app shell, registry, docs, admin, forms, and data views inherit the same language. +- **Canonical source of truth:** **This document is canonical.** It defines the intended state of the design system. Two reference builds are kept aligned with it for visual sanity-checking: + - `design-preview/Ornn-Landing-v3.html` (deployed at `chrono-ornn.surge.sh/Ornn-Landing-v3.html`) — standalone Forge Workshop reference + - The live ornn-web implementation (deployed at `chrono-ornn-web.surge.sh`) — production application +- **When this doc and an implementation disagree, the implementation is wrong.** Bring the implementation back into alignment, then re-verify the build. Do not silently update DESIGN.md to match drifted code; instead, propose the change explicitly (PR description: "DESIGN.md change + impl follows" or "DESIGN.md unchanged, impl regression fix"). This protects the system from lossy round-trips between code and doc. + +## Design Thesis +Ornn should feel like a registry, workshop, and publishing desk for skills. The product is not a generic SaaS dashboard and not a cyberpunk toy. Its visual language is a controlled blend of: + +- **Paper:** editorial warmth, legible reading surfaces, quiet hierarchy +- **Metal:** forged structure, thin separators, instrument-like controls +- **Ember:** selective heat, action emphasis, and directional energy + +The result should read as warm, tactile, precise, industrial, and composed. Interfaces should feel authored, not templated. From 77dc8f2702f0ecca7899d31eeee2ca1ccccaef59 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:09:26 +0800 Subject: [PATCH 015/110] feat(api): extend quota Surface with 'assistant' (reserve/charge only) (#970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Ornn Assistant is a billed LLM surface, so quota must reserve + charge against it. Widen the quota `Surface` type to include 'assistant' and resolve its default allotment from `assistant.defaultMonthlyQuota`. Kept surgical to avoid frontend/admin ripple: - `SURFACES` (admin-grant + redemption-code enums, quota-snapshot UI) stays ["playground","skillGen"]. The assistant isn't admin-grantable or redeemable in v1 — its allotment is the section default only. - Introduced `GrantableSurface = SURFACES[number]`; grant/bulkGrant + the redemption grant entry use it, so they stay assignable to the notification layer's narrow surface type. The QuotaSnapshot shape is unchanged. - `QuotaDefaults.defaultAssistantMonthly` is optional so existing resolver mocks keep compiling; the production resolver supplies it. Absent → 0 allotment (fail-closed) until wired. --- ornn-api/src/domains/quota/bootstrap.ts | 4 ++- ornn-api/src/domains/quota/service.ts | 31 +++++++++++++++---- ornn-api/src/domains/quota/types.ts | 20 +++++++++++- .../src/domains/redemption-codes/types.ts | 6 ++-- 4 files changed, 51 insertions(+), 10 deletions(-) diff --git a/ornn-api/src/domains/quota/bootstrap.ts b/ornn-api/src/domains/quota/bootstrap.ts index 2fe59b49..67ad860f 100644 --- a/ornn-api/src/domains/quota/bootstrap.ts +++ b/ornn-api/src/domains/quota/bootstrap.ts @@ -45,13 +45,15 @@ export function wireQuota(deps: { // inside QuotaService; this resolver just hands it the current // section values whenever it asks. getQuotaDefaults: async () => { - const [pg, sg] = await Promise.all([ + const [pg, sg, asst] = await Promise.all([ deps.settingsService.getPlayground(), deps.settingsService.getSkillGen(), + deps.settingsService.getAssistant(), ]); return { defaultPlaygroundMonthly: pg.defaultMonthlyQuota, defaultSkillGenMonthly: sg.defaultMonthlyQuota, + defaultAssistantMonthly: asst.defaultMonthlyQuota, }; }, }, diff --git a/ornn-api/src/domains/quota/service.ts b/ornn-api/src/domains/quota/service.ts index 498d8ffb..46a3944e 100644 --- a/ornn-api/src/domains/quota/service.ts +++ b/ornn-api/src/domains/quota/service.ts @@ -24,6 +24,7 @@ import type { QuotaRepository } from "./repository"; import { DEFAULT_WARNING_THRESHOLD, type ChargeOutcome, + type GrantableSurface, type QuotaBucketDoc, type QuotaDecision, type QuotaSnapshot, @@ -38,6 +39,14 @@ const logger = createLogger("quotaService"); export interface QuotaDefaults { defaultPlaygroundMonthly: number; defaultSkillGenMonthly: number; + /** + * Ornn Assistant monthly default (#970). Optional so existing + * `QuotaDefaultsResolver` mocks keep compiling; the production resolver + * always supplies it from `assistant.defaultMonthlyQuota`. When absent, + * the assistant surface resolves to a 0 allotment (fail-closed: every + * non-admin assistant call is denied until the default is wired). + */ + defaultAssistantMonthly?: number; } export interface QuotaDefaultsResolver { @@ -75,9 +84,14 @@ export class QuotaService { private async resolveDefault(surface: Surface): Promise { const def = await this.defaults.getQuotaDefaults(); - return surface === "playground" - ? def.defaultPlaygroundMonthly - : def.defaultSkillGenMonthly; + switch (surface) { + case "playground": + return def.defaultPlaygroundMonthly; + case "skillGen": + return def.defaultSkillGenMonthly; + case "assistant": + return def.defaultAssistantMonthly ?? 0; + } } /** @@ -193,7 +207,7 @@ export class QuotaService { async grant(params: { admin: { userId: string; email: string; displayName: string }; targetUserId: string; - surface: Surface; + surface: GrantableSurface; amount: number; note?: string; now?: Date; @@ -264,7 +278,7 @@ export class QuotaService { async bulkGrant(params: { admin: { userId: string; email: string; displayName: string }; targetUserIds: readonly string[]; - surface: Surface; + surface: GrantableSurface; amount: number; note?: string; now?: Date; @@ -363,6 +377,11 @@ export class QuotaService { } function buildOverLimitMessage(surface: Surface): string { - const surfaceLabel = surface === "playground" ? "playground" : "skill-generation"; + const surfaceLabel = + surface === "playground" + ? "playground" + : surface === "skillGen" + ? "skill-generation" + : "assistant"; return `You've hit your monthly ${surfaceLabel} limit — contact admin for credits, or upgrade when paid plans launch.`; } diff --git a/ornn-api/src/domains/quota/types.ts b/ornn-api/src/domains/quota/types.ts index 23d3b939..ece3623d 100644 --- a/ornn-api/src/domains/quota/types.ts +++ b/ornn-api/src/domains/quota/types.ts @@ -11,10 +11,28 @@ * @module domains/quota/types */ -export type Surface = "playground" | "skillGen"; +export type Surface = "playground" | "skillGen" | "assistant"; +/** + * Admin-grantable / redeemable surfaces. The Ornn Assistant (#970) is a + * billed surface that reserves + charges like the others, but in v1 it is + * NOT admin-grantable and NOT redeemable — its allotment comes solely from + * the `assistant.defaultMonthlyQuota` section default. So `Surface` (the + * reserve/charge type) includes `assistant`, but this list — which drives + * the admin-grant + redemption-code surface enums and the quota snapshot + * UI — deliberately does not. Add `assistant` here only when those flows + * gain assistant support. + */ export const SURFACES = ["playground", "skillGen"] as const; +/** + * Surfaces an admin can grant credits to / a redemption code can target. + * Narrower than {@link Surface}: the assistant surface (#970) reserves + + * charges but isn't grantable in v1, so grant/bulk-grant accept only this + * subset (and stay assignable to the notification layer's narrow type). + */ +export type GrantableSurface = (typeof SURFACES)[number]; + export const QUOTA_ADMIN_PERMISSION = "ornn:admin:skill" as const; /** diff --git a/ornn-api/src/domains/redemption-codes/types.ts b/ornn-api/src/domains/redemption-codes/types.ts index 75c8938b..6439e27a 100644 --- a/ornn-api/src/domains/redemption-codes/types.ts +++ b/ornn-api/src/domains/redemption-codes/types.ts @@ -15,7 +15,7 @@ */ import { z } from "zod"; -import { SURFACES, type Surface } from "../quota/types"; +import { SURFACES, type GrantableSurface } from "../quota/types"; /** * Length of the random portion of a redemption code. 16 chars over a @@ -41,7 +41,9 @@ export type RedemptionCodeStatus = "active" | "redeemed" | "invalidated"; * the redeem path can apply each grant independently. */ export interface RedemptionGrantEntry { - surface: Surface; + // Redemption codes target only admin-grantable surfaces (the assistant + // surface isn't redeemable in v1 — see quota/types `GrantableSurface`). + surface: GrantableSurface; amount: number; } From f61f9468839f38d74aa52e96a01a56f9fe65be38 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:09:39 +0800 Subject: [PATCH 016/110] feat(api): assistant scoped retrieval + context assembly + chat service (#970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pure (HTTP-free) core of the Ornn Assistant — a non-agentic Q&A pipeline: - retrieval.ts: ScopedSkillRetriever runs visibility-scoped keyword search (scope "mixed") and enforces canReadSkill again at the projection layer, emitting RetrievedSkill — a SAFE projection (name, description, tags, category, createdOn, createdBy person-id). Listing fields explicitly (never spreading the doc) is the data-safety boundary: no createdByEmail/DisplayName, storageKey, skillHash, sharedWith* or any PII can leak. - contextAssembler.ts: ASSISTANT_SYSTEM_PROMPT (grounded-Q&A persona, no fabrication, no cross-user data) + curated KB + scoped skills, all in one leading developer message; conversation turns follow verbatim. - chatService.ts: emits chat_start → text deltas → chat_finish for ONE streamed completion with NO tools (the structural guarantee that the assistant can never trigger an execution loop). Retrieval failure is non-fatal (KB-only still answers); stream error → chat_error with a catalog code; best-effort usage on finish. Tests pin the data-safety projection (incl. a projection-layer drop of an unreadable skill), the no-tools guarantee, fail-soft retrieval, and the event sequence. --- .../src/domains/assistant/chatService.test.ts | 203 ++++++++++++++++++ ornn-api/src/domains/assistant/chatService.ts | 197 +++++++++++++++++ .../assistant/contextAssembler.test.ts | 94 ++++++++ .../src/domains/assistant/contextAssembler.ts | 87 ++++++++ .../src/domains/assistant/retrieval.test.ts | 168 +++++++++++++++ ornn-api/src/domains/assistant/retrieval.ts | 133 ++++++++++++ ornn-api/src/domains/assistant/types.ts | 69 ++++++ 7 files changed, 951 insertions(+) create mode 100644 ornn-api/src/domains/assistant/chatService.test.ts create mode 100644 ornn-api/src/domains/assistant/chatService.ts create mode 100644 ornn-api/src/domains/assistant/contextAssembler.test.ts create mode 100644 ornn-api/src/domains/assistant/contextAssembler.ts create mode 100644 ornn-api/src/domains/assistant/retrieval.test.ts create mode 100644 ornn-api/src/domains/assistant/retrieval.ts create mode 100644 ornn-api/src/domains/assistant/types.ts diff --git a/ornn-api/src/domains/assistant/chatService.test.ts b/ornn-api/src/domains/assistant/chatService.test.ts new file mode 100644 index 00000000..43aa16d6 --- /dev/null +++ b/ornn-api/src/domains/assistant/chatService.test.ts @@ -0,0 +1,203 @@ +/** + * UT-ASST-CHAT-* — AssistantChatService (#970). + * + * Verifies the wire-contract event sequence, the structural "no tools" + * guarantee (pure Q&A), fail-soft retrieval, error mapping, and usage. + * + * @module domains/assistant/chatService.test + */ + +import { describe, expect, it } from "bun:test"; +import type { + NyxLlmStreamParams, + ResponsesApiStreamEvent, +} from "../../clients/nyxid/llm"; +import type { ActorContext } from "../skills/crud/authorize"; +import { AssistantChatService, latestUserMessage } from "./chatService"; +import type { AssistantChatEvent, RetrievedSkill } from "./types"; + +const ACTOR: ActorContext = { + userId: "u-1", + memberships: [], + isPlatformAdmin: false, + membershipsResolved: true, +}; + +const DEFAULTS = { model: "default-model", maxOutputTokens: 4096, temperature: 0.4 }; + +class FakeLlm { + lastParams: NyxLlmStreamParams | null = null; + events: ResponsesApiStreamEvent[] = [ + { type: "response.output_text.delta", delta: "Hello" }, + { type: "response.output_text.delta", delta: " world" }, + ]; + throwError: Error | null = null; + async *stream(params: NyxLlmStreamParams): AsyncIterable { + this.lastParams = params; + if (this.throwError) throw this.throwError; + for (const e of this.events) yield e; + } +} + +function fakeKb(text = "Ornn KB.") { + return { load: () => ({ text, estimatedTokens: 2, budgetTokens: 100, truncated: false }) }; +} + +function fakeRetriever(skills: RetrievedSkill[] = [], err?: Error) { + return { + retrieve: async () => { + if (err) throw err; + return skills; + }, + }; +} + +function makeService(llm: FakeLlm, retriever = fakeRetriever()) { + return new AssistantChatService({ + llmClient: llm, + kbLoader: fakeKb(), + retriever, + defaultsResolver: async () => DEFAULTS, + }); +} + +async function collect( + gen: AsyncGenerator, +): Promise { + const out: AssistantChatEvent[] = []; + for await (const e of gen) out.push(e); + return out; +} + +describe("latestUserMessage", () => { + it("UT-ASST-CHAT-000: returns the last user turn", () => { + expect( + latestUserMessage([ + { role: "user", content: "first" }, + { role: "assistant", content: "reply" }, + { role: "user", content: "second" }, + ]), + ).toBe("second"); + expect(latestUserMessage([{ role: "assistant", content: "x" }])).toBe(""); + }); +}); + +describe("AssistantChatService", () => { + it("UT-ASST-CHAT-001: emits chat_start → deltas → chat_finish", async () => { + const llm = new FakeLlm(); + const svc = makeService(llm); + const events = await collect( + svc.chat(ACTOR, { messages: [{ role: "user", content: "hi" }] }, undefined, { + modelId: "m-explicit", + }), + ); + expect(events[0]).toEqual({ type: "chat_start", model: "m-explicit" }); + expect(events.filter((e) => e.type === "chat_text_delta")).toEqual([ + { type: "chat_text_delta", delta: "Hello" }, + { type: "chat_text_delta", delta: " world" }, + ]); + expect(events[events.length - 1]!.type).toBe("chat_finish"); + }); + + it("UT-ASST-CHAT-002: NEVER passes tools to the LLM (pure Q&A, no agentic loop)", async () => { + const llm = new FakeLlm(); + await collect( + makeService(llm).chat( + ACTOR, + { messages: [{ role: "user", content: "hi" }] }, + undefined, + { modelId: "m" }, + ), + ); + expect(llm.lastParams?.tools).toBeUndefined(); + // Grounding developer message is injected first. + expect(llm.lastParams?.input[0]?.role).toBe("developer"); + }); + + it("UT-ASST-CHAT-003: falls back to surface default model when modelId blank", async () => { + const llm = new FakeLlm(); + const events = await collect( + makeService(llm).chat( + ACTOR, + { messages: [{ role: "user", content: "hi" }] }, + undefined, + { modelId: "" }, + ), + ); + expect(events[0]).toEqual({ type: "chat_start", model: "default-model" }); + }); + + it("UT-ASST-CHAT-004: retrieval failure is non-fatal — still answers KB-only", async () => { + const llm = new FakeLlm(); + const svc = makeService(llm, fakeRetriever([], new Error("mongo down"))); + const events = await collect( + svc.chat(ACTOR, { messages: [{ role: "user", content: "hi" }] }, undefined, { + modelId: "m", + }), + ); + expect(events.some((e) => e.type === "chat_text_delta")).toBe(true); + expect(events[events.length - 1]!.type).toBe("chat_finish"); + }); + + it("UT-ASST-CHAT-005: stream error → chat_error with a catalog code, no finish", async () => { + const llm = new FakeLlm(); + llm.throwError = new Error("LLM Gateway error (502): upstream down"); + const events = await collect( + makeService(llm).chat( + ACTOR, + { messages: [{ role: "user", content: "hi" }] }, + undefined, + { modelId: "m" }, + ), + ); + const err = events.find((e) => e.type === "chat_error"); + expect(err).toBeDefined(); + if (err && err.type === "chat_error") { + expect(err.code).toBe("upstream_unavailable"); + expect(err.message).toContain("502"); + } + expect(events.some((e) => e.type === "chat_finish")).toBe(false); + }); + + it("UT-ASST-CHAT-006: reports usage on chat_finish when provider supplies it", async () => { + const llm = new FakeLlm(); + llm.events = [ + { type: "response.output_text.delta", delta: "hi" }, + { + type: "response.completed", + response: { usage: { input_tokens: 12, output_tokens: 7, total_tokens: 19 } }, + }, + ]; + const events = await collect( + makeService(llm).chat( + ACTOR, + { messages: [{ role: "user", content: "hi" }] }, + undefined, + { modelId: "m" }, + ), + ); + const finish = events.find((e) => e.type === "chat_finish"); + expect(finish).toEqual({ + type: "chat_finish", + usage: { inputTokens: 12, outputTokens: 7, totalTokens: 19 }, + }); + }); + + it("UT-ASST-CHAT-007: aborted signal stops the stream (no finish)", async () => { + const llm = new FakeLlm(); + const ac = new AbortController(); + ac.abort(); + const events = await collect( + makeService(llm).chat( + ACTOR, + { messages: [{ role: "user", content: "hi" }] }, + ac.signal, + { modelId: "m" }, + ), + ); + // chat_start always emits; the loop bails on the first aborted check. + expect(events[0]!.type).toBe("chat_start"); + expect(events.some((e) => e.type === "chat_finish")).toBe(false); + expect(events.some((e) => e.type === "chat_text_delta")).toBe(false); + }); +}); diff --git a/ornn-api/src/domains/assistant/chatService.ts b/ornn-api/src/domains/assistant/chatService.ts new file mode 100644 index 00000000..5ddcef99 --- /dev/null +++ b/ornn-api/src/domains/assistant/chatService.ts @@ -0,0 +1,197 @@ +/** + * Ornn Assistant chat service (#970) — pure Q&A, ONE streamed completion. + * + * Per request: + * 1. emit `chat_start`, + * 2. run a deterministic, visibility-scoped skill retrieval on the + * latest user message (failures are non-fatal — KB-only still + * answers), + * 3. assemble grounding (curated KB + scoped skills) + the conversation, + * 4. stream ONE completion via `NyxLlmClient.stream` (NO tools, NO + * agentic loop), mapping text deltas → `chat_text_delta`, + * 5. emit `chat_finish` (with usage when the provider reports it) or + * `chat_error` on failure. + * + * This service yields the WIRE-CONTRACT events directly; the route only + * serializes them to SSE frames and reconciles quota. Keeping the mapping + * here (not the route) makes the event sequence unit-testable without HTTP. + * + * @module domains/assistant/chatService + */ + +import { createLogger } from "../../shared/logger"; +import type { + NyxLlmClient, + ResponsesApiStreamEvent, +} from "../../clients/nyxid/llm"; +import type { ActorContext } from "../skills/crud/authorize"; +import type { AssistantKbLoader } from "./kb/loader"; +import type { ScopedSkillRetriever } from "./retrieval"; +import { assembleAssistantInput } from "./contextAssembler"; +import type { + AssistantChatEvent, + AssistantChatRequest, + AssistantUsage, + RetrievedSkill, +} from "./types"; + +const logger = createLogger("assistantChatService"); + +/** Per-request model + sampling snapshot resolved from settings. */ +export interface AssistantChatDefaults { + readonly model: string; + readonly maxOutputTokens: number; + readonly temperature: number; +} + +export interface AssistantChatServiceDeps { + readonly llmClient: Pick; + readonly kbLoader: Pick; + readonly retriever: Pick; + readonly defaultsResolver: () => Promise; +} + +export class AssistantChatService { + private readonly llmClient: Pick; + private readonly kbLoader: Pick; + private readonly retriever: Pick; + private readonly defaultsResolver: () => Promise; + + constructor(deps: AssistantChatServiceDeps) { + this.llmClient = deps.llmClient; + this.kbLoader = deps.kbLoader; + this.retriever = deps.retriever; + this.defaultsResolver = deps.defaultsResolver; + } + + async *chat( + actor: ActorContext, + request: AssistantChatRequest, + abortSignal: AbortSignal | undefined, + options: { modelId: string }, + ): AsyncGenerator { + const defaults = await this.defaultsResolver(); + const model = options.modelId || defaults.model; + yield { type: "chat_start", model }; + + // Visibility-scoped retrieval on the latest user message. Non-fatal: + // a retrieval failure must not deny the user a KB-grounded answer. + const query = latestUserMessage(request.messages); + let skills: RetrievedSkill[] = []; + try { + skills = await this.retriever.retrieve(query, actor); + } catch (err) { + logger.warn( + { actor: actor.userId, err: (err as Error).message }, + "assistant skill retrieval failed — proceeding KB-only", + ); + } + + const kb = this.kbLoader.load(); + const { input } = assembleAssistantInput({ + kbText: kb.text, + skills, + messages: request.messages, + }); + + logger.info( + { + actor: actor.userId, + model, + turns: request.messages.length, + retrievedSkills: skills.length, + kbTokens: kb.estimatedTokens, + }, + "assistant chat starting", + ); + + let usage: AssistantUsage | undefined; + try { + const stream = this.llmClient.stream({ + model, + input, + max_output_tokens: defaults.maxOutputTokens, + temperature: defaults.temperature, + // NO tools — pure Q&A. This is the structural guarantee that the + // assistant can never trigger an agentic tool/execution loop. + }); + + for await (const event of stream) { + if (abortSignal?.aborted) { + logger.info({ actor: actor.userId }, "assistant stream aborted by client"); + return; + } + const delta = extractTextDelta(event); + if (delta) { + yield { type: "chat_text_delta", delta }; + continue; + } + const reported = extractUsage(event); + if (reported) usage = reported; + } + } catch (err) { + const message = err instanceof Error ? err.message : "Assistant stream failed"; + logger.error({ actor: actor.userId, err: message }, "assistant stream error"); + yield { type: "chat_error", code: mapErrorCode(message), message }; + return; + } + + yield { type: "chat_finish", ...(usage ? { usage } : {}) }; + } +} + +/** Latest user-authored message content (empty string if none). */ +export function latestUserMessage( + messages: ReadonlyArray<{ role: string; content: string }>, +): string { + for (let i = messages.length - 1; i >= 0; i--) { + const m = messages[i]!; + if (m.role === "user") return m.content; + } + return ""; +} + +/** + * Extract incremental text from a Responses-API stream event, handling + * both the direct `output_text.delta` and the `content_part.delta` + * variants. Returns null for non-text events. + */ +function extractTextDelta(event: ResponsesApiStreamEvent): string | null { + if (event.type === "response.output_text.delta") { + return typeof event.delta === "string" ? event.delta : null; + } + if (event.type === "response.content_part.delta") { + const delta = event.delta as { type?: string; text?: string } | undefined; + if (delta?.type === "output_text" && typeof delta.text === "string") { + return delta.text; + } + } + return null; +} + +/** + * Best-effort token-usage extraction from the terminal `response.completed` + * event (Responses-API format only — the chat-completion normalizer drops + * usage, so `chat_finish` simply omits it there). + */ +function extractUsage(event: ResponsesApiStreamEvent): AssistantUsage | null { + if (event.type !== "response.completed") return null; + const response = event.response as { usage?: Record } | undefined; + const usage = response?.usage; + if (!usage || typeof usage !== "object") return null; + const out: { inputTokens?: number; outputTokens?: number; totalTokens?: number } = {}; + if (typeof usage.input_tokens === "number") out.inputTokens = usage.input_tokens; + if (typeof usage.output_tokens === "number") out.outputTokens = usage.output_tokens; + if (typeof usage.total_tokens === "number") out.totalTokens = usage.total_tokens; + return Object.keys(out).length > 0 ? out : null; +} + +/** + * Map a thrown stream error to an SSE `chat_error.code` from the ERRORS.md + * catalog. Every LLM/gateway failure (including "no provider configured") + * is an upstream-dependency failure from the caller's perspective. + */ +function mapErrorCode(message: string): string { + void message; // reserved for finer-grained mapping if needed later + return "upstream_unavailable"; +} diff --git a/ornn-api/src/domains/assistant/contextAssembler.test.ts b/ornn-api/src/domains/assistant/contextAssembler.test.ts new file mode 100644 index 00000000..d7ee5bfc --- /dev/null +++ b/ornn-api/src/domains/assistant/contextAssembler.test.ts @@ -0,0 +1,94 @@ +/** + * UT-ASST-CTX-* — assembleAssistantInput (#970). + * + * @module domains/assistant/contextAssembler.test + */ + +import { describe, expect, it } from "bun:test"; +import { + ASSISTANT_SYSTEM_PROMPT, + assembleAssistantInput, +} from "./contextAssembler"; +import type { RetrievedSkill } from "./types"; + +const SKILL: RetrievedSkill = { + name: "slack-poster", + description: "Post messages to Slack", + tags: ["slack"], + category: "messaging", + createdOn: "2026-01-02T03:04:05.000Z", + createdBy: "user-author", +}; + +describe("assembleAssistantInput", () => { + it("UT-ASST-CTX-001: leads with one developer grounding message, then turns", () => { + const { input } = assembleAssistantInput({ + kbText: "Ornn is a skill-lifecycle API.", + skills: [SKILL], + messages: [ + { role: "user", content: "What is Ornn?" }, + { role: "assistant", content: "It's an API." }, + { role: "user", content: "Tell me more." }, + ], + }); + expect(input[0]!.role).toBe("developer"); + expect(input.slice(1).map((m) => m.role)).toEqual([ + "user", + "assistant", + "user", + ]); + expect(input.length).toBe(4); + }); + + it("UT-ASST-CTX-002: grounding carries persona + KB + the safe skill fields", () => { + const { input } = assembleAssistantInput({ + kbText: "Ornn KB body here.", + skills: [SKILL], + messages: [{ role: "user", content: "hi" }], + }); + const grounding = input[0]!.content as string; + expect(grounding).toContain(ASSISTANT_SYSTEM_PROMPT.slice(0, 30)); + expect(grounding).toContain("Ornn KB body here."); + expect(grounding).toContain("slack-poster"); + expect(grounding).toContain("Post messages to Slack"); + expect(grounding).toContain("messaging"); + expect(grounding).toContain("user-author"); + }); + + it("UT-ASST-CTX-003: empty skills → explicit 'no skills' line, no fabrication", () => { + const { input } = assembleAssistantInput({ + kbText: "KB.", + skills: [], + messages: [{ role: "user", content: "hi" }], + }); + const grounding = input[0]!.content as string; + expect(grounding.toLowerCase()).toContain("no skills"); + }); + + it("UT-ASST-CTX-004: blank KB → grounding still has persona + turns", () => { + const { input } = assembleAssistantInput({ + kbText: " ", + skills: [], + messages: [{ role: "user", content: "hi" }], + }); + expect(input[0]!.content).toContain(ASSISTANT_SYSTEM_PROMPT.slice(0, 30)); + expect(input.length).toBe(2); + }); + + it("UT-ASST-CTX-005: a RetrievedSkill cannot leak forbidden fields by construction", () => { + // RetrievedSkill is the only skill shape the assembler accepts, and it + // has no PII/secret fields. This guards the type-level boundary: even a + // skill carrying secret-looking text in SAFE fields renders only those. + const { input } = assembleAssistantInput({ + kbText: "", + skills: [SKILL], + messages: [{ role: "user", content: "hi" }], + }); + const grounding = input[0]!.content as string; + // None of these substrings exist anywhere because RetrievedSkill omits + // the source document's sensitive fields entirely. + for (const forbidden of ["@", "storageKey", "skillHash", "sharedWith"]) { + expect(grounding.includes(forbidden)).toBe(false); + } + }); +}); diff --git a/ornn-api/src/domains/assistant/contextAssembler.ts b/ornn-api/src/domains/assistant/contextAssembler.ts new file mode 100644 index 00000000..ea594d3e --- /dev/null +++ b/ornn-api/src/domains/assistant/contextAssembler.ts @@ -0,0 +1,87 @@ +/** + * Context assembly for the Ornn Assistant (#970). + * + * Turns (curated KB grounding + visibility-scoped skills + the + * conversation) into the `input` message array for ONE streamed + * completion. There is NO tool loop and NO agentic behaviour — the model + * sees the grounding once and answers. + * + * The persona + grounding are injected as a single leading `developer` + * message (the playground does the same — the upstream gateway ignores + * the Responses-API `instructions` field, so grounding must ride in the + * message list). The user/assistant turns follow verbatim. + * + * @module domains/assistant/contextAssembler + */ + +import type { ResponsesApiInputMessage } from "../../clients/nyxid/llm"; +import type { AssistantMessage, RetrievedSkill } from "./types"; + +/** + * Assistant persona + guardrails. Constrains the model to grounded Q&A + * and forbids inventing facts or leaking anything outside the grounding. + */ +export const ASSISTANT_SYSTEM_PROMPT = `You are the Ornn Assistant, a helpful Q&A guide for Ornn — the agent-facing skill-lifecycle API (think "npm registry + npm CLI, fused, model-agnostic"). + +Your job: +- Answer questions about what Ornn is, how it is different, and how to use it (search, pull, install, execute, build, upload, share skills). +- Help the user understand the skills that appear in the "relevant skills" section below — these are already filtered to what THIS user is allowed to see. + +Hard rules: +- You are a read-only Q&A assistant. You cannot run, install, modify, upload, or execute anything. If asked to perform an action, explain how the user/agent can do it via the Ornn API instead. +- Ground every answer in the knowledge base and the relevant-skills section below. If the answer is not supported there, say you don't know and point the user to the docs or the relevant API endpoint — never invent endpoints, fields, behaviour, or skills. +- Only discuss skills that appear in the relevant-skills section. Never speculate about skills that aren't listed, and never reveal author emails, internal IDs, storage details, sharing lists, secrets, quotas, or any other user's private data. +- Be concise and technical — the audience is agent developers.`; + +const SEPARATOR = "\n\n---\n\n"; + +/** + * Build the LLM `input` for one assistant completion. + */ +export function assembleAssistantInput(opts: { + readonly kbText: string; + readonly skills: ReadonlyArray; + readonly messages: ReadonlyArray; +}): { input: ResponsesApiInputMessage[] } { + const grounding = buildGroundingBlock(opts.kbText, opts.skills); + const input: ResponsesApiInputMessage[] = [ + { role: "developer", content: grounding }, + ]; + for (const m of opts.messages) { + input.push({ role: m.role, content: m.content }); + } + return { input }; +} + +/** Assemble persona + KB + scoped-skills into one developer message. */ +function buildGroundingBlock( + kbText: string, + skills: ReadonlyArray, +): string { + const parts: string[] = [ASSISTANT_SYSTEM_PROMPT]; + + const kb = kbText.trim(); + if (kb.length > 0) { + parts.push(`# Ornn knowledge base\n\n${kb}`); + } + + if (skills.length > 0) { + const rendered = skills.map(renderSkill).join("\n"); + parts.push( + `# Relevant skills (already filtered to what this user may see)\n\n${rendered}`, + ); + } else { + parts.push( + `# Relevant skills\n\n(No skills matching the question are visible to this user.)`, + ); + } + + return parts.join(SEPARATOR); +} + +/** One-line, SAFE rendering of a retrieved skill. */ +function renderSkill(s: RetrievedSkill): string { + const category = s.category ? ` (category: ${s.category})` : ""; + const tags = s.tags.length > 0 ? ` [tags: ${s.tags.join(", ")}]` : ""; + return `- ${s.name}: ${s.description}${category}${tags} — created ${s.createdOn} by ${s.createdBy}`; +} diff --git a/ornn-api/src/domains/assistant/retrieval.test.ts b/ornn-api/src/domains/assistant/retrieval.test.ts new file mode 100644 index 00000000..ca363cef --- /dev/null +++ b/ornn-api/src/domains/assistant/retrieval.test.ts @@ -0,0 +1,168 @@ +/** + * UT-ASST-RETR-* — ScopedSkillRetriever + projectSafeSkill (#970). + * + * The data-safety boundary: these tests pin that retrieval is + * visibility-scoped at BOTH layers and that the projection NEVER carries + * a PII / secret / private-membership field into the result. + * + * @module domains/assistant/retrieval.test + */ + +import { describe, expect, it } from "bun:test"; +import type { SkillDocument } from "../../shared/types/index"; +import type { ActorContext } from "../skills/crud/authorize"; +import { + ScopedSkillRetriever, + projectSafeSkill, + type SkillSearchPort, +} from "./retrieval"; + +function skillDoc(overrides: Partial = {}): SkillDocument { + return { + guid: "g-1", + name: "slack-poster", + description: "Post messages to Slack", + license: "MIT", + compatibility: null, + metadata: { category: "messaging", tags: ["slack", "chat"] }, + skillHash: "sha256:DEADBEEFsecrethash", + storageKey: "skills/g-1/1.0.0.zip", + createdBy: "user-author", + createdByEmail: "author@secret.example", + createdByDisplayName: "Author Secret Name", + createdOn: new Date("2026-01-02T03:04:05.000Z"), + updatedBy: "user-author", + updatedOn: new Date("2026-01-02T03:04:05.000Z"), + isPrivate: false, + sharedWithUsers: ["secret-grantee"], + sharedWithOrgs: ["secret-org"], + latestVersion: "1.0.0", + ...overrides, + }; +} + +const ACTOR: ActorContext = { + userId: "u-caller", + memberships: [{ userId: "org-a", role: "member", displayName: "Org A" }], + isPlatformAdmin: false, + membershipsResolved: true, +}; + +class FakeSearch implements SkillSearchPort { + lastArgs: unknown[] = []; + next: SkillDocument[] = []; + async keywordSearch( + query: string, + scope: string, + currentUserId: string, + userOrgIds: string[], + page: number, + pageSize: number, + ) { + this.lastArgs = [query, scope, currentUserId, userOrgIds, page, pageSize]; + return { skills: this.next, total: this.next.length }; + } +} + +describe("projectSafeSkill", () => { + it("UT-ASST-RETR-001: keeps only SAFE fields, drops all PII/secret fields", () => { + const projected = projectSafeSkill(skillDoc()); + expect(projected).toEqual({ + name: "slack-poster", + description: "Post messages to Slack", + tags: ["slack", "chat"], + category: "messaging", + createdOn: "2026-01-02T03:04:05.000Z", + createdBy: "user-author", + }); + // Belt: the serialized projection must not carry any forbidden field. + const json = JSON.stringify(projected); + for (const forbidden of [ + "author@secret.example", + "Author Secret Name", + "DEADBEEF", + "storage", + "secret-grantee", + "secret-org", + "isPrivate", + "skillHash", + ]) { + expect(json.includes(forbidden)).toBe(false); + } + }); + + it("UT-ASST-RETR-002: missing tags → empty array, never undefined", () => { + const projected = projectSafeSkill( + skillDoc({ metadata: { category: "misc" } }), + ); + expect(projected.tags).toEqual([]); + expect(projected.category).toBe("misc"); + }); +}); + +describe("ScopedSkillRetriever", () => { + it("UT-ASST-RETR-003: queries with the 'mixed' scope + actor org ids", async () => { + const search = new FakeSearch(); + const retriever = new ScopedSkillRetriever({ search, maxResults: 5 }); + await retriever.retrieve("how do I post to slack", ACTOR); + expect(search.lastArgs[1]).toBe("mixed"); + expect(search.lastArgs[2]).toBe("u-caller"); + expect(search.lastArgs[3]).toEqual(["org-a"]); + expect(search.lastArgs[5]).toBe(5); // pageSize == maxResults + }); + + it("UT-ASST-RETR-004: blank query → no search call, empty result", async () => { + const search = new FakeSearch(); + const retriever = new ScopedSkillRetriever({ search }); + expect(await retriever.retrieve(" ", ACTOR)).toEqual([]); + // FakeSearch records args only when called — empty means never invoked. + expect(search.lastArgs).toEqual([]); + }); + + it("UT-ASST-RETR-005: projection-layer canReadSkill drops an unreadable doc", async () => { + // Simulate a query-layer regression that returned a private skill the + // actor cannot read. The projection-layer guard MUST drop it. + const search = new FakeSearch(); + search.next = [ + skillDoc({ guid: "pub", name: "public-skill", isPrivate: false }), + skillDoc({ + guid: "priv", + name: "someone-elses-private", + isPrivate: true, + createdBy: "other-user", + sharedWithUsers: [], + sharedWithOrgs: [], + }), + ]; + const retriever = new ScopedSkillRetriever({ search }); + const result = await retriever.retrieve("anything", ACTOR); + expect(result.map((r) => r.name)).toEqual(["public-skill"]); + }); + + it("UT-ASST-RETR-006: caps results at maxResults", async () => { + const search = new FakeSearch(); + search.next = Array.from({ length: 10 }, (_, i) => + skillDoc({ guid: `g${i}`, name: `skill-${i}`, isPrivate: false }), + ); + const retriever = new ScopedSkillRetriever({ search, maxResults: 3 }); + const result = await retriever.retrieve("x", ACTOR); + expect(result.length).toBe(3); + }); + + it("UT-ASST-RETR-007: private skill shared with actor's org IS readable", async () => { + const search = new FakeSearch(); + search.next = [ + skillDoc({ + guid: "shared", + name: "org-shared-skill", + isPrivate: true, + createdBy: "other", + sharedWithUsers: [], + sharedWithOrgs: ["org-a"], // actor is a member of org-a + }), + ]; + const retriever = new ScopedSkillRetriever({ search }); + const result = await retriever.retrieve("x", ACTOR); + expect(result.map((r) => r.name)).toEqual(["org-shared-skill"]); + }); +}); diff --git a/ornn-api/src/domains/assistant/retrieval.ts b/ornn-api/src/domains/assistant/retrieval.ts new file mode 100644 index 00000000..05ae5a93 --- /dev/null +++ b/ornn-api/src/domains/assistant/retrieval.ts @@ -0,0 +1,133 @@ +/** + * Visibility-scoped skill retrieval for the Ornn Assistant (#970). + * + * Given the caller's latest question, return up to N skills the caller is + * allowed to see, projected down to SAFE fields only. This is the most + * security-sensitive part of the assistant: the retrieved skills are fed + * verbatim into the LLM context and streamed back to the user, so the + * scoping + projection here is the data-safety boundary. + * + * Two independent guards (belt-and-suspenders, per the issue): + * 1. QUERY layer — `keywordSearch(..., scope: "mixed", ...)` runs + * `applyScope`, which restricts the Mongo match to public skills + + * private skills the actor authored / was shared / is an org member + * of. A private skill the actor can't see never leaves the DB. + * 2. PROJECTION layer — every surviving doc is re-checked with + * `canReadSkill(actor)` and then stripped to SAFE fields. Even if a + * future query-layer regression widened the match, the projection + * gate drops anything the actor can't read and never copies a + * PII/secret field. + * + * Deterministic: same (query, actor, corpus) → same result set (the repo + * sorts by `createdOn desc`). + * + * @module domains/assistant/retrieval + */ + +import { createLogger } from "../../shared/logger"; +import type { SkillDocument } from "../../shared/types/index"; +import { canReadSkill, type ActorContext } from "../skills/crud/authorize"; +import type { RetrievedSkill } from "./types"; + +const logger = createLogger("assistantRetrieval"); + +/** Default top-N skills injected into the grounding. */ +export const DEFAULT_MAX_RETRIEVED_SKILLS = 5; + +/** + * Cap the keyword query length. The latest user message is used verbatim + * as the (escaped) search term; bounding it keeps the regex sane and the + * query cheap regardless of how long the user's message is. + */ +const MAX_QUERY_CHARS = 256; + +/** + * Narrow port over the one `SkillRepository` method we use. Keeping the + * dependency surface tiny makes the retriever trivially fakeable in tests + * and decouples it from the full repository. + */ +export interface SkillSearchPort { + keywordSearch( + query: string, + scope: "public" | "private" | "mixed" | "shared-with-me" | "mine", + currentUserId: string, + userOrgIds: string[], + page: number, + pageSize: number, + ): Promise<{ skills: SkillDocument[]; total: number }>; +} + +export interface ScopedSkillRetrieverDeps { + readonly search: SkillSearchPort; + readonly maxResults?: number; +} + +export class ScopedSkillRetriever { + private readonly search: SkillSearchPort; + private readonly maxResults: number; + + constructor(deps: ScopedSkillRetrieverDeps) { + this.search = deps.search; + this.maxResults = deps.maxResults ?? DEFAULT_MAX_RETRIEVED_SKILLS; + } + + /** + * Retrieve up to `maxResults` SAFE-projected skills the actor may see, + * matching the query. Empty / blank query → no retrieval. + */ + async retrieve(query: string, actor: ActorContext): Promise { + const q = query.trim().slice(0, MAX_QUERY_CHARS); + if (q.length === 0) return []; + + const orgIds = actor.memberships.map((m) => m.userId); + // QUERY-layer visibility: "mixed" = public + private-the-actor-can-read. + const { skills } = await this.search.keywordSearch( + q, + "mixed", + actor.userId, + orgIds, + 1, + this.maxResults, + ); + + // PROJECTION-layer enforcement: re-check readability, strip to SAFE + // fields. A doc that somehow slipped past the scope filter but fails + // `canReadSkill` is dropped and logged — it must never reach context. + const safe: RetrievedSkill[] = []; + for (const s of skills) { + if (!canReadSkill(s, actor)) { + logger.warn( + { actor: actor.userId, skill: s.name }, + "skill passed query scope but failed canReadSkill — dropping (data-safety)", + ); + continue; + } + safe.push(projectSafeSkill(s)); + if (safe.length >= this.maxResults) break; + } + logger.debug( + { actor: actor.userId, matched: skills.length, returned: safe.length }, + "assistant skill retrieval complete", + ); + return safe; + } +} + +/** + * Strip a full skill document to the SAFE projection (#970). This is the + * ONLY place a `SkillDocument` becomes assistant-visible — by listing + * fields explicitly (never spreading) a newly-added sensitive field on + * `SkillDocument` can't silently leak into the grounding. + */ +export function projectSafeSkill(s: SkillDocument): RetrievedSkill { + const tags = Array.isArray(s.metadata?.tags) ? [...s.metadata.tags] : []; + return { + name: s.name, + description: s.description, + tags, + category: s.metadata?.category ?? "", + createdOn: + s.createdOn instanceof Date ? s.createdOn.toISOString() : String(s.createdOn), + createdBy: s.createdBy, + }; +} diff --git a/ornn-api/src/domains/assistant/types.ts b/ornn-api/src/domains/assistant/types.ts new file mode 100644 index 00000000..59318c7a --- /dev/null +++ b/ornn-api/src/domains/assistant/types.ts @@ -0,0 +1,69 @@ +/** + * Ornn Assistant domain types (#970). + * + * The assistant is a pure, non-agentic Q&A chatbot: it answers questions + * about Ornn (grounded in the curated KB) and about skills the caller is + * allowed to see (grounded in a visibility-scoped retrieval). It never + * runs tools, executes skills, or mutates state. + * + * The SSE event union here is the WIRE CONTRACT the frontend is built + * against — `chat_start` / `chat_text_delta` / `chat_error` / `chat_finish` + * (+ keepalive comment frames). The route serializes each event to an SSE + * frame whose `event:` line equals the `type` field (CONVENTIONS §6). + * + * @module domains/assistant/types + */ + +/** The LLM surface key this domain reserves/charges/resolves against. */ +export const ASSISTANT_SURFACE = "assistant" as const; + +/** Inbound chat turn. Only user/assistant roles — no tool/system turns. */ +export interface AssistantMessage { + readonly role: "user" | "assistant"; + readonly content: string; +} + +export interface AssistantChatRequest { + readonly messages: ReadonlyArray; + /** + * Optional admin-curated model id; falls back to the surface default. + * Widened to `| undefined` so a Zod `.optional()`-inferred body assigns + * cleanly under exactOptionalPropertyTypes (#657). + */ + readonly modelId?: string | undefined; +} + +/** Optional token-usage report attached to `chat_finish`. */ +export interface AssistantUsage { + readonly inputTokens?: number; + readonly outputTokens?: number; + readonly totalTokens?: number; +} + +/** + * SSE event union (the wire contract). Every event carries `type`; the + * route mirrors it onto the SSE `event:` line. + */ +export type AssistantChatEvent = + | { readonly type: "chat_start"; readonly model: string } + | { readonly type: "chat_text_delta"; readonly delta: string } + | { readonly type: "chat_error"; readonly code: string; readonly message: string } + | { readonly type: "chat_finish"; readonly usage?: AssistantUsage }; + +/** + * SAFE projection of a skill for grounding (#970 data-safety). ONLY these + * fields ever reach the LLM context or the user. Deliberately excludes + * every PII / secret / private-membership field on the source document: + * createdByEmail, createdByDisplayName, storageKey, skillHash, + * sharedWithUsers, sharedWithOrgs, isPrivate, license, and so on. + */ +export interface RetrievedSkill { + readonly name: string; + readonly description: string; + readonly tags: ReadonlyArray; + readonly category: string; + /** ISO-8601 string. */ + readonly createdOn: string; + /** Author person user_id only — never an email/display name. */ + readonly createdBy: string; +} From 37212246ed6ef973f71e7be9f641d5efe6adb2ee Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:09:51 +0800 Subject: [PATCH 017/110] feat(api): POST /api/v1/assistant/chat SSE endpoint + wiring (#970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mounts the assistant endpoint following the playground reference + CONVENTIONS pipeline: nyxidAuth → rateLimit(30/min) → validateBody → resolveModel(assistant) → buildActorContext → quota reserve(assistant) → SSE stream → chargeOnCompletion. Model resolution + quota reserve run before the stream so a misconfig/cap-hit returns clean RFC 7807 JSON; in-stream failures surface as a chat_error event. SSE frames carry both the native `event:` line and a JSON `data:` line whose type matches (CONVENTIONS §6.3): chat_start / chat_text_delta / chat_error / chat_finish + keepalive comment frames. Quota is reconciled in a finally that always runs (commit on success / billed-then-aborted, release on pre-token system error). bootstrap wires wireAssistant over the shared NyxLlmClient + SkillRepo + QuotaService, resolving the assistant surface defaults + sseKeepAliveMs from settings, and warms the KB cache at boot. Route mounted under /api/v1. Integration tests cover framing, auth/validation/model/quota gates, and — mandatory — that a private skill + PII never reach the streamed context through the real pipeline. --- ornn-api/src/bootstrap.ts | 18 ++ ornn-api/src/domains/assistant/bootstrap.ts | 68 +++++ ornn-api/src/domains/assistant/routes.test.ts | 276 ++++++++++++++++++ ornn-api/src/domains/assistant/routes.ts | 244 ++++++++++++++++ 4 files changed, 606 insertions(+) create mode 100644 ornn-api/src/domains/assistant/bootstrap.ts create mode 100644 ornn-api/src/domains/assistant/routes.test.ts create mode 100644 ornn-api/src/domains/assistant/routes.ts diff --git a/ornn-api/src/bootstrap.ts b/ornn-api/src/bootstrap.ts index 79e3f5f5..b9aa0df6 100644 --- a/ornn-api/src/bootstrap.ts +++ b/ornn-api/src/bootstrap.ts @@ -89,6 +89,9 @@ import { wireSkillGeneration } from "./domains/skills/generation/bootstrap"; // Domain: Playground import { wirePlayground } from "./domains/playground/bootstrap"; +// Domain: Assistant (#970 — repo-aware Q&A chatbot) +import { wireAssistant } from "./domains/assistant/bootstrap"; + // Domain: Admin import { createAdminRoutes } from "./domains/admin/routes"; @@ -764,6 +767,20 @@ export async function bootstrap( llmProvidersService, }); + // ---- Domain: Assistant (#970) ---- + // Repo-aware Q&A chatbot. Reuses the shared NyxLlmClient, the assistant + // LLM surface (resolver + quota), and a visibility-scoped retrieval over + // the same SkillRepository. Pure Q&A — no agentic tool loop. + const { routes: assistantRoutes } = wireAssistant({ + llmClient: nyxLlmClient, + skillRepo, + quotaService, + llmProvidersService, + defaultsResolver: async () => resolveSurfaceDefaults("assistant"), + keepAliveIntervalMsResolver: async () => + (await settingsService.getAssistant()).sseKeepAliveMs, + }); + // ---- Domain: Admin ---- const adminRoutes = createAdminRoutes({ analyticsEmitter, @@ -918,6 +935,7 @@ export async function bootstrap( apiApp.route("/", searchRoutes); apiApp.route("/", generationRoutes); apiApp.route("/", playgroundRoutes); + apiApp.route("/", assistantRoutes); apiApp.route("/", adminRoutes); apiApp.route("/", adminDashboardRoutes); apiApp.route("/", adminUsersRoutes); diff --git a/ornn-api/src/domains/assistant/bootstrap.ts b/ornn-api/src/domains/assistant/bootstrap.ts new file mode 100644 index 00000000..ec4272c9 --- /dev/null +++ b/ornn-api/src/domains/assistant/bootstrap.ts @@ -0,0 +1,68 @@ +/** + * Wire the Ornn Assistant domain (#970). + * + * Composition root: build the KB loader (cache warmed at boot), the + * visibility-scoped skill retriever (over the shared `SkillRepository`), + * the chat service, and mount the SSE route. Quota + model resolution + + * SSE keep-alive are injected as resolvers so admin settings edits land on + * the next request without a restart. + * + * @module domains/assistant/bootstrap + */ + +import type { Hono } from "hono"; +import type { AuthVariables } from "../../middleware/nyxidAuth"; +import type { NyxLlmClient } from "../../clients/nyxid/llm"; +import type { SkillRepository } from "../skills/crud/repository"; +import type { QuotaService } from "../quota/service"; +import type { LlmProvidersService } from "../settings/llmProviders/service"; +import { AssistantKbLoader } from "./kb/loader"; +import { ScopedSkillRetriever, type SkillSearchPort } from "./retrieval"; +import { + AssistantChatService, + type AssistantChatDefaults, +} from "./chatService"; +import { createAssistantRoutes } from "./routes"; + +export interface AssistantWiring { + readonly routes: Hono<{ Variables: AuthVariables }>; +} + +export function wireAssistant(deps: { + llmClient: NyxLlmClient; + skillRepo: SkillRepository; + quotaService: QuotaService; + llmProvidersService: LlmProvidersService; + /** Resolve the per-request model + sampling snapshot (assistant surface). */ + defaultsResolver: () => Promise; + /** Resolve the SSE keep-alive cadence (assistant section). */ + keepAliveIntervalMsResolver: () => Promise; + /** Optional KB loader override (tests inject a fake digest reader). */ + kbLoader?: AssistantKbLoader; +}): AssistantWiring { + const kbLoader = deps.kbLoader ?? new AssistantKbLoader(); + // Warm the cache at boot so the first chat doesn't pay the artifact read + // (and any read/budget warning surfaces in boot logs, not mid-stream). + kbLoader.load(); + + const retriever = new ScopedSkillRetriever({ + // SkillRepository structurally satisfies the narrow SkillSearchPort. + search: deps.skillRepo as SkillSearchPort, + }); + + const chatService = new AssistantChatService({ + llmClient: deps.llmClient, + kbLoader, + retriever, + defaultsResolver: deps.defaultsResolver, + }); + + const routes = createAssistantRoutes({ + chatService, + quotaService: deps.quotaService, + llmProvidersService: deps.llmProvidersService, + keepAliveIntervalMsResolver: deps.keepAliveIntervalMsResolver, + }); + + return { routes }; +} diff --git a/ornn-api/src/domains/assistant/routes.test.ts b/ornn-api/src/domains/assistant/routes.test.ts new file mode 100644 index 00000000..6e39070c --- /dev/null +++ b/ornn-api/src/domains/assistant/routes.test.ts @@ -0,0 +1,276 @@ +/** + * IT-ASST-* — POST /api/v1/assistant/chat route integration (#970). + * + * Covers the CONVENTIONS pipeline (auth → validate → model → quota → + * SSE), the wire-contract SSE framing, and — mandatory — the end-to-end + * data-safety guarantee that no private skill / PII / secret reaches the + * streamed context. + * + * @module domains/assistant/routes.test + */ + +import { describe, expect, it } from "bun:test"; +import { Hono } from "hono"; +import { buildProblemJsonBody } from "../../shared/types/index"; +import type { + NyxLlmStreamParams, + ResponsesApiStreamEvent, +} from "../../clients/nyxid/llm"; +import type { SkillDocument } from "../../shared/types/index"; +import type { ModelResolution } from "../settings/llmProviders/service"; +import type { ChargeOutcome } from "../quota/types"; +import { AssistantChatService } from "./chatService"; +import { ScopedSkillRetriever, type SkillSearchPort } from "./retrieval"; +import { createAssistantRoutes } from "./routes"; +import type { AssistantChatEvent } from "./types"; + +const AUTH = { + userId: "u-caller", + email: "caller@test.local", + displayName: "Caller", + permissions: [] as string[], +}; + +// ---- fakes ----------------------------------------------------------------- + +class FakeQuota { + allow = true; + charges: Array<{ surface: string; outcome: ChargeOutcome }> = []; + private chargeResolvers: Array<() => void> = []; + async checkAllowed(p: { surface: string }) { + return this.allow + ? { allowed: true as const, isAdminBypass: false as const } + : { + allowed: false as const, + isAdminBypass: false as const, + surface: p.surface as never, + message: "over limit", + }; + } + async chargeOnCompletion(p: { surface: string; outcome: ChargeOutcome }) { + this.charges.push({ surface: p.surface, outcome: p.outcome }); + this.chargeResolvers.splice(0).forEach((r) => r()); + } + /** Resolves once chargeOnCompletion has been invoked. */ + charged(): Promise { + if (this.charges.length > 0) return Promise.resolve(); + return new Promise((r) => this.chargeResolvers.push(r)); + } +} + +class FakeProviders { + resolution: ModelResolution = { + kind: "ok", + modelId: "m-1", + displayName: "M1", + providerId: "p-1", + }; + async resolveModel(): Promise { + return this.resolution; + } +} + +/** Chat service that yields a fixed event list. */ +class FixedChat { + constructor(private readonly events: AssistantChatEvent[]) {} + async *chat(): AsyncGenerator { + for (const e of this.events) yield e; + } +} + +function makeApp(opts: { + withAuth?: boolean; + chatService: unknown; + quota?: FakeQuota; + providers?: FakeProviders; +}) { + const quota = opts.quota ?? new FakeQuota(); + const providers = opts.providers ?? new FakeProviders(); + const routes = createAssistantRoutes({ + chatService: opts.chatService as never, + quotaService: quota as never, + llmProvidersService: providers as never, + keepAliveIntervalMsResolver: async () => 15_000, + }); + const app = new Hono(); + if (opts.withAuth !== false) { + app.use("*", async (c, next) => { + c.set("auth" as never, AUTH as never); + await next(); + }); + } + app.route("/api/v1", routes); + app.onError((err, c) => { + const code = (err as { code?: string }).code ?? "internal_error"; + const status = (err as { statusCode?: number }).statusCode ?? 500; + return c.json( + buildProblemJsonBody({ + statusCode: status, + code, + message: err.message, + instance: c.req.path, + requestId: null, + }), + status as never, + ); + }); + return { app, quota, providers }; +} + +async function postChat(app: Hono, body: unknown) { + return app.request("/api/v1/assistant/chat", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(body), + }); +} + +// ---- tests ----------------------------------------------------------------- + +describe("POST /assistant/chat", () => { + it("IT-ASST-001: streams chat_start/text_delta/finish with event: + data: framing", async () => { + const chat = new FixedChat([ + { type: "chat_start", model: "m-1" }, + { type: "chat_text_delta", delta: "Ornn is an API." }, + { type: "chat_finish", usage: { totalTokens: 5 } }, + ]); + const { app } = makeApp({ chatService: chat }); + const res = await postChat(app, { messages: [{ role: "user", content: "hi" }] }); + expect(res.status).toBe(200); + expect(res.headers.get("content-type")).toContain("text/event-stream"); + const text = await res.text(); + expect(text).toContain("event: chat_start"); + expect(text).toContain('data: {"type":"chat_start","model":"m-1"}'); + expect(text).toContain("event: chat_text_delta"); + expect(text).toContain("Ornn is an API."); + expect(text).toContain("event: chat_finish"); + }); + + it("IT-ASST-002: charges quota with the assistant surface + success outcome", async () => { + const chat = new FixedChat([ + { type: "chat_start", model: "m-1" }, + { type: "chat_text_delta", delta: "hello" }, + { type: "chat_finish" }, + ]); + const quota = new FakeQuota(); + const { app } = makeApp({ chatService: chat, quota }); + const res = await postChat(app, { messages: [{ role: "user", content: "hi" }] }); + await res.text(); + await quota.charged(); + expect(quota.charges).toEqual([{ surface: "assistant", outcome: "success" }]); + }); + + it("IT-ASST-003: 401 when unauthenticated", async () => { + const { app } = makeApp({ chatService: new FixedChat([]), withAuth: false }); + const res = await postChat(app, { messages: [{ role: "user", content: "hi" }] }); + expect(res.status).toBe(401); + }); + + it("IT-ASST-004: 400 on empty messages array", async () => { + const { app } = makeApp({ chatService: new FixedChat([]) }); + const res = await postChat(app, { messages: [] }); + expect(res.status).toBe(400); + }); + + it("IT-ASST-005: 503 when no model is enabled for the assistant surface", async () => { + const providers = new FakeProviders(); + providers.resolution = { kind: "no-models-enabled", surface: "assistant" }; + const { app } = makeApp({ chatService: new FixedChat([]), providers }); + const res = await postChat(app, { messages: [{ role: "user", content: "hi" }] }); + expect(res.status).toBe(503); + }); + + it("IT-ASST-006: 429 when over quota", async () => { + const quota = new FakeQuota(); + quota.allow = false; + const { app } = makeApp({ chatService: new FixedChat([]), quota }); + const res = await postChat(app, { messages: [{ role: "user", content: "hi" }] }); + expect(res.status).toBe(429); + }); + + // ---- data-safety through the REAL pipeline ------------------------------- + + it("IT-ASST-007: private skill + PII never reach the streamed context", async () => { + // A fake LLM that echoes the assembled developer grounding back as a + // text delta. Whatever the model "sees" is what the SSE body carries — + // so the SSE output IS the assembled context, asserted directly. + class EchoLlm { + async *stream(p: NyxLlmStreamParams): AsyncIterable { + const grounding = String(p.input[0]?.content ?? ""); + yield { type: "response.output_text.delta", delta: grounding }; + } + } + + const publicSkill: SkillDocument = baseSkill({ + guid: "pub", + name: "public-weather-skill", + isPrivate: false, + }); + const privateSkill: SkillDocument = baseSkill({ + guid: "priv", + name: "TOP-SECRET-private-skill", + isPrivate: true, + createdBy: "someone-else", + createdByEmail: "victim@private.example", + sharedWithUsers: [], + sharedWithOrgs: [], + }); + + const search: SkillSearchPort = { + // Simulate a query-layer that returned BOTH (regression scenario): + // the projection-layer canReadSkill must still drop the private one. + async keywordSearch() { + return { skills: [publicSkill, privateSkill], total: 2 }; + }, + }; + const chatService = new AssistantChatService({ + llmClient: new EchoLlm(), + kbLoader: { load: () => ({ text: "Ornn KB.", estimatedTokens: 1, budgetTokens: 100, truncated: false }) }, + retriever: new ScopedSkillRetriever({ search }), + defaultsResolver: async () => ({ model: "m-1", maxOutputTokens: 1000, temperature: 0.3 }), + }); + + const { app } = makeApp({ chatService }); + const res = await postChat(app, { + messages: [{ role: "user", content: "what skills can I use?" }], + }); + const body = await res.text(); + + // Public skill IS present; private skill + every PII/secret marker is NOT. + expect(body).toContain("public-weather-skill"); + expect(body).not.toContain("TOP-SECRET-private-skill"); + for (const forbidden of [ + "victim@private.example", + "author@secret.example", + "storage/key", + "sha256:SECRETHASH", + "someone-else", + ]) { + expect(body.includes(forbidden)).toBe(false); + } + }); +}); + +function baseSkill(overrides: Partial): SkillDocument { + return { + guid: "g", + name: "skill", + description: "a skill", + license: null, + compatibility: null, + metadata: { category: "misc", tags: ["t"] }, + skillHash: "sha256:SECRETHASH", + storageKey: "storage/key/zip", + createdBy: "u-author", + createdByEmail: "author@secret.example", + createdByDisplayName: "Author Name", + createdOn: new Date("2026-01-01T00:00:00.000Z"), + updatedBy: "u-author", + updatedOn: new Date("2026-01-01T00:00:00.000Z"), + isPrivate: false, + sharedWithUsers: [], + sharedWithOrgs: [], + latestVersion: "1.0.0", + ...overrides, + }; +} diff --git a/ornn-api/src/domains/assistant/routes.ts b/ornn-api/src/domains/assistant/routes.ts new file mode 100644 index 00000000..25b5eb08 --- /dev/null +++ b/ornn-api/src/domains/assistant/routes.ts @@ -0,0 +1,244 @@ +/** + * Ornn Assistant routes (#970). + * + * POST /assistant/chat — AUTH REQUIRED, SSE. + * + * Pipeline (mirrors the playground reference, CONVENTIONS-compliant): + * nyxidAuth → rateLimit → validateBody → resolveModel(assistant) → + * buildActorContext → quota reserve(assistant) → stream → charge. + * + * Model resolution + the quota reserve run BEFORE the stream opens, so a + * misconfig / cap-hit returns a clean RFC 7807 JSON error (never a broken + * SSE stream). Once the stream opens, in-stream failures surface as a + * `chat_error` event. Everything from the quota reserve to the producer's + * `finally` is await-safe, so a reserved slot is always reconciled. + * + * SSE frames carry BOTH the native `event:` line and a JSON `data:` line + * whose `type` equals the event name (CONVENTIONS §6.3). + * + * @module domains/assistant/routes + */ + +import { Hono } from "hono"; +import { z } from "zod"; +import { + type AuthVariables, + nyxidAuthMiddleware, + getAuth, +} from "../../middleware/nyxidAuth"; +import { validateBody, getValidatedBody } from "../../middleware/validate"; +import { rateLimit } from "../../middleware/rateLimit"; +import { createLogger } from "../../shared/logger"; +import { buildActorContext } from "../skills/crud/authorize"; +import { throwQuotaError } from "../quota/routes"; +import { throwModelResolutionError } from "../settings/llmProviders/routes"; +import type { ChargeOutcome } from "../quota/types"; +import type { QuotaService } from "../quota/service"; +import type { LlmProvidersService } from "../settings/llmProviders/service"; +import type { AssistantChatService } from "./chatService"; +import { ASSISTANT_SURFACE, type AssistantChatRequest } from "./types"; + +const logger = createLogger("assistantRoutes"); + +/** + * Per-message content cap — mirrors the playground's `MAX_CHAT_MESSAGE_CHARS` + * (~8k tokens at 4 chars/token). The backend enforces it independently of + * any frontend `maxLength` so a non-browser client can't slip past. + */ +const MAX_CHAT_MESSAGE_CHARS = 32_000; + +const assistantMessageSchema = z.object({ + role: z.enum(["user", "assistant"]), + content: z + .string() + .max( + MAX_CHAT_MESSAGE_CHARS, + `Message content exceeds ${MAX_CHAT_MESSAGE_CHARS} character limit`, + ), +}); + +export const assistantChatRequestSchema = z.object({ + messages: z.array(assistantMessageSchema).min(1).max(100), + modelId: z.string().optional(), +}); + +export interface AssistantRoutesConfig { + readonly chatService: AssistantChatService; + readonly quotaService: QuotaService; + readonly llmProvidersService: LlmProvidersService; + /** SSE keep-alive interval (ms); resolved per-request from settings. */ + readonly keepAliveIntervalMsResolver: () => Promise; +} + +export function createAssistantRoutes( + config: AssistantRoutesConfig, +): Hono<{ Variables: AuthVariables }> { + const { chatService, quotaService, llmProvidersService, keepAliveIntervalMsResolver } = + config; + const app = new Hono<{ Variables: AuthVariables }>(); + const auth = nyxidAuthMiddleware(); + + app.post( + "/assistant/chat", + auth, + // Per-user rate limit (#809 class). Assistant Q&A is one completion + // per request — cheaper than the playground tool loop — but still an + // LLM call, so it's capped. Mounted before validateBody so a flood of + // malformed bodies 429s before Zod and before any LLM cost. + rateLimit({ windowMs: 60_000, max: 30, label: "assistant-chat" }), + validateBody(assistantChatRequestSchema, "VALIDATION_ERROR"), + async (c) => { + const authCtx = getAuth(c); + const parsed = getValidatedBody>(c); + + logger.info( + { userId: authCtx.userId, messageCount: parsed.messages.length }, + "Assistant chat request", + ); + + // Resolve model (assistant surface) BEFORE the quota reserve so a + // model/config failure can't strand a reserved slot. Pure read — no + // LLM cost — so "429 before LLM cost" still holds. + const resolution = await llmProvidersService.resolveModel({ + surface: ASSISTANT_SURFACE, + ...(parsed.modelId !== undefined ? { requested: parsed.modelId } : {}), + }); + if (resolution.kind !== "ok") throwModelResolutionError(resolution); + const resolvedModelId = resolution.modelId; + + // Object-level actor (org memberships resolved via the lookup + // middleware mounted ahead of these routes in bootstrap). Used by + // the scoped skill retrieval inside the chat service. + const actor = await buildActorContext(c); + + // Quota reserve (assistant surface) — atomic cap-guarded claim, + // rejects with 429 BEFORE any LLM cost. Admins bypass inside the + // service. Capture the instant so the eventual charge lands in the + // same month bucket the slot was reserved against (#827). + const reservedAt = new Date(); + const decision = await quotaService.checkAllowed({ + userId: authCtx.userId, + permissions: authCtx.permissions, + surface: ASSISTANT_SURFACE, + now: reservedAt, + }); + if (!decision.allowed) throwQuotaError(decision); + + // Outcome defaults to system_error (refundable); flips to success + // on a clean finish. `chargeableStarted` flips on the first real + // text delta — once tokens stream the LLM has billed, so an + // abort/error after that commits instead of refunding (#766). + let outcome: ChargeOutcome = "system_error"; + let chargeableStarted = false; + + const encoder = new TextEncoder(); + const signal = c.req.raw.signal; + const chatRequest: AssistantChatRequest = parsed; + + const { readable, writable } = new TransformStream(); + const writer = writable.getWriter(); + + let writerClosed = false; + const closeOnce = async () => { + if (writerClosed) return; + writerClosed = true; + try { + await writer.close(); + } catch { + /* already closed */ + } + }; + const writeFrame = async (frame: string) => { + if (writerClosed) return; + try { + await writer.write(encoder.encode(frame)); + } catch { + writerClosed = true; + } + }; + // Each SSE frame carries the native `event:` line + a JSON `data:` + // line whose `type` equals the event name (CONVENTIONS §6.3). + const writeEvent = (event: { type: string; [k: string]: unknown }) => + writeFrame(`event: ${event.type}\ndata: ${JSON.stringify(event)}\n\n`); + + // Pre-flush a padded comment so headers + first chunk hit the wire + // immediately and proxies that buffer until ~2-4KB release early. + const padding = " ".repeat(2048); + void writeFrame(`: stream-open ${Date.now()} ${padding}\n\n`); + + let keepAliveMs = 15_000; + try { + const resolved = await keepAliveIntervalMsResolver(); + if (Number.isFinite(resolved) && resolved > 0) keepAliveMs = resolved; + } catch (err) { + logger.warn( + { err: (err as Error).message }, + "Failed to resolve assistant sseKeepAliveMs; using 15s default", + ); + } + const keepAlive = setInterval(() => { + void writeFrame(`: keepalive ${Date.now()}\n\n`); + }, keepAliveMs); + + const onAbort = () => { + clearInterval(keepAlive); + void closeOnce(); + }; + signal.addEventListener("abort", onAbort); + + void (async () => { + try { + for await (const event of chatService.chat(actor, chatRequest, signal, { + modelId: resolvedModelId, + })) { + await writeEvent(event); + if (event.type === "chat_text_delta" && event.delta.length > 0) { + chargeableStarted = true; + } + if (event.type === "chat_finish") outcome = "success"; + } + } catch (err) { + const message = err instanceof Error ? err.message : "Assistant stream failed"; + logger.error({ userId: authCtx.userId, err: message }, "Assistant stream error"); + await writeEvent({ type: "chat_error", code: "upstream_unavailable", message }); + } finally { + signal.removeEventListener("abort", onAbort); + clearInterval(keepAlive); + await closeOnce(); + if (chargeableStarted && outcome === "system_error") { + // Tokens already streamed (billed) before an abort/error — + // commit the reserved slot instead of refunding it (#766). + outcome = "skill_error"; + } + await quotaService + .chargeOnCompletion({ + userId: authCtx.userId, + permissions: authCtx.permissions, + surface: ASSISTANT_SURFACE, + outcome, + modelId: resolvedModelId, + now: reservedAt, + }) + .catch((err) => { + logger.warn( + { userId: authCtx.userId, err: (err as Error).message }, + "Quota charge after assistant chat failed", + ); + }); + } + })(); + + return new Response(readable, { + status: 200, + headers: { + "Content-Type": "text/event-stream; charset=utf-8", + "Cache-Control": "no-cache, no-transform", + Connection: "keep-alive", + "X-Accel-Buffering": "no", + }, + }); + }, + ); + + return app; +} From dfad20ab6b7b2c6e45b666f490858bd8f09c13f8 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:09:58 +0800 Subject: [PATCH 018/110] feat(api): register assistant/chat in the OpenAPI spec (#970) Adds assistantChatRequestBodySchema + assistantChatEventSchema and the `POST /api/v1/assistant/chat` path (SSE response, 400/401/429/503 error responses, "Assistant" tag) so the generated openapi.json documents the new endpoint and its event contract. --- ornn-api/src/openapi/schemas.ts | 33 +++++++++++++++++++++++++++++ ornn-api/src/openapi/specBuilder.ts | 25 ++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/ornn-api/src/openapi/schemas.ts b/ornn-api/src/openapi/schemas.ts index fbf5bf54..378efb94 100644 --- a/ornn-api/src/openapi/schemas.ts +++ b/ornn-api/src/openapi/schemas.ts @@ -183,6 +183,39 @@ export const playgroundChatEventSchema = z.discriminatedUnion("type", [ z.object({ type: z.literal("finish"), finishReason: z.string() }), ]); +// --------------------------------------------------------------------------- +// Assistant (#970) — repo-aware Q&A chatbot +// --------------------------------------------------------------------------- + +export const assistantChatRequestBodySchema = z.object({ + messages: z + .array( + z.object({ + role: z.enum(["user", "assistant"]), + content: z.string(), + }), + ) + .min(1) + .max(100), + modelId: z.string().optional(), +}); + +export const assistantChatEventSchema = z.discriminatedUnion("type", [ + z.object({ type: z.literal("chat_start"), model: z.string() }), + z.object({ type: z.literal("chat_text_delta"), delta: z.string() }), + z.object({ type: z.literal("chat_error"), code: z.string(), message: z.string() }), + z.object({ + type: z.literal("chat_finish"), + usage: z + .object({ + inputTokens: z.number().optional(), + outputTokens: z.number().optional(), + totalTokens: z.number().optional(), + }) + .optional(), + }), +]); + // --------------------------------------------------------------------------- // Admin // --------------------------------------------------------------------------- diff --git a/ornn-api/src/openapi/specBuilder.ts b/ornn-api/src/openapi/specBuilder.ts index d9e91bcd..4fa1e7e1 100644 --- a/ornn-api/src/openapi/specBuilder.ts +++ b/ornn-api/src/openapi/specBuilder.ts @@ -303,6 +303,29 @@ function playgroundChatPath(): PathItem { }; } +function assistantChatPath(): PathItem { + return { + post: { + summary: "Ornn Assistant — repo-aware Q&A chat (SSE stream)", + description: + "Pure, non-agentic Q&A about Ornn and the skills the caller may see. Grounds answers in a curated knowledge-base digest plus a visibility-scoped skill retrieval (SAFE fields only). SSE event types: 'chat_start', 'chat_text_delta', 'chat_error', 'chat_finish' (+ keepalive comment frames). No tools / no execution.", + operationId: "assistantChat", + tags: ["Assistant"], + security: bearerAuth(), + requestBody: { + required: true, + content: { + "application/json": { schema: toSchema(S.assistantChatRequestBodySchema) }, + }, + }, + responses: { + ...sseResponse("SSE stream of assistant chat events"), + ...errorResponses(400, 401, 429, 503), + }, + }, + }; +} + function categoriesListCreatePath(): PathItem { return { get: { @@ -444,6 +467,8 @@ export function buildSpec(): OpenApiSpec { [`${prefix}/skill-manifest-schema.json`]: formatSchemaPath(), // Playground [`${prefix}/playground/chat`]: playgroundChatPath(), + // Assistant (#970) + [`${prefix}/assistant/chat`]: assistantChatPath(), // Admin [`${prefix}/admin/categories`]: categoriesListCreatePath(), [`${prefix}/admin/categories/{id}`]: categoryUpdateDeletePath(), From d641c97d7c4e1a8ccfc721840f2ca4ace8191365 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:13:55 +0800 Subject: [PATCH 019/110] docs: changeset for Ornn Assistant (#970) Single minor changeset (fixed-versioned ornn-api + ornn-web) describing the Ornn Assistant feature. Required by the check-changeset CI gate. --- .changeset/feat-970-ornn-assistant.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changeset/feat-970-ornn-assistant.md diff --git a/.changeset/feat-970-ornn-assistant.md b/.changeset/feat-970-ornn-assistant.md new file mode 100644 index 00000000..166714ba --- /dev/null +++ b/.changeset/feat-970-ornn-assistant.md @@ -0,0 +1,6 @@ +--- +"ornn-api": minor +"ornn-web": minor +--- + +Add Ornn Assistant — an authenticated, repo-aware Q&A assistant. A new `/api/v1/assistant/chat` SSE endpoint and an in-app chat widget answer questions about Ornn and the skill catalog, grounded in a curated knowledge base plus visibility-scoped skill search (no private or PII data exposed). Admins can select the assistant model per provider. (#970) From 012a558f00521e853be631e1938dd9aff3b2eff3 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:19:37 +0800 Subject: [PATCH 020/110] docs: document /assistant/chat SSE + error reuse (#970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Ornn Assistant endpoint POST /v1/assistant/chat ships without any new contract surface to document, but its SSE event set and error behaviour were undocumented. Two precise additions: - CONVENTIONS.md §6.2: add the assistant row to the SSE endpoint mapping table — events chat_start, chat_text_delta, chat_error, chat_finish (the tool/file events from playground/chat do not apply here). - ERRORS.md: note under upstream_unavailable that /assistant/chat introduces NO new error codes. Pre-stream failures reuse validation_error (400), authentication_required (401), rate_limited (429), and MODEL_NOT_ENABLED / MODEL_NOT_FOUND (400, only when an explicit modelId is supplied). In-stream LLM failure surfaces as an SSE chat_error event with code upstream_unavailable and no chat_finish. Verified against ornn-api/src/domains/assistant/{routes,chatService}.ts and the shared throwModelResolutionError helper. --- docs/CONVENTIONS.md | 1 + docs/ERRORS.md | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/docs/CONVENTIONS.md b/docs/CONVENTIONS.md index 1e93d66e..3604267b 100644 --- a/docs/CONVENTIONS.md +++ b/docs/CONVENTIONS.md @@ -331,6 +331,7 @@ Endpoints pick a subset and MAY add endpoint-specific events with the same prefi |---|---| | `POST /v1/skills/generate` | `generation_start`, `generation_delta`, `generation_validation_error`, `generation_error`, `generation_complete` | | `POST /v1/playground/chat` | `chat_start`, `chat_text_delta`, `chat_tool_call`, `chat_tool_result`, `chat_file_output`, `chat_error`, `chat_finish` | +| `POST /v1/assistant/chat` | `chat_start`, `chat_text_delta`, `chat_error`, `chat_finish` | ### 6.3 Transport rules diff --git a/docs/ERRORS.md b/docs/ERRORS.md index be5c5d7f..c178e513 100644 --- a/docs/ERRORS.md +++ b/docs/ERRORS.md @@ -176,6 +176,10 @@ A dependency Ornn relies on (NyxID, OpenSandbox, LLM provider, mirror target, **Client action:** retry with exponential backoff. If the failure persists, check [status.chrono-ai.fun](https://status.chrono-ai.fun) (when published) or [Discussions → Q&A](https://github.com/ChronoAIProject/Ornn/discussions/categories/q-a). +### chat_error (SSE) — `/assistant/chat` + +`POST /v1/assistant/chat` (SSE; see [`docs/CONVENTIONS.md`](CONVENTIONS.md) §6.2) introduces **no new error codes** — it reuses the existing catalog. Failures before the stream opens use the normal `application/problem+json` envelope: `validation_error` (400, bad body), `authentication_required` (401), `rate_limited` (429), and — only when the caller supplies an explicit `modelId` — `MODEL_NOT_ENABLED` / `MODEL_NOT_FOUND` (400, from the per-surface model resolver). Once the stream is open, an in-stream LLM failure is delivered as an SSE `chat_error` event with `code: "upstream_unavailable"` and no terminal `chat_finish`, mirroring this section's parent code. + --- ## Appendix: pre-#585 migration map From d600e657f136b918aa05dadc9477a319e84a3bc7 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:26:38 +0800 Subject: [PATCH 021/110] fix(web): align assistant usage fields to backend contract (#970) Quality-review nit: the frontend assistantUsageSchema used promptTokens/completionTokens while the backend AssistantUsage contract (ornn-api domains/assistant/types.ts) and the OpenAPI spec emit inputTokens/outputTokens/totalTokens. Rename the two drifted fields so a populated chat_finish.usage validates instead of being silently stripped. totalTokens was already correct. --- ornn-web/src/types/assistant.test.ts | 4 +++- ornn-web/src/types/assistant.ts | 8 +++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/ornn-web/src/types/assistant.test.ts b/ornn-web/src/types/assistant.test.ts index 4684973a..6138fbbf 100644 --- a/ornn-web/src/types/assistant.test.ts +++ b/ornn-web/src/types/assistant.test.ts @@ -45,11 +45,13 @@ describe("assistant SSE event union", () => { it("accepts chat_finish with usage and tolerates extra usage keys", () => { const r = assistantChatEventSchema.safeParse({ type: "chat_finish", - usage: { promptTokens: 10, completionTokens: 5, totalTokens: 15, costUsd: 0.01 }, + usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15, costUsd: 0.01 }, }); expect(r.success).toBe(true); if (r.success && r.data.type === "chat_finish") { expect(r.data.usage?.totalTokens).toBe(15); + expect(r.data.usage?.inputTokens).toBe(10); + expect(r.data.usage?.outputTokens).toBe(5); // Forward-compatible extra key is stripped, not retained. expect(r.data.usage).not.toHaveProperty("costUsd"); } diff --git a/ornn-web/src/types/assistant.ts b/ornn-web/src/types/assistant.ts index 3fd2cc4e..92509b3d 100644 --- a/ornn-web/src/types/assistant.ts +++ b/ornn-web/src/types/assistant.ts @@ -63,14 +63,16 @@ export const assistantChatErrorSchema = z.object({ }); /** - * Usage accounting attached to `chat_finish`. Shape is advisory — the + * Usage accounting attached to `chat_finish`. Field names mirror the + * backend AssistantUsage contract (ornn-api domains/assistant/types.ts): + * inputTokens / outputTokens / totalTokens. Shape is advisory — the * backend may add fields, so unknown keys are tolerated (Zod strips them) * and every known field is optional. */ export const assistantUsageSchema = z .object({ - promptTokens: z.number().optional(), - completionTokens: z.number().optional(), + inputTokens: z.number().optional(), + outputTokens: z.number().optional(), totalTokens: z.number().optional(), }) .optional(); From 447bbc93d8d907008ad8c78e628e6dddebd0c6a0 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:26:38 +0800 Subject: [PATCH 022/110] =?UTF-8?q?fix(web):=20a11y=20=E2=80=94=20dialog?= =?UTF-8?q?=20focus=20trap=20+=2044px=20header=20touch=20targets=20(#970)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two quality-review a11y nits on the assistant panel: - The panel declares role=dialog / aria-modal=true but had no focus trap, so Tab / Shift+Tab escaped to the backdrop'd page behind it. Add a trap scoped to the dialog via onKeyDown (currentTarget — no ref threading) that wraps focus at the first/last focusable. The existing focus-move-into-composer-on-open and restore-to-launcher-on-close behavior is unchanged. - The header close / clear IconButtons were h-8 w-8 (32px), under the 44px mobile touch-target guideline in docs/DESIGN.md. Bump the hit area to h-11 w-11 (44px); the launcher and suggestion rows already met it. --- .../assistant/AssistantWidget.test.tsx | 31 +++++++++++++++++++ .../components/assistant/AssistantWidget.tsx | 28 ++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/ornn-web/src/components/assistant/AssistantWidget.test.tsx b/ornn-web/src/components/assistant/AssistantWidget.test.tsx index 0db52227..3a25ccc1 100644 --- a/ornn-web/src/components/assistant/AssistantWidget.test.tsx +++ b/ornn-web/src/components/assistant/AssistantWidget.test.tsx @@ -134,4 +134,35 @@ describe("AssistantWidget", () => { fireEvent.click(screen.getByLabelText("Close assistant")); expect(screen.queryByRole("dialog")).not.toBeInTheDocument(); }); + + it("traps Tab focus inside the dialog", () => { + render(); + openPanel(); + const dialog = screen.getByRole("dialog"); + const focusables = dialog.querySelectorAll( + 'a[href], button:not([disabled]), textarea:not([disabled]), input:not([disabled]), select:not([disabled]), [tabindex]:not([tabindex="-1"])', + ); + expect(focusables.length).toBeGreaterThan(1); + const first = focusables[0]!; + const last = focusables[focusables.length - 1]!; + + // Tab off the last element wraps to the first. + last.focus(); + fireEvent.keyDown(dialog, { key: "Tab" }); + expect(document.activeElement).toBe(first); + + // Shift+Tab off the first element wraps to the last. + first.focus(); + fireEvent.keyDown(dialog, { key: "Tab", shiftKey: true }); + expect(document.activeElement).toBe(last); + }); + + it("gives the close control a >=44px touch target", () => { + render(); + openPanel(); + const close = screen.getByLabelText("Close assistant"); + // h-11 w-11 = 44px (docs/DESIGN.md mobile touch-target guideline). + expect(close.className).toContain("h-11"); + expect(close.className).toContain("w-11"); + }); }); diff --git a/ornn-web/src/components/assistant/AssistantWidget.tsx b/ornn-web/src/components/assistant/AssistantWidget.tsx index e47674b6..ae09d3a3 100644 --- a/ornn-web/src/components/assistant/AssistantWidget.tsx +++ b/ornn-web/src/components/assistant/AssistantWidget.tsx @@ -152,6 +152,30 @@ function AssistantPanel({ isOpen, onClose }: { isOpen: boolean; onClose: () => v inputRef.current?.setValue(text); }; + // Focus trap — keep Tab / Shift+Tab cycling inside the dialog so focus + // can't escape to the backdrop'd page behind it. Paired with the + // focus-in-on-open / restore-on-close effects above. Scoped to the + // panel via currentTarget so no ref threading is needed. + const handlePanelKeyDown = (e: React.KeyboardEvent) => { + if (e.key !== "Tab") return; + const focusables = e.currentTarget.querySelectorAll( + 'a[href], button:not([disabled]), textarea:not([disabled]), input:not([disabled]), select:not([disabled]), [tabindex]:not([tabindex="-1"])', + ); + if (focusables.length === 0) return; + const first = focusables[0]!; + const last = focusables[focusables.length - 1]!; + const active = document.activeElement; + if (e.shiftKey) { + if (active === first || !e.currentTarget.contains(active)) { + e.preventDefault(); + last.focus(); + } + } else if (active === last) { + e.preventDefault(); + first.focus(); + } + }; + return createPortal( {isOpen && ( @@ -172,6 +196,7 @@ function AssistantPanel({ isOpen, onClose }: { isOpen: boolean; onClose: () => v role="dialog" aria-modal="true" aria-label={t("assistant.title")} + onKeyDown={handlePanelKeyDown} initial={ reduceMotion ? { opacity: 0 } : { opacity: 0, y: 16, scale: 0.98 } } @@ -353,7 +378,8 @@ function IconButton({ onClick={onClick} aria-label={label} title={label} - className="inline-flex h-8 w-8 items-center justify-center rounded-sm text-meta transition-colors hover:bg-elevated hover:text-strong focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent" + // h-11 w-11 = 44px min touch target (docs/DESIGN.md a11y guideline). + className="inline-flex h-11 w-11 items-center justify-center rounded-sm text-meta transition-colors hover:bg-elevated hover:text-strong focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent" > {children} From 33ed2661ad6f078e5544fd3e032bbda0be7839f6 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:26:55 +0800 Subject: [PATCH 023/110] fix(api): restrict assistant KB to user-relevant doc sections (#970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Security review finding #1 (LOW): the architecture + conventions KB sources distilled whole docs, so internal infra surfaced in the digest any authenticated user can elicit — env-var catalogs, internal proxy header names (X-NyxID-*/X-Ornn-Caller-*), telemetry/observability internals, and the user-directory section. No secret VALUES, but needless internal-recon surface. Add a `headings` allow-list to both sources (mirroring README/DESIGN): - architecture → Project Overview, External Services, Skill Format only (drops the PostHog/telemetry, caller-type-detection, env-var config, and user-directory sections). - conventions → §1 Response/error, §2 URL structure, §3 HTTP semantics, §4 Query params, §6 SSE only (drops §5 Auth's internal X-NyxID-* note and §7–§12 deprecation/caching/observability/architecture internals). Regenerated digest is clean of every internal-recon marker (grep ✓) and still carries the user-relevant grounding; ~10.8k tokens, deterministic. --- .../domains/assistant/kb/digest.generated.md | 200 +++--------------- ornn-api/src/domains/assistant/kb/sources.ts | 27 ++- 2 files changed, 58 insertions(+), 169 deletions(-) diff --git a/ornn-api/src/domains/assistant/kb/digest.generated.md b/ornn-api/src/domains/assistant/kb/digest.generated.md index dc073472..fc96baf3 100644 --- a/ornn-api/src/domains/assistant/kb/digest.generated.md +++ b/ornn-api/src/domains/assistant/kb/digest.generated.md @@ -3,13 +3,13 @@ Produced by ornn-api/scripts/build-assistant-kb.ts (#970). Re-run: `bun run scripts/build-assistant-kb.ts` from ornn-api/. budgetTokens: 18000 - estimatedTokens: 12433 + estimatedTokens: 10761 sources: - readme: ~1883 tok - claude-positioning: ~272 tok - - architecture: ~1659 tok + - architecture: ~219 tok - agent-manual-http: ~5486 tok (clipped) - - conventions: ~2598 tok (clipped) + - conventions: ~2366 tok - design-overview: ~487 tok --> @@ -189,10 +189,6 @@ Implications when proposing or building features: ## Architecture -# Architecture — chrono-ornn - -> For API v1 and architecture conventions, see [`CONVENTIONS.md`](./CONVENTIONS.md). Active refactor work is tracked under the [`Refactor` milestone](https://github.com/ChronoAIProject/Ornn/milestone/6). - ## Project Overview chrono-ornn is an AI skill platform. Users create, publish, search, and execute AI skills (packaged prompts + scripts) via a web UI or API. Authentication and LLM calls go through NyxID. Script execution runs in chrono-sandbox. @@ -212,117 +208,6 @@ chrono-ornn is an AI skill platform. Users create, publish, search, and execute - Category types: `plain`, `tool-based`, `runtime-based`, `mixed` - Output types: `text` (stdout), `file` (generated files retrieved via glob) -## Audit + analytics (PostHog) - -Issue #271 collapsed every observability surface in Ornn — the -universal API audit middleware (#245), the `activities` Mongo -collection, and the OpenTelemetry placeholder section — into a single -PostHog-driven pipeline. There is **no custom audit code** in Ornn -anymore; everything flows through the `posthog-node` SDK and is -viewed in the PostHog dashboard. - -### Event taxonomy - -Backend events (server-emitted, every event carries `source: "api"` -so dashboards can disambiguate from frontend events of the same name): - -| event | when | properties | -|---|---|---| -| `api.request` | every authenticated `/api/v1/*` request | `userId`, `callerType`, `method`, `path`, `routePattern`, `status`, `durationMs`, `sourceIp` (truncated /24 IPv4, /48 IPv6), `requestId` | -| `api.error` | sampled 5xx responses | `statusCode`, `errorCode`, `method`, `path`, `requestId` | -| `api.skill.pull` | every skill package materialization | `callerType`, `skillId`, `skillName`, `skillVersion` | -| `api.skill.published` | skill create + version publish | `skillId`, `skillVersion`, `isNewSkill` | -| `user.login` / `user.logout` | session open / close | — | -| `skill.created` / `.updated` / `.deleted` / `.version_deleted` | mutation routes | `skillId`, `skillName`, `version`, `adminAction?` | -| `skill.visibility_changed` / `.permissions_changed` | visibility + sharing flips | `skillId`, `isPrivate`, `sharedWithUsers`, `sharedWithOrgs` | -| `skill.refresh` / `.source_linked` / `.source_unlinked` | source-pointer ops | `skillId`, `repo`, `ref`, `commit` | -| `skill.nyxid_service_tied` / `.agentseal_rescanned` | tie + admin-rescan | `skillId`, `isSystemSkill`, `score` | -| `settings.exported` / `.imported` | settings IO | `schemaVersion`, `aggregateStatus`, `dryRun`, `sections` | - -Frontend events (browser SDK — `ornn-web/src/lib/analytics.ts`) carry -auto-pageview + cookie-consent state and the typed event union in -that file. Identity is set via `posthog.identify(userId, traits)` on -every NyxID login. - -### Caller-type detection - -`api.request` is emitted from `apiRequestTrackingMiddleware` mounted -on `/api/v1/*` AFTER `proxyAuthSetup`. `callerType` derives from auth -shape: - -| auth shape | `X-Ornn-Caller` | `callerType` | -|---|---|---| -| browser session (NyxID OAuth cookie / browser-scope Bearer) | — | `web` | -| NyxID forwarded user-access token (agent via NyxID proxy) | — | `api` | -| anonymous | `system` / `playground` | matches header | -| anonymous | other | `web` | - -The header is informational only. Source IP is read from -`X-Forwarded-For` (first hop), falls back to `X-Real-IP`, then -truncated to /24 (IPv4) or /48 (IPv6) before emit. - -### Configuration - -PostHog config lives in the admin `telemetry` settings section. -Backend reads it once at boot (`bootstrap.ts`) and falls back to env -vars when the DB section has no API key set: - -| field | env fallback | meaning | -|---|---|---| -| `postHogEnabled` | `POSTHOG_ENABLED` | master switch — off forces NoopTracker even with a key | -| `postHogApiKey` | `POSTHOG_API_KEY` | public project key (`phc_…`); empty disables | -| `postHogHost` | `POSTHOG_HOST` | ingest host (e.g. `https://eu.i.posthog.com`) | -| `postHogProjectId` | `POSTHOG_PROJECT_ID` | informational, surfaced in log lines | -| `postHogErrorSampleRate` | `POSTHOG_ERROR_SAMPLE_RATE` | `[0,1]` sampling for `api.error` | - -Admin DB is canonical: a non-empty `postHogApiKey` in the section -makes the entire DB record authoritative; otherwise env wins. -Restart-required for changes to apply (the SDK is initialized once -at boot). - -### Failure modes accepted - -- **No body archive.** Request/response bodies are not captured. - Forensic body-replay post-incident is not possible. The previous - MinIO-offload pipeline (#245) was removed. -- **Audit retention = PostHog retention.** Cloud free tier is - approximately 1 year of events; paid extends. Self-hosted PostHog - retains as long as the storage volume allows. -- **PostHog-side outages** drop events that miss the in-process - buffer. The drain on `shutdown()` flushes the buffer; sigterm - during a backlog can lose tail events. - -### Viewing data - -There is **no in-Ornn activity feed UI**. Admins use the PostHog -dashboard for the full event explorer, funnels, retention, and SQL -queries. The Ornn admin dashboard at `/admin` deep-links to the -PostHog Activity / Insights views via -`ornn-web/src/lib/postHogLinks.ts`, which translates the configured -ingest host (`.i.posthog.com`) into the matching dashboard -host (`.posthog.com`). - -### What about OpenTelemetry? - -Considered and deferred (issue #271 discussion). For Ornn's current -single-service architecture and the requirements covered here -(per-request audit, user activity, who-called-what), PostHog alone -is sufficient. OpenTelemetry's value (distributed tracing, metrics -histograms) doesn't justify standing up a collector + Tempo / Loki / -Jaeger today. Reopen as a separate issue if/when the architecture -splits across services or a concrete tracing pain point appears. - -### User directory - -The unified `users` Mongo collection (built in #271, replaces -`activities` + `admin_users` + `users_meta`) is fed lazily by -`proxyAuthSetup.onAuthSeen` on every authenticated request. It is -NOT audit data — it's an identity cache backing the skill-permissions -typeahead, the admin user list, and the dashboard role partition. -NyxID stays authoritative for permission checks; this collection is -display + indexing only. See -`ornn-api/src/domains/users/repository.ts`. - --- ## Using Ornn from an AI Agent (HTTP API) @@ -698,31 +583,6 @@ curl -X PUT \ ## API Conventions -# ornn API & Architecture Conventions - -The contract every `/api/v1/*` endpoint and every `ornn-api` module must follow. All future endpoints and modules MUST conform. Changes that violate a convention are blocked at review. - -This document is normative. It is the authoritative source for decisions that would otherwise be re-litigated per PR. When in doubt, this file wins. - ---- - -## Table of Contents - -1. [Response & error format](#1-response--error-format) -2. [URL structure](#2-url-structure) -3. [HTTP semantics](#3-http-semantics) -4. [Query parameters](#4-query-parameters) -5. [Authentication & authorization](#5-authentication--authorization) -6. [SSE streaming](#6-sse-streaming) -7. [Deprecation](#7-deprecation) -8. [Caching](#8-caching) -9. [Observability headers](#9-observability-headers) -10. [OpenAPI](#10-openapi) -11. [Architecture conventions](#11-architecture-conventions) -12. [Every new `/v1/` endpoint checklist](#12-every-new-v1-endpoint-checklist) - ---- - ## 1. Response & error format ### 1.1 Success — single resource @@ -965,36 +825,44 @@ Endpoint-specific. Rules: --- -## 5. Authentication & authorization +## 6. SSE streaming + +### 6.1 Event naming -### 5.1 Transport +Format: `_`, snake_case. -- `Authorization: Bearer ` between client and the NyxID proxy. -- `X-NyxID-Identity-Token` and `X-NyxID-*` headers between proxy and `ornn-api` (internal). -- OpenAPI declares one `bearerAuth` scheme; `X-NyxID-*` is not part of the public contract. +Shared event vocabulary across endpoints: -### 5.2 Permission strings +| Suffix | Meaning | +|---|---| +| `_start` | Stream opened | +| `_text_delta` | Incremental text content | +| `_tool_call` | Model requests tool invocation | +| `_tool_result` | Tool output | +| `_file_output` | File produced during run | +| `_validation_error` | Recoverable validation failure | +| `_error` | Terminal error | +| `_complete` / `_finish` | Stream ended normally | -Format: `ornn::`. +Endpoints pick a subset and MAY add endpoint-specific events with the same prefix. -Actions: `read`, `write`, `admin`, plus resource-specific high-cost actions when needed. +### 6.2 Current endpoint mapping -| Permission | Grants | +| Endpoint | Events | |---|---| -| `ornn:skill:read` | Read skills (respects visibility) | -| `ornn:skill:write` | Create, update, delete own skills | -| `ornn:skill:admin` | Manage any skill (override ownership); delete any skill | -| `ornn:skill:generate` | Invoke skill generation endpoints (high LLM cost) | -| `ornn:skill:execute` | Invoke playground chat (runs user code) | -| `ornn:category:read` | List categories | -| `ornn:category:admin` | Manage categories | -| `ornn:tag:read` | List tags | -| `ornn:tag:admin` | Manage tags | -| `ornn:user:admin` | User dashboard (list users, aggregate stats per user) | -| `ornn:activity:read` | Platform activity log read access | -| `ornn:stats:read` | Platform-wide dashboard aggregates | - -NyxID composes a **"Platform Admin"** role that grants all `*:admin` + `*:read` permissions above; current platform admins inherit this role with zero UX change. Sub-admin roles (content moderator, tag curator, support) can be +| `POST /v1/skills/generate` | `generation_start`, `generation_delta`, `generation_validation_error`, `generation_error`, `generation_complete` | +| `POST /v1/playground/chat` | `chat_start`, `chat_text_delta`, `chat_tool_call`, `chat_tool_result`, `chat_file_output`, `chat_error`, `chat_finish` | +| `POST /v1/assistant/chat` | `chat_start`, `chat_text_delta`, `chat_error`, `chat_finish` | + +### 6.3 Transport rules + +- `Content-Type: text/event-stream` +- Each event has a `type` field in the JSON payload plus SSE-native `event:` line set to the same value +- Keep-alive events every `config.sseKeepAliveIntervalMs` milliseconds (JSON `{ type: "keepalive" }`) +- Clients abort via `AbortSignal` / closing the connection +- `Last-Event-ID` reconnection: **not supported** in v1; clients start over on reconnect + +--- --- diff --git a/ornn-api/src/domains/assistant/kb/sources.ts b/ornn-api/src/domains/assistant/kb/sources.ts index a4a92b1b..121ab43b 100644 --- a/ornn-api/src/domains/assistant/kb/sources.ts +++ b/ornn-api/src/domains/assistant/kb/sources.ts @@ -55,11 +55,18 @@ export const KB_SOURCE_MANIFEST: ReadonlyArray = [ headings: ["Product Positioning"], }, { - // External services + skill format + observability pipeline. + // User-relevant architecture only: what Ornn is, the high-level + // external-service overview, and the skill format. The internal infra + // sections (PostHog/telemetry internals, env-var catalogs, internal + // request-header names like X-NyxID-*/X-Ornn-Caller-*, the user + // directory) are EXCLUDED via this allow-list — they're needless + // internal-recon surface for an assistant any authenticated user can + // query (security review #970, finding #1). id: "architecture", title: "Architecture", repoRelPath: "docs/ARCHITECTURE.md", - maxTokens: 2_400, + maxTokens: 1_800, + headings: ["Project Overview", "External Services", "Skill Format"], }, { // The authoritative agent contract: search → pull → execute → build → @@ -71,11 +78,25 @@ export const KB_SOURCE_MANIFEST: ReadonlyArray = [ maxTokens: 5_500, }, { - // Normative /api/v1 contract — envelope, errors, paths, auth. + // User-relevant /api/v1 contract sections only: response/error + // envelope, URL structure, HTTP semantics, query params, SSE. The + // §5 Authentication section carries an INTERNAL transport note + // (`X-NyxID-*` proxy headers, "not part of the public contract"), and + // §7–§12 are deprecation/caching/observability/architecture + // internals — all EXCLUDED via this allow-list so the same internal + // header names the architecture source dropped don't re-enter the + // digest here (security review #970, finding #1). id: "conventions", title: "API Conventions", repoRelPath: "docs/CONVENTIONS.md", maxTokens: 2_600, + headings: [ + "1. Response & error format", + "2. URL structure", + "3. HTTP semantics", + "4. Query parameters", + "6. SSE streaming", + ], }, { // Visual spec is mostly irrelevant to Q&A; keep only the opening From 78a3f460899b6fe92fb935a06566b8d14550fc0f Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:27:08 +0800 Subject: [PATCH 024/110] ci: gate assistant KB digest freshness (#970, MAJOR-1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The grounding digest (digest.generated.md) is a committed build artifact distilled from the repo docs. If a source doc or the source manifest changes but the digest isn't regenerated, the assistant silently ships stale grounding. Add an `assistant-kb-freshness` CI job that rebuilds the digest with `bun run build:assistant-kb` and `git diff --exit-code`s the artifact — the build is deterministic (no timestamp/RNG), so a clean tree means the committed digest is in sync. A stale digest fails the PR with a clear "re-run build:assistant-kb and commit" annotation. --- .github/workflows/ci.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 11156bd5..a423505f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,6 +22,26 @@ jobs: - run: bun install --frozen-lockfile - run: bun run typecheck + # Assistant KB freshness (#970, MAJOR-1). The grounding digest + # (digest.generated.md) is a committed build artifact distilled from the + # repo docs by scripts/build-assistant-kb.ts. If a source doc or the + # source manifest changes but the digest isn't regenerated, the assistant + # ships stale grounding. Rebuild it here and fail if the committed copy + # differs — the build is deterministic, so a clean tree means in-sync. + assistant-kb-freshness: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: oven-sh/setup-bun@v2 + - run: bun install --frozen-lockfile + - name: Rebuild assistant KB digest + working-directory: ornn-api + run: bun run build:assistant-kb + - name: Fail if the committed digest is stale + run: | + git diff --exit-code -- ornn-api/src/domains/assistant/kb/digest.generated.md \ + || { echo "::error::assistant KB digest is stale — run 'bun run build:assistant-kb' in ornn-api/ and commit the result"; exit 1; } + test: runs-on: ubuntu-latest steps: From 758eed437774c8fac5b0adda4bc4bdf90116d06c Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:29:12 +0800 Subject: [PATCH 025/110] test(web): dedicated unit test for assistantStreamApi (#970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quality review flagged that the SSE client had only indirect coverage via the hook. Add a focused transport test that mocks fetch + the auth store and exercises the four load-bearing behaviors: - POSTs to /api/v1/assistant/chat with the Bearer header and the {messages, modelId} JSON body - parses chat_* frames off the stream via the real sseParser, in order (incl. a frame split across two chunk boundaries → remainder buffering) - maps a non-OK RFC 7807 problem+json response to a synthetic chat_error carrying detail + code - wires the AbortController so handle.abort() cancels the request and the AbortError path stays silent (no spurious chat_error) The sseParser and event schema are the real modules, so this also guards the parse → validate → dispatch seam end to end. --- .../src/services/assistantStreamApi.test.ts | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 ornn-web/src/services/assistantStreamApi.test.ts diff --git a/ornn-web/src/services/assistantStreamApi.test.ts b/ornn-web/src/services/assistantStreamApi.test.ts new file mode 100644 index 00000000..8c5b9c96 --- /dev/null +++ b/ornn-web/src/services/assistantStreamApi.test.ts @@ -0,0 +1,179 @@ +/** + * UT-WEB-ASSISTANT-STREAM-001 — assistantStreamApi transport (#970). + * + * Dedicated unit coverage for the SSE client (previously only exercised + * indirectly through useAssistantChat). Asserts the four load-bearing + * behaviors: + * 1. POSTs to /api/v1/assistant/chat with the Bearer header + the + * {messages, modelId} body. + * 2. Parses chat_* frames off the stream via the real sseParser and + * forwards them in arrival order. + * 3. Maps a non-OK RFC 7807 problem+json response to a synthetic + * chat_error carrying detail + code. + * 4. Wires the AbortController so handle.abort() cancels the request + * and the AbortError path stays silent (no spurious chat_error). + * + * The auth store is mocked (fresh token + Bearer); fetch is stubbed; the + * sseParser + event schema are the real modules. + * + * @module services/assistantStreamApi.test + */ + +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import type { AssistantChatEvent } from "@/types/assistant"; + +vi.mock("@/stores/authStore", () => ({ + useAuthStore: { + getState: () => ({ + ensureFreshToken: async () => {}, + accessToken: "test-token", + }), + }, +})); + +import { streamAssistantChat } from "./assistantStreamApi"; + +/** Build a fake streaming Response whose body yields the given SSE frames. */ +function sseResponse(frames: string[]) { + const enc = new TextEncoder(); + const chunks = frames.map((f) => enc.encode(f)); + let i = 0; + return { + ok: true, + status: 200, + statusText: "OK", + body: { + getReader: () => ({ + read: () => + i < chunks.length + ? Promise.resolve({ done: false, value: chunks[i++] }) + : Promise.resolve({ done: true, value: undefined }), + }), + }, + }; +} + +/** Run a stream to a terminal event, collecting everything onEvent saw. */ +function collect( + params: Parameters[0], +): Promise { + return new Promise((resolve) => { + const events: AssistantChatEvent[] = []; + streamAssistantChat(params, (e) => { + events.push(e); + if (e.type === "chat_finish" || e.type === "chat_error") resolve(events); + }); + }); +} + +const fetchMock = vi.fn(); + +beforeEach(() => { + fetchMock.mockReset(); + vi.stubGlobal("fetch", fetchMock); +}); + +afterEach(() => { + vi.unstubAllGlobals(); +}); + +describe("assistantStreamApi", () => { + it("POSTs to /api/v1/assistant/chat with auth + body, parsing chat_* frames", async () => { + fetchMock.mockResolvedValue( + sseResponse([ + 'data: {"type":"chat_start","model":"gpt-5"}\n\n', + 'data: {"type":"chat_text_delta","delta":"Ornn"}\n\n', + 'data: {"type":"chat_finish","usage":{"totalTokens":5}}\n\n', + ]), + ); + + const events = await collect({ + messages: [{ role: "user", content: "What is Ornn?" }], + modelId: "gpt-5", + }); + + // Request shape. + expect(fetchMock).toHaveBeenCalledTimes(1); + const [url, opts] = fetchMock.mock.calls[0]!; + expect(String(url)).toMatch(/\/api\/v1\/assistant\/chat$/); + expect(opts.method).toBe("POST"); + expect(opts.headers.Authorization).toBe("Bearer test-token"); + expect(opts.headers["Content-Type"]).toBe("application/json"); + expect(opts.headers.Accept).toBe("text/event-stream"); + expect(JSON.parse(opts.body)).toEqual({ + messages: [{ role: "user", content: "What is Ornn?" }], + modelId: "gpt-5", + }); + + // Frames parsed via sseParser, in order. + expect(events.map((e) => e.type)).toEqual([ + "chat_start", + "chat_text_delta", + "chat_finish", + ]); + const delta = events[1]; + expect(delta.type === "chat_text_delta" && delta.delta).toBe("Ornn"); + }); + + it("splits frames that arrive across chunk boundaries", async () => { + // The delta frame is delivered in two reads — sseParser must buffer + // the partial remainder until the terminating blank line lands. + fetchMock.mockResolvedValue( + sseResponse([ + 'data: {"type":"chat_text_delta","del', + 'ta":"Hi"}\n\n', + 'data: {"type":"chat_finish"}\n\n', + ]), + ); + + const events = await collect({ messages: [{ role: "user", content: "hi" }] }); + expect(events.map((e) => e.type)).toEqual(["chat_text_delta", "chat_finish"]); + const delta = events[0]; + expect(delta.type === "chat_text_delta" && delta.delta).toBe("Hi"); + }); + + it("maps a non-OK RFC 7807 response to a synthetic chat_error", async () => { + fetchMock.mockResolvedValue({ + ok: false, + status: 429, + statusText: "Too Many Requests", + text: async () => JSON.stringify({ detail: "Rate limited", code: "rate_limited" }), + }); + + const events = await collect({ messages: [{ role: "user", content: "hi" }] }); + expect(events).toEqual([ + { type: "chat_error", code: "rate_limited", message: "Rate limited" }, + ]); + }); + + it("honors AbortController cancel and stays silent on AbortError", async () => { + let capturedSignal: AbortSignal | undefined; + fetchMock.mockImplementation((_url: string, opts: { signal: AbortSignal }) => { + capturedSignal = opts.signal; + return new Promise((_resolve, reject) => { + opts.signal.addEventListener("abort", () => { + const err = new Error("aborted"); + err.name = "AbortError"; + reject(err); + }); + }); + }); + + const onEvent = vi.fn(); + const handle = streamAssistantChat( + { messages: [{ role: "user", content: "hi" }] }, + onEvent, + ); + + // Let getAuthHeaders + fetch run so the signal is captured. + await new Promise((r) => setTimeout(r, 0)); + expect(capturedSignal?.aborted).toBe(false); + + handle.abort(); + await new Promise((r) => setTimeout(r, 0)); + + expect(capturedSignal?.aborted).toBe(true); + // AbortError must not surface as a chat_error. + expect(onEvent).not.toHaveBeenCalled(); + }); +}); From 7862409ec51e60f55ae904e6ff80dbb1362b1f13 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:35:36 +0800 Subject: [PATCH 026/110] feat(web): add assistant sign-in + mascot greeting i18n keys (#976) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the copy the redesigned Ornn Assistant needs (en + zh): - assistant.signIn.{title,body,cta,dismiss} — the inline prompt shown when a signed-out visitor tries to send (the chat backend is authed-only, so we gate the send rather than 401). - assistant.greeting — the mascot's friendly empty-state hello. - assistant.mascotAlt — alt text for the mascot in non-decorative spots. Tone matches the existing "Repo-aware answers" assistant voice. --- ornn-web/src/i18n/en.json | 8 ++++++++ ornn-web/src/i18n/zh.json | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/ornn-web/src/i18n/en.json b/ornn-web/src/i18n/en.json index b13b93de..ef53b4e2 100644 --- a/ornn-web/src/i18n/en.json +++ b/ornn-web/src/i18n/en.json @@ -695,6 +695,8 @@ "close": "Close assistant", "clear": "Clear conversation", "thinking": "Thinking…", + "mascotAlt": "Ornn mascot", + "greeting": "Hi, I'm Ornn 👋", "empty": { "title": "Ask Ornn anything", "subtitle": "Repo-aware answers about what Ornn is, how it works, and which skills fit your agent.", @@ -704,6 +706,12 @@ "what": "What is Ornn?", "different": "How is Ornn different?", "findSkill": "Find a skill that does X" + }, + "signIn": { + "title": "Sign in to chat with Ornn", + "body": "Ornn's repo-aware answers are for signed-in builders. Sign in to start the conversation.", + "cta": "Sign in", + "dismiss": "Maybe later" } }, "chatInput": { diff --git a/ornn-web/src/i18n/zh.json b/ornn-web/src/i18n/zh.json index cffddeac..9b5ecf5c 100644 --- a/ornn-web/src/i18n/zh.json +++ b/ornn-web/src/i18n/zh.json @@ -695,6 +695,8 @@ "close": "关闭助手", "clear": "清空对话", "thinking": "思考中……", + "mascotAlt": "Ornn 吉祥物", + "greeting": "你好,我是 Ornn 👋", "empty": { "title": "向 Ornn 提问", "subtitle": "基于代码库回答关于 Ornn 是什么、如何运作,以及哪些技能适合你的智能体。", @@ -704,6 +706,12 @@ "what": "Ornn 是什么?", "different": "Ornn 有何不同?", "findSkill": "找一个能做某事的技能" + }, + "signIn": { + "title": "登录后与 Ornn 对话", + "body": "Ornn 基于代码库的回答仅向已登录的开发者开放。登录即可开始对话。", + "cta": "登录", + "dismiss": "以后再说" } }, "chatInput": { From 3d9abd4dd98375825b66e9b25ea33d0c1630c1d2 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:35:36 +0800 Subject: [PATCH 027/110] =?UTF-8?q?feat(web):=20redesign=20AssistantWidget?= =?UTF-8?q?=20=E2=80=94=20mascot=20launcher,=20anon=20gate,=20auto-open=20?= =?UTF-8?q?(#976)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Substantial rewrite of the Ornn Assistant widget (UX redesign, #976). Backend/data layer untouched. - Mascot launcher: the floating pill is replaced by the Ornn mascot (src/assets/ornn-mascot.webp, the same character as the landing hero video) — ~88px, ambient ember/arc glow, motion-safe idle bob, press-down on tap, and an "Ask Ornn" hover/focus speech bubble. It is a real + + + ); } @@ -390,15 +702,6 @@ function IconButton({ // Icons (inline — no external icon dependency) // --------------------------------------------------------------------------- -function SparkIcon({ className }: { className?: string }) { - return ( - - ); -} - function CloseIcon({ className }: { className?: string }) { return ( Date: Tue, 9 Jun 2026 16:35:36 +0800 Subject: [PATCH 028/110] feat(web): mount AssistantWidget globally for anonymous + landing visitors (#976) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moves the assistant mount from RootLayout (authed app shell only) to AnalyticsRoot in App.tsx, so the mascot launcher floats over EVERY page — including the landing page (which owns its own layout, not RootLayout) and for anonymous visitors. AnalyticsRoot lives inside the router, so it suppresses the widget on the auth-handshake routes via useLocation (`/login`, `/oauth/*`) where a floating chatbot would be noise. RootLayout drops the import + `{isAuthenticated && }`; useIsAuthenticated stays (still gates the QuotaChip). --- ornn-web/src/App.tsx | 10 ++++++++++ ornn-web/src/components/layout/RootLayout.tsx | 4 ---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ornn-web/src/App.tsx b/ornn-web/src/App.tsx index d8deb060..46bde03e 100644 --- a/ornn-web/src/App.tsx +++ b/ornn-web/src/App.tsx @@ -21,6 +21,7 @@ import { Outlet, Route, RouterProvider, + useLocation, } from "react-router-dom"; import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; import { RootLayout } from "@/components/layout/RootLayout"; @@ -33,14 +34,22 @@ import { HighlighterMarkFilter } from "@/pages/landing/HighlighterMark"; import { VersionUpdateBanner } from "@/components/layout/VersionUpdateBanner"; import { PostHogProvider } from "@/components/analytics/PostHogProvider"; import { CookieConsentBanner } from "@/components/analytics/CookieConsentBanner"; +import { AssistantWidget } from "@/components/assistant/AssistantWidget"; /** * Top-level wrapper rendered as the root route's element. Lives INSIDE * the router tree so child analytics hooks (`useLocation`) work, and * renders the consent banner above every page. PostHogProvider has no * DOM output — it just wires init / identify / pageview tracking. + * + * The Ornn Assistant mounts here (not RootLayout) so its mascot launcher + * floats over EVERY page — including the landing page and for anonymous + * visitors (#976). Suppressed only on the auth handshake routes + * (`/login`, `/oauth/*`) where a floating chatbot would be noise. */ function AnalyticsRoot() { + const { pathname } = useLocation(); + const hideAssistant = pathname === "/login" || pathname.startsWith("/oauth"); return ( <> @@ -48,6 +57,7 @@ function AnalyticsRoot() { {/* Global announcement surface — top-right headline pill on every page. */} + {!hideAssistant && } ); } diff --git a/ornn-web/src/components/layout/RootLayout.tsx b/ornn-web/src/components/layout/RootLayout.tsx index dbe58f3f..a9c0078a 100644 --- a/ornn-web/src/components/layout/RootLayout.tsx +++ b/ornn-web/src/components/layout/RootLayout.tsx @@ -3,7 +3,6 @@ import { useTranslation } from "react-i18next"; import { Navbar } from "./Navbar"; import { ToastContainer } from "@/components/ui/Toast"; import { QuotaChip } from "@/components/quota/QuotaChip"; -import { AssistantWidget } from "@/components/assistant/AssistantWidget"; import { useIsAuthenticated } from "@/stores/authStore"; import { useSkill } from "@/hooks/useSkills"; @@ -174,9 +173,6 @@ export function RootLayout() { - {/* Ornn Assistant — authed-only floating chatbot (#970). Renders its - own launcher + slide-in panel via a portal; self-guards on auth. */} - {isAuthenticated && } ); } From ae666a76e59f089f40307a21cf0205aee72745ad Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:38:04 +0800 Subject: [PATCH 029/110] docs: changeset for Ornn Assistant UX redesign (#976) --- .changeset/assistant-mascot-ux.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/assistant-mascot-ux.md diff --git a/.changeset/assistant-mascot-ux.md b/.changeset/assistant-mascot-ux.md new file mode 100644 index 00000000..d146e336 --- /dev/null +++ b/.changeset/assistant-mascot-ux.md @@ -0,0 +1,5 @@ +--- +"ornn-web": minor +--- + +Ornn Assistant is now a branded, always-available presence: the widget appears for anonymous visitors (including the landing page) as a draggable, animated Ornn-mascot launcher, auto-expands once on a first visit, and prompts sign-in when a logged-out visitor tries to send. Backend remains authenticated-only. From 39e29a042eac5f70c304babcac0ef5ffca658241 Mon Sep 17 00:00:00 2001 From: Shining <250120269+chronoai-shining@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:45:37 +0800 Subject: [PATCH 030/110] fix(web): tone down assistant launcher glow for DESIGN.md compliance (#976) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-merge design-review fix. The launcher's ambient halo violated three Forge Workshop rules: - Dropped the arc-blue (`--color-arc-soft`) gradient stop — arc-blue as a decorative gradient wash is explicitly forbidden (DESIGN.md L54). The aura is now EMBER-ONLY via the sanctioned `--color-ember-glow` accent token. - Removed the perpetual `animate-pulse` — an always-on blurred halo on every page is the omnipresent-bloom anti-pattern. The glow is now static at rest and only intensifies on hover / focus-visible / active (group-state opacity), so it signals interaction, not baseline. - Trimmed intensity for restraint: rest opacity 0.70→0.25, blur-xl→ blur-lg, tighter inset — a quiet warm aura at rest. prefers-reduced-motion still honored (opacity transition is motion-safe; the state change itself still applies instantly under reduced motion). typecheck:web + lint + test:web all green. --- ornn-web/src/components/assistant/AssistantWidget.tsx | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ornn-web/src/components/assistant/AssistantWidget.tsx b/ornn-web/src/components/assistant/AssistantWidget.tsx index 869308b4..dfa15af7 100644 --- a/ornn-web/src/components/assistant/AssistantWidget.tsx +++ b/ornn-web/src/components/assistant/AssistantWidget.tsx @@ -232,15 +232,18 @@ function AssistantLauncher({ isOpen, onOpen }: { isOpen: boolean; onOpen: () => exit={reduceMotion ? { opacity: 0 } : { opacity: 0, scale: 0.6 }} transition={{ type: "spring", stiffness: 320, damping: 24, mass: 0.7 }} style={{ x, y, width: LAUNCHER_W, height: LAUNCHER_H }} - className="pointer-events-auto absolute left-0 top-0 flex cursor-grab touch-none items-end justify-center rounded-2xl focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-2 focus-visible:ring-offset-page" + className="group pointer-events-auto absolute left-0 top-0 flex cursor-grab touch-none items-end justify-center rounded-2xl focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-2 focus-visible:ring-offset-page" > - {/* Ambient ember/arc glow behind the mascot — accent halo only. */} + {/* Quiet ember aura behind the mascot — ember-only, static at + rest, and only intensifying on hover/focus/active so the + glow signals interaction rather than baseline bloom. No + perpetual pulse, no arc-blue wash (docs/DESIGN.md). */}
@@ -168,6 +170,9 @@ export function ProviderModelsDrawer({ Skill-Gen + Assistant +
+ Archived (no longer in upstream catalog)
+
+ + onPatch({ enabledForAssistant: !model.enabledForAssistant }) + } + /> + onPatch({ defaultForAssistant: true })} + /> +
+