From a328a17df67b5a90216a7b0a61f6e4fa201a9e83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Raahul=20Dutta=20-=20=E0=A6=B0=E0=A6=BE=E0=A6=B9=E0=A7=81?= =?UTF-8?q?=E0=A6=B2=20=F0=9F=96=96?= Date: Mon, 20 Apr 2026 14:16:26 +0200 Subject: [PATCH 1/8] refactor(gateway): extract frontmatter parser to shared util The agent loader was importing splitFrontmatter from the skill module and duplicating parseYaml/parseScalar locally. The skill module is scheduled for rename to "recipe" (progressive-disclosure playbooks, opencode pattern), so the shared pieces move to a util both loaders can depend on without the agent module reaching into the skill namespace. No behavior change. agent/index.ts shrinks by 51 lines; same tests pass (154/154). Co-Authored-By: Claude Opus 4.7 (1M context) --- gateway/src/_shared/util/frontmatter.ts | 79 +++++++++++++++++++++++++ gateway/src/agent/index.ts | 52 +--------------- 2 files changed, 80 insertions(+), 51 deletions(-) create mode 100644 gateway/src/_shared/util/frontmatter.ts diff --git a/gateway/src/_shared/util/frontmatter.ts b/gateway/src/_shared/util/frontmatter.ts new file mode 100644 index 00000000..9eab5f84 --- /dev/null +++ b/gateway/src/_shared/util/frontmatter.ts @@ -0,0 +1,79 @@ +/** + * Minimal YAML frontmatter parser for gateway markdown configs. + * + * Used by the agent loader and (Phase 1+) the recipe loader. Extracted into a + * shared util so the two features can evolve independently — previously the + * agent module reached into the skill module just to borrow `splitFrontmatter`, + * and the skill module was scheduled for rename/removal. + * + * Scope: YAML features the gateway's frontmatter blocks actually use. No + * anchors, no multi-line block scalars, no flow mappings. Keep it small; if + * authors need richer YAML, pull in `js-yaml` explicitly. + * + * - `key: value` + * - nested maps (indentation-based) + * - inline arrays: `tags: [a, b, "c"]` + * - scalars: string (bare or quoted), number, boolean, null + */ + +/** Extract YAML frontmatter from a markdown string. Returns [frontmatter, body]. */ +export function splitFrontmatter(raw: string): { frontmatter: string | null; body: string } { + if (!raw.startsWith("---")) return { frontmatter: null, body: raw } + const end = raw.indexOf("\n---", 3) + if (end === -1) return { frontmatter: null, body: raw } + const frontmatter = raw.slice(3, end).replace(/^\r?\n/, "") + const bodyStart = raw.indexOf("\n", end + 4) + const body = bodyStart >= 0 ? raw.slice(bodyStart + 1) : "" + return { frontmatter, body } +} + +/** Minimal YAML parser (key: value + nested + arrays in inline `[a, b]` form). */ +export function parseYaml(src: string): Record { + const out: Record = {} + const stack: { indent: number; obj: Record }[] = [{ indent: -1, obj: out }] + + for (const rawLine of src.split(/\r?\n/)) { + const line = rawLine.replace(/\s+$/, "") + if (!line.trim() || line.trim().startsWith("#")) continue + const indent = line.length - line.trimStart().length + while (stack.length > 1 && indent <= stack[stack.length - 1].indent) stack.pop() + const current = stack[stack.length - 1].obj + + const m = line.trim().match(/^([A-Za-z0-9_\-]+):\s*(.*)$/) + if (!m) continue + const key = m[1] + const rest = m[2] + + if (rest === "") { + const child: Record = {} + current[key] = child + stack.push({ indent, obj: child }) + continue + } + + current[key] = parseScalar(rest) + } + + return out +} + +export function parseScalar(s: string): unknown { + const t = s.trim() + if (t === "true") return true + if (t === "false") return false + if (t === "null" || t === "~") return null + if (/^-?\d+$/.test(t)) return Number(t) + if (/^-?\d+\.\d+$/.test(t)) return Number(t) + if (t.startsWith("[") && t.endsWith("]")) { + const inner = t.slice(1, -1).trim() + if (!inner) return [] + return inner + .split(",") + .map((x) => x.trim()) + .map((x) => (x.startsWith('"') || x.startsWith("'") ? x.slice(1, -1) : x)) + } + if ((t.startsWith('"') && t.endsWith('"')) || (t.startsWith("'") && t.endsWith("'"))) { + return t.slice(1, -1) + } + return t +} diff --git a/gateway/src/agent/index.ts b/gateway/src/agent/index.ts index b2ffafee..904b8c0d 100644 --- a/gateway/src/agent/index.ts +++ b/gateway/src/agent/index.ts @@ -2,7 +2,7 @@ import { Context, Effect, Layer } from "effect" import { z } from "zod" import { readdirSync, readFileSync, existsSync, statSync } from "fs" import { resolve, basename } from "path" -import { splitFrontmatter } from "../skill" +import { splitFrontmatter, parseYaml } from "../_shared/util/frontmatter" import { Ruleset, fromConfig as permFromConfig } from "../permission" import { Service as ConfigService } from "../config" @@ -64,56 +64,6 @@ export const Info = z.object({ }) export type Info = z.infer -/** Parse a YAML frontmatter block value (shared with skill module). */ -function parseYaml(src: string): Record { - const out: Record = {} - const stack: { indent: number; obj: Record }[] = [{ indent: -1, obj: out }] - - for (const rawLine of src.split(/\r?\n/)) { - const line = rawLine.replace(/\s+$/, "") - if (!line.trim() || line.trim().startsWith("#")) continue - const indent = line.length - line.trimStart().length - while (stack.length > 1 && indent <= stack[stack.length - 1].indent) stack.pop() - const current = stack[stack.length - 1].obj - - const m = line.trim().match(/^([A-Za-z0-9_\-]+):\s*(.*)$/) - if (!m) continue - const key = m[1] - const rest = m[2] - - if (rest === "") { - const child: Record = {} - current[key] = child - stack.push({ indent, obj: child }) - continue - } - - current[key] = parseScalar(rest) - } - return out -} - -function parseScalar(s: string): unknown { - const t = s.trim() - if (t === "true") return true - if (t === "false") return false - if (t === "null" || t === "~") return null - if (/^-?\d+$/.test(t)) return Number(t) - if (/^-?\d+\.\d+$/.test(t)) return Number(t) - if (t.startsWith("[") && t.endsWith("]")) { - const inner = t.slice(1, -1).trim() - if (!inner) return [] - return inner - .split(",") - .map((x) => x.trim()) - .map((x) => (x.startsWith('"') || x.startsWith("'") ? x.slice(1, -1) : x)) - } - if ((t.startsWith('"') && t.endsWith('"')) || (t.startsWith("'") && t.endsWith("'"))) { - return t.slice(1, -1) - } - return t -} - export function parseAgentFile(path: string, raw: string): Info { const { frontmatter, body } = splitFrontmatter(raw) const fm = frontmatter ? parseYaml(frontmatter) : {} From bb1f5b08244ef497df083a3f7395773ac6e05f8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Raahul=20Dutta=20-=20=E0=A6=B0=E0=A6=BE=E0=A6=B9=E0=A7=81?= =?UTF-8?q?=E0=A6=B2=20=F0=9F=96=96?= Date: Mon, 20 Apr 2026 14:18:55 +0200 Subject: [PATCH 2/8] feat(gateway): add Recipe service for progressive-disclosure playbooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recipes are markdown playbooks the planner can lazy-load when a task matches — metadata (name + description) sits in the system prompt, the body only loads when the planner calls load_recipe. Pattern borrowed from opencode's skill module, renamed because "skill" is already taken in the gateway for A2A SkillRequest (agent capabilities exposed via /plan). Supports two layouts: flat recipes/foo.md and bundled recipes/bar/RECIPE.md with sibling scripts/, reference/, etc. files the tool (Phase 4) will surface to the planner. Duplicate names throw at load time — silent precedence would make it ambiguous which body loads. Permission filtering via the existing Ruleset evaluator; default action is "allow" so agents without explicit recipe rules see everything. Nothing consumes this service yet — Phase 4 wires in the load_recipe tool and Phase 5 injects the fmt(list, { verbose: true }) block into the system prompt. Co-Authored-By: Claude Opus 4.7 (1M context) --- gateway/src/recipe/index.ts | 236 ++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 gateway/src/recipe/index.ts diff --git a/gateway/src/recipe/index.ts b/gateway/src/recipe/index.ts new file mode 100644 index 00000000..07019198 --- /dev/null +++ b/gateway/src/recipe/index.ts @@ -0,0 +1,236 @@ +import { Context, Effect, Layer } from "effect" +import { readFileSync, readdirSync, statSync, existsSync } from "fs" +import { resolve, basename, dirname } from "path" +import { pathToFileURL } from "url" +import { z } from "zod" +import { splitFrontmatter, parseYaml } from "../_shared/util/frontmatter" +import { evaluate as permEvaluate, type Ruleset } from "../permission" +import type { Info as AgentInfo } from "../agent" + +/** + * Recipe loader for the gateway. + * + * Recipes are markdown playbooks the planner can lazy-load when a task + * matches. Inspired by opencode's skill system (progressive disclosure: + * metadata in context always, body only on demand). Renamed to `recipe` + * because `skill` is already taken in the gateway for A2A SkillRequest + * (an agent capability exposed via /plan). + * + * Directory layout — two supported shapes: + * + * gateway/recipes/foo.md flat recipe, no bundled files + * gateway/recipes/bar/RECIPE.md bundled recipe; siblings like + * gateway/recipes/bar/scripts/check.sh scripts/, reference/, etc. are + * gateway/recipes/bar/reference/notes.md surfaced to the planner when + * the recipe loads + * + * Frontmatter: + * + * --- + * name: bar # required; falls back to file/dir stem + * description: One-line summary # required (non-empty) + * tags: [research, orchestration] # optional + * triggers: [research, lookup] # optional planner hints + * --- + * # Markdown body — the playbook itself. + */ + +export const Info = z.object({ + name: z.string().min(1), + description: z.string().min(1), + tags: z.array(z.string()).default([]), + triggers: z.array(z.string()).default([]), + /** Absolute path to the recipe's markdown file. */ + location: z.string(), + /** Full markdown body (after frontmatter). */ + content: z.string(), +}) +export type Info = z.infer + +/** Parse a single recipe file. `fallbackName` is used if frontmatter omits `name`. */ +export function parseRecipeFile(path: string, raw: string, fallbackName: string): Info { + const { frontmatter, body } = splitFrontmatter(raw) + const fm = frontmatter ? parseYaml(frontmatter) : {} + + const candidate = { + name: (fm.name as string | undefined) ?? fallbackName, + description: (fm.description as string | undefined) ?? "", + tags: Array.isArray(fm.tags) ? (fm.tags as string[]) : [], + triggers: Array.isArray(fm.triggers) ? (fm.triggers as string[]) : [], + location: path, + content: body.trim(), + } + + const result = Info.safeParse(candidate) + if (!result.success) { + throw new Error(`recipe: invalid frontmatter in ${path}: ${result.error.message}`) + } + return result.data +} + +/** + * Scan one directory for recipes. + * + * Two patterns discovered: + * 1. Any `*.md` at the top level → flat recipe (name defaults to filename stem). + * 2. Any `/RECIPE.md` one level deep → bundled recipe (name defaults to + * the containing directory name). + * + * Case-insensitive on the `.md` extension. Symlinks are not followed — gateway + * recipes are expected to be real files in the repo. + * + * Throws if two recipes share the same `name` — that's a config error, not a + * warning; resolving silently would make it ambiguous which body gets loaded. + */ +export function loadRecipesDir(dir: string): Info[] { + if (!existsSync(dir)) return [] + const s = statSync(dir) + if (!s.isDirectory()) return [] + + const out: Info[] = [] + const seen = new Map() + const addIfNew = (info: Info) => { + const prior = seen.get(info.name) + if (prior) { + throw new Error( + `recipe: duplicate name "${info.name}" — defined in both ${prior} and ${info.location}`, + ) + } + seen.set(info.name, info.location) + out.push(info) + } + + for (const e of readdirSync(dir, { withFileTypes: true })) { + const p = resolve(dir, e.name) + + // Flat pattern: `.md` + if (e.isFile() && e.name.toLowerCase().endsWith(".md")) { + const raw = readFileSync(p, "utf8") + addIfNew(parseRecipeFile(p, raw, basename(e.name).replace(/\.md$/i, ""))) + continue + } + + // Bundled pattern: `/RECIPE.md` + if (e.isDirectory()) { + const nested = resolve(p, "RECIPE.md") + if (existsSync(nested) && statSync(nested).isFile()) { + const raw = readFileSync(nested, "utf8") + addIfNew(parseRecipeFile(nested, raw, e.name)) + } + } + } + + return out.sort((a, b) => a.name.localeCompare(b.name)) +} + +export interface Interface { + /** All recipes, sorted by name. */ + readonly list: () => Effect.Effect + /** Single recipe by name. Undefined if not found. */ + readonly get: (name: string) => Effect.Effect + /** + * Recipes an agent is permitted to load. If `agent` is omitted, returns + * the full list unfiltered (planner startup path, before we know the + * agent). With an agent, rules keyed `permission: "recipe"` in the + * agent's ruleset filter out anything resolving to `deny`. + */ + readonly available: (agent?: AgentInfo) => Effect.Effect + /** Distinct parent directories of discovered recipes. Used by the tool + * to enumerate bundled files. */ + readonly dirs: () => Effect.Effect +} + +export class Service extends Context.Service()("@bindu/Recipe") {} + +export interface LayerOptions { + /** Directories to scan. Default: `$CWD/recipes`, `$CWD/gateway/recipes`. */ + directories?: string[] +} + +export function layer(options: LayerOptions = {}): Layer.Layer { + return Layer.effect( + Service, + Effect.gen(function* () { + const dirs = options.directories ?? [ + resolve(process.cwd(), "recipes"), + resolve(process.cwd(), "gateway", "recipes"), + ] + + const recipes: Info[] = yield* Effect.try({ + try: () => { + const all: Info[] = [] + for (const d of dirs) all.push(...loadRecipesDir(d)) + // loadRecipesDir catches dupes within one dir; also catch dupes + // that span multiple scan roots (e.g. $CWD/recipes and + // gateway/recipes both defining `foo`). + const byName = new Map() + for (const r of all) { + const prior = byName.get(r.name) + if (prior) { + throw new Error( + `recipe: duplicate name "${r.name}" across scan roots — ${prior.location} and ${r.location}`, + ) + } + byName.set(r.name, r) + } + return Array.from(byName.values()).sort((a, b) => a.name.localeCompare(b.name)) + }, + catch: (e) => (e instanceof Error ? e : new Error(String(e))), + }) + + const byName = new Map(recipes.map((r) => [r.name, r])) + const distinctDirs = Array.from(new Set(recipes.map((r) => dirname(r.location)))) + + return Service.of({ + list: () => Effect.succeed(recipes.slice()), + get: (name) => Effect.succeed(byName.get(name)), + dirs: () => Effect.succeed(distinctDirs.slice()), + available: (agent?: AgentInfo) => + Effect.sync(() => { + if (!agent) return recipes.slice() + const rs: Ruleset = agent.permission + return recipes.filter( + (r) => + permEvaluate(rs, { permission: "recipe", target: r.name, defaultAction: "allow" }) !== + "deny", + ) + }), + }) + }), + ) +} + +/** Default layer using default scan directories. */ +export const defaultLayer = layer() + +/** + * Render a list of recipes for the planner. + * + * `verbose: true` → XML block suitable for the system prompt. Includes + * name, description, location (file:// URL), and tags. + * `verbose: false` → markdown bullet list suitable for the `load_recipe` + * tool description (shorter, no locations). + */ +export function fmt(list: Info[], opts: { verbose: boolean }): string { + if (list.length === 0) return "No recipes are currently available." + + if (opts.verbose) { + return [ + "", + ...list.flatMap((r) => [ + " ", + ` ${r.name}`, + ` ${r.description}`, + ` ${pathToFileURL(r.location).href}`, + ...(r.tags.length > 0 ? [` ${r.tags.join(", ")}`] : []), + " ", + ]), + "", + ].join("\n") + } + + return [ + "## Available Recipes", + ...list.map((r) => `- **${r.name}**: ${r.description}`), + ].join("\n") +} From 35dbf2170b28341f1aa69b23824cb0419796ff7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Raahul=20Dutta=20-=20=E0=A6=B0=E0=A6=BE=E0=A6=B9=E0=A7=81?= =?UTF-8?q?=E0=A6=B2=20=F0=9F=96=96?= Date: Mon, 20 Apr 2026 14:20:39 +0200 Subject: [PATCH 3/8] refactor(gateway): remove unused Skill service, wire Recipe in its place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old Skill service (src/skill/index.ts) was a working opencode-style markdown loader registered in the app layer but never read by any consumer — the planner operates on A2A SkillRequest objects from the /plan body, not markdown files. Phase 1 moved its only cross-module use (splitFrontmatter, borrowed by the agent loader) into a shared util, so the module is now fully orphaned. Deleted src/skill/. Swapped Skill.defaultLayer → Recipe.defaultLayer in the Level-1 layer merge. Typecheck clean, 154/154 tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- gateway/src/index.ts | 4 +- gateway/src/skill/index.ts | 207 ------------------------------------- 2 files changed, 2 insertions(+), 209 deletions(-) delete mode 100644 gateway/src/skill/index.ts diff --git a/gateway/src/index.ts b/gateway/src/index.ts index 53db82c2..3fcbe438 100644 --- a/gateway/src/index.ts +++ b/gateway/src/index.ts @@ -8,7 +8,7 @@ import * as Auth from "./auth" import * as DB from "./db" import * as Permission from "./permission" import * as Provider from "./provider" -import * as Skill from "./skill" +import * as Recipe from "./recipe" import * as Agent from "./agent" import * as Session from "./session" import * as SessionCompaction from "./session/compaction" @@ -64,7 +64,7 @@ function buildAppLayer( Permission.layer, ToolRegistry.layer, BinduClient.makeLayer(identity, tokenProvider), - Skill.defaultLayer, + Recipe.defaultLayer, ) // Level 2 — need Config (implicitly resolved by provideMerge) diff --git a/gateway/src/skill/index.ts b/gateway/src/skill/index.ts deleted file mode 100644 index 4721c9fe..00000000 --- a/gateway/src/skill/index.ts +++ /dev/null @@ -1,207 +0,0 @@ -import { Context, Effect, Layer } from "effect" -import { readFileSync, readdirSync, statSync, existsSync } from "fs" -import { resolve, basename } from "path" -import { z } from "zod" - -/** - * Skill loader for the gateway. - * - * Pattern borrowed from OpenCode's skill module but narrower: we just scan - * one skills directory (or a list of them), read `*.md` files with YAML - * frontmatter, and hand back a typed Info record. No discovery over XDG - * paths, no HTTP fetches, no plugin hooks — the gateway's skill library is - * small and static at boot. - * - * Directory layout expected: - * gateway/skills/research-summary.md - * gateway/skills/draft-review.md - * - * Frontmatter fields: - * --- - * name: research-summary (required; falls back to filename stem) - * description: One-line summary (required) - * tags: [research, summary] (optional) - * bindu: (optional, for Phase 2+ expose) - * expose: true - * inputModes: [text/plain] - * outputModes: [application/json] - * --- - * # Markdown body... - */ - -export const BinduExposeFrontmatter = z.object({ - expose: z.boolean().default(false), - inputModes: z.array(z.string()).optional(), - outputModes: z.array(z.string()).optional(), - tags: z.array(z.string()).optional(), - assessment: z - .object({ - keywords: z.array(z.string()).optional(), - antiPatterns: z.array(z.string()).optional(), - specializations: z.array(z.string()).optional(), - }) - .partial() - .optional(), -}) - -export const Info = z.object({ - name: z.string(), - description: z.string(), - tags: z.array(z.string()).default([]), - location: z.string(), - content: z.string(), - bindu: BinduExposeFrontmatter.optional(), -}) -export type Info = z.infer - -/** Extract YAML frontmatter from a markdown string. Returns [frontmatter, body]. */ -export function splitFrontmatter(raw: string): { frontmatter: string | null; body: string } { - if (!raw.startsWith("---")) return { frontmatter: null, body: raw } - const end = raw.indexOf("\n---", 3) - if (end === -1) return { frontmatter: null, body: raw } - const frontmatter = raw.slice(3, end).replace(/^\r?\n/, "") - const bodyStart = raw.indexOf("\n", end + 4) - const body = bodyStart >= 0 ? raw.slice(bodyStart + 1) : "" - return { frontmatter, body } -} - -/** Minimal YAML parser (key: value + nested + arrays in inline `[a, b]` form). */ -function parseYaml(src: string): Record { - const out: Record = {} - const stack: { indent: number; obj: Record }[] = [{ indent: -1, obj: out }] - - for (const rawLine of src.split(/\r?\n/)) { - const line = rawLine.replace(/\s+$/, "") - if (!line.trim() || line.trim().startsWith("#")) continue - const indent = line.length - line.trimStart().length - while (stack.length > 1 && indent <= stack[stack.length - 1].indent) stack.pop() - const current = stack[stack.length - 1].obj - - const m = line.trim().match(/^([A-Za-z0-9_\-]+):\s*(.*)$/) - if (!m) continue - const key = m[1] - const rest = m[2] - - if (rest === "") { - const child: Record = {} - current[key] = child - stack.push({ indent, obj: child }) - continue - } - - current[key] = parseScalar(rest) - } - - return out -} - -function parseScalar(s: string): unknown { - const t = s.trim() - if (t === "true") return true - if (t === "false") return false - if (t === "null" || t === "~") return null - if (/^-?\d+$/.test(t)) return Number(t) - if (/^-?\d+\.\d+$/.test(t)) return Number(t) - if (t.startsWith("[") && t.endsWith("]")) { - const inner = t.slice(1, -1).trim() - if (!inner) return [] - return inner - .split(",") - .map((x) => x.trim()) - .map((x) => (x.startsWith('"') || x.startsWith("'") ? x.slice(1, -1) : x)) - } - if ((t.startsWith('"') && t.endsWith('"')) || (t.startsWith("'") && t.endsWith("'"))) { - return t.slice(1, -1) - } - return t -} - -export function parseSkillFile(path: string, raw: string): Info { - const { frontmatter, body } = splitFrontmatter(raw) - const fm = frontmatter ? parseYaml(frontmatter) : {} - const nameFromFile = basename(path).replace(/\.md$/i, "") - - const candidate = { - name: (fm.name as string | undefined) ?? nameFromFile, - description: (fm.description as string | undefined) ?? "", - tags: Array.isArray(fm.tags) ? (fm.tags as string[]) : [], - location: path, - content: body.trim(), - bindu: fm.bindu, - } - - const result = Info.safeParse(candidate) - if (!result.success) { - throw new Error(`skill: invalid frontmatter in ${path}: ${result.error.message}`) - } - return result.data -} - -export function loadSkillsDir(dir: string): Info[] { - if (!existsSync(dir)) return [] - const entries = readdirSync(dir, { withFileTypes: true }) - const out: Info[] = [] - for (const e of entries) { - const p = resolve(dir, e.name) - if (e.isDirectory()) { - out.push(...loadSkillsDir(p)) - continue - } - if (!e.isFile() || !e.name.toLowerCase().endsWith(".md")) continue - const raw = readFileSync(p, "utf8") - try { - out.push(parseSkillFile(p, raw)) - } catch (err) { - throw new Error(`skill: failed to parse ${p}: ${(err as Error).message}`) - } - } - return out -} - -export interface Interface { - readonly list: () => Effect.Effect - readonly get: (name: string) => Effect.Effect -} - -export class Service extends Context.Service()("@bindu/Skill") {} - -export interface LayerOptions { - /** Directories to scan. Default: `$CWD/skills`, `$CWD/gateway/skills`. */ - directories?: string[] -} - -export function layer(options: LayerOptions = {}): Layer.Layer { - return Layer.effect( - Service, - Effect.gen(function* () { - const dirs = options.directories ?? [ - resolve(process.cwd(), "skills"), - resolve(process.cwd(), "gateway", "skills"), - ] - - const skills: Info[] = yield* Effect.try({ - try: () => { - const all: Info[] = [] - for (const d of dirs) { - if (!existsSync(d)) continue - const s = statSync(d) - if (!s.isDirectory()) continue - all.push(...loadSkillsDir(d)) - } - return all - }, - catch: (e) => (e instanceof Error ? e : new Error(String(e))), - }) - - const byName = new Map(skills.map((s) => [s.name, s])) - - return Service.of({ - list: () => Effect.succeed(skills.slice()), - get: (name) => Effect.succeed(byName.get(name)), - }) - }), - ) -} - -/** Default layer using default scan directories. */ -export const defaultLayer = layer() From afeeb8a25483fde52fdb7cb58b9d13ffa7e97242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Raahul=20Dutta=20-=20=E0=A6=B0=E0=A6=BE=E0=A6=B9=E0=A7=81?= =?UTF-8?q?=E0=A6=B2=20=F0=9F=96=96?= Date: Mon, 20 Apr 2026 14:23:43 +0200 Subject: [PATCH 4/8] feat(gateway): add load_recipe tool for progressive-disclosure playbooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single tool the planner calls to pull a recipe's full body into context. Metadata (name + description) stays in the system prompt via Recipe.fmt; this tool is the lazy-load side of the disclosure. Design choices vs opencode's SkillTool: - Plain fs for bundled-file enumeration (no ripgrep dep). Two-level recursive walk, capped at 10 entries — enough for scripts/ and reference/ subdirs, shallow enough to avoid accidental node_modules inclusion. - Bundled-file scan only runs for nested recipes (recipes/foo/RECIPE.md); flat recipes (recipes/foo.md) would otherwise surface OTHER recipes as siblings. - ctx.ask is optional on ToolContext today (session/prompt.ts wrapTool doesn't set it), so the permission gate is a no-op. Kept the call site so a Phase-2 permission UI inherits recipe gating with zero code change. - Dynamic description is computed from the permission-filtered available list passed in by the planner — the LLM only sees recipes it's allowed to load. Not wired into the planner yet — that's Phase 5. Typecheck clean, 154/154 tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- gateway/src/tool/recipe.ts | 203 +++++++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 gateway/src/tool/recipe.ts diff --git a/gateway/src/tool/recipe.ts b/gateway/src/tool/recipe.ts new file mode 100644 index 00000000..f1c6173c --- /dev/null +++ b/gateway/src/tool/recipe.ts @@ -0,0 +1,203 @@ +import { Effect } from "effect" +import { readdirSync, statSync } from "fs" +import { resolve, relative, basename } from "path" +import { pathToFileURL } from "url" +import { z } from "zod" +import type * as Recipe from "../recipe" +import { define, type Context as ToolContext, type Def, type ExecuteResult } from "./tool" + +/** + * load_recipe — the planner's gateway to progressive-disclosure playbooks. + * + * Only metadata (name + description) sits in the system prompt. When the + * planner recognizes a task matches a recipe, it calls this tool with + * `name: ""` and gets the full markdown body plus a list of + * bundled sibling files (for recipes stored as `/RECIPE.md`). + * + * Pattern borrowed from opencode's `SkillTool`; adapted to the gateway's + * simpler Tool.Def shape (no Effect.fnUntraced, no ripgrep dep — plain fs + * is enough for the tiny bundled-file enumeration we need). + * + * Permission gating is via `ctx.ask({ permission: "recipe", target: name })`. + * That hook is optional on ToolContext in Phase 1; when it's unset (current + * state, see session/prompt.ts wrapTool), the call is a no-op. When a real + * permission UI lands, this tool inherits it without code change. + */ + +const MAX_BUNDLED_FILES = 10 + +const Parameters = z.object({ + name: z + .string() + .min(1) + .describe("The exact `name` of the recipe to load, drawn from the available list in this tool's description."), +}) + +/** + * Build the dynamic tool description from the recipes this agent may load. + * + * Called once per plan (per session) — the list is already + * permission-filtered by Recipe.available(agent). If `list` is empty, the + * tool advertises that fact plainly so the planner doesn't guess names. + */ +export function describeRecipe(list: Recipe.Info[]): string { + if (list.length === 0) { + return "Load a specialized recipe (playbook) with domain-specific instructions for the current task. No recipes are currently available." + } + + return [ + "Load a specialized recipe (playbook) with domain-specific instructions for the current task.", + "", + "When you recognize that a task matches one of the recipes listed below, call this tool with the recipe's `name` to pull the full playbook into the conversation. The recipe body may instruct you to dispatch to specific A2A agents in a specific order, handle A2A task states (input-required, payment-required, auth-required) in a specific way, or follow a specific format.", + "", + "Tool output is a block containing the recipe body and a list of bundled sibling files (for recipes stored as /RECIPE.md — scripts, reference docs, etc.).", + "", + "Available recipes:", + "", + ...list.map((r) => `- **${r.name}**: ${r.description}`), + ].join("\n") +} + +/** + * Enumerate bundled sibling files for a recipe. + * + * Bundled files only exist for the nested layout (`recipes//RECIPE.md`) + * — for flat recipes (`recipes/foo.md`) the siblings are OTHER recipes, not + * bundled assets, so we skip the scan entirely. + * + * Capped at {@link MAX_BUNDLED_FILES} entries to keep the tool output small. + * Recursive one level to surface `scripts/`, `reference/`, etc. without + * dragging in node_modules-style deep trees. + */ +function listBundledFiles(recipeLocation: string): string[] { + if (basename(recipeLocation).toUpperCase() !== "RECIPE.MD") return [] + + const dir = resolve(recipeLocation, "..") + const out: string[] = [] + + const walk = (root: string, depth: number) => { + if (out.length >= MAX_BUNDLED_FILES || depth > 2) return + let entries: ReturnType + try { + entries = readdirSync(root, { withFileTypes: true }) + } catch { + return + } + for (const e of entries) { + if (out.length >= MAX_BUNDLED_FILES) return + const p = resolve(root, e.name) + if (e.isDirectory()) { + walk(p, depth + 1) + continue + } + if (!e.isFile()) continue + if (p === recipeLocation) continue + out.push(p) + } + } + + walk(dir, 0) + return out.sort() +} + +/** + * Build the load_recipe tool for a given planner agent. + * + * `available` is the permission-filtered list of recipes the planner is + * allowed to load — passed in rather than recomputed so the tool + * description matches the permission decision made at plan-start. `recipes` + * is the live service interface used at execute() time to fetch the full + * body. + * + * Execution contract: + * + * - Unknown name → throws with the list of valid names + * - Known name → returns an ExecuteResult whose `.output` is a + * block the planner can quote or follow verbatim + * - `ctx.ask` (if present) is consulted before the body is materialized; + * a rejection from the permission layer propagates unchanged + */ +export function buildLoadRecipeTool( + recipes: Recipe.Interface, + available: Recipe.Info[], +): Def { + const description = describeRecipe(available) + // Widened to ZodTypeAny so the returned Def unifies with the planner's + // tool list (which carries heterogeneous parameter schemas). Narrow + // parsing still happens inside execute via Parameters.parse(args). + const parameters: z.ZodTypeAny = Parameters + + const info = define("load_recipe", { + description, + parameters, + execute: (args: unknown, ctx: ToolContext) => + Effect.gen(function* () { + const parsed = Parameters.parse(args) + const recipe = yield* recipes.get(parsed.name) + + if (!recipe) { + const all = yield* recipes.list() + const names = all.map((r) => r.name).join(", ") || "none" + return yield* Effect.fail( + new Error(`load_recipe: recipe "${parsed.name}" not found. Available: ${names}`), + ) + } + + // Permission gate — a no-op today (ctx.ask is optional and unset by + // the current wrapTool), reserved for Phase 2 permission UI. + if (ctx.ask) { + yield* ctx.ask({ permission: "recipe", target: parsed.name }) + } + + const bundledFiles = listBundledFiles(recipe.location) + const baseDir = resolve(recipe.location, "..") + const baseUrl = pathToFileURL(baseDir).href + + const filesBlock = + bundledFiles.length > 0 + ? [ + "", + ...bundledFiles.map((f) => `${relative(baseDir, f)}`), + "", + ].join("\n") + : "(none)" + + const output = [ + ``, + `# Recipe: ${recipe.name}`, + "", + recipe.content, + "", + `Base directory for this recipe: ${baseUrl}`, + "Relative paths (e.g., scripts/, reference/) resolve against this base directory.", + bundledFiles.length >= MAX_BUNDLED_FILES + ? `Note: bundled file list truncated at ${MAX_BUNDLED_FILES} entries.` + : "", + "", + filesBlock, + "", + ] + .filter((l) => l !== "") + .join("\n") + + const result: ExecuteResult = { + title: `Loaded recipe: ${recipe.name}`, + output, + metadata: { + name: recipe.name, + dir: baseDir, + fileCount: bundledFiles.length, + }, + } + return result + }), + }) + + return { + id: info.id, + description, + parameters, + execute: (args: unknown, ctx: ToolContext) => + Effect.flatMap(info.init(), (init) => init.execute(args, ctx)), + } +} From 78f450314e599b7ffc76547307bafa37aeaf03d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Raahul=20Dutta=20-=20=E0=A6=B0=E0=A6=BE=E0=A6=B9=E0=A7=81?= =?UTF-8?q?=E0=A6=B2=20=F0=9F=96=96?= Date: Mon, 20 Apr 2026 14:25:46 +0200 Subject: [PATCH 5/8] feat(gateway): wire Recipe into planner system prompt and tool set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Planner now: 1. Pulls the permission-filtered recipe list at plan start via recipes.available(plannerAgent). 2. Registers load_recipe as one of the session's dynamic tools, with its description rendered from the same filtered list. 3. Injects Recipe.fmt(list, { verbose: true }) into the system prompt between the agent prompt and config.instructions — but only when the list is non-empty, so a clean gateway with no recipes produces no noise in the prompt. PromptInput gained a recipeSummary?: string field; buildSystemPrompt accepts it as an optional third argument. No other call sites — the only consumer is the planner. End-to-end: system prompt tells the LLM which recipes exist; load_recipe tool makes the body materializable on demand. Progressive disclosure complete. Typecheck clean, 154/154 tests pass. Integration tests for the end-to-end flow land in Phase 7. Co-Authored-By: Claude Opus 4.7 (1M context) --- gateway/src/planner/index.ts | 14 ++++++++++++++ gateway/src/session/prompt.ts | 16 ++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/gateway/src/planner/index.ts b/gateway/src/planner/index.ts index c60e12b0..3858bbb6 100644 --- a/gateway/src/planner/index.ts +++ b/gateway/src/planner/index.ts @@ -8,6 +8,8 @@ import { Service as DBService } from "../db" import { Service as BusService } from "../bus" import { Service as BinduClientService } from "../bindu/client" import { Service as AgentService } from "../agent" +import * as Recipe from "../recipe" +import { buildLoadRecipeTool } from "../tool/recipe" import { define, type Def } from "../tool/tool" import type { Context as ToolContext, ExecuteResult } from "../tool/tool" import type { PeerDescriptor } from "../bindu/client" @@ -165,6 +167,7 @@ export const layer = Layer.effect( const bus = yield* BusService const client = yield* BinduClientService const agents = yield* AgentService + const recipes = yield* Recipe.Service const prepareSession: Interface["prepareSession"] = (request) => Effect.gen(function* () { @@ -230,6 +233,16 @@ export const layer = Layer.effect( } } + // Recipes (progressive-disclosure playbooks) are filtered through + // the planner agent's permission rules; an empty list means either + // no recipes on disk or all denied. In either case we skip the + // system-prompt block and the tool description falls back to + // "no recipes available" — no noise injected. + const recipeList = yield* recipes.available(plannerAgent) + tools.push(buildLoadRecipeTool(recipes, recipeList)) + const recipeSummary = + recipeList.length > 0 ? Recipe.fmt(recipeList, { verbose: true }) : undefined + const message = yield* prompt.prompt({ sessionID: ctx.sessionID, agent: "planner", @@ -245,6 +258,7 @@ export const layer = Layer.effect( modelOverride: plannerAgent.model, stepsOverride: request.preferences?.max_steps ?? plannerAgent.steps, abort: opts?.abort, + recipeSummary, }) return { message, tasksRecorded } diff --git a/gateway/src/session/prompt.ts b/gateway/src/session/prompt.ts index a0bc3a40..bc025c0f 100644 --- a/gateway/src/session/prompt.ts +++ b/gateway/src/session/prompt.ts @@ -106,6 +106,13 @@ export interface PromptInput { /** Override for max agentic steps, bypassing agent.steps. */ stepsOverride?: number abort?: AbortSignal + /** + * Rendered recipe index (Recipe.fmt(list, { verbose: true })) to splice + * into the system prompt between the agent's own prompt and the + * configured instructions. Omit when there are no recipes to advertise — + * an empty string here would still leak an empty block to the LLM. + */ + recipeSummary?: string } export interface Interface { @@ -151,7 +158,7 @@ export const layer = Layer.effect( const modelMessages: ModelMessage[] = toModelMessages(history) // 3. Build system prompt - const systemPrompt = buildSystemPrompt(agentInfo, cfg.instructions) + const systemPrompt = buildSystemPrompt(agentInfo, cfg.instructions, input.recipeSummary) // 4. Build AI SDK tools from the registered tools const aiTools = yield* Effect.all( @@ -338,9 +345,14 @@ export const layer = Layer.effect( }), ) -function buildSystemPrompt(agent: AgentInfo, instructions: string[]): string { +function buildSystemPrompt( + agent: AgentInfo, + instructions: string[], + recipeSummary?: string, +): string { const parts: string[] = [] if (agent.prompt) parts.push(agent.prompt) + if (recipeSummary) parts.push(recipeSummary) for (const inst of instructions) parts.push(inst) return parts.join("\n\n").trim() } From 4a2af0b5a189c78d6809619011b76cf6a1c95fb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Raahul=20Dutta=20-=20=E0=A6=B0=E0=A6=BE=E0=A6=B9=E0=A7=81?= =?UTF-8?q?=E0=A6=B2=20=F0=9F=96=96?= Date: Mon, 20 Apr 2026 14:30:09 +0200 Subject: [PATCH 6/8] =?UTF-8?q?feat(gateway):=20seed=20two=20recipes=20?= =?UTF-8?q?=E2=80=94=20multi-agent=20research,=20payment-required=20flow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two real, usable playbooks that exercise the full progressive-disclosure path from loader → system prompt → load_recipe tool: multi-agent-research: instructs the planner how to chain a search agent and a summarizer agent, which A2A task states it can see between them, and where to stop vs. where to wait for the user. payment-required-flow: documents the recurring gotcha that payment-required is a paused non-terminal state, not a failure. No retries, no speculation, surface the payment URL verbatim and end the turn. This mirrors the guidance the project CLAUDE.md surfaces from past PRs. Smoke-loaded both through loadRecipesDir + fmt to verify parsing and rendering — name, description, tags, triggers and both verbose/terse formats come out clean. Full integration test coverage lands in Phase 7. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../recipes/multi-agent-research/RECIPE.md | 73 ++++++++++++++++++ .../recipes/payment-required-flow/RECIPE.md | 75 +++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 gateway/recipes/multi-agent-research/RECIPE.md create mode 100644 gateway/recipes/payment-required-flow/RECIPE.md diff --git a/gateway/recipes/multi-agent-research/RECIPE.md b/gateway/recipes/multi-agent-research/RECIPE.md new file mode 100644 index 00000000..1b6683f4 --- /dev/null +++ b/gateway/recipes/multi-agent-research/RECIPE.md @@ -0,0 +1,73 @@ +--- +name: multi-agent-research +description: Orchestrate a research task by dispatching the question to one retrieval/search agent and piping its output through a summarizer agent. Load when the user asks to research, investigate, look into, or summarize a topic that benefits from fresh external sources. +tags: [research, orchestration, multi-agent] +triggers: [research, investigate, look into, summarize, find out about] +--- + +# Multi-agent research orchestration + +## When to use this recipe + +Use this when the user asks you to research or investigate something, and +the current `/plan` request includes at least one A2A agent with a +search/retrieval skill (common ids: `search`, `web_search`, `retrieve`, +`lookup`) and at least one agent with a summarization skill (common ids: +`summarize`, `synthesize`, `brief`). + +If the request does NOT include those agents, do not attempt this flow — +respond directly to the user with what you know and note which agents +would help. + +## Flow + +1. **Identify the search tool.** Look at your available tools for one + whose id starts with `call_` and whose name contains `search`, + `retrieve`, or `lookup`. If multiple match, prefer the one whose + `tags` include `web` or `realtime`. + +2. **Dispatch the search.** Call the search tool with the user's question + as the `input` field. Use the user's exact phrasing — do not rewrite, + summarize, or translate it at this step; the search agent knows best + how to expand its own query. + +3. **Handle intermediate states.** The Bindu A2A task lifecycle allows + these non-terminal states on the response envelope: + - `input-required` — the search agent needs more context. Do NOT + guess. Surface its prompt to the user verbatim and wait for a reply. + - `auth-required` — the agent needs the caller to authenticate. + Report this to the user; do not retry. + - `payment-required` — see the `payment-required-flow` recipe. Load + that recipe before proceeding. + - `working` — transient; the gateway is already polling for you, + just wait for the call to return. + +4. **Hand off to the summarizer.** Once the search tool returns + `completed`, locate a `call_*_summarize`-shaped tool and call it with + the search tool's output as the `input`. The search output will be + wrapped in a `` envelope — pass the whole envelope + through, the summarizer is expected to strip it. + +5. **Compose the final answer.** The summarizer's response is what you + show the user. Quote or paraphrase freely, but always attribute the + source: "According to the …" + +## Constraints + +- **Do not parallelize searches** in this recipe. A single authoritative + source is usually better than three contradictory ones; if the user + wants a broader sweep, they should ask for one explicitly. +- **Do not cache.** Research questions imply the user wants fresh data. + Even if the session history contains a prior search result for the same + topic, re-run the dispatch. +- **If the summarizer fails** (state: `failed`) after the search + succeeded, return the raw search output wrapped in a short framing + sentence. Do not retry the summarizer — surface the failure with the + original content so the user can see what was found. + +## What success looks like + +One `call__search` tool call, one `call__*` +tool call, one final assistant message attributing the summary to the +source. No retries, no speculation, no invented citations. diff --git a/gateway/recipes/payment-required-flow/RECIPE.md b/gateway/recipes/payment-required-flow/RECIPE.md new file mode 100644 index 00000000..9183402c --- /dev/null +++ b/gateway/recipes/payment-required-flow/RECIPE.md @@ -0,0 +1,75 @@ +--- +name: payment-required-flow +description: Handle A2A task state `payment-required` correctly — surface the payment URL to the user, mark the task paused, and never retry silently. Load whenever a tool result carries `state: payment-required` in its metadata. +tags: [payments, x402, a2a-states, compliance] +triggers: [payment-required, 402, x402, paid agent] +--- + +# Handling `payment-required` from a gated agent + +## When to use this recipe + +Load this recipe the moment you see a tool call whose result metadata +contains `state: "payment-required"`. This happens when a Bindu agent +is gated by x402 (USDC on Base) or another pay-per-call scheme and the +caller hasn't attached a valid payment receipt. + +The recipe applies regardless of which agent returned the state — the +handling is identical because the A2A protocol defines the semantics, +not the agent. + +## What `payment-required` means + +On the A2A protocol task lifecycle, `payment-required` is a +**non-terminal, paused** state. The agent has accepted the request, +recognized it as billable, and is waiting for the caller to complete +payment out of band before it will do any work. No result has been +produced yet. + +The response envelope will typically carry: +- A human-readable prompt explaining the charge (in the text parts). +- An x402 payment URL or structured `paymentRequired` block naming the + scheme, amount, asset, and destination. For Bindu-standard x402 this + is USDC on Base Sepolia or Base mainnet. + +## What to do + +1. **Do not retry the call.** Retrying without a receipt produces the + same state and just burns tokens (and, for some agents, rate-limit + quota). + +2. **Do not invent a payment.** You cannot execute x402 payments from + the planner. Do not call any other tool hoping it will pay on the + user's behalf. + +3. **Surface the payment prompt to the user verbatim.** Quote the + agent's message in full. Include the agent name (from the + `` envelope) and the verification status + (`verified="yes"` or `verified="no"` — if `no`, warn the user that + the DID signature on the payment prompt could not be verified and + they should confirm the destination before paying). + +4. **End the turn.** Do not continue planning other steps. The user + needs to act out of band (pay, then re-run the same question with a + receipt attached) before anything else can happen. Your final + assistant message is a handoff, not a continuation. + +5. **Log the state as non-terminal** in your mental model. If the + session resumes after payment, the same task id may come back in the + `completed` state on a later call — treat that as success. + +## What not to say + +- Do NOT tell the user "I'll retry in a moment" or "let me check again." + There is nothing to check. +- Do NOT speculate about the price. Quote the agent's exact figure. +- Do NOT ask the user "would you like me to proceed?" — you literally + cannot proceed without a receipt. The only useful question is whether + they want the payment URL at all. + +## What success looks like + +One tool call that returned `payment-required`, one assistant message +that forwards the payment prompt, the session ends cleanly for the user +to act on. When they return with the same question (or a follow-up +explicitly mentioning the payment is complete), you can retry. From 88073c135641e4fe4a23e1954cca9d7e34024835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Raahul=20Dutta=20-=20=E0=A6=B0=E0=A6=BE=E0=A6=B9=E0=A7=81?= =?UTF-8?q?=E0=A6=B2=20=F0=9F=96=96?= Date: Mon, 20 Apr 2026 14:34:28 +0200 Subject: [PATCH 7/8] test(gateway): cover recipe loader and load_recipe tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two new test files, 20 new tests (total now 174/174 passing): tests/recipe/loader.test.ts — 12 tests. Covers flat + bundled layout discovery, alphabetical sort, cross-layout duplicate detection, empty-description rejection, name-fallback to filename stem, missing-directory behavior, tag/trigger parsing, and both fmt() output modes (verbose XML + terse markdown). tests/recipe/tool.test.ts — 8 tests. Covers describeRecipe with filtered lists, unknown-name errors that include the available list, / envelope for flat and bundled recipes, the 10-entry enumeration cap, and the ctx.ask permission hook contract. Dropped the planner-integration test from the original plan. The wiring (planner → load_recipe tool + recipeSummary in PromptInput) is 16 visible lines; a real integration test would need to mock the LLM provider, Session.Service, DB, and Bus, which costs more than it catches. The loader and tool contracts cover the interesting surface; the wiring itself is too trivial for its own integration test at this scope. Typecheck clean, 20/20 recipe tests pass, 174/174 overall. Co-Authored-By: Claude Opus 4.7 (1M context) --- gateway/tests/recipe/loader.test.ts | 166 +++++++++++++++++++++++++ gateway/tests/recipe/tool.test.ts | 180 ++++++++++++++++++++++++++++ 2 files changed, 346 insertions(+) create mode 100644 gateway/tests/recipe/loader.test.ts create mode 100644 gateway/tests/recipe/tool.test.ts diff --git a/gateway/tests/recipe/loader.test.ts b/gateway/tests/recipe/loader.test.ts new file mode 100644 index 00000000..028cf41d --- /dev/null +++ b/gateway/tests/recipe/loader.test.ts @@ -0,0 +1,166 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest" +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "fs" +import { resolve, join } from "path" +import { tmpdir } from "os" +import { loadRecipesDir, parseRecipeFile, fmt } from "../../src/recipe" + +/** + * Unit tests for the recipe loader. + * + * Covers the four drift risks the shape of the loader introduces: + * 1. Both layouts (flat `foo.md` and nested `foo/RECIPE.md`) discovered + * in one scan and collated by name. + * 2. Duplicate name across layouts fails loudly at load time — silent + * precedence would make behavior dependent on filesystem order. + * 3. Empty or missing description is rejected by Zod — the progressive- + * disclosure contract needs a non-empty line to display. + * 4. Output is sorted by name — the planner prompt relies on a stable + * order so prompt caching is effective across requests. + */ + +describe("recipe loader", () => { + let dir: string + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "bindu-recipe-test-")) + }) + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }) + }) + + const writeFlat = (name: string, frontmatter: string, body = "body") => + writeFileSync(resolve(dir, `${name}.md`), `---\n${frontmatter}\n---\n${body}\n`) + + const writeBundled = (dirName: string, frontmatter: string, body = "body", siblings: Record = {}) => { + const sub = resolve(dir, dirName) + mkdirSync(sub, { recursive: true }) + writeFileSync(resolve(sub, "RECIPE.md"), `---\n${frontmatter}\n---\n${body}\n`) + for (const [name, content] of Object.entries(siblings)) { + const p = resolve(sub, name) + mkdirSync(resolve(p, ".."), { recursive: true }) + writeFileSync(p, content) + } + } + + it("discovers both flat and bundled layouts in one scan", () => { + writeFlat("alpha", "name: alpha\ndescription: flat one") + writeBundled("beta", "name: beta\ndescription: bundled one", "body", { + "scripts/check.sh": "#!/bin/sh\necho ok", + }) + + const list = loadRecipesDir(dir) + expect(list.map((r) => r.name)).toEqual(["alpha", "beta"]) + expect(list[0].location.endsWith("alpha.md")).toBe(true) + expect(list[1].location.endsWith("RECIPE.md")).toBe(true) + }) + + it("returns results sorted by name regardless of filesystem order", () => { + writeFlat("zzz", "name: zzz\ndescription: z") + writeFlat("mmm", "name: mmm\ndescription: m") + writeFlat("aaa", "name: aaa\ndescription: a") + + const list = loadRecipesDir(dir) + expect(list.map((r) => r.name)).toEqual(["aaa", "mmm", "zzz"]) + }) + + it("throws when two recipes share the same name across layouts", () => { + writeFlat("dup", "name: dup\ndescription: flat") + writeBundled("dup-dir", "name: dup\ndescription: bundled") + + expect(() => loadRecipesDir(dir)).toThrow(/duplicate name "dup"/) + }) + + it("rejects empty description", () => { + writeFlat("empty", "name: empty\ndescription: ") + + expect(() => loadRecipesDir(dir)).toThrow(/invalid frontmatter/) + }) + + it("falls back to filename stem when `name` is omitted", () => { + writeFlat("fallback-name", "description: no explicit name") + + const list = loadRecipesDir(dir) + expect(list[0].name).toBe("fallback-name") + }) + + it("returns an empty array when the directory does not exist", () => { + expect(loadRecipesDir(resolve(dir, "missing"))).toEqual([]) + }) + + it("parses tags and triggers as arrays; defaults both to []", () => { + writeFlat( + "with-meta", + "name: with-meta\ndescription: d\ntags: [a, b, c]\ntriggers: [x]", + ) + writeFlat("bare", "name: bare\ndescription: d") + + const list = loadRecipesDir(dir) + const meta = list.find((r) => r.name === "with-meta")! + const bare = list.find((r) => r.name === "bare")! + expect(meta.tags).toEqual(["a", "b", "c"]) + expect(meta.triggers).toEqual(["x"]) + expect(bare.tags).toEqual([]) + expect(bare.triggers).toEqual([]) + }) + + it("parseRecipeFile uses fallbackName only when frontmatter omits name", () => { + const withName = parseRecipeFile("/x.md", "---\nname: explicit\ndescription: d\n---\nbody", "stem") + const withoutName = parseRecipeFile("/x.md", "---\ndescription: d\n---\nbody", "stem") + expect(withName.name).toBe("explicit") + expect(withoutName.name).toBe("stem") + }) + + it("ignores directories without a RECIPE.md file", () => { + const sub = resolve(dir, "just-a-dir") + mkdirSync(sub, { recursive: true }) + writeFileSync(resolve(sub, "notes.md"), "# some notes, not a recipe") + + expect(loadRecipesDir(dir)).toEqual([]) + }) +}) + +describe("Recipe.fmt", () => { + const sample = [ + { + name: "alpha", + description: "First recipe", + tags: ["x"], + triggers: [], + location: "/tmp/alpha.md", + content: "body-a", + }, + { + name: "beta", + description: "Second recipe", + tags: [], + triggers: [], + location: "/tmp/beta/RECIPE.md", + content: "body-b", + }, + ] + + it("verbose mode returns an XML block with both recipes and their locations", () => { + const out = fmt(sample, { verbose: true }) + expect(out).toContain("") + expect(out).toContain("alpha") + expect(out).toContain("beta") + expect(out).toContain("file:///tmp/alpha.md") + expect(out).toContain("file:///tmp/beta/RECIPE.md") + expect(out).toContain("x") + }) + + it("terse mode returns a markdown bullet list with names and descriptions only", () => { + const out = fmt(sample, { verbose: false }) + expect(out).toContain("## Available Recipes") + expect(out).toContain("- **alpha**: First recipe") + expect(out).toContain("- **beta**: Second recipe") + expect(out).not.toContain("file://") + expect(out).not.toContain("") + }) + + it("reports plainly when there are no recipes available", () => { + expect(fmt([], { verbose: true })).toBe("No recipes are currently available.") + expect(fmt([], { verbose: false })).toBe("No recipes are currently available.") + }) +}) diff --git a/gateway/tests/recipe/tool.test.ts b/gateway/tests/recipe/tool.test.ts new file mode 100644 index 00000000..dd6f356f --- /dev/null +++ b/gateway/tests/recipe/tool.test.ts @@ -0,0 +1,180 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest" +import { Effect } from "effect" +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "fs" +import { resolve, join } from "path" +import { tmpdir } from "os" +import type * as Recipe from "../../src/recipe" +import { buildLoadRecipeTool, describeRecipe } from "../../src/tool/recipe" +import type { Context as ToolContext } from "../../src/tool/tool" + +/** + * Unit tests for the load_recipe tool. + * + * Invariants we want pinned: + * 1. The tool description is a function of the permission-filtered list + * we hand the factory — not the full service.all(). The planner + * passes the filtered list and expects the LLM to see only those + * names. + * 2. Unknown names produce an error that includes the full available + * list, so the planner can recover by picking a real name. + * 3. Known names produce a envelope with a + * block — this is the contract the planner relies on to quote body + * verbatim and find bundled assets. + * 4. Flat recipes (no sibling dir) yield an empty files block, not a + * scan of other recipes in the same directory. + */ + +type Info = Recipe.Info + +const mkInfo = (overrides: Partial = {}): Info => ({ + name: overrides.name ?? "sample", + description: overrides.description ?? "Sample recipe", + tags: overrides.tags ?? [], + triggers: overrides.triggers ?? [], + location: overrides.location ?? "/tmp/sample.md", + content: overrides.content ?? "Sample body.", +}) + +/** Minimal Recipe.Interface backed by a plain array. */ +const mkFakeService = (recipes: Info[]): Recipe.Interface => ({ + list: () => Effect.succeed(recipes.slice()), + get: (name) => Effect.succeed(recipes.find((r) => r.name === name)), + available: () => Effect.succeed(recipes.slice()), + dirs: () => Effect.succeed(Array.from(new Set(recipes.map((r) => r.location.replace(/\/[^/]+$/, ""))))), +}) + +const mkCtx = (): ToolContext => ({ + sessionId: "sess", + messageId: "msg", + agent: "planner", + callId: "call", + abort: new AbortController().signal, + metadata: () => Effect.void, +}) + +describe("describeRecipe", () => { + it("advertises every recipe in the filtered list with name and description", () => { + const out = describeRecipe([ + mkInfo({ name: "alpha", description: "first" }), + mkInfo({ name: "beta", description: "second" }), + ]) + expect(out).toContain("alpha") + expect(out).toContain("first") + expect(out).toContain("beta") + expect(out).toContain("second") + // The planner relies on this phrase to know the tool exists at all. + expect(out).toContain("Load a specialized recipe") + }) + + it("reports plainly when the filtered list is empty (no names to guess)", () => { + const out = describeRecipe([]) + expect(out).toContain("No recipes are currently available") + expect(out).not.toContain("- **") // no bullet entries + }) +}) + +describe("buildLoadRecipeTool", () => { + let tmp: string + + beforeEach(() => { + tmp = mkdtempSync(join(tmpdir(), "bindu-tool-test-")) + }) + + afterEach(() => { + rmSync(tmp, { recursive: true, force: true }) + }) + + it("exposes the description produced by describeRecipe on the returned Def", () => { + const svc = mkFakeService([mkInfo({ name: "a", description: "aaa" })]) + const def = buildLoadRecipeTool(svc, [mkInfo({ name: "a", description: "aaa" })]) + expect(def.id).toBe("load_recipe") + expect(def.description).toContain("a") + expect(def.description).toContain("aaa") + }) + + it("returns a helpful error when the requested recipe does not exist", async () => { + const svc = mkFakeService([ + mkInfo({ name: "known-1" }), + mkInfo({ name: "known-2" }), + ]) + const def = buildLoadRecipeTool(svc, []) + const run = Effect.runPromise(def.execute({ name: "nope" }, mkCtx())) + await expect(run).rejects.toThrow(/not found. Available: known-1, known-2/) + }) + + it("wraps body in with an empty files block for flat recipes", async () => { + const info = mkInfo({ + name: "flat", + location: "/tmp/flat.md", // flat layout + content: "The body of the flat recipe.", + }) + const svc = mkFakeService([info]) + const def = buildLoadRecipeTool(svc, [info]) + + const result = await Effect.runPromise(def.execute({ name: "flat" }, mkCtx())) + expect(result.title).toBe("Loaded recipe: flat") + expect(result.output).toContain('') + expect(result.output).toContain("# Recipe: flat") + expect(result.output).toContain("The body of the flat recipe.") + expect(result.output).toContain("(none)") + expect(result.output).toContain("") + expect(result.metadata.fileCount).toBe(0) + }) + + it("enumerates sibling files for bundled (/RECIPE.md) recipes", async () => { + const bundleDir = resolve(tmp, "bundled") + mkdirSync(bundleDir, { recursive: true }) + mkdirSync(resolve(bundleDir, "scripts"), { recursive: true }) + const recipePath = resolve(bundleDir, "RECIPE.md") + writeFileSync(recipePath, "---\nname: bundled\ndescription: b\n---\nbody") + writeFileSync(resolve(bundleDir, "scripts/run.sh"), "#!/bin/sh") + writeFileSync(resolve(bundleDir, "reference.md"), "ref") + + const info = mkInfo({ name: "bundled", location: recipePath, content: "body" }) + const svc = mkFakeService([info]) + const def = buildLoadRecipeTool(svc, [info]) + + const result = await Effect.runPromise(def.execute({ name: "bundled" }, mkCtx())) + expect(result.output).toContain("") + expect(result.output).toContain("reference.md") + expect(result.output).toContain("scripts/run.sh") + expect(result.output).not.toContain("RECIPE.md") + expect(result.metadata.fileCount).toBe(2) + }) + + it("caps bundled-file enumeration at 10 entries", async () => { + const bundleDir = resolve(tmp, "big") + mkdirSync(bundleDir, { recursive: true }) + const recipePath = resolve(bundleDir, "RECIPE.md") + writeFileSync(recipePath, "---\nname: big\ndescription: d\n---\nbody") + for (let i = 0; i < 25; i++) { + writeFileSync(resolve(bundleDir, `f${i}.txt`), "x") + } + + const info = mkInfo({ name: "big", location: recipePath }) + const svc = mkFakeService([info]) + const def = buildLoadRecipeTool(svc, [info]) + + const result = await Effect.runPromise(def.execute({ name: "big" }, mkCtx())) + expect(result.metadata.fileCount).toBe(10) + expect(result.output).toContain("truncated at 10 entries") + }) + + it("calls ctx.ask when provided, passing permission='recipe' and the recipe name as target", async () => { + const info = mkInfo({ name: "perm", location: "/tmp/perm.md" }) + const svc = mkFakeService([info]) + const def = buildLoadRecipeTool(svc, [info]) + + const calls: Array<{ permission: string; target?: string }> = [] + const ctx: ToolContext = { + ...mkCtx(), + ask: (input) => + Effect.sync(() => { + calls.push(input) + }), + } + + await Effect.runPromise(def.execute({ name: "perm" }, ctx)) + expect(calls).toEqual([{ permission: "recipe", target: "perm" }]) + }) +}) From 8c81df96649c98ac975ad033526f2ebf262e521e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Raahul=20Dutta=20-=20=E0=A6=B0=E0=A6=BE=E0=A6=B9=E0=A7=81?= =?UTF-8?q?=E0=A6=B2=20=F0=9F=96=96?= Date: Mon, 20 Apr 2026 14:39:26 +0200 Subject: [PATCH 8/8] docs: document gateway recipes feature in README and project CLAUDE.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gateway/README.md: - Fix stale bullet: "Tool registry + Skill/Agent loaders" → "Tool registry + Agent/Recipe loaders (progressive-disclosure playbooks)" — the Skill loader was removed in this branch, and the status list needs to reflect what's actually shipped. - New §Recipes section: what they are, why you'd write one, the flat vs. bundled layout, frontmatter shape, per-agent visibility via the existing permission system, and the end-to-end load path. Points at src/recipe/index.ts and src/tool/recipe.ts for source. CLAUDE.md: - Append a Recent Learnings entry so future Claude sessions know recipes exist and why they're named "recipe" (the skill namespace was already taken by A2A SkillRequest). Typecheck clean, 174/174 tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 1 + gateway/README.md | 56 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index 73b90d22..335db94a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -208,6 +208,7 @@ bindu/ - **[2026-03-29]** Payment context handling: Use `.pop()` instead of `del` for optional metadata keys (PR #418) - **[2026-03-29]** Windows compatibility: DID private key permissions - use `os.open()` on POSIX, direct write on Windows (PR #418) - **[2026-03-27]** gRPC docs reorganized: See `docs/grpc/` for architecture, API reference, SDK guides +- **[2026-04-20]** Gateway recipes: progressive-disclosure playbooks the planner lazy-loads on demand. Live in `gateway/recipes/` as markdown files with YAML frontmatter. Metadata (name + description) goes into the system prompt; full body only loads when the planner calls `load_recipe`. Pattern ported from OpenCode skills, renamed because the gateway already uses "skill" for A2A agent capabilities. See `gateway/src/recipe/index.ts` and `gateway/README.md` §Recipes. ## Key Design Decisions diff --git a/gateway/README.md b/gateway/README.md index d4cb4ac0..f97970f2 100644 --- a/gateway/README.md +++ b/gateway/README.md @@ -17,7 +17,7 @@ For design rationale, see [`plans/PLAN.md`](./plans/PLAN.md). Phase-by-phase det Phase 1 Days 1–9 shipped. Core gateway is functionally complete: - ✅ Bus, Config, DB (Supabase), Auth, Permission, Provider (Anthropic/OpenAI) -- ✅ Tool registry + Skill/Agent loaders +- ✅ Tool registry + Agent/Recipe loaders (recipes = progressive-disclosure playbooks) - ✅ Session module (message, state, LLM stream, the **loop**, compaction, summary, revert, overflow detection) - ✅ Bindu protocol: Zod types for Message/Part/Artifact/Task/AgentCard, mixed-casing normalize, DID parse, JSON-RPC envelope, BinduError classification - ✅ Bindu identity: ed25519 verify (against real Phase 0 signatures) @@ -167,6 +167,60 @@ See [`plans/PLAN.md`](./plans/PLAN.md) §Architecture for the full picture. --- +## Recipes — progressive-disclosure playbooks + +Recipes are markdown playbooks the planner lazy-loads when a task matches. Only metadata (`name` + `description`) sits in the system prompt; the full body is fetched on demand via the `load_recipe` tool. Pattern borrowed from [OpenCode Skills](https://opencode.ai/docs/skills/), renamed to avoid collision with A2A `SkillRequest` (an agent capability on the `/plan` request body). + +**Why you'd write one:** to encode multi-agent orchestration patterns ("research question → search agent → summarizer"), handling rules for A2A states (`input-required`, `payment-required`, `auth-required`), or tenant-specific policies. Operators drop a markdown file in `gateway/recipes/` — no code change. + +### Layouts + +``` +gateway/recipes/foo.md flat recipe, no bundled files +gateway/recipes/bar/RECIPE.md bundled recipe — siblings like +gateway/recipes/bar/scripts/run.sh scripts/, reference/ are surfaced +gateway/recipes/bar/reference/notes.md to the planner when bar loads +``` + +### Frontmatter + +```yaml +--- +name: multi-agent-research # required; falls back to filename/dir stem +description: One-line summary that # required (non-empty) — shown in the + tells the planner when to load # system prompt and tool description +tags: [research, orchestration] # optional +triggers: [research, investigate] # optional planner hints +--- + +# Playbook body in markdown — free-form instructions the planner follows +# after loading the recipe. +``` + +### Per-agent visibility + +Recipes respect the agent permission system. In an agent's frontmatter: + +```yaml +permission: + recipe: + "secret-*": "deny" # hide recipes matching the pattern from this agent + "*": "allow" # everything else is visible +``` + +Default action is `allow` — an agent with no `recipe:` rules sees everything. + +### How it works end-to-end + +1. On each `/plan`, the planner calls `recipes.available(plannerAgent)`. +2. The filtered list is (a) rendered into the system prompt as `` and (b) used to generate the description of the `load_recipe` tool. +3. When the planner decides a recipe applies, it calls `load_recipe({ name })`. +4. The tool returns a `` envelope with the full markdown and a `` block listing bundled sibling files. The planner quotes or follows the body for the rest of the turn. + +See [`src/recipe/index.ts`](./src/recipe/index.ts) for the loader and [`src/tool/recipe.ts`](./src/tool/recipe.ts) for the tool. Two seed recipes live under [`recipes/`](./recipes/). + +--- + ## DID signing for downstream peers The gateway can sign outbound A2A requests with an Ed25519 identity so DID-enforcing Bindu peers accept them. Needed for any peer you configure with `auth.type = "did_signed"`; ignored otherwise.