diff --git a/docs/USAGE.md b/docs/USAGE.md index 9d09e1e..e4bd0dc 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -343,6 +343,7 @@ tier: primary # optional, primary|secondary|ollama, default = SOLRAC max_turns: 1 # optional, integer in [1,10], default 1. Model-turn budget for the skill body. Pure text-transforms want 1; agentic skills that chain tool calls (e.g. `notion_search` → `notion_create_page`) need headroom. Doubles as `maxIterations` for the Ollama tool loop. tool: false # optional, default false. When true, also expose this skill as a callable MCP tool to the Ollama agent (Phase 1: requires tier: ollama). requires: notion # optional, integration deps. Bare string OR array (`requires: [notion, gmail]`). When any name is missing from the loaded integrations at boot, the skill is skipped with a `skills.load_error` warn — it never appears in `/help` or Telegram autocomplete. Omit for unconditional load. +auto_allow: false # optional, default false. When true, every `confirm`-tier tool the skill body calls bypasses the Telegram prompt and runs directly. The skill's purpose IS the operation (e.g. `/log` → Notion write) — re-prompting on every call hurts UX. Loop detector, hard-deny classifier, and cost cap still apply. --- You are a concise summarizer. Produce exactly 3 bullets, no preamble. @@ -350,7 +351,7 @@ Input: {{args}} ``` -Two required fields (`name`, `description`), four optional (`tier`, `max_turns`, `tool`, `requires`), plus a body. The body is a prompt template — `{{args}}` is the only placeholder and gets replaced with whatever the user typed after the command (e.g. `/summarize ` → `args = ""`). The substitution is literal text-for-text; no escaping, no nested templating. +Two required fields (`name`, `description`), five optional (`tier`, `max_turns`, `tool`, `requires`, `auto_allow`), plus a body. The body is a prompt template — `{{args}}` is the only placeholder and gets replaced with whatever the user typed after the command (e.g. `/summarize ` → `args = ""`). The substitution is literal text-for-text; no escaping, no nested templating. The frontmatter parser supports a YAML *subset*: `key: scalar`, `key: [a, b, c]` string arrays, single- or double-quoted strings, integers, booleans. Multi-line strings, anchors, and nested maps are NOT supported and produce a clear error pointing at the offending line. @@ -361,7 +362,7 @@ Skills run with the full tool surface their tier provides, bounded by `max_turns - **Claude tiers (`primary` / `secondary`)** — the body sees the same Claude Code tool preset a normal turn does (`Bash`, `Read`, `Edit`, `Write`, `WebFetch`, `WebSearch`, plus every `mcp__solrac__*` integration tool). `Agent` and `Task` stay denied at the SDK + policy layers — no sub-agents from inside a skill. - **Ollama tier** — when the deploy has integrations + Ollama tools enabled, the body routes through the same `runToolLoop` driver as a regular Ollama turn and sees the full MCP catalog (minus its own `skills__` entry — see "Skills as tools" below). Without integrations / tools, the path falls back to a single-shot `/api/chat` round trip. -Every tool call (both tiers) flows through the same three-tier policy (auto-allow / auto-deny / Telegram-confirm), the same `PreToolUse` cost-cap + loop-detector hooks, and the same `canUseTool` interactive confirm UX as a normal turn. A skill body that calls `Bash(rm -rf /)` gets denied identically — there's no skill-specific bypass. +Every tool call (both tiers) flows through the same three-tier policy (auto-allow / auto-deny / Telegram-confirm), the same `PreToolUse` cost-cap + loop-detector hooks, and the same `canUseTool` interactive confirm UX as a normal turn. A skill body that calls `Bash(rm -rf /)` gets denied identically — there's no skill-specific bypass *except* `auto_allow: true`, which suppresses ONLY the interactive Telegram-confirm prompt (the loop detector, hard-deny classifier, and cost cap all still gate). Reach for `auto_allow` on skills whose entire purpose is a known operation — `/log` writing to Notion, an Ollama-tier skill appending to a Google Drive doc — where re-prompting on every call costs more than it protects. `max_turns` is the per-skill model-turn budget. A pure text-transform (summarize, translate) wants `max_turns: 1`. An agentic skill that chains tool calls (e.g. `/log` doing `notion_search` → `notion_create_page` → return URL) needs a few more; the bound caps runaway behavior the same way the SDK's `maxTurns` does for a regular turn. Hard ceiling is 10; the cost cap is the ultimate backstop on Claude tiers, `OLLAMA_MAX_TOOL_ITERATIONS` on Ollama. diff --git a/src/commands.ts b/src/commands.ts index 8fbb80d..f7a77be 100644 --- a/src/commands.ts +++ b/src/commands.ts @@ -1315,12 +1315,18 @@ async function runSkill( const pendingHandles = new Map(); const policyDeny: { event: PolicyDenyEvent | null } = { event: null }; - const canUseTool: CanUseTool = - deps.createCanUseTool?.({ - chatId: msg.chat.id, - auditId, - pendingHandles, - }) ?? skillDefaultAllowAll; + // `auto_allow: true` skills bypass the interactive confirm UX entirely — + // the PreToolUse hook (cost cap + loop detector) and SDK `disallowedTools` + // still run, but every tool the SDK routes to `canUseTool` is allowed + // without prompting the operator. Useful for skills whose entire purpose + // IS a known write (e.g. /log → notion). + const canUseTool: CanUseTool = skill.autoAllow + ? skillDefaultAllowAll + : (deps.createCanUseTool?.({ + chatId: msg.chat.id, + auditId, + pendingHandles, + }) ?? skillDefaultAllowAll); const preToolUseHook = createPreToolUseHook({ chatId: msg.chat.id, @@ -1650,6 +1656,20 @@ async function runSkillBareWithTools( const toolTiers = ollama.toolTiers!; const broker = ollama.broker!; + // The broker uses `chatId` to send the Telegram inline-keyboard confirm + // prompt; without the real id, sends fail-close to a denial and the + // operator never sees a prompt. Both production callers (`runOllamaSkill` + // and `skill-tools.ts` dispatch) wrap us in `skillToolCtx.run({chatId, + // parentAuditId, ...})`, so we read context here. Missing context means + // a misconfigured test harness — log loudly and fall through with 0 (the + // broker's fail-closed path will surface the bug as a denied tool call). + const cx = skillToolCtx.getStore(); + if (!cx) { + log.warn("skill.no_context_in_bare_with_tools", { skill: skill.name }); + } + const chatId = cx?.chatId ?? 0; + const auditId = cx?.parentAuditId ?? 0; + // Recursion guard. The skill's body must not see its own MCP entry. The // model can still call OTHER skills tools; cycles longer than 1 are bounded // by `maxIterations` and the loop detector. @@ -1688,15 +1708,13 @@ async function runSkillBareWithTools( broker, loopDetector, maxIterations: skill.maxTurns, - // Skill audit row is owned by the caller (runOllamaSkill or - // skill-tools.ts::buildOneSkillTool); pass 0 here. The audit - // correlation in `log.info("ollama.tool_loop_start", ...)` is - // best-effort for skills. - auditId: 0, - // chatId is only used for log correlation inside runToolLoop. The - // ALS context (skillToolCtx) carries the real chatId for any - // nested skill calls. - chatId: 0, + // chatId is required by the broker to address the Telegram confirm + // prompt — sourced from the ALS context the caller set up. auditId + // is best-effort log correlation; we forward the caller's audit row + // so loop entries pin to the right turn. + auditId, + chatId, + autoAllow: skill.autoAllow, }, { initialMessages }, ); diff --git a/src/ollama-tools.test.ts b/src/ollama-tools.test.ts index d865d65..f959a42 100644 --- a/src/ollama-tools.test.ts +++ b/src/ollama-tools.test.ts @@ -357,6 +357,32 @@ describe("executeToolCall", () => { expect(r.content).toContain("network down"); }); + test("autoAllow: confirm-tier tool bypasses broker, handler invoked", async () => { + let requested = false; + const def = textTool("write_thing", "wrote ok"); + const deps = buildDeps([{ def, tier: "confirm" }], { + broker: makeBroker("deny", { onRequest: () => (requested = true) }), + autoAllow: true, + }); + const r = await executeToolCall(deps, { + name: "write_thing", + arguments: {}, + }); + + expect(requested).toBe(false); + expect(r.disposition).toBe("ok"); + expect(r.content).toBe("wrote ok"); + }); + + test("autoAllow: auto-tier tool still works (no change)", async () => { + const def = textTool("time_now", "12:00"); + const deps = buildDeps([{ def, tier: "auto" }], { autoAllow: true }); + const r = await executeToolCall(deps, { name: "time_now", arguments: {} }); + + expect(r.disposition).toBe("ok"); + expect(r.content).toBe("12:00"); + }); + test("malformed args: zod validation fails, handler not invoked", async () => { let invoked = false; const def = tool( diff --git a/src/ollama-tools.ts b/src/ollama-tools.ts index cf70ae4..51ef2b9 100644 --- a/src/ollama-tools.ts +++ b/src/ollama-tools.ts @@ -242,6 +242,14 @@ export interface ExecuteToolCallDeps { * tests so they exercise the unbounded path. */ readonly roundState?: { confirmsRemaining: number }; + /** + * When true, `confirm`-tier classifications fall through to invocation + * without dispatching the broker. Set per-skill via SKILL.md `auto_allow: + * true` for skills whose entire purpose IS a known write. Loop detector + * and `deny`-tier still gate as normal — only the interactive prompt is + * suppressed. + */ + readonly autoAllow?: boolean; } /** @@ -336,8 +344,18 @@ export async function executeToolCall( }; } - // Step 3: confirm UX for confirm-tier tools. - if (decision.kind === "confirm") { + // Step 3: confirm UX for confirm-tier tools. Per-skill `auto_allow: + // true` (SKILL.md) bypasses the broker entirely — the skill's purpose IS + // the operation, so re-prompting hurts UX. Loop detector + deny-tier above + // still ran. Logged so audit-greps can tell "operator approved" from + // "skill auto-allowed". + if (decision.kind === "confirm" && deps.autoAllow) { + log.info("ollama.tool_auto_allow", { + auditId: deps.auditId, + chatId: deps.chatId, + tool: shortName, + }); + } else if (decision.kind === "confirm") { // Per-round confirm-cap (PLAN §3 / Phase 3). When the model emits // multiple parallel `tool_calls` and ≥2 are confirm-tier, the FIRST // gets the broker; subsequent ones short-circuit to a deny that tells @@ -740,6 +758,12 @@ export interface RunToolLoopDeps { readonly denyTools?: ReadonlySet; /** Optional throttled progress hook for live UI. */ readonly renderer?: RunToolLoopRenderer; + /** + * When true, `confirm`-tier tool calls bypass the broker and run directly. + * Forwarded into every `executeToolCall` for this loop. Set by callers that + * already have a per-invocation trust signal (e.g. SKILL.md `auto_allow`). + */ + readonly autoAllow?: boolean; } export interface RunToolLoopInput { @@ -996,8 +1020,10 @@ export async function runToolLoop( } // Single-confirm-per-round: pre-classify confirm-tier; deny 2nd+. + // `autoAllow` skills bypass the broker entirely, so the cap (which + // exists to avoid stacking 60s prompts) doesn't apply to them. const tier = deps.toolTiers.get(call.name) ?? "confirm"; - const wouldConfirm = tier !== "auto"; + const wouldConfirm = tier !== "auto" && !deps.autoAllow; if (wouldConfirm && confirmsUsedThisRound > 0) { const msg = "denied: only one confirmable tool per round; retry separately"; @@ -1022,6 +1048,7 @@ export async function runToolLoop( toolTiers: deps.toolTiers, broker: deps.broker, loopDetector: deps.loopDetector, + autoAllow: deps.autoAllow, }, call, ); diff --git a/src/skill-tools.test.ts b/src/skill-tools.test.ts index 4621b83..8596152 100644 --- a/src/skill-tools.test.ts +++ b/src/skill-tools.test.ts @@ -86,6 +86,7 @@ function fakeSkill(overrides: Partial = {}): Skill { tool: true, maxTurns: 1, requires: [] as ReadonlyArray, + autoAllow: false, ...overrides, }); } diff --git a/src/skills.test.ts b/src/skills.test.ts index dc84ea3..cb7d980 100644 --- a/src/skills.test.ts +++ b/src/skills.test.ts @@ -513,6 +513,56 @@ Body.`; }); }); +// --------------------------------------------------------------------------- +// parseSkillFile — auto_allow field +// --------------------------------------------------------------------------- + +describe("parseSkillFile — auto_allow field", () => { + test("defaults to false when omitted", () => { + const c = `--- +name: example +description: x +--- +Body.`; + const skill = parseSkillFile(c, "/p", RESERVED); + expect(skill.autoAllow).toBe(false); + }); + + test("accepts true", () => { + const c = `--- +name: example +description: x +auto_allow: true +--- +Body.`; + const skill = parseSkillFile(c, "/p", RESERVED); + expect(skill.autoAllow).toBe(true); + }); + + test("accepts false explicitly", () => { + const c = `--- +name: example +description: x +auto_allow: false +--- +Body.`; + const skill = parseSkillFile(c, "/p", RESERVED); + expect(skill.autoAllow).toBe(false); + }); + + test("rejects non-boolean", () => { + const c = `--- +name: example +description: x +auto_allow: "yes" +--- +Body.`; + expect(() => parseSkillFile(c, "/p", RESERVED)).toThrow( + /"auto_allow" must be a boolean/, + ); + }); +}); + // --------------------------------------------------------------------------- // parseSkillFile — tier inheritance from defaultTier // --------------------------------------------------------------------------- @@ -799,6 +849,7 @@ describe("skillsToBotCommands", () => { tool: false, maxTurns: 1, requires: [] as ReadonlyArray, + autoAllow: false, }); } diff --git a/src/skills.ts b/src/skills.ts index ce756d0..4ae39ac 100644 --- a/src/skills.ts +++ b/src/skills.ts @@ -106,6 +106,12 @@ export interface Skill { // (unconditional load). Frontmatter key: `requires`; accepts a bare string // or a string array. readonly requires: ReadonlyArray; + // When true, every `confirm`-tier tool the skill body calls is auto-allowed + // without prompting the operator. Trades safety for ergonomics — appropriate + // for skills whose entire purpose IS a known write (e.g. /log → notion). + // Loop detector + hard-deny classifier still apply; cost cap still applies. + // Frontmatter key: `auto_allow`. Default false. + readonly autoAllow: boolean; } export interface SkillLoadError { @@ -360,13 +366,34 @@ export function parseSkillFile( requires = Object.freeze([...list]); } + // auto_allow — opt-in flag that auto-allows every confirm-tier tool the + // skill body calls. The skill's purpose IS the operation, so re-prompting on + // every call hurts UX. Loop detector + hard-deny classifier + cost cap still + // apply; only the interactive Telegram-confirm is bypassed. + let autoAllow = false; + if ("auto_allow" in f) { + const v = f.auto_allow; + if (typeof v !== "boolean") { + throw new Error(`${sourcePath}: "auto_allow" must be a boolean (got "${String(v)}")`); + } + autoAllow = v; + } + // body if (body.trim() === "") { throw new Error(`${sourcePath}: body must be non-empty (no prompt template)`); } // Reject unknown keys so typos don't silently skip. - const ALLOWED = new Set(["name", "description", "tier", "tool", "max_turns", "requires"]); + const ALLOWED = new Set([ + "name", + "description", + "tier", + "tool", + "max_turns", + "requires", + "auto_allow", + ]); for (const k of Object.keys(f)) { if (!ALLOWED.has(k)) { throw new Error( @@ -384,6 +411,7 @@ export function parseSkillFile( tool: toolFlag, maxTurns, requires, + autoAllow, }); }