From 86449aae380744708daa440bfc3a5400a97429b0 Mon Sep 17 00:00:00 2001 From: sajeerzeji Date: Sun, 7 Jun 2026 23:58:18 +0530 Subject: [PATCH 1/5] feat: add MCP server, McpChannel, eval primitives, and OTel tracing --- README.md | 56 +- package-lock.json | 1314 ++++++++++++++++- packages/toolpack-agents/README.md | 76 +- packages/toolpack-agents/docs/README.md | 2 +- packages/toolpack-agents/docs/channels.md | 53 + packages/toolpack-agents/docs/testing.md | 117 ++ packages/toolpack-agents/package.json | 21 +- .../toolpack-agents/src/channels/index.ts | 2 + .../src/channels/mcp-channel.test.ts | 124 ++ .../src/channels/mcp-channel.ts | 124 ++ packages/toolpack-agents/src/index.ts | 34 + .../interceptors/builtins/builtins.test.ts | 172 +++ .../src/interceptors/builtins/index.ts | 10 + .../src/interceptors/builtins/otel-tracer.ts | 186 +++ .../toolpack-agents/src/interceptors/index.ts | 8 + .../src/testing/eval-dataset.ts | 121 ++ .../src/testing/eval-report.ts | 116 ++ .../src/testing/eval-runner.ts | 89 ++ .../src/testing/eval-scorer.ts | 248 ++++ .../toolpack-agents/src/testing/eval-types.ts | 160 ++ .../toolpack-agents/src/testing/eval.test.ts | 406 +++++ packages/toolpack-agents/src/testing/index.ts | 24 + packages/toolpack-knowledge/package.json | 2 +- packages/toolpack-sdk/README.md | 8 +- .../docs/examples/mcp-server-example.ts | 121 ++ packages/toolpack-sdk/package.json | 26 +- packages/toolpack-sdk/src/client/index.ts | 12 +- packages/toolpack-sdk/src/mcp/index.ts | 26 +- packages/toolpack-sdk/src/mcp/server-auth.ts | 134 ++ packages/toolpack-sdk/src/mcp/server-types.ts | 226 +++ packages/toolpack-sdk/src/mcp/server.ts | 378 +++++ .../src/providers/anthropic/index.ts | 24 +- .../src/providers/gemini/index.ts | 31 +- .../src/providers/ollama/adapter.ts | 10 + .../src/providers/openai/index.ts | 14 +- .../src/providers/vertexai/index.ts | 24 +- packages/toolpack-sdk/src/toolpack.ts | 102 ++ .../coding-tools/parsers/babel-parser.ts | 2 +- .../src/tools/exec-tools/index.test.ts | 4 +- .../src/tools/exec-tools/index.ts | 12 +- .../exec-tools/tools/run-background/index.ts | 2 +- .../tools/run-blocking/index.test.ts | 70 + .../exec-tools/tools/run-blocking/index.ts | 81 + .../exec-tools/tools/run-blocking/schema.ts | 24 + .../tools/tail-output/index.test.ts | 79 + .../exec-tools/tools/tail-output/index.ts | 49 + .../exec-tools/tools/tail-output/schema.ts | 25 + packages/toolpack-sdk/src/tools/index.ts | 4 +- packages/toolpack-sdk/src/tools/registry.ts | 5 +- packages/toolpack-sdk/src/tools/types.ts | 55 +- packages/toolpack-sdk/src/types/index.ts | 29 +- .../tests/integration/mcp-server.test.ts | 316 ++++ .../tests/unit/mcp-server-auth.test.ts | 249 ++++ .../tests/unit/mcp-server.test.ts | 609 ++++++++ 54 files changed, 6135 insertions(+), 81 deletions(-) create mode 100644 packages/toolpack-agents/src/channels/mcp-channel.test.ts create mode 100644 packages/toolpack-agents/src/channels/mcp-channel.ts create mode 100644 packages/toolpack-agents/src/interceptors/builtins/otel-tracer.ts create mode 100644 packages/toolpack-agents/src/testing/eval-dataset.ts create mode 100644 packages/toolpack-agents/src/testing/eval-report.ts create mode 100644 packages/toolpack-agents/src/testing/eval-runner.ts create mode 100644 packages/toolpack-agents/src/testing/eval-scorer.ts create mode 100644 packages/toolpack-agents/src/testing/eval-types.ts create mode 100644 packages/toolpack-agents/src/testing/eval.test.ts create mode 100644 packages/toolpack-sdk/docs/examples/mcp-server-example.ts create mode 100644 packages/toolpack-sdk/src/mcp/server-auth.ts create mode 100644 packages/toolpack-sdk/src/mcp/server-types.ts create mode 100644 packages/toolpack-sdk/src/mcp/server.ts create mode 100644 packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts create mode 100644 packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.ts create mode 100644 packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/schema.ts create mode 100644 packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.test.ts create mode 100644 packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.ts create mode 100644 packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/schema.ts create mode 100644 packages/toolpack-sdk/tests/integration/mcp-server.test.ts create mode 100644 packages/toolpack-sdk/tests/unit/mcp-server-auth.test.ts create mode 100644 packages/toolpack-sdk/tests/unit/mcp-server.test.ts diff --git a/README.md b/README.md index 433139c..8af4071 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Toolpack SDK -A unified TypeScript/Node.js SDK for building AI-powered applications with multiple providers, 100+ built-in tools, a workflow engine, and a flexible mode system — all through a single API. +The TypeScript SDK for building production AI agents — 100+ built-in tools, 8 channel integrations, a persistent cognitive layer, and full Knowledge/RAG, all in one package. [![npm version](https://img.shields.io/npm/v/toolpack-sdk.svg)](https://www.npmjs.com/package/toolpack-sdk) [![License: Apache 2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) @@ -17,8 +17,8 @@ A unified TypeScript/Node.js SDK for building AI-powered applications with multi - **Workflow Engine** — AI-driven planning with plan-direct execution and parallel tool orchestration - **Mode System** — Built-in Agent and Chat modes, plus `createMode()` for custom modes with tool filtering - **HITL Confirmation** — Human-in-the-loop approval for high-risk operations with configurable bypass rules -- **Custom Providers** — Bring your own provider by implementing the `ProviderAdapter` interface -- **100+ Built-in Tools** across 14 categories: +- **Extensible at Every Layer** — Every built-in component is a plug-in point: custom tools (`ToolDefinition`), custom channels (`BaseChannel`), custom provider adapters (`ProviderAdapter`), custom agents (`BaseAgent`), custom modes (`createMode()`), and custom interceptors — all using the same interfaces as the built-ins +- **100+ Built-in Tools** across 12 categories: - **MCP Tool Server Integration** — dynamically bridge external Model Context Protocol servers into Toolpack as first-class tools via `createMcpToolProject()` and `disconnectMcpToolProject()`. | Category | Tools | Description | @@ -163,15 +163,14 @@ See `packages/toolpack-sdk/docs/examples/kubernetes-usage.ts` for a complete exa - **OpenAI**: Supports `reasoningTier` and `costTier` on model info for GPT-5.x reasoning models. API key read from `OPENAI_API_KEY` or `TOOLPACK_OPENAI_KEY`. - **Anthropic**: Does not support embeddings. Tool results are converted to `tool_result` content blocks automatically. `tool_choice: none` is handled by omitting tools from the request. `max_tokens` defaults to `4096` if not specified. API key read from `ANTHROPIC_API_KEY` or `TOOLPACK_ANTHROPIC_KEY`. -## MCP Tool Server Support +## MCP Support -Toolpack now includes first-class support for Model Context Protocol (MCP) adapters and server tool discovery. +Toolpack has first-class MCP support in both directions: as a **client** (consume external MCP servers) and as a **server** (expose Toolpack tools + agents to any MCP client). -### Quick MCP Setup +### MCP Client — consume external MCP servers ```typescript -import { Toolpack } from 'toolpack-sdk'; -import { createMcpToolProject } from './tools/mcp-tools'; +import { Toolpack, createMcpToolProject } from 'toolpack-sdk'; const mcpToolProject = await createMcpToolProject({ servers: [ @@ -182,13 +181,6 @@ const mcpToolProject = await createMcpToolProject({ args: ['-y', '@modelcontextprotocol/server-filesystem', '/workspace'], autoConnect: true, }, - { - name: 'custom', - displayName: 'Custom MCP', - command: 'npx', - args: ['-y', '@modelcontextprotocol/server-tools'], - autoConnect: true, - }, ], }); @@ -198,11 +190,41 @@ const sdk = await Toolpack.init({ customTools: [mcpToolProject], }); -// On shutdown/cold path: +// On shutdown: // await disconnectMcpToolProject(mcpToolProject); ``` -See `docs/MCP_INTEGRATION.md` and `docs/examples/mcp-integration-example.ts` for full instructions and best practices. +See `docs/MCP_INTEGRATION.md` for full client configuration options. + +### MCP Server — expose Toolpack as an MCP server + +Expose all 100+ built-in tools (or a filtered subset) to any MCP client — Claude Desktop, Cursor, custom agents: + +```typescript +const handle = await sdk.startMcpServer({ + transport: 'http', // or 'stdio' for Claude Desktop / Cursor + port: 3000, + + // Optional: restrict exposed tools + expose: { categories: ['filesystem', 'version-control'] }, + + // Optional: static bearer token auth + auth: { mode: 'static', tokens: [process.env.MCP_TOKEN!] }, + + // Optional: expose Toolpack agents as MCP tools + agents: [mcpChannel.asAgentDefinition(myAgent)], + + // Optional: search mode — expose only tool.search, clients discover tools on demand + searchMode: true, +}); + +console.log(`MCP server running on port ${handle.port}`); +// handle.stop() to shut down +``` + +**Auth modes:** `static` (pre-shared tokens), `jwt` (JWKS/Auth0/Supabase), `custom` (your own verifier). + +**Search mode** reduces context token usage for large tool sets — clients call `tool.search` to discover tools on demand instead of receiving all 100+ upfront. - **Gemini**: Uses synthetic tool call IDs (`gemini__`) since the Gemini API doesn't return tool call IDs natively. Tool results are converted to `functionResponse` parts in chat history automatically. API key read from `GOOGLE_GENERATIVE_AI_KEY` or `TOOLPACK_GEMINI_KEY`. - **Ollama**: Auto-discovers all locally pulled models when registered as `{ ollama: {} }`. Uses `/api/show` and tool probing to detect capabilities (tool calling, vision, embeddings) per model. Models without tool support are automatically stripped of tools and given a system instruction to prevent hallucinated tool usage. Uses synthetic tool call IDs (`ollama__`). Embeddings use the modern `/api/embed` batch endpoint. Legacy per-model registration (`{ 'ollama-llama3': {} }`) is also supported. - **OpenRouter**: Routes requests to any of the 300+ models available on [openrouter.ai](https://openrouter.ai) via an OpenAI-compatible API. Models are discovered dynamically from the `/models` endpoint. Tool calling is fully supported; models that reject `tool_choice: 'none'` have tools stripped gracefully instead. No embeddings support. Optional `siteUrl` and `siteName` config for OpenRouter's attribution leaderboard. API key read from `OPENROUTER_API_KEY` or `TOOLPACK_OPENROUTER_KEY`. diff --git a/package-lock.json b/package-lock.json index f9c9d91..eec42fd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -959,6 +959,19 @@ "node": ">=18.0.0" } }, + "node_modules/@hono/node-server": { + "version": "1.19.14", + "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.14.tgz", + "integrity": "sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18.14.1" + }, + "peerDependencies": { + "hono": "^4" + } + }, "node_modules/@humanfs/core": { "version": "0.19.1", "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz", @@ -1164,6 +1177,71 @@ "integrity": "sha512-GaHYm+c0O9MjZRu0ongGBRbinu8gVAMd2UZjji6jVmqKtZluZnptXGWhz1E8j8D2HJ3f/yMxKAUC0b+57wncIw==", "license": "MIT" }, + "node_modules/@modelcontextprotocol/sdk": { + "version": "1.29.0", + "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.29.0.tgz", + "integrity": "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@hono/node-server": "^1.19.9", + "ajv": "^8.17.1", + "ajv-formats": "^3.0.1", + "content-type": "^1.0.5", + "cors": "^2.8.5", + "cross-spawn": "^7.0.5", + "eventsource": "^3.0.2", + "eventsource-parser": "^3.0.0", + "express": "^5.2.1", + "express-rate-limit": "^8.2.1", + "hono": "^4.11.4", + "jose": "^6.1.3", + "json-schema-typed": "^8.0.2", + "pkce-challenge": "^5.0.0", + "raw-body": "^3.0.0", + "zod": "^3.25 || ^4.0", + "zod-to-json-schema": "^3.25.1" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@cfworker/json-schema": "^4.1.1", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "@cfworker/json-schema": { + "optional": true + }, + "zod": { + "optional": false + } + } + }, + "node_modules/@modelcontextprotocol/sdk/node_modules/ajv": { + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.20.0.tgz", + "integrity": "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==", + "dev": true, + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/@modelcontextprotocol/sdk/node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "dev": true, + "license": "MIT" + }, "node_modules/@napi-rs/wasm-runtime": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.2.tgz", @@ -2655,6 +2733,47 @@ "npm": ">=7.0.0" } }, + "node_modules/accepts": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-2.0.0.tgz", + "integrity": "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==", + "dev": true, + "license": "MIT", + "dependencies": { + "mime-types": "^3.0.0", + "negotiator": "^1.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/accepts/node_modules/mime-db": { + "version": "1.54.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/accepts/node_modules/mime-types": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz", + "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/acorn": { "version": "8.16.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.16.0.tgz", @@ -2717,6 +2836,48 @@ "url": "https://github.com/sponsors/epoberezkin" } }, + "node_modules/ajv-formats": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-3.0.1.tgz", + "integrity": "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "ajv": "^8.0.0" + }, + "peerDependencies": { + "ajv": "^8.0.0" + }, + "peerDependenciesMeta": { + "ajv": { + "optional": true + } + } + }, + "node_modules/ajv-formats/node_modules/ajv": { + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.20.0.tgz", + "integrity": "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==", + "dev": true, + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ajv-formats/node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "dev": true, + "license": "MIT" + }, "node_modules/ansi-regex": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", @@ -3024,6 +3185,48 @@ "readable-stream": "^3.4.0" } }, + "node_modules/body-parser": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-2.2.2.tgz", + "integrity": "sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==", + "dev": true, + "license": "MIT", + "dependencies": { + "bytes": "^3.1.2", + "content-type": "^1.0.5", + "debug": "^4.4.3", + "http-errors": "^2.0.0", + "iconv-lite": "^0.7.0", + "on-finished": "^2.4.1", + "qs": "^6.14.1", + "raw-body": "^3.0.1", + "type-is": "^2.0.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/body-parser/node_modules/iconv-lite": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz", + "integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==", + "dev": true, + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/boolbase": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", @@ -3108,6 +3311,16 @@ "esbuild": ">=0.18" } }, + "node_modules/bytes": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", + "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/cac": { "version": "6.7.14", "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz", @@ -3341,6 +3554,30 @@ "node": "^14.18.0 || >=16.10.0" } }, + "node_modules/content-disposition": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.1.0.tgz", + "integrity": "sha512-5jRCH9Z/+DRP7rkvY83B+yGIGX96OYdJmzngqnw2SBSxqCFPd0w2km3s5iawpGX8krnwSGmF0FW5Nhr0Hfai3g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/content-type": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", + "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/convert-source-map": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", @@ -3348,6 +3585,44 @@ "dev": true, "license": "MIT" }, + "node_modules/cookie": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz", + "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie-signature": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.2.2.tgz", + "integrity": "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.6.0" + } + }, + "node_modules/cors": { + "version": "2.8.6", + "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.6.tgz", + "integrity": "sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw==", + "dev": true, + "license": "MIT", + "dependencies": { + "object-assign": "^4", + "vary": "^1" + }, + "engines": { + "node": ">= 0.10" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/cosmiconfig": { "version": "9.0.1", "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.1.tgz", @@ -3533,6 +3808,16 @@ "node": ">=0.10" } }, + "node_modules/depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/detect-libc": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", @@ -3691,12 +3976,29 @@ "safe-buffer": "^5.0.1" } }, + "node_modules/ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==", + "dev": true, + "license": "MIT" + }, "node_modules/emoji-regex": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", "license": "MIT" }, + "node_modules/encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/encoding-sniffer": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz", @@ -3856,6 +4158,13 @@ "node": ">=6" } }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==", + "dev": true, + "license": "MIT" + }, "node_modules/escape-string-regexp": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", @@ -4065,6 +4374,16 @@ "node": ">=0.10.0" } }, + "node_modules/etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/events-universal": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz", @@ -4074,6 +4393,29 @@ "bare-events": "^2.7.0" } }, + "node_modules/eventsource": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/eventsource/-/eventsource-3.0.7.tgz", + "integrity": "sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==", + "dev": true, + "license": "MIT", + "dependencies": { + "eventsource-parser": "^3.0.1" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/eventsource-parser": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.1.0.tgz", + "integrity": "sha512-kJezFj9YFAMLeORyi7aCLxLbD5/qWMQnoMVlVPyHIll7lgRJCc3JVln9Vgl9nwQi0YkMnhdGTMNn7CkRRAptMg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/expand-template": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", @@ -4093,6 +4435,96 @@ "node": ">=12.0.0" } }, + "node_modules/express": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", + "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", + "dev": true, + "license": "MIT", + "dependencies": { + "accepts": "^2.0.0", + "body-parser": "^2.2.1", + "content-disposition": "^1.0.0", + "content-type": "^1.0.5", + "cookie": "^0.7.1", + "cookie-signature": "^1.2.1", + "debug": "^4.4.0", + "depd": "^2.0.0", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "finalhandler": "^2.1.0", + "fresh": "^2.0.0", + "http-errors": "^2.0.0", + "merge-descriptors": "^2.0.0", + "mime-types": "^3.0.0", + "on-finished": "^2.4.1", + "once": "^1.4.0", + "parseurl": "^1.3.3", + "proxy-addr": "^2.0.7", + "qs": "^6.14.0", + "range-parser": "^1.2.1", + "router": "^2.2.0", + "send": "^1.1.0", + "serve-static": "^2.2.0", + "statuses": "^2.0.1", + "type-is": "^2.0.1", + "vary": "^1.1.2" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/express-rate-limit": { + "version": "8.5.2", + "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.5.2.tgz", + "integrity": "sha512-5Kb34ipNX694DH48vN9irak1Qx30nb0PLYHXfJgw4YEjiC3ZEmZJhwOp+VfiCYwFzvFTdB9QkArYS5kXa2cx2A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ip-address": "^10.2.0" + }, + "engines": { + "node": ">= 16" + }, + "funding": { + "url": "https://github.com/sponsors/express-rate-limit" + }, + "peerDependencies": { + "express": ">= 4.11" + } + }, + "node_modules/express/node_modules/mime-db": { + "version": "1.54.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/express/node_modules/mime-types": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz", + "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/extend": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", @@ -4174,6 +4606,23 @@ "dev": true, "license": "MIT" }, + "node_modules/fast-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.2.tgz", + "integrity": "sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, "node_modules/fastq": { "version": "1.20.1", "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.20.1.tgz", @@ -4264,6 +4713,28 @@ "node": ">=8" } }, + "node_modules/finalhandler": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-2.1.1.tgz", + "integrity": "sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA==", + "dev": true, + "license": "MIT", + "dependencies": { + "debug": "^4.4.0", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "on-finished": "^2.4.1", + "parseurl": "^1.3.3", + "statuses": "^2.0.1" + }, + "engines": { + "node": ">= 18.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/find-up": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", @@ -4381,6 +4852,26 @@ "node": ">=12.20.0" } }, + "node_modules/forwarded": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/fresh": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-2.0.0.tgz", + "integrity": "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/fs-constants": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", @@ -4712,6 +5203,16 @@ "node": ">= 0.4" } }, + "node_modules/hono": { + "version": "4.12.23", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.23.tgz", + "integrity": "sha512-eIaZ9qDgu7XV0pxOCrg7/WhnQ6Ivm22UcxhXx/A3dcbqbbYgBEkc6e/J/s7j2tS96zoB0S9VBdLwQNCWwUo4LA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=16.9.0" + } + }, "node_modules/html-escaper": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz", @@ -4750,6 +5251,27 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, + "node_modules/http-errors": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", + "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "depd": "~2.0.0", + "inherits": "~2.0.4", + "setprototypeof": "~1.2.0", + "statuses": "~2.0.2", + "toidentifier": "~1.0.1" + }, + "engines": { + "node": ">= 0.8" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/http-proxy-agent": { "version": "7.0.2", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", @@ -4857,14 +5379,24 @@ "license": "ISC" }, "node_modules/ip-address": { - "version": "10.1.0", - "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz", - "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==", + "version": "10.2.0", + "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.2.0.tgz", + "integrity": "sha512-/+S6j4E9AHvW9SWMSEY9Xfy66O5PWvVEJ08O0y5JGyEKQpojb0K0GKpz/v5HJ/G0vi3D2sjGK78119oXZeE0qA==", "license": "MIT", "engines": { "node": ">= 12" } }, + "node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.10" + } + }, "node_modules/is-arrayish": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", @@ -4910,6 +5442,13 @@ "node": ">=0.12.0" } }, + "node_modules/is-promise": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz", + "integrity": "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==", + "dev": true, + "license": "MIT" + }, "node_modules/is-property": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/is-property/-/is-property-1.0.2.tgz", @@ -4990,6 +5529,16 @@ "@pkgjs/parseargs": "^0.11.0" } }, + "node_modules/jose": { + "version": "6.2.3", + "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.3.tgz", + "integrity": "sha512-YYVDInQKFJfR/xa3ojUTl8c2KoTwiL1R5Wg9YCydwH0x0B9grbzlg5HC7mMjCtUJjbQ/YnGEZIhI5tCgfTb4Hw==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/panva" + } + }, "node_modules/joycon": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/joycon/-/joycon-3.1.1.tgz", @@ -5072,6 +5621,13 @@ "dev": true, "license": "MIT" }, + "node_modules/json-schema-typed": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/json-schema-typed/-/json-schema-typed-8.0.2.tgz", + "integrity": "sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==", + "dev": true, + "license": "BSD-2-Clause" + }, "node_modules/json-stable-stringify-without-jsonify": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", @@ -5624,6 +6180,29 @@ "node": ">= 0.4" } }, + "node_modules/media-typer": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-1.1.0.tgz", + "integrity": "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/merge-descriptors": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-2.0.0.tgz", + "integrity": "sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/merge2": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", @@ -5850,6 +6429,16 @@ "dev": true, "license": "MIT" }, + "node_modules/negotiator": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-1.0.0.tgz", + "integrity": "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/netlify": { "version": "24.9.0", "resolved": "https://registry.npmjs.org/netlify/-/netlify-24.9.0.tgz", @@ -10685,6 +11274,331 @@ } } }, + "node_modules/netlify/node_modules/@rollup/rollup-android-arm-eabi": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.59.0.tgz", + "integrity": "sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==", + "cpu": [ + "arm" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "android" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-android-arm64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.59.0.tgz", + "integrity": "sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q==", + "cpu": [ + "arm64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "android" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-darwin-arm64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.59.0.tgz", + "integrity": "sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg==", + "cpu": [ + "arm64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "darwin" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-darwin-x64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.59.0.tgz", + "integrity": "sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w==", + "cpu": [ + "x64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "darwin" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-freebsd-arm64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.59.0.tgz", + "integrity": "sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA==", + "cpu": [ + "arm64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "freebsd" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-freebsd-x64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.59.0.tgz", + "integrity": "sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg==", + "cpu": [ + "x64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "freebsd" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-arm-gnueabihf": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.59.0.tgz", + "integrity": "sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw==", + "cpu": [ + "arm" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-arm-musleabihf": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.59.0.tgz", + "integrity": "sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA==", + "cpu": [ + "arm" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-arm64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.59.0.tgz", + "integrity": "sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA==", + "cpu": [ + "arm64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-arm64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.59.0.tgz", + "integrity": "sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA==", + "cpu": [ + "arm64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-loong64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.59.0.tgz", + "integrity": "sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg==", + "cpu": [ + "loong64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-loong64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.59.0.tgz", + "integrity": "sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q==", + "cpu": [ + "loong64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-ppc64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.59.0.tgz", + "integrity": "sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA==", + "cpu": [ + "ppc64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-ppc64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.59.0.tgz", + "integrity": "sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA==", + "cpu": [ + "ppc64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-riscv64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.59.0.tgz", + "integrity": "sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg==", + "cpu": [ + "riscv64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-riscv64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.59.0.tgz", + "integrity": "sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg==", + "cpu": [ + "riscv64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-s390x-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.59.0.tgz", + "integrity": "sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w==", + "cpu": [ + "s390x" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-x64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.59.0.tgz", + "integrity": "sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg==", + "cpu": [ + "x64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-linux-x64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.59.0.tgz", + "integrity": "sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg==", + "cpu": [ + "x64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "linux" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-openbsd-x64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.59.0.tgz", + "integrity": "sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ==", + "cpu": [ + "x64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "openbsd" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-openharmony-arm64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.59.0.tgz", + "integrity": "sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA==", + "cpu": [ + "arm64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "openharmony" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-win32-arm64-msvc": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.59.0.tgz", + "integrity": "sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A==", + "cpu": [ + "arm64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "win32" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-win32-ia32-msvc": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.59.0.tgz", + "integrity": "sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA==", + "cpu": [ + "ia32" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "win32" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-win32-x64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.59.0.tgz", + "integrity": "sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA==", + "cpu": [ + "x64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "win32" + ] + }, + "node_modules/netlify/node_modules/@rollup/rollup-win32-x64-msvc": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.59.0.tgz", + "integrity": "sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA==", + "cpu": [ + "x64" + ], + "extraneous": true, + "license": "MIT", + "os": [ + "win32" + ] + }, "node_modules/netlify/node_modules/@sec-ant/readable-stream": { "version": "0.4.1", "license": "MIT" @@ -13629,6 +14543,17 @@ "safe-buffer": "~5.1.0" } }, + "node_modules/netlify/node_modules/fsevents": { + "version": "2.3.3", + "extraneous": true, + "license": "MIT", + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, "node_modules/netlify/node_modules/function-bind": { "version": "1.1.2", "license": "MIT", @@ -17066,6 +17991,51 @@ "version": "1.4.1", "license": "MIT" }, + "node_modules/netlify/node_modules/rollup": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.59.0.tgz", + "integrity": "sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg==", + "extraneous": true, + "license": "MIT", + "dependencies": { + "@types/estree": "1.0.8" + }, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=18.0.0", + "npm": ">=8.0.0" + }, + "optionalDependencies": { + "@rollup/rollup-android-arm-eabi": "4.59.0", + "@rollup/rollup-android-arm64": "4.59.0", + "@rollup/rollup-darwin-arm64": "4.59.0", + "@rollup/rollup-darwin-x64": "4.59.0", + "@rollup/rollup-freebsd-arm64": "4.59.0", + "@rollup/rollup-freebsd-x64": "4.59.0", + "@rollup/rollup-linux-arm-gnueabihf": "4.59.0", + "@rollup/rollup-linux-arm-musleabihf": "4.59.0", + "@rollup/rollup-linux-arm64-gnu": "4.59.0", + "@rollup/rollup-linux-arm64-musl": "4.59.0", + "@rollup/rollup-linux-loong64-gnu": "4.59.0", + "@rollup/rollup-linux-loong64-musl": "4.59.0", + "@rollup/rollup-linux-ppc64-gnu": "4.59.0", + "@rollup/rollup-linux-ppc64-musl": "4.59.0", + "@rollup/rollup-linux-riscv64-gnu": "4.59.0", + "@rollup/rollup-linux-riscv64-musl": "4.59.0", + "@rollup/rollup-linux-s390x-gnu": "4.59.0", + "@rollup/rollup-linux-x64-gnu": "4.59.0", + "@rollup/rollup-linux-x64-musl": "4.59.0", + "@rollup/rollup-openbsd-x64": "4.59.0", + "@rollup/rollup-openharmony-arm64": "4.59.0", + "@rollup/rollup-win32-arm64-msvc": "4.59.0", + "@rollup/rollup-win32-ia32-msvc": "4.59.0", + "@rollup/rollup-win32-x64-gnu": "4.59.0", + "@rollup/rollup-win32-x64-msvc": "4.59.0", + "fsevents": "~2.3.2" + } + }, "node_modules/netlify/node_modules/router": { "version": "2.2.0", "license": "MIT", @@ -18924,6 +19894,19 @@ ], "license": "MIT" }, + "node_modules/on-finished": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", + "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", + "dev": true, + "license": "MIT", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -19135,6 +20118,16 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, + "node_modules/parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -19179,6 +20172,17 @@ "dev": true, "license": "ISC" }, + "node_modules/path-to-regexp": { + "version": "8.4.2", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.4.2.tgz", + "integrity": "sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA==", + "dev": true, + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/pathe": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz", @@ -19310,6 +20314,16 @@ "node": ">= 6" } }, + "node_modules/pkce-challenge": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.1.tgz", + "integrity": "sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=16.20.0" + } + }, "node_modules/pkg-types": { "version": "1.3.1", "resolved": "https://registry.npmjs.org/pkg-types/-/pkg-types-1.3.1.tgz", @@ -19503,6 +20517,20 @@ "node": ">=12.0.0" } }, + "node_modules/proxy-addr": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", + "dev": true, + "license": "MIT", + "dependencies": { + "forwarded": "0.2.0", + "ipaddr.js": "1.9.1" + }, + "engines": { + "node": ">= 0.10" + } + }, "node_modules/proxy-agent": { "version": "6.5.0", "resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.5.0.tgz", @@ -19623,6 +20651,49 @@ ], "license": "MIT" }, + "node_modules/range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/raw-body": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.2.tgz", + "integrity": "sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==", + "dev": true, + "license": "MIT", + "dependencies": { + "bytes": "~3.1.2", + "http-errors": "~2.0.1", + "iconv-lite": "~0.7.0", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/raw-body/node_modules/iconv-lite": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz", + "integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==", + "dev": true, + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/rc": { "version": "1.2.8", "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", @@ -19684,6 +20755,16 @@ "node": ">=0.10.0" } }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/resolve-from": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", @@ -19807,6 +20888,23 @@ "fsevents": "~2.3.2" } }, + "node_modules/router": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/router/-/router-2.2.0.tgz", + "integrity": "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "debug": "^4.4.0", + "depd": "^2.0.0", + "is-promise": "^4.0.0", + "parseurl": "^1.3.3", + "path-to-regexp": "^8.0.0" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/rss-parser": { "version": "3.13.0", "resolved": "https://registry.npmjs.org/rss-parser/-/rss-parser-3.13.0.tgz", @@ -19904,6 +21002,87 @@ "node": ">=10" } }, + "node_modules/send": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/send/-/send-1.2.1.tgz", + "integrity": "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "debug": "^4.4.3", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "fresh": "^2.0.0", + "http-errors": "^2.0.1", + "mime-types": "^3.0.2", + "ms": "^2.1.3", + "on-finished": "^2.4.1", + "range-parser": "^1.2.1", + "statuses": "^2.0.2" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/send/node_modules/mime-db": { + "version": "1.54.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/send/node_modules/mime-types": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz", + "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/serve-static": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-2.2.1.tgz", + "integrity": "sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw==", + "dev": true, + "license": "MIT", + "dependencies": { + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "parseurl": "^1.3.3", + "send": "^1.2.0" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", + "dev": true, + "license": "ISC" + }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", @@ -20172,6 +21351,16 @@ "dev": true, "license": "MIT" }, + "node_modules/statuses": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz", + "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/std-env": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/std-env/-/std-env-4.0.0.tgz", @@ -20429,6 +21618,16 @@ "node": ">=8.0" } }, + "node_modules/toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.6" + } + }, "node_modules/toolpack-sdk": { "resolved": "packages/toolpack-sdk", "link": true @@ -20713,6 +21912,66 @@ "node": ">= 0.8.0" } }, + "node_modules/type-is": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-2.1.0.tgz", + "integrity": "sha512-faYHw0anBbc/kWF3zFTEnxSFOAGUX9GFbOBthvDdLsIlEoWOFOtS0zgCiQYwIskL9iGXZL3kAXD8OoZ4GmMATA==", + "dev": true, + "license": "MIT", + "dependencies": { + "content-type": "^2.0.0", + "media-typer": "^1.1.0", + "mime-types": "^3.0.0" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/type-is/node_modules/content-type": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-2.0.0.tgz", + "integrity": "sha512-j/O/d7GcZCyNl7/hwZAb606rzqkyvaDctLmckbxLzHvFBzTJHuGEdodATcP3yIRoDrLHkIATJuvzbFlp/ki2cQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/type-is/node_modules/mime-db": { + "version": "1.54.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/type-is/node_modules/mime-types": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz", + "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/typed-query-selector": { "version": "2.12.1", "resolved": "https://registry.npmjs.org/typed-query-selector/-/typed-query-selector-2.12.1.tgz", @@ -20779,6 +22038,16 @@ "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", "license": "MIT" }, + "node_modules/unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/uri-js": { "version": "4.4.1", "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", @@ -20816,6 +22085,16 @@ "dev": true, "license": "MIT" }, + "node_modules/vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/vite": { "version": "8.0.3", "resolved": "https://registry.npmjs.org/vite/-/vite-8.0.3.tgz", @@ -21250,9 +22529,19 @@ "url": "https://github.com/sponsors/colinhacks" } }, + "node_modules/zod-to-json-schema": { + "version": "3.25.2", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.2.tgz", + "integrity": "sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA==", + "dev": true, + "license": "ISC", + "peerDependencies": { + "zod": "^3.25.28 || ^4" + } + }, "packages/toolpack-agents": { "name": "@toolpack-sdk/agents", - "version": "2.1.0", + "version": "2.1.1", "license": "Apache-2.0", "dependencies": { "cron-parser": "^5.5.0" @@ -21269,11 +22558,11 @@ "node": ">=20" }, "peerDependencies": { - "@toolpack-sdk/knowledge": "^2.0.0", + "@toolpack-sdk/knowledge": "^2.1.1", "better-sqlite3": "^12.6.2", "discord.js": "^14.x", "nodemailer": "^6.x", - "toolpack-sdk": "^2.0.0", + "toolpack-sdk": "^2.1.1", "twilio": "^5.x" }, "peerDependenciesMeta": { @@ -21296,7 +22585,7 @@ }, "packages/toolpack-knowledge": { "name": "@toolpack-sdk/knowledge", - "version": "2.1.0", + "version": "2.1.1", "license": "Apache-2.0", "dependencies": { "better-sqlite3": "^12.6.2", @@ -21317,7 +22606,7 @@ } }, "packages/toolpack-sdk": { - "version": "2.1.0", + "version": "2.1.1", "license": "Apache-2.0", "dependencies": { "@anthropic-ai/sdk": "^0.73.0", @@ -21341,6 +22630,7 @@ }, "devDependencies": { "@eslint/js": "^9.39.2", + "@modelcontextprotocol/sdk": "^1.29.0", "@types/babel__core": "^7.20.5", "@types/babel__traverse": "^7.28.0", "@types/better-sqlite3": "^7.6.13", @@ -21361,6 +22651,14 @@ }, "engines": { "node": ">=20" + }, + "peerDependencies": { + "@modelcontextprotocol/sdk": "^1.29.0" + }, + "peerDependenciesMeta": { + "@modelcontextprotocol/sdk": { + "optional": true + } } }, "packages/toolpack-sdk/node_modules/@google/genai": { diff --git a/packages/toolpack-agents/README.md b/packages/toolpack-agents/README.md index d92d2fa..08afa8c 100644 --- a/packages/toolpack-agents/README.md +++ b/packages/toolpack-agents/README.md @@ -8,12 +8,14 @@ Build production-ready AI agents with channels, workflows, and event-driven arch ## Features - **4 Built-in Agents** — Research, Coding, Data, Browser -- **7 Channel Types** — Slack, Telegram, Discord, Email, SMS, Webhook, Scheduled +- **8 Channel Types** — Slack, Telegram, Discord, Email, SMS, Webhook, Scheduled, MCP - **Event-Driven** — Full lifecycle hooks and events - **Human-in-the-Loop** — `ask()` support for two-way channels - **Knowledge Integration** — Built-in RAG support with knowledge bases +- **Agent Mind** — Persistent cognitive layer: goals, beliefs, reflections, cross-run recall +- **Evals** — `EvalDataset`, `EvalRunner`, 4 scorer types, regression reports +- **OTel Tracing** — OpenTelemetry interceptor for distributed traces - **Type-Safe** — Full TypeScript support -- **Production-Ready** — 573 tests passing ## Installation @@ -239,6 +241,27 @@ const smsOutbound = new SMSChannel({ }); ``` +### McpChannel (Two-way) + +Exposes a Toolpack agent as a tool in an MCP server. The agent appears in `tools/list` as `agent.` and is callable by any MCP client. + +```typescript +import { McpChannel } from '@toolpack-sdk/agents'; +import { Toolpack } from 'toolpack-sdk'; + +const ch = new McpChannel({ name: 'mcp' }); +const agent = new PrReviewerAgent({ channels: [ch] }); +await agent.start(); + +const sdk = await Toolpack.init({ provider: 'anthropic', tools: true }); +await sdk.startMcpServer({ + transport: 'stdio', // or 'http' with port + agents: [ch.asAgentDefinition(agent)], +}); +``` + +`ch.asAgentDefinition(agent)` produces the entry that `startMcpServer` registers in `tools/list`. Each MCP `tools/call` for `agent.` is routed through the channel to `agent.invokeAgent()` and the output is returned as the tool result. + ## Creating Custom Agents Extend `BaseAgent` to create custom agents: @@ -763,6 +786,7 @@ class MyAgent extends BaseAgent { | `createCaptureInterceptor` | Persist inbound and outbound messages to conversation history (auto-registered) | | `createDepthGuardInterceptor` | Reject delegation chains that exceed a configured depth | | `createTracerInterceptor` | Structured logging of each chain hop for debugging | +| `createOTelTracerInterceptor` | OpenTelemetry span per invocation — compatible with any OTel-compliant backend | ## Capabilities @@ -818,14 +842,58 @@ const result = await summarizer.invokeAgent({ const summary = JSON.parse(result.output) as SummarizerOutput; ``` +## Evals — LLM Quality Evaluation + +Unit tests verify wiring; evals verify agent **quality**. Use the eval primitives to build regression suites and track answer quality over time. + +```typescript +import { + EvalDataset, + EvalRunner, + ContainsScorer, + LLMJudgeScorer, + compareEvalRuns, + formatEvalReport, +} from '@toolpack-sdk/agents'; + +const dataset = new EvalDataset([ + { id: 'q1', input: 'What is 2+2?', expectedOutput: '4' }, + { id: 'q2', input: 'Capital of France?', expectedOutput: 'Paris' }, +]); + +const runner = new EvalRunner({ + agent: myAgent, + dataset, + scorers: [new ContainsScorer()], +}); + +const run = await runner.run(); +console.log(`Average score: ${(run.averageScore * 100).toFixed(1)}%`); +``` + +**Four built-in scorers:** + +| Scorer | When to use | +|---|---| +| `ExactMatchScorer` | Deterministic outputs — exact string match | +| `ContainsScorer` | Output must contain the expected string | +| `LLMJudgeScorer` | Open-ended answers — ask an LLM to grade on 0–1 | +| `CustomScorer` | Any custom scoring logic | + +**Regression detection:** + +```typescript +const report = compareEvalRuns(baselineRun, currentRun); +console.log(formatEvalReport(report)); +expect(report.regressions).toHaveLength(0); // CI gate +``` + ## Testing ```bash npm test ``` -**Test Coverage:** 573 tests passing across 29 test files. - ## License Apache 2.0 © Toolpack SDK diff --git a/packages/toolpack-agents/docs/README.md b/packages/toolpack-agents/docs/README.md index 34cf672..c9f4c97 100644 --- a/packages/toolpack-agents/docs/README.md +++ b/packages/toolpack-agents/docs/README.md @@ -54,7 +54,7 @@ |---|---| | [agents.md](agents.md) | Creating agents — `BaseAgent` API, built-in agents, lifecycle | | [registry.md](registry.md) | `AgentRegistry` — multi-agent coordination | -| [channels.md](channels.md) | All 7 channel integrations (Slack, Discord, Telegram, Webhook, Scheduled, Email, SMS) | +| [channels.md](channels.md) | All 8 channel integrations (Slack, Discord, Telegram, Webhook, Scheduled, Email, SMS, MCP) | | [scheduler.md](scheduler.md) | `SchedulerStore` and `createSchedulerTools` — persistent job scheduling reference | | [mind.md](mind.md) | `AgentMind` — persistent cognitive layer: goals, beliefs, reflections | | [conversation-history.md](conversation-history.md) | Conversation storage, `assemblePrompt`, addressed-only mode | diff --git a/packages/toolpack-agents/docs/channels.md b/packages/toolpack-agents/docs/channels.md index df87027..260e043 100644 --- a/packages/toolpack-agents/docs/channels.md +++ b/packages/toolpack-agents/docs/channels.md @@ -434,6 +434,59 @@ const sms = new SMSChannel({ --- +## McpChannel + +`McpChannel` exposes a Toolpack agent as a tool in an MCP server. When an MCP client calls `agent.`, the channel delivers the input to the agent and returns its output as the tool result. + +`isTriggerChannel = false` — the MCP client drives the conversation, so `ask()` works normally. + +### Configuration + +```typescript +import { McpChannel } from '@toolpack-sdk/agents'; + +const ch = new McpChannel({ + // Optional: descriptive name used for sendTo() routing + name: 'mcp', +}); +``` + +### Wiring to an agent and MCP server + +```typescript +import { McpChannel } from '@toolpack-sdk/agents'; +import { Toolpack } from 'toolpack-sdk'; + +const ch = new McpChannel(); +const agent = new PrReviewerAgent({ channels: [ch] }); +await agent.start(); + +const sdk = await Toolpack.init({ provider: 'anthropic', tools: true }); + +await sdk.startMcpServer({ + transport: 'stdio', // or 'http' + agents: [ch.asAgentDefinition(agent)], +}); +``` + +`ch.asAgentDefinition(agent)` produces the `McpAgentDefinition` object that `startMcpServer` uses to register the agent in `tools/list` as `agent.`. + +### `McpChannelConfig` + +| Option | Type | Default | Description | +|---|---|---|---| +| `name` | `string` | `'mcp'` | Channel name for `sendTo()` routing. | + +### Flow + +1. MCP client calls `tools/call` with `name: 'agent.'` +2. `startMcpServer` routes the call to `ch.asAgentDefinition(agent).invoke(args)` +3. `McpChannel` wraps args into an `AgentInput` and calls `agent.invokeAgent()` +4. Agent runs, returns `AgentResult` +5. Output is returned to the MCP client as a text tool result + +--- + ## Custom channels Implement `ChannelInterface` (or extend `BaseChannel`) to connect any data source: diff --git a/packages/toolpack-agents/docs/testing.md b/packages/toolpack-agents/docs/testing.md index 0edb8bc..a9ce89f 100644 --- a/packages/toolpack-agents/docs/testing.md +++ b/packages/toolpack-agents/docs/testing.md @@ -563,3 +563,120 @@ it('emits agent:error on failure', async () => { events.stop(); }); ``` + +--- + +## Evals — LLM quality evaluation + +Unit tests verify agent wiring; evals verify agent **quality** — does the agent give correct, helpful answers on real inputs? The eval primitives let you build regression suites and track quality over time. + +### Import path + +```typescript +import { + EvalDataset, + EvalRunner, + ExactMatchScorer, + ContainsScorer, + LLMJudgeScorer, + CustomScorer, + compareEvalRuns, + formatEvalReport, +} from '@toolpack-sdk/agents'; +``` + +### Quick start + +```typescript +import { EvalDataset, EvalRunner, ContainsScorer } from '@toolpack-sdk/agents'; + +const dataset = new EvalDataset([ + { + id: 'greet-1', + input: 'Say hello', + expectedOutput: 'hello', + }, + { + id: 'summarise-1', + input: 'Summarise: The sky is blue.', + expectedOutput: 'blue', + }, +]); + +const runner = new EvalRunner({ + agent: myAgent, + dataset, + scorers: [new ContainsScorer()], +}); + +const run = await runner.run(); +console.log(`Score: ${run.averageScore * 100}%`); +``` + +### `EvalDataset` + +Holds a list of `EvalCase` objects. + +```typescript +interface EvalCase { + id: string; // unique identifier + input: string; // message sent to the agent + expectedOutput: string; // used by scorers + metadata?: Record; +} +``` + +```typescript +const dataset = new EvalDataset(cases); +dataset.add({ id: 'c3', input: 'test', expectedOutput: 'expected' }); +const subset = dataset.filter(c => c.id.startsWith('greet')); +``` + +### `EvalRunner` + +```typescript +const runner = new EvalRunner({ + agent, // BaseAgent instance + dataset, // EvalDataset + scorers, // EvalScorer[] + concurrency?: 1, // parallel cases (default: 1) +}); + +const run: EvalRun = await runner.run(); +``` + +### Scorers + +| Scorer | Description | +|---|---| +| `ExactMatchScorer` | Score 1.0 if output === expectedOutput (trimmed, case-insensitive by default) | +| `ContainsScorer` | Score 1.0 if output contains expectedOutput | +| `LLMJudgeScorer` | Ask an LLM to score the output on a 0–1 scale | +| `CustomScorer` | Your own scoring function | + +```typescript +// LLM judge +const judge = new LLMJudgeScorer({ + sdk: myToolpack, + prompt: 'Is this response factually correct and helpful? Score 0-1.', +}); + +// Custom scorer +const lengthScorer = new CustomScorer({ + name: 'brevity', + score: async ({ output, expectedOutput }) => + output.length <= expectedOutput.length ? 1.0 : 0.0, +}); +``` + +### Regression reports + +```typescript +import { compareEvalRuns, formatEvalReport } from '@toolpack-sdk/agents'; + +const report = compareEvalRuns(baselineRun, currentRun); +console.log(formatEvalReport(report)); + +// CI gate +expect(report.regressions).toHaveLength(0); +``` diff --git a/packages/toolpack-agents/package.json b/packages/toolpack-agents/package.json index e705232..d54b331 100644 --- a/packages/toolpack-agents/package.json +++ b/packages/toolpack-agents/package.json @@ -1,7 +1,7 @@ { "name": "@toolpack-sdk/agents", "version": "2.1.1", - "description": "Agent layer for the Toolpack SDK - build, compose, and deploy AI agents with a consistent, extensible pattern", + "description": "Production AI agents for Toolpack SDK — 8 channel integrations (Slack, Discord, Telegram, SMS, Email, Webhook, Scheduled, MCP), AgentMind persistent cognitive layer (goals, beliefs, reflections), interceptors, evals, and multi-agent coordination", "engines": { "node": ">=20" }, @@ -59,15 +59,20 @@ "publish:npm": "npm run build && npm run test && npm publish" }, "keywords": [ + "ai-agent", + "production-ai", + "agent-mind", + "cognitive-layer", "ai", "llm", - "agent", - "ai-agent", - "slack", - "telegram", + "slack-bot", + "discord-bot", + "telegram-bot", + "sms", + "email-agent", "webhook", - "cron", "scheduler", + "multi-agent", "typescript", "sdk", "toolpack" @@ -83,6 +88,7 @@ "url": "https://github.com/toolpack-ai/toolpack-sdk/issues" }, "peerDependencies": { + "@opentelemetry/api": "^1.x", "@toolpack-sdk/knowledge": "^2.1.1", "better-sqlite3": "^12.6.2", "discord.js": "^14.x", @@ -91,6 +97,9 @@ "twilio": "^5.x" }, "peerDependenciesMeta": { + "@opentelemetry/api": { + "optional": true + }, "@toolpack-sdk/knowledge": { "optional": true }, diff --git a/packages/toolpack-agents/src/channels/index.ts b/packages/toolpack-agents/src/channels/index.ts index 0aec180..868c436 100644 --- a/packages/toolpack-agents/src/channels/index.ts +++ b/packages/toolpack-agents/src/channels/index.ts @@ -6,3 +6,5 @@ export { TelegramChannel, TelegramChannelConfig } from './telegram-channel.js'; export { DiscordChannel, DiscordChannelConfig } from './discord-channel.js'; export { EmailChannel, EmailChannelConfig } from './email-channel.js'; export { SMSChannel, SMSChannelConfig } from './sms-channel.js'; +export { McpChannel } from './mcp-channel.js'; +export type { McpChannelConfig } from './mcp-channel.js'; diff --git a/packages/toolpack-agents/src/channels/mcp-channel.test.ts b/packages/toolpack-agents/src/channels/mcp-channel.test.ts new file mode 100644 index 0000000..69816f3 --- /dev/null +++ b/packages/toolpack-agents/src/channels/mcp-channel.test.ts @@ -0,0 +1,124 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { McpChannel } from './mcp-channel.js'; +import type { AgentInput, AgentOutput } from '../agent/types.js'; + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +function makeChannel(timeout?: number) { + return new McpChannel({ timeout }); +} + +/** Wire a handler that immediately calls send() with the given output. */ +function wireHandler(ch: McpChannel, output: string) { + ch.onMessage(async (_input: AgentInput) => { + await ch.send({ output }); + }); +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +describe('McpChannel', () => { + describe('listen()', () => { + it('is a no-op and does not throw', () => { + const ch = makeChannel(); + expect(() => ch.listen()).not.toThrow(); + }); + }); + + describe('normalize()', () => { + it('uses a string message field directly', () => { + const ch = makeChannel(); + const input = ch.normalize({ message: 'review this PR' }); + expect(input.message).toBe('review this PR'); + }); + + it('JSON-stringifies non-string args as message', () => { + const ch = makeChannel(); + const input = ch.normalize({ pr_url: 'https://github.com/...' }); + expect(input.message).toBe(JSON.stringify({ pr_url: 'https://github.com/...' })); + }); + + it('sets data to the raw args', () => { + const ch = makeChannel(); + const args = { pr_url: 'https://github.com/...', depth: 3 }; + const input = ch.normalize(args); + expect(input.data).toEqual(args); + }); + + it('generates a unique conversationId per call', () => { + const ch = makeChannel(); + const a = ch.normalize({}); + const b = ch.normalize({}); + expect(a.conversationId).not.toBe(b.conversationId); + }); + }); + + describe('trigger()', () => { + it('resolves with agent output when send() is called', async () => { + const ch = makeChannel(); + wireHandler(ch, 'LGTM — no issues found'); + const result = await ch.trigger({ pr_url: 'https://github.com/...' }); + expect(result).toBe('LGTM — no issues found'); + }); + + it('passes normalized input to the handler', async () => { + const ch = makeChannel(); + let received: AgentInput | undefined; + ch.onMessage(async (input) => { + received = input; + await ch.send({ output: 'ok' }); + }); + await ch.trigger({ message: 'hello' }); + expect(received?.message).toBe('hello'); + expect(received?.data).toEqual({ message: 'hello' }); + }); + + it('rejects when no handler is registered (timeout fires)', async () => { + const ch = makeChannel(50); // short timeout so test completes fast + // No handler registered — handleMessage is a no-op, send() never called + await expect(ch.trigger({})).rejects.toThrow(/50ms/); + }); + + it('rejects after timeout when agent never calls send()', async () => { + const ch = makeChannel(50); // 50ms timeout for fast test + ch.onMessage(async () => { /* never calls send */ }); + await expect(ch.trigger({})).rejects.toThrow(/50ms/); + }); + + it('rejects when handleMessage throws', async () => { + const ch = makeChannel(); + ch.onMessage(async () => { throw new Error('agent crashed'); }); + await expect(ch.trigger({})).rejects.toThrow('agent crashed'); + }); + }); + + describe('asAgentDefinition()', () => { + it('returns correct name and description', () => { + const ch = makeChannel(); + const def = ch.asAgentDefinition({ name: 'pr_reviewer', description: 'Reviews PRs' }); + expect(def.name).toBe('pr_reviewer'); + expect(def.description).toBe('Reviews PRs'); + }); + + it('includes inputSchema when provided', () => { + const ch = makeChannel(); + const schema = { type: 'object', properties: { pr_url: { type: 'string' } } }; + const def = ch.asAgentDefinition({ name: 'x', description: 'y' }, schema); + expect(def.inputSchema).toEqual(schema); + }); + + it('omits inputSchema when not provided', () => { + const ch = makeChannel(); + const def = ch.asAgentDefinition({ name: 'x', description: 'y' }); + expect(def.inputSchema).toBeUndefined(); + }); + + it('invoke() delegates to trigger()', async () => { + const ch = makeChannel(); + wireHandler(ch, 'done'); + const def = ch.asAgentDefinition({ name: 'x', description: 'y' }); + const result = await def.invoke({ task: 'test' }); + expect(result).toBe('done'); + }); + }); +}); diff --git a/packages/toolpack-agents/src/channels/mcp-channel.ts b/packages/toolpack-agents/src/channels/mcp-channel.ts new file mode 100644 index 0000000..9763119 --- /dev/null +++ b/packages/toolpack-agents/src/channels/mcp-channel.ts @@ -0,0 +1,124 @@ +import { BaseChannel } from './base-channel.js'; +import type { AgentInput, AgentOutput } from '../agent/types.js'; + +export interface McpChannelConfig { + /** + * Maximum milliseconds to wait for the agent to respond. + * Default: 120_000 (2 minutes). + */ + timeout?: number; +} + +/** + * Channel that connects a Toolpack agent to an MCP server as a tool. + * + * Unlike other channels (Slack, Webhook) this channel does not own a server or + * socket. Instead it exposes a `trigger()` method that the MCP tools/call handler + * calls directly. The agent runs and sends its output back through `send()`, which + * resolves the Promise that `trigger()` is waiting on. + * + * Usage: + * ```typescript + * const ch = new McpChannel(); + * const agent = new PrReviewerAgent({ channels: [ch] }); + * await agent.start(); + * + * await sdk.startMcpServer({ + * transport: 'stdio', + * agents: [ch.asAgentDefinition(agent)], + * }); + * ``` + * + * ⚠ One McpChannel handles one concurrent call at a time. If two tools/call + * requests arrive for the same channel simultaneously, the second call's + * pendingResolve overwrites the first and the first call's result is lost. + * Create one McpChannel per agent instance and do not share channels. + */ +export class McpChannel extends BaseChannel { + readonly isTriggerChannel = false; + + private readonly _timeout: number; + private _pendingResolve?: (output: AgentOutput) => void; + + constructor(config: McpChannelConfig = {}) { + super(); + this._timeout = config.timeout ?? 120_000; + } + + /** + * No-op — McpChannel is driven by trigger(), not a background listener. + */ + listen(): void { /* intentional no-op */ } + + /** + * Resolves the pending trigger() Promise with the agent's output. + */ + async send(output: AgentOutput): Promise { + this._pendingResolve?.(output); + this._pendingResolve = undefined; + } + + /** + * Convert raw MCP arguments into AgentInput. + * If args contains a string 'message' field it is used as the message; + * otherwise the entire args object is JSON-stringified as the message. + */ + normalize(incoming: unknown): AgentInput { + const args = incoming as Record; + const message = typeof args['message'] === 'string' + ? args['message'] + : JSON.stringify(args); + return { + message, + data: args, + conversationId: `mcp-${Date.now()}-${Math.random().toString(36).slice(2, 7)}`, + }; + } + + /** + * Called by the MCP tools/call handler. + * Triggers the agent and waits for it to respond via send(). + * Rejects if the agent does not respond within the configured timeout. + */ + async trigger(args: Record): Promise { + const input = this.normalize(args); + + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { + this._pendingResolve = undefined; + reject(new Error(`McpChannel: agent did not respond within ${this._timeout}ms`)); + }, this._timeout); + + this._pendingResolve = (output: AgentOutput) => { + clearTimeout(timer); + resolve(output.output); + }; + + // Fire-and-forget — the agent will call send() when done, + // which resolves the Promise above. + this.handleMessage(input).catch(err => { + clearTimeout(timer); + this._pendingResolve = undefined; + reject(err instanceof Error ? err : new Error(String(err))); + }); + }); + } + + /** + * Produce an McpAgentDefinition suitable for startMcpServer({ agents: [...] }). + * + * @param agent Object with name and description (typically a BaseAgent instance). + * @param inputSchema Optional JSON Schema for the tool's input parameters. + */ + asAgentDefinition( + agent: { name: string; description: string }, + inputSchema?: Record, + ) { + return { + name: agent.name, + description: agent.description, + ...(inputSchema !== undefined && { inputSchema }), + invoke: (args: Record) => this.trigger(args), + }; + } +} diff --git a/packages/toolpack-agents/src/index.ts b/packages/toolpack-agents/src/index.ts index f2d9410..dd9ae8a 100644 --- a/packages/toolpack-agents/src/index.ts +++ b/packages/toolpack-agents/src/index.ts @@ -36,6 +36,8 @@ export { TelegramChannel, TelegramChannelConfig } from './channels/telegram-chan export { DiscordChannel, DiscordChannelConfig } from './channels/discord-channel.js'; export { EmailChannel, EmailChannelConfig } from './channels/email-channel.js'; export { SMSChannel, SMSChannelConfig } from './channels/sms-channel.js'; +export { McpChannel } from './channels/mcp-channel.js'; +export type { McpChannelConfig } from './channels/mcp-channel.js'; // Transport layer for agent-to-agent communication export { @@ -130,6 +132,14 @@ export { DepthExceededError, createTracerInterceptor, type TracerConfig, + createOTelTracerInterceptor, + OTelSpanStatusCode, + type OTelTracerConfig, + type OTelTracerProvider, + type OTelTracer, + type OTelSpan, + type OTelSpanOptions, + type OTelSpanStatus, } from './interceptors/index.js'; // Scheduler — persistent job store and LLM-callable tools @@ -141,3 +151,27 @@ export { type CreateJobResult, type JobStatus, } from './scheduler/index.js'; + +// Eval primitives — dataset management, runner, scoring, and regression reports +export { + EvalDataset, + EvalRunner, + ExactMatchScorer, + ContainsScorer, + LLMJudgeScorer, + CustomScorer, + compareEvalRuns, + formatEvalReport, + type EvalRunnerOptions, + type EvalScorer, + type LLMJudgeScorerOptions, + type EvalCase, + type EvalCaseResult, + type EvalRun, + type EvalVerdict, + type EvalScoredResult, + type EvalScoredRun, + type EvalRegression, + type EvalImprovement, + type EvalReport, +} from './testing/index.js'; diff --git a/packages/toolpack-agents/src/interceptors/builtins/builtins.test.ts b/packages/toolpack-agents/src/interceptors/builtins/builtins.test.ts index da0af11..8faf753 100644 --- a/packages/toolpack-agents/src/interceptors/builtins/builtins.test.ts +++ b/packages/toolpack-agents/src/interceptors/builtins/builtins.test.ts @@ -11,6 +11,7 @@ import { createParticipantResolverInterceptor } from './participant-resolver.js' import { createAddressCheckInterceptor, isAgentNameOnlyInCodeBlocks, type AddressCheckResult } from './address-check.js'; import { createDepthGuardInterceptor, DepthExceededError } from './depth-guard.js'; import { createTracerInterceptor } from './tracer.js'; +import { createOTelTracerInterceptor, OTelSpanStatusCode, type OTelSpan, type OTelTracerProvider } from './otel-tracer.js'; import { createIntentClassifierInterceptor } from './intent-classifier.js'; // ---------- Test helpers ---------- @@ -1094,3 +1095,174 @@ describe('skip sentinel integration', () => { expect(isSkipSentinel({ output: 'x' })).toBe(false); }); }); + +// ---------- otel-tracer ---------- + +function createMockSpan(): OTelSpan & { + _attributes: Record; + _status: { code: OTelSpanStatusCode; message?: string } | null; + _exceptions: unknown[]; + _ended: boolean; +} { + const span = { + _attributes: {} as Record, + _status: null as { code: OTelSpanStatusCode; message?: string } | null, + _exceptions: [] as unknown[], + _ended: false, + setAttribute(key: string, value: string | number | boolean) { this._attributes[key] = value; }, + setStatus(status: { code: OTelSpanStatusCode; message?: string }) { this._status = status; }, + recordException(err: Error | string) { this._exceptions.push(err); }, + end() { this._ended = true; }, + }; + return span; +} + +function createMockProvider(span = createMockSpan()): OTelTracerProvider & { span: ReturnType } { + return { + span, + getTracer: () => ({ + startSpan: () => span, + }), + }; +} + +describe('createOTelTracerInterceptor', () => { + it('is a transparent pass-through when no tracerProvider is supplied', async () => { + const interceptor = createOTelTracerInterceptor(); + const { result, agent } = await runInterceptor(interceptor, { + message: 'hi', + conversationId: 'c1', + }); + expect(result).not.toBeNull(); + expect(agent.invokeAgent).toHaveBeenCalledTimes(1); + }); + + it('starts and ends a span on successful invocation', async () => { + const { span, ...provider } = createMockProvider(); + const interceptor = createOTelTracerInterceptor({ tracerProvider: provider }); + const { result } = await runInterceptor(interceptor, { + message: 'hi', + conversationId: 'c1', + }); + + expect(result).not.toBeNull(); + expect(span._ended).toBe(true); + expect(span._status?.code).toBe(OTelSpanStatusCode.OK); + expect(span._attributes['agent.name']).toBe('test-agent'); + expect(span._attributes['channel.name']).toBe('test-channel'); + expect(typeof span._attributes['duration.ms']).toBe('number'); + }); + + it('records conversation.id and agent.intent as span attributes when present', async () => { + const { span, ...provider } = createMockProvider(); + const interceptor = createOTelTracerInterceptor({ tracerProvider: provider }); + await runInterceptor(interceptor, { + message: 'hi', + conversationId: 'conv-42', + intent: 'support', + }); + + expect(span._attributes['conversation.id']).toBe('conv-42'); + expect(span._attributes['agent.intent']).toBe('support'); + }); + + it('records workflow step attributes when result.steps are present', async () => { + const { span, ...provider } = createMockProvider(); + const interceptor = createOTelTracerInterceptor({ tracerProvider: provider }); + const agentResult: AgentResult = { + output: 'done', + steps: [ + { + number: 1, + description: 'fetch data', + status: 'completed', + result: { success: true, duration: 120, toolsUsed: ['http.get'] }, + }, + { + number: 2, + description: 'summarize', + status: 'failed', + result: { success: false, error: 'timeout' }, + }, + ], + }; + + const agent = createMockAgent('test-agent', agentResult); + const chain = composeChain([interceptor], agent, createMockChannel(), createMockRegistry()); + await executeChain(chain, { message: 'go', conversationId: 'c1' }); + + expect(span._attributes['steps.total']).toBe(2); + expect(span._attributes['steps.failed']).toBe(1); + expect(span._attributes['step.0.description']).toBe('fetch data'); + expect(span._attributes['step.0.status']).toBe('completed'); + expect(span._attributes['step.0.duration.ms']).toBe(120); + expect(span._attributes['step.0.tools']).toBe('http.get'); + expect(span._attributes['step.1.status']).toBe('failed'); + }); + + it('does not record step attributes when recordSteps is false', async () => { + const { span, ...provider } = createMockProvider(); + const interceptor = createOTelTracerInterceptor({ tracerProvider: provider, recordSteps: false }); + const agentResult: AgentResult = { + output: 'done', + steps: [{ number: 1, description: 'step', status: 'completed' }], + }; + const agent = createMockAgent('test-agent', agentResult); + const chain = composeChain([interceptor], agent, createMockChannel(), createMockRegistry()); + await executeChain(chain, { message: 'go', conversationId: 'c1' }); + + expect(span._attributes['steps.total']).toBeUndefined(); + }); + + it('sets ERROR status and records exception on downstream throw', async () => { + const { span, ...provider } = createMockProvider(); + const interceptor = createOTelTracerInterceptor({ tracerProvider: provider }); + const thrower: Interceptor = async () => { throw new Error('oops'); }; + + const agent = createMockAgent('test-agent'); + const chain = composeChain([interceptor, thrower], agent, createMockChannel(), createMockRegistry()); + + await expect(executeChain(chain, { message: 'hi', conversationId: 'c1' })).rejects.toThrow('oops'); + + expect(span._ended).toBe(true); + expect(span._status?.code).toBe(OTelSpanStatusCode.ERROR); + expect(span._status?.message).toBe('oops'); + expect(span._exceptions).toHaveLength(1); + }); + + it('marks span OK and sets result.skipped=true for skip sentinel', async () => { + const { span, ...provider } = createMockProvider(); + const interceptor = createOTelTracerInterceptor({ tracerProvider: provider }); + const skipper: Interceptor = async (_input, ctx) => ctx.skip(); + + const agent = createMockAgent('test-agent'); + const chain = composeChain([interceptor, skipper], agent, createMockChannel(), createMockRegistry()); + const result = await executeChain(chain, { message: 'hi', conversationId: 'c1' }); + + expect(result).toBeNull(); + expect(span._ended).toBe(true); + expect(span._status?.code).toBe(OTelSpanStatusCode.OK); + expect(span._attributes['result.skipped']).toBe(true); + }); + + it('skips tracing when shouldTrace returns false', async () => { + const { span, ...provider } = createMockProvider(); + const shouldTrace = vi.fn(() => false); + const interceptor = createOTelTracerInterceptor({ tracerProvider: provider, shouldTrace }); + const { result, agent } = await runInterceptor(interceptor, { message: 'hi', conversationId: 'c1' }); + + expect(shouldTrace).toHaveBeenCalled(); + expect(result).not.toBeNull(); + expect(agent.invokeAgent).toHaveBeenCalledTimes(1); + expect(span._ended).toBe(false); + }); + + it('uses custom tracerName when building the tracer', async () => { + const getTracerSpy = vi.fn().mockReturnValue({ startSpan: () => createMockSpan() }); + const provider: OTelTracerProvider = { getTracer: getTracerSpy }; + const interceptor = createOTelTracerInterceptor({ tracerProvider: provider, tracerName: 'my-service', tracerVersion: '3.0.0' }); + await runInterceptor(interceptor, { message: 'hi', conversationId: 'c1' }); + + expect(getTracerSpy).toHaveBeenCalledWith('my-service', '3.0.0'); + }); +}); diff --git a/packages/toolpack-agents/src/interceptors/builtins/index.ts b/packages/toolpack-agents/src/interceptors/builtins/index.ts index de38d4a..42a0738 100644 --- a/packages/toolpack-agents/src/interceptors/builtins/index.ts +++ b/packages/toolpack-agents/src/interceptors/builtins/index.ts @@ -11,3 +11,13 @@ export { createAddressCheckInterceptor, type AddressCheckConfig, type AddressChe export { createIntentClassifierInterceptor, type IntentClassifierInterceptorConfig } from './intent-classifier.js'; export { createDepthGuardInterceptor, type DepthGuardConfig, DepthExceededError } from './depth-guard.js'; export { createTracerInterceptor, type TracerConfig } from './tracer.js'; +export { + createOTelTracerInterceptor, + OTelSpanStatusCode, + type OTelTracerConfig, + type OTelTracerProvider, + type OTelTracer, + type OTelSpan, + type OTelSpanOptions, + type OTelSpanStatus, +} from './otel-tracer.js'; diff --git a/packages/toolpack-agents/src/interceptors/builtins/otel-tracer.ts b/packages/toolpack-agents/src/interceptors/builtins/otel-tracer.ts new file mode 100644 index 0000000..10fe35b --- /dev/null +++ b/packages/toolpack-agents/src/interceptors/builtins/otel-tracer.ts @@ -0,0 +1,186 @@ +import type { AgentInput } from '../../agent/types.js'; +import type { Interceptor, InterceptorResult } from '../types.js'; +import { isSkipSentinel } from '../types.js'; + +/** + * OTel TracerProvider interface — mirrors @opentelemetry/api's TracerProvider + * without requiring the package as a hard dependency. + */ +export interface OTelTracerProvider { + getTracer(name: string, version?: string): OTelTracer; +} + +export interface OTelTracer { + startSpan(name: string, options?: OTelSpanOptions): OTelSpan; +} + +export interface OTelSpanOptions { + attributes?: Record; +} + +export interface OTelSpan { + setAttribute(key: string, value: string | number | boolean): void; + setStatus(status: OTelSpanStatus): void; + recordException(error: Error | string): void; + end(): void; +} + +export interface OTelSpanStatus { + code: OTelSpanStatusCode; + message?: string; +} + +export enum OTelSpanStatusCode { + UNSET = 0, + OK = 1, + ERROR = 2, +} + +/** + * Configuration for the OTel tracer interceptor. + */ +export interface OTelTracerConfig { + /** + * An OTel-compatible TracerProvider (e.g. from @opentelemetry/sdk-node or any OTel-compatible backend). + * When omitted, the interceptor is a transparent no-op and adds zero overhead. + */ + tracerProvider?: OTelTracerProvider; + + /** + * Name used to identify the tracer in OTel (default: 'toolpack-agents'). + */ + tracerName?: string; + + /** + * Version string attached to the tracer. + * When omitted, no version is passed to the OTel TracerProvider. + */ + tracerVersion?: string; + + /** + * Whether to record workflow step durations as span attributes (default: true). + */ + recordSteps?: boolean; + + /** + * Optional: filter which inputs to trace. + * Return false to skip tracing for a specific input. + */ + shouldTrace?: (input: AgentInput) => boolean; +} + +/** + * Creates an OTel-compatible tracer interceptor. + * + * Emits spans for: + * - Agent invocation (wraps the entire chain below it) + * - Each workflow step in the result (if recordSteps is true) + * - Errors thrown downstream + * + * Works with any OTel-compatible backend: Jaeger, Honeycomb, Datadog, OTLP, etc. + * When no tracerProvider is supplied it is a zero-cost transparent pass-through. + * + * @example + * ```ts + * import { NodeTracerProvider } from '@opentelemetry/sdk-node'; + * + * const provider = new NodeTracerProvider(); + * provider.register(); + * + * const registry = new AgentRegistry([ + * { + * agent: MyAgent, + * channels: [slackChannel], + * interceptors: [ + * createOTelTracerInterceptor({ tracerProvider: provider }), + * ], + * }, + * ]); + * ``` + */ +export function createOTelTracerInterceptor(config: OTelTracerConfig = {}): Interceptor { + const { + tracerProvider, + tracerName = 'toolpack-agents', + tracerVersion, + recordSteps = true, + shouldTrace, + } = config; + + // Acquire the tracer once at construction time, not per-invocation. + const tracer = tracerProvider?.getTracer(tracerName, tracerVersion); + + return async (input, ctx, next): Promise => { + // No-op path — cheapest guard first + if (!tracer) { + return await next(); + } + + if (shouldTrace && !shouldTrace(input)) { + return await next(); + } + + const span = tracer.startSpan('agent.invocation'); + + span.setAttribute('agent.name', ctx.agent.name); + span.setAttribute('channel.name', ctx.channel.name ?? 'unknown'); + span.setAttribute('invocation.depth', ctx.invocationDepth); + if (input.conversationId) span.setAttribute('conversation.id', input.conversationId); + if (input.intent) span.setAttribute('agent.intent', input.intent); + + const startTime = performance.now(); + + try { + const result = await next(); + const durationMs = performance.now() - startTime; + + span.setAttribute('duration.ms', Math.round(durationMs)); + + if (isSkipSentinel(result)) { + span.setAttribute('result.skipped', true); + span.setStatus({ code: OTelSpanStatusCode.OK }); + } else { + span.setAttribute('result.output.length', result.output.length); + + if (recordSteps && result.steps && result.steps.length > 0) { + span.setAttribute('steps.total', result.steps.length); + + const failedSteps = result.steps.filter(s => s.status === 'failed'); + if (failedSteps.length > 0) { + span.setAttribute('steps.failed', failedSteps.length); + } + + result.steps.forEach((step, index) => { + const prefix = `step.${index}`; + span.setAttribute(`${prefix}.description`, step.description); + span.setAttribute(`${prefix}.status`, step.status); + if (step.result?.duration !== undefined) { + span.setAttribute(`${prefix}.duration.ms`, step.result.duration); + } + if (step.result?.toolsUsed && step.result.toolsUsed.length > 0) { + span.setAttribute(`${prefix}.tools`, step.result.toolsUsed.join(',')); + } + }); + } + + span.setStatus({ code: OTelSpanStatusCode.OK }); + } + + return result; + } catch (error) { + const durationMs = performance.now() - startTime; + + span.setAttribute('duration.ms', Math.round(durationMs)); + const exception = error instanceof Error ? error : String(error); + span.recordException(exception); + span.setStatus({ + code: OTelSpanStatusCode.ERROR, + message: error instanceof Error ? error.message : String(error), + }); + + throw error; + } finally { + span.end(); + } + }; +} diff --git a/packages/toolpack-agents/src/interceptors/index.ts b/packages/toolpack-agents/src/interceptors/index.ts index babe15b..6ac6422 100644 --- a/packages/toolpack-agents/src/interceptors/index.ts +++ b/packages/toolpack-agents/src/interceptors/index.ts @@ -44,4 +44,12 @@ export { DepthExceededError, createTracerInterceptor, type TracerConfig, + createOTelTracerInterceptor, + OTelSpanStatusCode, + type OTelTracerConfig, + type OTelTracerProvider, + type OTelTracer, + type OTelSpan, + type OTelSpanOptions, + type OTelSpanStatus, } from './builtins/index.js'; diff --git a/packages/toolpack-agents/src/testing/eval-dataset.ts b/packages/toolpack-agents/src/testing/eval-dataset.ts new file mode 100644 index 0000000..f4b0a58 --- /dev/null +++ b/packages/toolpack-agents/src/testing/eval-dataset.ts @@ -0,0 +1,121 @@ +import { readFileSync, writeFileSync } from 'node:fs'; +import type { EvalCase } from './eval-types.js'; + +/** + * A collection of eval cases that can be loaded from / saved to JSON. + * + * @example + * ```ts + * const dataset = new EvalDataset([ + * { + * id: 'q1', + * input: { message: 'What is 2 + 2?' }, + * expectedOutput: '4', + * }, + * ]); + * + * dataset.save('./evals/math.json'); + * + * const loaded = EvalDataset.load('./evals/math.json'); + * ``` + */ +export class EvalDataset { + private _cases: EvalCase[]; + + constructor(cases: EvalCase[] = []) { + this._cases = [...cases]; + } + + // ── Read ────────────────────────────────────────────────────────────────── + + /** All cases in the dataset. */ + get cases(): EvalCase[] { + return [...this._cases]; + } + + /** Number of cases. */ + get size(): number { + return this._cases.length; + } + + /** + * Get a case by ID. + * Returns `undefined` if not found. + */ + get(id: string): EvalCase | undefined { + return this._cases.find(c => c.id === id); + } + + // ── Write ───────────────────────────────────────────────────────────────── + + /** + * Add one or more cases. + * Throws if a case with the same ID already exists. + */ + add(...cases: EvalCase[]): this { + // Validate all before mutating — prevents partial add on duplicate within the batch + const seen = new Set(this._cases.map(c => c.id)); + for (const c of cases) { + if (seen.has(c.id)) { + throw new Error(`EvalDataset: case with id "${c.id}" already exists.`); + } + seen.add(c.id); + } + this._cases.push(...cases); + return this; + } + + /** + * Remove a case by ID. + * Returns `true` if removed, `false` if not found. + */ + remove(id: string): boolean { + const before = this._cases.length; + this._cases = this._cases.filter(c => c.id !== id); + return this._cases.length < before; + } + + /** + * Filter cases by a predicate. Returns a new EvalDataset. + */ + filter(predicate: (c: EvalCase) => boolean): EvalDataset { + return new EvalDataset(this._cases.filter(predicate)); + } + + // ── Persistence ─────────────────────────────────────────────────────────── + + /** + * Serialize to a plain array (suitable for `JSON.stringify`). + */ + toJSON(): EvalCase[] { + return [...this._cases]; + } + + /** + * Save cases to a JSON file. + * + * @param filePath Absolute or relative path to the output file. + */ + save(filePath: string): void { + writeFileSync(filePath, JSON.stringify(this._cases, null, 2), 'utf-8'); + } + + /** + * Load cases from a JSON file. + * The file must contain a JSON array of `EvalCase` objects. + * + * @param filePath Absolute or relative path to the JSON file. + */ + static load(filePath: string): EvalDataset { + const raw = readFileSync(filePath, 'utf-8'); + const cases = JSON.parse(raw) as EvalCase[]; + return new EvalDataset(cases); + } + + /** + * Create an `EvalDataset` from a plain array (e.g. from a database query). + */ + static from(cases: EvalCase[]): EvalDataset { + return new EvalDataset(cases); + } +} diff --git a/packages/toolpack-agents/src/testing/eval-report.ts b/packages/toolpack-agents/src/testing/eval-report.ts new file mode 100644 index 0000000..2748f4d --- /dev/null +++ b/packages/toolpack-agents/src/testing/eval-report.ts @@ -0,0 +1,116 @@ +import type { EvalScoredRun, EvalReport, EvalRegression, EvalImprovement } from './eval-types.js'; + +/** + * Compares two scored runs and produces a regression/improvement report. + * + * @example + * ```ts + * const report = compareEvalRuns(baselineScoredRun, candidateScoredRun); + * + * if (report.regressions.length > 0) { + * console.error('Regressions detected:', report.regressions); + * process.exit(1); + * } + * + * console.log(`Pass rate: ${report.baselinePassRate} → ${report.candidatePassRate} (Δ${report.delta > 0 ? '+' : ''}${report.delta.toFixed(2)})`); + * ``` + */ +export function compareEvalRuns(baseline: EvalScoredRun, candidate: EvalScoredRun): EvalReport { + const baselineById = new Map(baseline.scoredResults.map(r => [r.caseResult.evalCase.id, r])); + const candidateById = new Map(candidate.scoredResults.map(r => [r.caseResult.evalCase.id, r])); + + const regressions: EvalRegression[] = []; + const improvements: EvalImprovement[] = []; + const stablePasses: string[] = []; + const stableFails: string[] = []; + + // Union of all case IDs across both runs + const allIds = new Set([...baselineById.keys(), ...candidateById.keys()]); + + for (const id of allIds) { + const base = baselineById.get(id); + const cand = candidateById.get(id); + + // Case only in one run — skip regression/improvement analysis + if (!base || !cand) continue; + + if (base.verdict === 'pass' && cand.verdict === 'fail') { + regressions.push({ + caseId: id, + baselineOutput: base.caseResult.actualOutput, + candidateOutput: cand.caseResult.actualOutput, + }); + } else if (base.verdict === 'fail' && cand.verdict === 'pass') { + improvements.push({ + caseId: id, + baselineOutput: base.caseResult.actualOutput, + candidateOutput: cand.caseResult.actualOutput, + }); + } else if (base.verdict === 'pass' && cand.verdict === 'pass') { + stablePasses.push(id); + } else { + stableFails.push(id); + } + } + + const delta = candidate.passRate - baseline.passRate; + + return { + baselineRunId: baseline.run.runId, + candidateRunId: candidate.run.runId, + baselinePassRate: baseline.passRate, + candidatePassRate: candidate.passRate, + delta, + regressions, + improvements, + stablePasses, + stableFails, + }; +} + +/** + * Format an `EvalReport` as a human-readable summary string. + * + * @example + * ```ts + * console.log(formatEvalReport(report)); + * ``` + */ +export function formatEvalReport(report: EvalReport): string { + const lines: string[] = []; + const deltaSign = report.delta >= 0 ? '+' : ''; + const pct = (n: number) => `${(n * 100).toFixed(1)}%`; + + lines.push(`Eval Report: ${report.baselineRunId} → ${report.candidateRunId}`); + lines.push(`Pass rate: ${pct(report.baselinePassRate)} → ${pct(report.candidatePassRate)} (Δ${deltaSign}${pct(report.delta)})`); + lines.push(''); + + if (report.regressions.length > 0) { + lines.push(`Regressions (${report.regressions.length}):`); + for (const r of report.regressions) { + lines.push(` ✗ ${r.caseId}`); + lines.push(` baseline: ${truncate(r.baselineOutput)}`); + lines.push(` candidate: ${truncate(r.candidateOutput)}`); + } + lines.push(''); + } + + if (report.improvements.length > 0) { + lines.push(`Improvements (${report.improvements.length}):`); + for (const imp of report.improvements) { + lines.push(` ✓ ${imp.caseId}`); + lines.push(` baseline: ${truncate(imp.baselineOutput)}`); + lines.push(` candidate: ${truncate(imp.candidateOutput)}`); + } + lines.push(''); + } + + lines.push(`Stable passes: ${report.stablePasses.length} | Stable fails: ${report.stableFails.length}`); + + return lines.join('\n'); +} + +function truncate(s: string, max = 80): string { + const single = s.replace(/\n/g, ' '); + return single.length > max ? `${single.slice(0, max)}…` : single; +} diff --git a/packages/toolpack-agents/src/testing/eval-runner.ts b/packages/toolpack-agents/src/testing/eval-runner.ts new file mode 100644 index 0000000..2a28089 --- /dev/null +++ b/packages/toolpack-agents/src/testing/eval-runner.ts @@ -0,0 +1,89 @@ +import type { BaseAgent } from '../agent/base-agent.js'; +import type { EvalDataset } from './eval-dataset.js'; +import type { EvalRun, EvalCaseResult } from './eval-types.js'; + +export interface EvalRunnerOptions { + /** + * Identifier for this run — use something meaningful like a version or PR number. + * Defaults to a timestamp string. + */ + runId?: string; + + /** + * Concurrency limit — how many cases to run in parallel. + * Defaults to 1 (sequential) to avoid overwhelming the provider. + */ + concurrency?: number; +} + +/** + * Runs an agent against every case in an `EvalDataset` and collects the + * results into an `EvalRun`. + * + * @example + * ```ts + * const runner = new EvalRunner(agent); + * const run = await runner.run(dataset, { runId: 'v1.2' }); + * + * console.log(`${run.results.length} cases run in ${run.totalDurationMs}ms`); + * ``` + */ +export class EvalRunner { + private agent: BaseAgent; + + constructor(agent: BaseAgent) { + this.agent = agent; + } + + /** + * Run all cases in the dataset and return an `EvalRun`. + */ + async run(dataset: EvalDataset, options: EvalRunnerOptions = {}): Promise { + const runId = options.runId ?? new Date().toISOString(); + const concurrency = Math.max(1, options.concurrency ?? 1); + const startedAt = new Date().toISOString(); + const runStart = Date.now(); + + const cases = dataset.cases; + const results: EvalCaseResult[] = []; + + // Process in batches of `concurrency` + for (let i = 0; i < cases.length; i += concurrency) { + const batch = cases.slice(i, i + concurrency); + const batchResults = await Promise.all( + batch.map(async (evalCase) => { + const caseStart = Date.now(); + try { + const result = await this.agent.invokeAgent({ + message: evalCase.input.message, + intent: evalCase.input.intent, + conversationId: evalCase.input.conversationId, + context: evalCase.input.context, + }); + return { + evalCase, + actualOutput: result.output, + durationMs: Date.now() - caseStart, + } satisfies EvalCaseResult; + } catch (err) { + return { + evalCase, + actualOutput: '', + durationMs: Date.now() - caseStart, + error: err instanceof Error ? err.message : String(err), + } satisfies EvalCaseResult; + } + }), + ); + results.push(...batchResults); + } + + return { + runId, + startedAt, + completedAt: new Date().toISOString(), + totalDurationMs: Date.now() - runStart, + results, + }; + } +} diff --git a/packages/toolpack-agents/src/testing/eval-scorer.ts b/packages/toolpack-agents/src/testing/eval-scorer.ts new file mode 100644 index 0000000..283360e --- /dev/null +++ b/packages/toolpack-agents/src/testing/eval-scorer.ts @@ -0,0 +1,248 @@ +import type { BaseAgent } from '../agent/base-agent.js'; +import type { + EvalRun, + EvalCaseResult, + EvalScoredResult, + EvalScoredRun, + EvalVerdict, +} from './eval-types.js'; + +// ─── Scorer interface ───────────────────────────────────────────────────────── + +/** + * A scorer evaluates each `EvalCaseResult` in a run and produces a + * pass/fail verdict with an optional explanation. + * + * Implement this interface to create custom scoring logic. + */ +export interface EvalScorer { + score(run: EvalRun): Promise; +} + +// ─── Shared helper ──────────────────────────────────────────────────────────── + +function buildScoredRun(run: EvalRun, scoredResults: EvalScoredResult[]): EvalScoredRun { + const passCount = scoredResults.filter(r => r.verdict === 'pass').length; + const failCount = scoredResults.length - passCount; + return { + run, + scoredResults, + passCount, + failCount, + passRate: scoredResults.length === 0 ? 0 : passCount / scoredResults.length, + }; +} + +function scoreResult(result: EvalCaseResult, verdict: EvalVerdict, explanation?: string): EvalScoredResult { + const scored: EvalScoredResult = { caseResult: result, verdict }; + if (explanation !== undefined) scored.explanation = explanation; + return scored; +} + +// ─── ExactMatchScorer ───────────────────────────────────────────────────────── + +/** + * Passes a case when `actualOutput` exactly equals `expectedOutput`. + * Optionally case-insensitive and/or trimmed. + * + * @example + * ```ts + * const scorer = new ExactMatchScorer({ trim: true, caseInsensitive: true }); + * const scored = await scorer.score(run); + * ``` + */ +export class ExactMatchScorer implements EvalScorer { + private trim: boolean; + private caseInsensitive: boolean; + + constructor(options: { trim?: boolean; caseInsensitive?: boolean } = {}) { + this.trim = options.trim ?? true; + this.caseInsensitive = options.caseInsensitive ?? false; + } + + async score(run: EvalRun): Promise { + const scoredResults = run.results.map(result => { + if (result.error) { + return scoreResult(result, 'fail', `Agent threw: ${result.error}`); + } + + let actual = result.actualOutput; + let expected = result.evalCase.expectedOutput; + + if (this.trim) { + actual = actual.trim(); + expected = expected.trim(); + } + if (this.caseInsensitive) { + actual = actual.toLowerCase(); + expected = expected.toLowerCase(); + } + + const pass = actual === expected; + return scoreResult(result, pass ? 'pass' : 'fail'); + }); + + return buildScoredRun(run, scoredResults); + } +} + +// ─── ContainsScorer ─────────────────────────────────────────────────────────── + +/** + * Passes a case when `actualOutput` contains `expectedOutput` as a substring. + * Optionally case-insensitive. + * + * @example + * ```ts + * const scorer = new ContainsScorer({ caseInsensitive: true }); + * const scored = await scorer.score(run); + * ``` + */ +export class ContainsScorer implements EvalScorer { + private caseInsensitive: boolean; + + constructor(options: { caseInsensitive?: boolean } = {}) { + this.caseInsensitive = options.caseInsensitive ?? true; + } + + async score(run: EvalRun): Promise { + const scoredResults = run.results.map(result => { + if (result.error) { + return scoreResult(result, 'fail', `Agent threw: ${result.error}`); + } + + let actual = result.actualOutput; + let expected = result.evalCase.expectedOutput; + + if (this.caseInsensitive) { + actual = actual.toLowerCase(); + expected = expected.toLowerCase(); + } + + const pass = actual.includes(expected); + return scoreResult(result, pass ? 'pass' : 'fail'); + }); + + return buildScoredRun(run, scoredResults); + } +} + +// ─── LLMJudgeScorer ─────────────────────────────────────────────────────────── + +export interface LLMJudgeScorerOptions { + /** + * Custom judge prompt template. + * Use `{{question}}`, `{{expected}}`, and `{{actual}}` as placeholders. + * Must instruct the LLM to respond with only "pass" or "fail" on the first line, + * optionally followed by an explanation. + */ + promptTemplate?: string; +} + +const DEFAULT_JUDGE_PROMPT = `You are an impartial evaluator assessing whether an AI agent's answer is correct. + +Question / Task: +{{question}} + +Expected answer: +{{expected}} + +Actual answer: +{{actual}} + +Is the actual answer correct or equivalent to the expected answer? +Respond with ONLY "pass" or "fail" on the first line, then optionally a one-sentence explanation.`; + +/** + * Uses an LLM agent as a judge to score each case. + * The judge is prompted with the question, expected answer, and actual answer. + * + * @example + * ```ts + * const judgeAgent = new MyAgent({ toolpack }); + * const scorer = new LLMJudgeScorer(judgeAgent); + * const scored = await scorer.score(run); + * ``` + */ +export class LLMJudgeScorer implements EvalScorer { + private judgeAgent: BaseAgent; + private promptTemplate: string; + + constructor(judgeAgent: BaseAgent, options: LLMJudgeScorerOptions = {}) { + this.judgeAgent = judgeAgent; + this.promptTemplate = options.promptTemplate ?? DEFAULT_JUDGE_PROMPT; + } + + async score(run: EvalRun): Promise { + const scoredResults: EvalScoredResult[] = []; + + for (const result of run.results) { + if (result.error) { + scoredResults.push(scoreResult(result, 'fail', `Agent threw: ${result.error}`)); + continue; + } + + const prompt = this.promptTemplate + .replace('{{question}}', result.evalCase.input.message) + .replace('{{expected}}', result.evalCase.expectedOutput) + .replace('{{actual}}', result.actualOutput); + + try { + const judgeResult = await this.judgeAgent.invokeAgent({ message: prompt }); + const lines = judgeResult.output.trim().split('\n'); + const verdict: EvalVerdict = lines[0].toLowerCase().startsWith('pass') ? 'pass' : 'fail'; + const explanation = lines.slice(1).join(' ').trim() || undefined; + scoredResults.push(scoreResult(result, verdict, explanation)); + } catch (err) { + scoredResults.push( + scoreResult(result, 'fail', `Judge threw: ${err instanceof Error ? err.message : String(err)}`), + ); + } + } + + return buildScoredRun(run, scoredResults); + } +} + +// ─── CustomScorer ───────────────────────────────────────────────────────────── + +/** + * Wraps a user-supplied scoring function. + * + * @example + * ```ts + * const scorer = new CustomScorer(async (result) => { + * const pass = result.actualOutput.includes('Paris'); + * return { verdict: pass ? 'pass' : 'fail' }; + * }); + * ``` + */ +export class CustomScorer implements EvalScorer { + private fn: (result: EvalCaseResult) => Promise<{ verdict: EvalVerdict; explanation?: string }>; + + constructor(fn: (result: EvalCaseResult) => Promise<{ verdict: EvalVerdict; explanation?: string }>) { + this.fn = fn; + } + + async score(run: EvalRun): Promise { + const scoredResults: EvalScoredResult[] = []; + + for (const result of run.results) { + if (result.error) { + scoredResults.push(scoreResult(result, 'fail', `Agent threw: ${result.error}`)); + continue; + } + + try { + const { verdict, explanation } = await this.fn(result); + scoredResults.push(scoreResult(result, verdict, explanation)); + } catch (err) { + scoredResults.push( + scoreResult(result, 'fail', `Scorer threw: ${err instanceof Error ? err.message : String(err)}`), + ); + } + } + + return buildScoredRun(run, scoredResults); + } +} diff --git a/packages/toolpack-agents/src/testing/eval-types.ts b/packages/toolpack-agents/src/testing/eval-types.ts new file mode 100644 index 0000000..88b28ec --- /dev/null +++ b/packages/toolpack-agents/src/testing/eval-types.ts @@ -0,0 +1,160 @@ +/** + * Eval primitives — shared types across EvalDataset, EvalRunner, EvalScorer, and EvalReport. + */ + +// ─── Dataset ────────────────────────────────────────────────────────────────── + +/** + * A single eval case: an input fed to the agent and the expected output used + * for scoring. + */ +export interface EvalCase { + /** Unique identifier for this case. */ + id: string; + + /** The input passed to `agent.invokeAgent()`. */ + input: { + message: string; + intent?: string; + conversationId?: string; + context?: Record; + }; + + /** + * The expected output used by scorers. + * Exact-match and contains scorers compare `actualOutput` against this. + * LLM-judge scorers use it as the reference answer. + */ + expectedOutput: string; + + /** Optional free-form metadata (e.g. tags, difficulty, source). */ + metadata?: Record; +} + +// ─── Runner ─────────────────────────────────────────────────────────────────── + +/** + * The actual output produced by running a single eval case against an agent. + */ +export interface EvalCaseResult { + /** The eval case that was run. */ + evalCase: EvalCase; + + /** The output produced by the agent. */ + actualOutput: string; + + /** Wall-clock duration in milliseconds. */ + durationMs: number; + + /** Error message if the agent threw, otherwise undefined. */ + error?: string; +} + +/** + * The result of running an entire dataset through an agent. + */ +export interface EvalRun { + /** Identifier for this run (e.g. "v1.2", "pr-456"). */ + runId: string; + + /** ISO timestamp of when the run started. */ + startedAt: string; + + /** ISO timestamp of when the run completed. */ + completedAt: string; + + /** Total wall-clock duration in milliseconds. */ + totalDurationMs: number; + + /** Per-case results, in dataset order. */ + results: EvalCaseResult[]; +} + +// ─── Scorer ─────────────────────────────────────────────────────────────────── + +/** The verdict for a single scored case. */ +export type EvalVerdict = 'pass' | 'fail'; + +/** + * A scored result — wraps an EvalCaseResult with a pass/fail verdict and + * an optional explanation. + */ +export interface EvalScoredResult { + /** The underlying case result. */ + caseResult: EvalCaseResult; + + /** Pass or fail. */ + verdict: EvalVerdict; + + /** + * Optional human-readable explanation of the verdict. + * Populated by LLMJudgeScorer; optional for other scorers. + */ + explanation?: string; +} + +/** + * A fully scored run — an EvalRun annotated with per-case verdicts and + * aggregate pass/fail counts. + */ +export interface EvalScoredRun { + /** The original run. */ + run: EvalRun; + + /** Scored results, in run order. */ + scoredResults: EvalScoredResult[]; + + /** Number of passing cases. */ + passCount: number; + + /** Number of failing cases. */ + failCount: number; + + /** Pass rate as a fraction between 0 and 1. */ + passRate: number; +} + +// ─── Report ─────────────────────────────────────────────────────────────────── + +/** + * A regression entry — a case that passed in the baseline but fails in the + * candidate. + */ +export interface EvalRegression { + caseId: string; + baselineOutput: string; + candidateOutput: string; +} + +/** + * An improvement entry — a case that failed in the baseline but passes in the + * candidate. + */ +export interface EvalImprovement { + caseId: string; + baselineOutput: string; + candidateOutput: string; +} + +/** + * Comparison report between a baseline scored run and a candidate scored run. + */ +export interface EvalReport { + baselineRunId: string; + candidateRunId: string; + + baselinePassRate: number; + candidatePassRate: number; + + /** Δ pass rate (candidate − baseline). Positive = improvement. */ + delta: number; + + regressions: EvalRegression[]; + improvements: EvalImprovement[]; + + /** Cases that passed in both runs. */ + stablePasses: string[]; + + /** Cases that failed in both runs. */ + stableFails: string[]; +} diff --git a/packages/toolpack-agents/src/testing/eval.test.ts b/packages/toolpack-agents/src/testing/eval.test.ts new file mode 100644 index 0000000..5bda5e4 --- /dev/null +++ b/packages/toolpack-agents/src/testing/eval.test.ts @@ -0,0 +1,406 @@ +import { describe, it, expect, vi } from 'vitest'; +import { EvalDataset } from './eval-dataset.js'; +import { EvalRunner } from './eval-runner.js'; +import { + ExactMatchScorer, + ContainsScorer, + LLMJudgeScorer, + CustomScorer, +} from './eval-scorer.js'; +import { compareEvalRuns, formatEvalReport } from './eval-report.js'; +import type { EvalRun, EvalCase } from './eval-types.js'; +import type { BaseAgent } from '../agent/base-agent.js'; + +// ─── Fixtures ───────────────────────────────────────────────────────────────── + +const cases: EvalCase[] = [ + { id: 'q1', input: { message: 'What is 2+2?' }, expectedOutput: '4' }, + { id: 'q2', input: { message: 'Capital of France?' }, expectedOutput: 'Paris' }, + { id: 'q3', input: { message: 'Colour of the sky?' }, expectedOutput: 'blue' }, +]; + +function makeRun(outputs: string[], runId = 'test-run'): EvalRun { + return { + runId, + startedAt: new Date().toISOString(), + completedAt: new Date().toISOString(), + totalDurationMs: 100, + results: cases.map((c, i) => ({ + evalCase: c, + actualOutput: outputs[i] ?? '', + durationMs: 10, + })), + }; +} + +// ─── EvalDataset ────────────────────────────────────────────────────────────── + +describe('EvalDataset', () => { + it('stores cases passed to constructor', () => { + const dataset = new EvalDataset(cases); + expect(dataset.size).toBe(3); + expect(dataset.cases).toHaveLength(3); + }); + + it('get() returns case by id', () => { + const dataset = new EvalDataset(cases); + expect(dataset.get('q2')?.expectedOutput).toBe('Paris'); + }); + + it('get() returns undefined for unknown id', () => { + const dataset = new EvalDataset(cases); + expect(dataset.get('nope')).toBeUndefined(); + }); + + it('add() appends cases', () => { + const dataset = new EvalDataset(cases); + dataset.add({ id: 'q4', input: { message: 'Hi' }, expectedOutput: 'Hello' }); + expect(dataset.size).toBe(4); + }); + + it('add() throws on duplicate id vs existing', () => { + const dataset = new EvalDataset(cases); + expect(() => dataset.add({ id: 'q1', input: { message: 'x' }, expectedOutput: 'x' })) + .toThrow('already exists'); + }); + + it('add() does not partially mutate when duplicate is within the batch', () => { + const dataset = new EvalDataset([]); + const newCase = { id: 'n1', input: { message: 'x' }, expectedOutput: 'x' }; + expect(() => dataset.add(newCase, { ...newCase })).toThrow('already exists'); + expect(dataset.size).toBe(0); // no partial add + }); + + it('remove() deletes a case and returns true', () => { + const dataset = new EvalDataset(cases); + expect(dataset.remove('q1')).toBe(true); + expect(dataset.size).toBe(2); + expect(dataset.get('q1')).toBeUndefined(); + }); + + it('remove() returns false for unknown id', () => { + const dataset = new EvalDataset(cases); + expect(dataset.remove('nope')).toBe(false); + }); + + it('filter() returns a new dataset matching the predicate', () => { + const dataset = new EvalDataset(cases); + const filtered = dataset.filter(c => c.id !== 'q1'); + expect(filtered.size).toBe(2); + expect(dataset.size).toBe(3); // original unchanged + }); + + it('toJSON() returns a plain array', () => { + const dataset = new EvalDataset(cases); + const json = dataset.toJSON(); + expect(Array.isArray(json)).toBe(true); + expect(json).toHaveLength(3); + }); + + it('EvalDataset.from() creates from an array', () => { + const dataset = EvalDataset.from(cases); + expect(dataset.size).toBe(3); + }); + + it('cases getter returns a defensive copy', () => { + const dataset = new EvalDataset(cases); + const first = dataset.cases; + first.push({ id: 'injected', input: { message: 'x' }, expectedOutput: 'x' }); + expect(dataset.size).toBe(3); + }); +}); + +// ─── EvalRunner ─────────────────────────────────────────────────────────────── + +describe('EvalRunner', () => { + function makeAgent(responses: string[]): BaseAgent { + let callIdx = 0; + return { + invokeAgent: vi.fn(async () => ({ + output: responses[callIdx++] ?? '', + steps: undefined, + metadata: undefined, + })), + } as unknown as BaseAgent; + } + + it('runs all cases and returns an EvalRun', async () => { + const agent = makeAgent(['4', 'Paris', 'blue']); + const dataset = new EvalDataset(cases); + const runner = new EvalRunner(agent); + const run = await runner.run(dataset, { runId: 'v1' }); + + expect(run.runId).toBe('v1'); + expect(run.results).toHaveLength(3); + expect(run.results[0].actualOutput).toBe('4'); + expect(run.results[1].actualOutput).toBe('Paris'); + expect(run.results[2].actualOutput).toBe('blue'); + }); + + it('captures errors without throwing', async () => { + const agent = { + invokeAgent: vi.fn().mockRejectedValue(new Error('network error')), + } as unknown as BaseAgent; + const dataset = new EvalDataset([cases[0]]); + const runner = new EvalRunner(agent); + const run = await runner.run(dataset); + + expect(run.results[0].error).toBe('network error'); + expect(run.results[0].actualOutput).toBe(''); + }); + + it('records durationMs per case', async () => { + const agent = makeAgent(['4']); + const dataset = new EvalDataset([cases[0]]); + const runner = new EvalRunner(agent); + const run = await runner.run(dataset); + + expect(run.results[0].durationMs).toBeGreaterThanOrEqual(0); + }); + + it('defaults runId to an ISO timestamp', async () => { + const agent = makeAgent(['4']); + const dataset = new EvalDataset([cases[0]]); + const runner = new EvalRunner(agent); + const run = await runner.run(dataset); + + expect(run.runId).toMatch(/^\d{4}-\d{2}-\d{2}T/); + }); +}); + +// ─── ExactMatchScorer ───────────────────────────────────────────────────────── + +describe('ExactMatchScorer', () => { + it('passes when actual equals expected (trimmed)', async () => { + const scorer = new ExactMatchScorer(); + const run = makeRun(['4', 'Paris', 'blue']); + const result = await scorer.score(run); + + expect(result.passCount).toBe(3); + expect(result.failCount).toBe(0); + expect(result.passRate).toBe(1); + }); + + it('fails when actual differs', async () => { + const scorer = new ExactMatchScorer(); + const run = makeRun(['5', 'Paris', 'blue']); + const result = await scorer.score(run); + + expect(result.passCount).toBe(2); + expect(result.failCount).toBe(1); + expect(result.scoredResults[0].verdict).toBe('fail'); + }); + + it('trims whitespace by default', async () => { + const scorer = new ExactMatchScorer({ trim: true }); + const run = makeRun([' 4 ', 'Paris', 'blue']); + const result = await scorer.score(run); + expect(result.scoredResults[0].verdict).toBe('pass'); + }); + + it('is case-sensitive by default', async () => { + const scorer = new ExactMatchScorer(); + const run = makeRun(['4', 'paris', 'blue']); // lowercase paris + const result = await scorer.score(run); + expect(result.scoredResults[1].verdict).toBe('fail'); + }); + + it('caseInsensitive option ignores case', async () => { + const scorer = new ExactMatchScorer({ caseInsensitive: true }); + const run = makeRun(['4', 'PARIS', 'BLUE']); + const result = await scorer.score(run); + expect(result.passCount).toBe(3); + }); + + it('fails cases that errored', async () => { + const scorer = new ExactMatchScorer(); + const run: EvalRun = { + runId: 'x', startedAt: '', completedAt: '', totalDurationMs: 0, + results: [{ evalCase: cases[0], actualOutput: '', durationMs: 0, error: 'boom' }], + }; + const result = await scorer.score(run); + expect(result.scoredResults[0].verdict).toBe('fail'); + expect(result.scoredResults[0].explanation).toContain('boom'); + }); +}); + +// ─── ContainsScorer ─────────────────────────────────────────────────────────── + +describe('ContainsScorer', () => { + it('passes when actual contains expected', async () => { + const scorer = new ContainsScorer(); + const run = makeRun(['The answer is 4.', 'The capital is Paris!', 'The sky is blue.']); + const result = await scorer.score(run); + expect(result.passCount).toBe(3); + }); + + it('fails when actual does not contain expected', async () => { + const scorer = new ContainsScorer(); + const run = makeRun(['The answer is 5.', 'Paris', 'blue']); + const result = await scorer.score(run); + expect(result.scoredResults[0].verdict).toBe('fail'); + }); + + it('is case-insensitive by default', async () => { + const scorer = new ContainsScorer(); + const run = makeRun(['4', 'PARIS IS THE CAPITAL', 'blue']); + const result = await scorer.score(run); + expect(result.scoredResults[1].verdict).toBe('pass'); + }); + + it('can be made case-sensitive', async () => { + const scorer = new ContainsScorer({ caseInsensitive: false }); + const run = makeRun(['4', 'paris', 'blue']); // lowercase but expected is 'Paris' + const result = await scorer.score(run); + expect(result.scoredResults[1].verdict).toBe('fail'); + }); +}); + +// ─── CustomScorer ───────────────────────────────────────────────────────────── + +describe('CustomScorer', () => { + it('uses the provided function', async () => { + const scorer = new CustomScorer(async (result) => ({ + verdict: result.actualOutput.length > 0 ? 'pass' : 'fail', + })); + const run = makeRun(['4', '', 'blue']); + const scored = await scorer.score(run); + expect(scored.scoredResults[0].verdict).toBe('pass'); + expect(scored.scoredResults[1].verdict).toBe('fail'); + }); + + it('catches scorer errors and marks as fail', async () => { + const scorer = new CustomScorer(async () => { throw new Error('scorer crash'); }); + const run = makeRun(['4']); + const scored = await scorer.score(run); + expect(scored.scoredResults[0].verdict).toBe('fail'); + expect(scored.scoredResults[0].explanation).toContain('scorer crash'); + }); +}); + +// ─── LLMJudgeScorer ─────────────────────────────────────────────────────────── + +describe('LLMJudgeScorer', () => { + function makeJudgeAgent(verdict: 'pass' | 'fail', explanation = ''): BaseAgent { + return { + invokeAgent: vi.fn(async () => ({ + output: explanation ? `${verdict}\n${explanation}` : verdict, + })), + } as unknown as BaseAgent; + } + + it('passes when judge returns "pass"', async () => { + const scorer = new LLMJudgeScorer(makeJudgeAgent('pass')); + const run = makeRun(['4', 'Paris', 'blue']); + const scored = await scorer.score(run); + expect(scored.passCount).toBe(3); + }); + + it('fails when judge returns "fail"', async () => { + const scorer = new LLMJudgeScorer(makeJudgeAgent('fail')); + const run = makeRun(['5', 'London', 'red']); + const scored = await scorer.score(run); + expect(scored.failCount).toBe(3); + }); + + it('captures explanation from second line', async () => { + const scorer = new LLMJudgeScorer(makeJudgeAgent('pass', 'The answer is correct.')); + const scored = await scorer.score(makeRun(['4'])); + expect(scored.scoredResults[0].explanation).toBe('The answer is correct.'); + }); + + it('handles judge throwing', async () => { + const judgeAgent = { + invokeAgent: vi.fn().mockRejectedValue(new Error('judge exploded')), + } as unknown as BaseAgent; + const scorer = new LLMJudgeScorer(judgeAgent); + const run = makeRun(['4']); + const scored = await scorer.score(run); + expect(scored.scoredResults[0].verdict).toBe('fail'); + expect(scored.scoredResults[0].explanation).toContain('judge exploded'); + }); +}); + +// ─── compareEvalRuns ────────────────────────────────────────────────────────── + +describe('compareEvalRuns', () => { + async function scoredRun(outputs: string[], runId: string) { + const scorer = new ExactMatchScorer(); + return scorer.score(makeRun(outputs, runId)); + } + + it('detects regressions (pass → fail)', async () => { + const baseline = await scoredRun(['4', 'Paris', 'blue'], 'v1'); + const candidate = await scoredRun(['5', 'Paris', 'blue'], 'v2'); + const report = compareEvalRuns(baseline, candidate); + + expect(report.regressions).toHaveLength(1); + expect(report.regressions[0].caseId).toBe('q1'); + }); + + it('detects improvements (fail → pass)', async () => { + const baseline = await scoredRun(['5', 'Paris', 'blue'], 'v1'); + const candidate = await scoredRun(['4', 'Paris', 'blue'], 'v2'); + const report = compareEvalRuns(baseline, candidate); + + expect(report.improvements).toHaveLength(1); + expect(report.improvements[0].caseId).toBe('q1'); + }); + + it('computes stable passes and fails', async () => { + const baseline = await scoredRun(['4', 'Paris', 'WRONG'], 'v1'); + const candidate = await scoredRun(['4', 'Paris', 'WRONG'], 'v2'); + const report = compareEvalRuns(baseline, candidate); + + expect(report.stablePasses).toContain('q1'); + expect(report.stablePasses).toContain('q2'); + expect(report.stableFails).toContain('q3'); + expect(report.regressions).toHaveLength(0); + expect(report.improvements).toHaveLength(0); + }); + + it('computes delta correctly', async () => { + const baseline = await scoredRun(['4', 'WRONG', 'WRONG'], 'v1'); // 1/3 pass + const candidate = await scoredRun(['4', 'Paris', 'WRONG'], 'v2'); // 2/3 pass + const report = compareEvalRuns(baseline, candidate); + + expect(report.baselinePassRate).toBeCloseTo(1 / 3); + expect(report.candidatePassRate).toBeCloseTo(2 / 3); + expect(report.delta).toBeCloseTo(1 / 3); + }); + + it('sets correct run IDs', async () => { + const baseline = await scoredRun(['4', 'Paris', 'blue'], 'baseline-v1'); + const candidate = await scoredRun(['4', 'Paris', 'blue'], 'candidate-v2'); + const report = compareEvalRuns(baseline, candidate); + + expect(report.baselineRunId).toBe('baseline-v1'); + expect(report.candidateRunId).toBe('candidate-v2'); + }); +}); + +// ─── formatEvalReport ───────────────────────────────────────────────────────── + +describe('formatEvalReport', () => { + it('includes run IDs and pass rates', async () => { + const scorer = new ExactMatchScorer(); + const baseline = await scorer.score(makeRun(['4', 'Paris', 'blue'], 'v1')); + const candidate = await scorer.score(makeRun(['5', 'Paris', 'blue'], 'v2')); + const report = compareEvalRuns(baseline, candidate); + const formatted = formatEvalReport(report); + + expect(formatted).toContain('v1'); + expect(formatted).toContain('v2'); + expect(formatted).toContain('Regressions'); + }); + + it('does not include Regressions section when there are none', async () => { + const scorer = new ExactMatchScorer(); + const baseline = await scorer.score(makeRun(['4', 'Paris', 'blue'], 'v1')); + const candidate = await scorer.score(makeRun(['4', 'Paris', 'blue'], 'v2')); + const report = compareEvalRuns(baseline, candidate); + const formatted = formatEvalReport(report); + + expect(formatted).not.toContain('Regressions'); + }); +}); diff --git a/packages/toolpack-agents/src/testing/index.ts b/packages/toolpack-agents/src/testing/index.ts index ca66673..5d5b93b 100644 --- a/packages/toolpack-agents/src/testing/index.ts +++ b/packages/toolpack-agents/src/testing/index.ts @@ -20,6 +20,30 @@ export type { TestAgentResult, } from './create-test-agent.js'; +// Eval primitives +export { EvalDataset } from './eval-dataset.js'; +export { EvalRunner } from './eval-runner.js'; +export type { EvalRunnerOptions } from './eval-runner.js'; +export { + ExactMatchScorer, + ContainsScorer, + LLMJudgeScorer, + CustomScorer, +} from './eval-scorer.js'; +export type { EvalScorer, LLMJudgeScorerOptions } from './eval-scorer.js'; +export { compareEvalRuns, formatEvalReport } from './eval-report.js'; +export type { + EvalCase, + EvalCaseResult, + EvalRun, + EvalVerdict, + EvalScoredResult, + EvalScoredRun, + EvalRegression, + EvalImprovement, + EvalReport, +} from './eval-types.js'; + // Event Capture export { captureEvents, registerEventMatchers } from './capture-events.js'; export type { diff --git a/packages/toolpack-knowledge/package.json b/packages/toolpack-knowledge/package.json index 704f894..e2a6e31 100644 --- a/packages/toolpack-knowledge/package.json +++ b/packages/toolpack-knowledge/package.json @@ -1,7 +1,7 @@ { "name": "@toolpack-sdk/knowledge", "version": "2.1.1", - "description": "RAG (Retrieval-Augmented Generation) package for Toolpack SDK", + "description": "Knowledge/RAG package for Toolpack SDK — web crawling, REST API ingestion, hybrid semantic + keyword search, and streaming indexing across 6 source types (Markdown, Web, API, JSON, SQLite, PostgreSQL)", "type": "module", "main": "dist/index.cjs", "module": "dist/index.js", diff --git a/packages/toolpack-sdk/README.md b/packages/toolpack-sdk/README.md index dbf225b..7bcfdb9 100644 --- a/packages/toolpack-sdk/README.md +++ b/packages/toolpack-sdk/README.md @@ -1,6 +1,6 @@ # Toolpack SDK -A unified TypeScript/Node.js SDK for building AI-powered applications with multiple providers, 100+ built-in tools, a workflow engine, and a flexible mode system — all through a single API. +The TypeScript SDK for building production AI agents — 100+ built-in tools, 8 channel integrations, a persistent cognitive layer, and full Knowledge/RAG, all in one package. [![npm version](https://img.shields.io/npm/v/toolpack-sdk.svg)](https://www.npmjs.com/package/toolpack-sdk) [![License: Apache 2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) @@ -17,9 +17,9 @@ A unified TypeScript/Node.js SDK for building AI-powered applications with multi - **Workflow Engine** — AI-driven planning with plan-direct execution and parallel tool orchestration - **Mode System** — Built-in Agent and Chat modes, plus `createMode()` for custom modes with tool filtering - **HITL Confirmation** — Human-in-the-loop approval for high-risk operations with configurable bypass rules -- **Custom Providers** — Bring your own provider by implementing the `ProviderAdapter` interface -- **101 Built-in Tools** across 14 categories (including 4 skill-tools and 2 mcp-tools): -- **MCP Tool Server Integration** — dynamically bridge external Model Context Protocol servers into Toolpack as first-class tools via `createMcpToolProject()` and `disconnectMcpToolProject()`. +- **Extensible at Every Layer** — Every built-in component is a plug-in point: custom tools (`ToolDefinition`), custom channels (`BaseChannel`), custom provider adapters (`ProviderAdapter`), custom agents (`BaseAgent`), custom modes (`createMode()`), and custom interceptors — all using the same interfaces as the built-ins +- **100+ Built-in Tools** across 12 categories: +- **MCP Client & Server** — consume external MCP servers via `createMcpToolProject()`, or expose Toolpack as an MCP server via `sdk.startMcpServer()` with static/JWT/custom auth, search mode, and agent exposure. | Category | Tools | Description | |----------|-------|-------------| diff --git a/packages/toolpack-sdk/docs/examples/mcp-server-example.ts b/packages/toolpack-sdk/docs/examples/mcp-server-example.ts new file mode 100644 index 0000000..06b1db0 --- /dev/null +++ b/packages/toolpack-sdk/docs/examples/mcp-server-example.ts @@ -0,0 +1,121 @@ +/** + * Toolpack MCP Server — entry point example + * + * Exposes Toolpack's 110+ built-in tools as an MCP server so any MCP-compatible + * client (Claude Desktop, Cursor, Windsurf, custom agents) can use them. + * + * Prerequisites: + * npm install toolpack-sdk @modelcontextprotocol/sdk + * + * ─── stdio transport (Claude Desktop / Cursor) ──────────────────────────────── + * + * 1. Run this file: node mcp-server-example.js + * 2. Add to ~/Library/Application Support/Claude/claude_desktop_config.json: + * + * { + * "mcpServers": { + * "toolpack": { + * "command": "node", + * "args": ["/absolute/path/to/mcp-server-example.js"] + * } + * } + * } + * + * ─── HTTP transport (remote / hosted) ──────────────────────────────────────── + * + * Set TOOLPACK_MCP_TRANSPORT=http and TOOLPACK_MCP_PORT=3000 to run as an HTTP server. + * MCP clients connect to http://localhost:3000. + * + * Always set MCP_AUTH_MODE when using HTTP outside of localhost. + * Supported values: 'static', 'jwt', 'none' (localhost only, not recommended in production) + */ + +import { Toolpack } from 'toolpack-sdk'; +import type { McpAuthConfig } from 'toolpack-sdk'; + +const transport = (process.env.TOOLPACK_MCP_TRANSPORT ?? 'stdio') as 'stdio' | 'http'; +const port = Number(process.env.TOOLPACK_MCP_PORT ?? 3000); + +const sdk = await Toolpack.init({ + provider: 'anthropic', + tools: true, + apiKey: process.env.ANTHROPIC_API_KEY, +}); + +// ─── Auth configuration ─────────────────────────────────────────────────────── +// Auth is only enforced on the HTTP transport. stdio is process-isolated. +// When transport is 'http' and no auth is set, a warning is logged and all +// requests are accepted — safe for localhost only. + +function buildAuth(): McpAuthConfig | undefined { + const mode = process.env.MCP_AUTH_MODE; + + if (mode === 'static') { + // Pre-shared bearer token — suitable for self-hosted / dev deployments. + // Generate: node -e "console.log(require('crypto').randomBytes(32).toString('hex'))" + const token = process.env.MCP_TOKEN; + if (!token) throw new Error('MCP_TOKEN env var required for static auth mode'); + return { mode: 'static', tokens: [token] }; + } + + if (mode === 'jwt') { + // JWT verification via JWKS — works with Auth0, Supabase, Clerk, Keycloak, etc. + const jwksUrl = process.env.MCP_JWKS_URL; + if (!jwksUrl) throw new Error('MCP_JWKS_URL env var required for jwt auth mode'); + return { + mode: 'jwt', + jwksUrl, + audience: process.env.MCP_JWT_AUDIENCE, // e.g. 'https://your-mcp-server.example.com' + issuer: process.env.MCP_JWT_ISSUER, // e.g. 'https://your-tenant.auth0.com/' + }; + } + + // No auth — open server. Only safe on localhost. + return undefined; +} + +// ─── Start server ───────────────────────────────────────────────────────────── + +const handle = await sdk.startMcpServer({ + transport, + port, + auth: buildAuth(), + + // Optional: advertise which OAuth server issues tokens for this server. + // Only used with jwt auth mode — enables MCP client auto-discovery. + serverUrl: process.env.MCP_SERVER_URL, // e.g. 'https://your-mcp-server.example.com' + + // Optional: search mode — dramatically reduces context token usage for 110+ tools. + // tools/list returns only `tool.search` + always-loaded tools. + // Clients call tool.search to discover tools on-demand instead of loading all upfront. + // + // Requires this addition to your system prompt: + // "You have access to a large library of tools via tool.search. + // Before calling any tool that is not already listed, call tool.search + // with a short description of what you want to do." + // + // searchMode: true, + + // Optional: expose only specific tool categories instead of all 110+ tools + // expose: { categories: ['filesystem', 'github', 'slack', 'database'] }, + + // Optional: expose specific tools by name + // expose: { tools: ['fs.read_file', 'fs.write_file', 'slack.chat.postMessage'] }, +}); + +console.error( + `Toolpack MCP server started — ${handle.toolCount} tools exposed over ${transport}` + + (transport === 'http' ? ` on port ${port}` : ''), +); + +// ─── Graceful shutdown ──────────────────────────────────────────────────────── + +process.on('SIGINT', async () => { + await handle.stop(); + process.exit(0); +}); + +process.on('SIGTERM', async () => { + await handle.stop(); + process.exit(0); +}); diff --git a/packages/toolpack-sdk/package.json b/packages/toolpack-sdk/package.json index 8cfff73..7af84bc 100644 --- a/packages/toolpack-sdk/package.json +++ b/packages/toolpack-sdk/package.json @@ -1,7 +1,7 @@ { "name": "toolpack-sdk", "version": "2.1.1", - "description": "Unified TypeScript SDK for AI providers (OpenAI, Anthropic, Gemini, Ollama) with 100+ built-in tools, workflow engine, and mode system for building AI-powered applications", + "description": "TypeScript SDK for production AI agents — 110+ built-in tools across 14 categories, multi-provider support (OpenAI, Anthropic, Gemini, Ollama), workflow engine, and mode system", "engines": { "node": ">=20" }, @@ -55,18 +55,22 @@ "publish:npm": "npm run build && npm run test && npm publish --access public" }, "keywords": [ + "ai-agent", + "production-ai", "ai", "llm", + "typescript", + "sdk", "openai", "anthropic", "claude", "gemini", - "ai-agent", - "coding-agent", + "ollama", "tools", "function-calling", - "typescript", - "sdk" + "rag", + "knowledge-base", + "multi-provider" ], "author": "Sajeer (https://sajeerzeji.com)", "license": "Apache-2.0", @@ -80,6 +84,7 @@ }, "devDependencies": { "@eslint/js": "^9.39.2", + "@modelcontextprotocol/sdk": "^1.29.0", "@types/babel__core": "^7.20.5", "@types/babel__traverse": "^7.28.0", "@types/better-sqlite3": "^7.6.13", @@ -100,6 +105,9 @@ }, "dependencies": { "@anthropic-ai/sdk": "^0.73.0", + "jose": "^6.2.3", + "zod": "^3.25.0", + "zod-to-json-schema": "^3.24.5", "@babel/parser": "^7.29.0", "@babel/traverse": "^7.29.0", "@babel/types": "^7.29.0", @@ -118,6 +126,14 @@ "simple-git": "^3.32.3", "web-tree-sitter": "^0.22.6" }, + "peerDependencies": { + "@modelcontextprotocol/sdk": "^1.29.0" + }, + "peerDependenciesMeta": { + "@modelcontextprotocol/sdk": { + "optional": true + } + }, "directories": { "doc": "docs", "test": "tests" diff --git a/packages/toolpack-sdk/src/client/index.ts b/packages/toolpack-sdk/src/client/index.ts index 21e3ba6..a71a544 100644 --- a/packages/toolpack-sdk/src/client/index.ts +++ b/packages/toolpack-sdk/src/client/index.ts @@ -494,7 +494,7 @@ export class AIClient extends EventEmitter { * When tools are enabled and autoExecute is true, handles the full * tool call → execute → send result → get final answer loop. */ - async generate(request: CompletionRequest, providerName?: string): Promise { + async generate(request: CompletionRequest, providerName?: string): Promise> { const provider = this.getProvider(providerName); try { const requestId = newRequestId(); @@ -728,7 +728,13 @@ export class AIClient extends EventEmitter { } } - return response; + if (response.data === undefined && response.content && request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format) { + response.data = (request.response_format as import('zod').ZodType).parse( + JSON.parse(response.content), + ); + } + + return response as CompletionResponse; } catch (error) { throw this.wrapError(error); } @@ -1806,7 +1812,7 @@ NEVER guess or hallucinate tool names. ALWAYS use tool.search to discover tools /** * Execute tool.search using BM25 engine. */ - private executeToolSearch(args: Record): string { + executeToolSearch(args: Record): string { const { query, category } = args; const limit = this.toolsConfig.toolSearch?.searchResultLimit ?? 5; const requestedCategory = typeof category === 'string' && category.length > 0 ? category : undefined; diff --git a/packages/toolpack-sdk/src/mcp/index.ts b/packages/toolpack-sdk/src/mcp/index.ts index 337266e..b6b879e 100644 --- a/packages/toolpack-sdk/src/mcp/index.ts +++ b/packages/toolpack-sdk/src/mcp/index.ts @@ -1,10 +1,30 @@ -// MCP Client Module -// Provides JSON-RPC transport for communicating with MCP servers (e.g., chrome-devtools-mcp) +// MCP Module +// Client: JSON-RPC transport for consuming external MCP servers +// Server: expose Toolpack's built-in tools as an MCP server +// ─── Client ─────────────────────────────────────────────────────────────────── export { McpClient, McpClientConfig, McpTimeoutError, McpConnectionError } from './client.js'; export { JsonRpcRequest, JsonRpcResponse, McpTool, McpServerCapabilities, -} from "./types.js"; +} from './types.js'; + +// ─── Server ─────────────────────────────────────────────────────────────────── +// startMcpServer() is intentionally NOT re-exported here. server.ts has static +// imports of @modelcontextprotocol/sdk (an optional peer dep). A static re-export +// would eagerly load those imports and break users who haven't installed the SDK. +// Use Toolpack.startMcpServer() instead — it gates the load behind a dynamic import. +export type { + ToolpackMcpServerConfig, + McpServerHandle, + McpTransport, + McpServerExposeConfig, + McpAgentDefinition, + McpAuthConfig, + McpStaticAuthConfig, + McpJwtAuthConfig, + McpCustomAuthConfig, + AuthInfo, +} from './server-types.js'; diff --git a/packages/toolpack-sdk/src/mcp/server-auth.ts b/packages/toolpack-sdk/src/mcp/server-auth.ts new file mode 100644 index 0000000..a0d63dd --- /dev/null +++ b/packages/toolpack-sdk/src/mcp/server-auth.ts @@ -0,0 +1,134 @@ +import { createRemoteJWKSet, jwtVerify } from 'jose'; +import type { IncomingMessage, ServerResponse } from 'node:http'; +import type { AuthInfo } from '@modelcontextprotocol/sdk/server/auth/types.js'; +import type { OAuthTokenVerifier } from '@modelcontextprotocol/sdk/server/auth/provider.js'; +import type { McpAuthConfig } from './server-types.js'; + +// ─── Public helpers ─────────────────────────────────────────────────────────── + +/** + * Build an OAuthTokenVerifier from the given auth config. + * The returned verifier is stateful (JwtVerifier caches the JWKS) — + * create once per server lifetime, not once per request. + */ +export function buildVerifier(auth: McpAuthConfig): OAuthTokenVerifier { + switch (auth.mode) { + case 'static': return new StaticBearerVerifier(auth.tokens); + case 'jwt': return new JwtVerifier(auth); + case 'custom': return { verifyAccessToken: auth.verifyAccessToken }; + } +} + +/** + * Extract and verify a bearer token from an incoming HTTP request. + * + * On success: attaches AuthInfo to req.auth and returns true. + * On failure: writes a 401 or 403 response and returns false. + * The caller must stop processing the request when false is returned. + */ +export async function applyBearerAuth( + req: IncomingMessage & { auth?: AuthInfo }, + res: ServerResponse, + auth: McpAuthConfig, + verifier: OAuthTokenVerifier, +): Promise { + const authHeader = req.headers['authorization']; + const token = typeof authHeader === 'string' && authHeader.startsWith('Bearer ') + ? authHeader.slice(7) + : null; + + if (!token) { + res.writeHead(401, { 'WWW-Authenticate': 'Bearer' }).end(); + return false; + } + + let authInfo: AuthInfo; + try { + authInfo = await verifier.verifyAccessToken(token); + } catch { + res.writeHead(401, { 'WWW-Authenticate': 'Bearer' }).end(); + return false; + } + + // Scope enforcement — only when requiredScopes is explicitly provided. + const required = 'requiredScopes' in auth ? auth.requiredScopes : undefined; + if (required?.length) { + const granted = new Set(authInfo.scopes); + const missing = required.filter(s => !granted.has(s)); + if (missing.length > 0) { + res.writeHead(403).end(`Missing required scopes: ${missing.join(', ')}`); + return false; + } + } + + req.auth = authInfo; + return true; +} + +// ─── StaticBearerVerifier ───────────────────────────────────────────────────── + +class StaticBearerVerifier implements OAuthTokenVerifier { + // Set for O(1) lookup. Timing is not perfectly constant across the Set.has() + // call, but acceptable for static tokens — they are opaque random strings, + // not secrets where a timing-safe compare is strictly required. + private readonly tokenSet: Set; + + constructor(tokens: string[]) { + if (tokens.length === 0) { + throw new Error( + 'McpAuthConfig static mode: tokens array must not be empty. ' + + 'Generate a token with: crypto.randomBytes(32).toString("hex")', + ); + } + this.tokenSet = new Set(tokens); + } + + async verifyAccessToken(token: string): Promise { + if (!this.tokenSet.has(token)) { + throw new Error('Invalid bearer token.'); + } + return { token, clientId: 'static-client', scopes: [] }; + } +} + +// ─── JwtVerifier ────────────────────────────────────────────────────────────── + +class JwtVerifier implements OAuthTokenVerifier { + // createRemoteJWKSet returns a cached, auto-rotating key set. + // One instance per server lifetime is the correct usage. + private readonly JWKS: ReturnType; + private readonly audience?: string; + private readonly issuer?: string; + + constructor(config: { jwksUrl: string; audience?: string; issuer?: string }) { + this.JWKS = createRemoteJWKSet(new URL(config.jwksUrl)); + this.audience = config.audience; + this.issuer = config.issuer; + } + + async verifyAccessToken(token: string): Promise { + const { payload } = await jwtVerify(token, this.JWKS, { + audience: this.audience, + issuer: this.issuer, + }); + + // Scope extraction: + // - RFC 9068: `scope` claim — space-separated string + // - Okta / Azure AD: `scp` claim — array of strings + const scopeRaw = payload['scope'] ?? payload['scp']; + const scopes: string[] = Array.isArray(scopeRaw) + ? scopeRaw.filter((s): s is string => typeof s === 'string') + : typeof scopeRaw === 'string' + ? scopeRaw.split(' ').filter(Boolean) + : []; + + // clientId: prefer explicit `client_id` claim, fall back to `sub` + const clientId = typeof payload['client_id'] === 'string' + ? payload['client_id'] + : typeof payload.sub === 'string' + ? payload.sub + : 'unknown'; + + return { token, clientId, scopes, expiresAt: payload.exp }; + } +} diff --git a/packages/toolpack-sdk/src/mcp/server-types.ts b/packages/toolpack-sdk/src/mcp/server-types.ts new file mode 100644 index 0000000..f6181b0 --- /dev/null +++ b/packages/toolpack-sdk/src/mcp/server-types.ts @@ -0,0 +1,226 @@ +import type { AuthInfo } from '@modelcontextprotocol/sdk/server/auth/types.js'; + +export type McpTransport = 'stdio' | 'http'; + +// Re-export AuthInfo so users implementing custom verifiers don't need to +// import from the SDK's internal path directly. +export type { AuthInfo }; + +// ─── Auth config ────────────────────────────────────────────────────────────── + +export interface McpServerExposeConfig { + /** Expose only tools in these categories. Mutually exclusive with `tools`. */ + categories?: string[]; + /** Expose only these exact tool names. Mutually exclusive with `categories`. */ + tools?: string[]; +} + +export interface McpStaticAuthConfig { + mode: 'static'; + /** + * One or more pre-shared bearer tokens that grant access. + * Generate with: crypto.randomBytes(32).toString('hex') + * All tokens in the array are valid — useful for token rotation. + */ + tokens: string[]; +} + +export interface McpJwtAuthConfig { + mode: 'jwt'; + /** + * JWKS endpoint URL for JWT signature verification. + * @example 'https://your-tenant.auth0.com/.well-known/jwks.json' + * @example 'https://your-project.supabase.co/auth/v1/jwks' + */ + jwksUrl: string; + /** + * Expected `aud` claim in the JWT. + * Required for most OIDC providers — omitting may accept tokens intended for other services. + */ + audience?: string; + /** + * Expected `iss` claim in the JWT. Recommended. + * Also used to populate the `authorization_servers` field in + * /.well-known/oauth-protected-resource when serverUrl is set. + */ + issuer?: string; + /** JWT must have all of these scopes. Checked after signature verification. */ + requiredScopes?: string[]; +} + +export interface McpCustomAuthConfig { + mode: 'custom'; + /** + * Your own token verification logic. + * Throw any error to reject the token — the caller receives a 401. + * Return a valid AuthInfo on success. + * + * @example + * ```typescript + * verifyAccessToken: async (token) => { + * const user = await db.findByToken(token); + * if (!user) throw new Error('Invalid token'); + * return { token, clientId: user.id, scopes: user.scopes }; + * } + * ``` + */ + verifyAccessToken(token: string): Promise; + /** Token must have all of these scopes. Checked after verifyAccessToken resolves. */ + requiredScopes?: string[]; +} + +export type McpAuthConfig = McpStaticAuthConfig | McpJwtAuthConfig | McpCustomAuthConfig; + +// ─── Agent definition ───────────────────────────────────────────────────────── + +/** + * Minimal contract for exposing an agent as an MCP tool. + * Satisfied by McpChannel.asAgentDefinition() from toolpack-agents, + * or by any plain object with these four fields. + */ +export interface McpAgentDefinition { + /** Exposed as "agent." in tools/list. Must be unique across all agents. */ + name: string; + /** Shown to the MCP client as the tool description. */ + description: string; + /** JSON Schema for the arguments the agent accepts. Defaults to empty object schema. */ + inputSchema?: Record; + /** + * Called when tools/call arrives for this agent. + * Must return the agent's output as a string. + * Throw to signal an error — the MCP client receives isError: true. + */ + invoke(args: Record): Promise; +} + +// ─── Server config ──────────────────────────────────────────────────────────── + +export interface ToolpackMcpServerConfig { + /** Transport type. 'stdio' for Claude Desktop / Cursor. 'http' for remote use. */ + transport: McpTransport; + /** Port for HTTP transport. Default: 3000. Only used when transport is 'http'. */ + port?: number; + /** Filter which tools to expose. Exposes all enabled tools when omitted. */ + expose?: McpServerExposeConfig; + /** Server name shown to MCP clients. Default: 'Toolpack SDK'. */ + serverName?: string; + /** Server version shown to MCP clients. Default: '2.0.0'. */ + serverVersion?: string; + + /** + * Authentication for the HTTP transport. Ignored when transport is 'stdio'. + * + * When omitted, the HTTP server accepts all requests — safe for localhost only. + * When set, every request must carry a valid Bearer token; missing or invalid + * tokens are rejected with 401. Scope violations are rejected with 403. + * + * @example Static tokens (dev / self-hosted) + * ```typescript + * auth: { mode: 'static', tokens: [process.env.MCP_TOKEN!] } + * ``` + * + * @example JWT with Auth0 / Supabase / Clerk + * ```typescript + * auth: { + * mode: 'jwt', + * jwksUrl: 'https://your-tenant.auth0.com/.well-known/jwks.json', + * audience: 'https://your-mcp-server.example.com', + * issuer: 'https://your-tenant.auth0.com/', + * } + * ``` + * + * @example Custom verification + * ```typescript + * auth: { + * mode: 'custom', + * verifyAccessToken: async (token) => { + * const user = await db.findByToken(token); + * if (!user) throw new Error('invalid'); + * return { token, clientId: user.id, scopes: user.scopes }; + * } + * } + * ``` + */ + auth?: McpAuthConfig; + + /** + * Agents to expose as MCP tools alongside regular tools. + * Each agent appears in tools/list as "agent.". + * + * Agents run to completion before returning — synchronous from the MCP + * client's perspective. For long-running agents, ensure the MCP client's + * timeout is set appropriately. + * + * The easiest way to produce an entry is via McpChannel.asAgentDefinition() + * from toolpack-agents. A plain object with { name, description, invoke } + * also works — no import from toolpack-agents required. + * + * @example using McpChannel (toolpack-agents) + * ```typescript + * const ch = new McpChannel(); + * const agent = new PrReviewerAgent({ channels: [ch] }); + * await agent.start(); + * await sdk.startMcpServer({ + * transport: 'stdio', + * agents: [ch.asAgentDefinition(agent)], + * }); + * ``` + * + * @example plain object (no extra dependency) + * ```typescript + * await sdk.startMcpServer({ + * transport: 'stdio', + * agents: [{ + * name: 'pr_reviewer', + * description: 'Reviews a pull request end-to-end.', + * inputSchema: { type: 'object', properties: { pr_url: { type: 'string' } }, required: ['pr_url'] }, + * invoke: async (args) => { + * const result = await prReviewer.invokeAgent({ data: args }); + * return result.output; + * }, + * }], + * }); + * ``` + */ + agents?: McpAgentDefinition[]; + + /** + * Enable tool search mode. + * + * When true, tools/list returns only `tool.search` plus any always-loaded tools + * configured in ToolSearchConfig (alwaysLoadedTools / alwaysLoadedCategories). + * MCP clients call `tool.search` first to discover tools on-demand, dramatically + * reducing context token usage for registries with 110+ tools. + * + * Requires the system prompt to instruct the client to use tool.search. + * See docs/examples/mcp-server-example.ts for the recommended prompt snippet. + * + * Default: false — all enabled tools sent upfront. + */ + searchMode?: boolean; + + /** + * Public base URL of this MCP server (e.g. 'https://mcp.example.com'). + * Only used when auth.mode is 'jwt'. + * + * When provided alongside jwt auth, the server mounts + * /.well-known/oauth-protected-resource so MCP clients can + * auto-discover which OAuth server issues tokens for this server. + * + * Ignored for static and custom auth modes. + */ + serverUrl?: string; +} + +export interface McpServerHandle { + /** Stop the MCP server and release all resources. */ + stop(): Promise; + /** Number of tools currently exposed. */ + toolCount: number; + /** + * Actual bound port (HTTP transport only). Useful when port:0 is passed and + * the OS assigns a free port — integration tests read this to know where to connect. + * Always 0 for stdio transport. + */ + port: number; +} diff --git a/packages/toolpack-sdk/src/mcp/server.ts b/packages/toolpack-sdk/src/mcp/server.ts new file mode 100644 index 0000000..e329377 --- /dev/null +++ b/packages/toolpack-sdk/src/mcp/server.ts @@ -0,0 +1,378 @@ +import { createServer, type IncomingMessage } from 'node:http'; +import { randomUUID } from 'node:crypto'; +import { Server } from '@modelcontextprotocol/sdk/server/index.js'; +import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; +import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js'; +import { + ListToolsRequestSchema, + CallToolRequestSchema, +} from '@modelcontextprotocol/sdk/types.js'; +import type { AuthInfo } from '@modelcontextprotocol/sdk/server/auth/types.js'; +import type { ToolRegistry } from '../tools/registry.js'; +import type { ToolContext, ToolDefinition } from '../tools/types.js'; +import type { ToolpackMcpServerConfig, McpServerHandle } from './server-types.js'; +import { logInfo } from '../providers/provider-logger.js'; +import { buildVerifier, applyBearerAuth } from './server-auth.js'; +import { getToolSearchSchema, isToolSearchTool, TOOL_SEARCH_NAME } from '../tools/search/index.js'; + +// ─── Public API ─────────────────────────────────────────────────────────────── + +/** + * Start an MCP server exposing Toolpack's built-in tools. + * + * Uses the low-level Server class (not McpServer) because Toolpack tools use + * plain JSON Schema and McpServer.tool() only accepts Zod schemas. + */ +export async function startMcpServer( + registry: ToolRegistry, + config: ToolpackMcpServerConfig, + searchFn?: (args: Record) => string, +): Promise { + + // 1. Create the low-level MCP Server with tools capability declared + const server = new Server( + { + name: config.serverName ?? 'Toolpack SDK', + version: config.serverVersion ?? '2.0.0', + }, + { + capabilities: { tools: {} }, + }, + ); + + // 2. Handle tools/list — resolve fresh on every request so tools added + // via loadToolProject() after startMcpServer() are always included. + server.setRequestHandler(ListToolsRequestSchema, async () => { + if (config.searchMode) { + // Search mode: expose tool.search + always-loaded tools only. + // All other tools are deferred — clients call tool.search to discover them. + const alwaysLoaded = resolveAlwaysLoadedTools(registry, config); + const searchToolSchema = getToolSearchSchema(); + const alwaysLoadedEntries = alwaysLoaded.map(tool => { + const annotations = deriveAnnotations(tool); + return { + name: tool.name, + description: tool.description ?? '', + inputSchema: (tool.parameters ?? { type: 'object', properties: {} }) as unknown as Record, + ...(annotations !== undefined && { annotations }), + }; + }); + // Agents are always listed even in search mode — they are not in the + // ToolRegistry so tool.search cannot discover them. Omitting them here + // would make them completely invisible and uncallable. + return { + tools: [ + { + name: searchToolSchema.name, + description: searchToolSchema.description ?? '', + inputSchema: searchToolSchema.parameters as unknown as Record, + annotations: { readOnlyHint: true }, + }, + ...alwaysLoadedEntries, + ...buildAgentEntries(config), + ], + }; + } + + const tools = resolveTools(registry, config.expose); + const toolEntries = tools.map(tool => { + const annotations = deriveAnnotations(tool); + return { + name: tool.name, + description: tool.description ?? '', + inputSchema: (tool.parameters ?? { type: 'object', properties: {} }) as unknown as Record, + // Only include annotations when there is actual signal — omitting lets + // MCP spec defaults apply (destructiveHint: true, openWorldHint: true), + // which are the correct conservative defaults for uncategorised tools. + ...(annotations !== undefined && { annotations }), + }; + }); + + return { tools: [...toolEntries, ...buildAgentEntries(config)] }; + }); + + // 3. Handle tools/call — intercept tool.search in search mode, then normal lookup + server.setRequestHandler(CallToolRequestSchema, async (request: { params: { name: string; arguments?: Record } }) => { + const { name, arguments: args } = request.params; + + // Intercept tool.search when search mode is enabled. + if (config.searchMode && isToolSearchTool(name)) { + if (!searchFn) { + return toMcpResult('tool.search is not available: searchFn was not provided to startMcpServer.', true); + } + try { + const result = searchFn(args ?? {}); + return toMcpResult(result); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return toMcpResult(`tool.search error: ${message}`, true); + } + } + + // Intercept agent.* calls before the normal tool lookup. + if (name.startsWith('agent.')) { + const agentName = name.slice('agent.'.length); + const agentDef = (config.agents ?? []).find(a => a.name === agentName); + if (!agentDef) { + return toMcpResult(`Agent not found: ${agentName}`, true); + } + try { + const output = await agentDef.invoke(args ?? {}); + return toMcpResult(output); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return toMcpResult(`Agent error (${agentName}): ${message}`, true); + } + } + + const tool = resolveToolByName(registry, name, config.expose); + + if (!tool) { + return toMcpResult(`Tool not found: ${name}`, true); + } + + try { + const ctx: ToolContext = { + workspaceRoot: process.cwd(), + config: registry.getConfig().additionalConfigurations ?? {}, + log: (msg) => logInfo(`[MCP Tool] ${msg}`), + }; + const result = await tool.execute(args ?? {}, ctx); + return toMcpResult(result); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return toMcpResult(`Error executing ${name}: ${message}`, true); + } + }); + + // 4. Connect the appropriate transport + if (config.transport === 'stdio') { + const transport = new StdioServerTransport(); + await server.connect(transport); + + return { + get toolCount() { return resolveTools(registry, config.expose).length; }, + port: 0, + stop: async () => { await server.close(); }, + }; + } + + if (config.transport === 'http') { + const port = config.port ?? 3000; + + // Warn when running without auth — open HTTP server is unsafe beyond localhost. + if (!config.auth) { + logInfo( + '[MCP Server] Warning: HTTP transport started without authentication. ' + + 'Safe for localhost only. Set `auth` in startMcpServer() before ' + + 'exposing this server to a network.', + ); + } + + // StreamableHTTPServerTransport is middleware — it does NOT bind to a port. + // We create a Node.js http.Server and route all requests through it. + const transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: () => randomUUID(), + }); + + // Build the verifier once — JwtVerifier caches the JWKS key set internally, + // so creating it per-request would defeat the caching and cause unnecessary + // network fetches. + const verifier = config.auth ? buildVerifier(config.auth) : null; + + const httpServer = createServer(async (req, res) => { + // ── OAuth Protected Resource Metadata (RFC 9728) ────────────────── + // Allows MCP clients to discover which OAuth server issues tokens for + // this server. Only mounted for jwt mode — static/custom auth has no + // external OAuth server to advertise. + if ( + config.auth?.mode === 'jwt' && + config.serverUrl && + req.url === '/.well-known/oauth-protected-resource' + ) { + const metadata: Record = { resource: config.serverUrl }; + if (config.auth.issuer) { + metadata['authorization_servers'] = [config.auth.issuer]; + } + res.writeHead(200, { 'Content-Type': 'application/json' }) + .end(JSON.stringify(metadata)); + return; + } + + // ── Bearer auth ─────────────────────────────────────────────────── + // When auth is configured, every request must carry a valid Bearer + // token. applyBearerAuth writes 401/403 and returns false on failure. + if (verifier) { + const ok = await applyBearerAuth( + req as IncomingMessage & { auth?: AuthInfo }, + res, + config.auth!, + verifier, + ).catch(err => { + // Unexpected error in the verifier itself (e.g. JWKS fetch crash). + logInfo(`[MCP Server] Auth error: ${err instanceof Error ? err.message : String(err)}`); + if (!res.headersSent) res.writeHead(500).end('Internal Server Error'); + return false; + }); + if (!ok) return; + } + + // ── MCP transport ───────────────────────────────────────────────── + transport.handleRequest(req, res).catch(err => { + logInfo(`[MCP Server] HTTP request handler error: ${err instanceof Error ? err.message : String(err)}`); + if (!res.headersSent) { + res.writeHead(500).end('Internal Server Error'); + } + }); + }); + + await server.connect(transport); + + try { + await new Promise((resolve, reject) => { + const onError = (err: NodeJS.ErrnoException) => { + reject(err.code === 'EADDRINUSE' + ? new Error(`MCP HTTP server failed to start: port ${port} is already in use.`) + : err, + ); + }; + httpServer.once('error', onError); + httpServer.listen(port, () => { + httpServer.off('error', onError); + resolve(); + }); + }); + } catch (listenErr) { + // httpServer failed to start — close the already-connected server + // and transport so they don't leak. + await server.close().catch(() => { /* ignore close errors during cleanup */ }); + throw listenErr; + } + + const boundPort = (httpServer.address() as { port: number }).port; + + return { + get toolCount() { return resolveTools(registry, config.expose).length; }, + port: boundPort, + stop: async () => { + try { + await server.close(); + } finally { + await new Promise((resolve, reject) => { + httpServer.close(err => (err ? reject(err) : resolve())); + }); + } + }, + }; + } + + throw new Error(`Unknown MCP transport: "${config.transport}". Use 'stdio' or 'http'.`); +} + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +function resolveTools(registry: ToolRegistry, expose?: ToolpackMcpServerConfig['expose']) { + if (!expose) return registry.getEnabled(); + // Treat empty arrays as "not specified" — expose all rather than zero tools silently + if (expose.categories?.length) return registry.getByCategories(expose.categories); + if (expose.tools?.length) return registry.getByNames(expose.tools); + return registry.getEnabled(); +} + +// O(1) variant used by tools/call — avoids iterating the full list just to find one tool. +// Must stay consistent with resolveTools: a tool that doesn't appear in tools/list +// must not be callable via tools/call. +function resolveToolByName( + registry: ToolRegistry, + name: string, + expose?: ToolpackMcpServerConfig['expose'], +): ToolDefinition | undefined { + const tool = registry.get(name); + if (!tool) return undefined; + + if (expose?.categories?.length) return new Set(expose.categories).has(tool.category) ? tool : undefined; + if (expose?.tools?.length) return expose.tools.includes(name) ? tool : undefined; + + // No active MCP-level filter (expose is undefined or has empty arrays) — + // fall back to the registry's own enabled filter, matching resolveTools behaviour. + return isEnabledInRegistry(registry, tool, name) ? tool : undefined; +} + +// Returns true when the tool passes the registry's enabledTools / enabledToolCategories +// config. Fast path (default config): both arrays are empty → all registered tools enabled. +function isEnabledInRegistry(registry: ToolRegistry, tool: ToolDefinition, name: string): boolean { + const { enabledTools, enabledToolCategories } = registry.getConfig(); + if (enabledTools.length === 0 && enabledToolCategories.length === 0) return true; + return enabledTools.includes(name) || enabledToolCategories.includes(tool.category); +} + +/** Build the tools/list entries for all configured agents. */ +function buildAgentEntries(config: ToolpackMcpServerConfig) { + return (config.agents ?? []).map(agent => ({ + name: `agent.${agent.name}`, + description: agent.description, + inputSchema: (agent.inputSchema ?? { type: 'object', properties: {} }) as Record, + })); +} + +/** + * Resolve the always-loaded tools for search mode. + * These appear in tools/list alongside tool.search — clients can call them directly + * without searching first. Respects the expose filter if set. + */ +function resolveAlwaysLoadedTools( + registry: ToolRegistry, + config: ToolpackMcpServerConfig, +): ToolDefinition[] { + const searchConfig = registry.getConfig().toolSearch; + if (!searchConfig) return []; + + const byName = registry.getByNames(searchConfig.alwaysLoadedTools); + const byCategory = registry.getByCategories(searchConfig.alwaysLoadedCategories); + + // Deduplicate and apply expose filter so always-loaded tools are also + // restricted to what's actually callable. + const seen = new Set([TOOL_SEARCH_NAME]); // exclude tool.search itself + const candidates = [...byName, ...byCategory].filter(t => { + if (seen.has(t.name)) return false; + seen.add(t.name); + return true; + }); + + // Intersect with expose filter when active + if (config.expose?.categories?.length) { + const cats = new Set(config.expose.categories); + return candidates.filter(t => cats.has(t.category)); + } + if (config.expose?.tools?.length) { + const names = new Set(config.expose.tools); + return candidates.filter(t => names.has(t.name)); + } + return candidates; +} + +/** + * Derive MCP tool annotations from a ToolDefinition. + * + * Priority: + * 1. Explicit tool.annotations — used as-is. + * 2. tool.confirmation present — { destructiveHint: true } (tool modifies state). + * 3. Neither — undefined (omit annotations; MCP spec defaults apply). + * + * MCP spec defaults when annotations are absent: + * readOnlyHint: false, destructiveHint: true, openWorldHint: true + * + * These conservative defaults are correct for tools we have no signal about + * (e.g. slack.post, gh.create_pr, create-dir — not read-only but no confirmation set). + * Returning readOnlyHint: true for those tools would be a semantic lie. + */ +function deriveAnnotations(tool: ToolDefinition): Record | undefined { + if (tool.annotations) return tool.annotations as Record; + if (tool.confirmation) return { destructiveHint: true }; + return undefined; +} + +function toMcpResult(result: unknown, isError = false) { + const text = typeof result === 'string' ? result : JSON.stringify(result, null, 2); + return { content: [{ type: 'text' as const, text }], isError }; +} diff --git a/packages/toolpack-sdk/src/providers/anthropic/index.ts b/packages/toolpack-sdk/src/providers/anthropic/index.ts index a2acecc..1eb67b6 100644 --- a/packages/toolpack-sdk/src/providers/anthropic/index.ts +++ b/packages/toolpack-sdk/src/providers/anthropic/index.ts @@ -1,4 +1,6 @@ import Anthropic from '@anthropic-ai/sdk'; +import { zodOutputFormat } from '@anthropic-ai/sdk/helpers/zod'; +import { type ZodType } from 'zod'; import { ProviderAdapter } from "../base/index.js"; import { CompletionRequest, CompletionResponse, CompletionChunk, ToolCallResult, Message, EmbeddingRequest, EmbeddingResponse, ProviderModelInfo, FileUploadRequest, FileUploadResponse } from "../../types/index.js"; import { AuthenticationError, RateLimitError, InvalidRequestError, ProviderError } from "../../errors/index.js"; @@ -160,10 +162,16 @@ export class AnthropicAdapter extends ProviderAdapter { logDebug(`[Anthropic][${requestId}] generate() request: model=${params.model}, messages=${params.messages.length}, tools=${params.tools?.length || 0}, tool_choice=${params.tool_choice?.type ?? 'unset'}`); logMessagePreview(requestId, 'Anthropic', params.messages); - const response = await this.client.messages.create( - params, - request.signal ? { signal: request.signal } : undefined, - ); + const isZodSchema = request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format; + + if (isZodSchema) { + params.output_config = { format: zodOutputFormat(request.response_format as ZodType) }; + } + + const rawResponse = isZodSchema + ? await (this.client.messages as any).parse(params, request.signal ? { signal: request.signal } : undefined) + : await this.client.messages.create(params, request.signal ? { signal: request.signal } : undefined); + const response = rawResponse as any; const textParts: string[] = []; const toolCalls: ToolCallResult[] = []; @@ -182,7 +190,7 @@ export class AnthropicAdapter extends ProviderAdapter { logDebug(`[Anthropic][${requestId}] Response finish_reason=${response.stop_reason} tool_calls=${toolCalls.length} content_preview=${safePreview(textParts.join(''), 200)}`); - return { + const completionResponse: CompletionResponse = { content: textParts.length > 0 ? textParts.join('') : null, usage: { prompt_tokens: response.usage.input_tokens, @@ -193,6 +201,12 @@ export class AnthropicAdapter extends ProviderAdapter { tool_calls: toolCalls.length > 0 ? toolCalls : undefined, raw: response, }; + + if (isZodSchema && response.parsed_output !== undefined) { + completionResponse.data = response.parsed_output; + } + + return completionResponse; } catch (error) { throw this.handleError(error); } diff --git a/packages/toolpack-sdk/src/providers/gemini/index.ts b/packages/toolpack-sdk/src/providers/gemini/index.ts index fbe75f5..c4de5dc 100644 --- a/packages/toolpack-sdk/src/providers/gemini/index.ts +++ b/packages/toolpack-sdk/src/providers/gemini/index.ts @@ -1,4 +1,6 @@ import { GoogleGenerativeAI } from '@google/generative-ai'; +import { zodToJsonSchema } from 'zod-to-json-schema'; +import { type ZodType } from 'zod'; import { ProviderAdapter } from "../base/index.js"; import { CompletionRequest, CompletionResponse, CompletionChunk, ToolCallResult, Message, EmbeddingRequest, EmbeddingResponse, ProviderModelInfo, FileUploadRequest, FileUploadResponse } from "../../types/index.js"; import { AuthenticationError, RateLimitError, InvalidRequestError, ProviderError } from "../../errors/index.js"; @@ -208,9 +210,12 @@ export class GeminiAdapter extends ProviderAdapter { topP: request.top_p, // responseMimeType must not be 'application/json' when tools are present — // Gemini does not support both simultaneously and will truncate responses. - responseMimeType: (request.response_format === 'json_object' && !(request.tools?.length)) + responseMimeType: ((request.response_format === 'json_object' || (request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format)) && !(request.tools?.length)) ? 'application/json' : 'text/plain', + ...(request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format && !(request.tools?.length) + ? { responseSchema: this.sanitizeSchema(zodToJsonSchema(request.response_format as ZodType)) } + : {}), }, }); @@ -291,9 +296,12 @@ export class GeminiAdapter extends ProviderAdapter { topP: request.top_p, // responseMimeType must not be 'application/json' when tools are present — // Gemini does not support both simultaneously and will truncate responses. - responseMimeType: (request.response_format === 'json_object' && !(request.tools?.length)) + responseMimeType: ((request.response_format === 'json_object' || (request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format)) && !(request.tools?.length)) ? 'application/json' : 'text/plain', + ...(request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format && !(request.tools?.length) + ? { responseSchema: this.sanitizeSchema(zodToJsonSchema(request.response_format as ZodType)) } + : {}), }, }); @@ -409,7 +417,7 @@ export class GeminiAdapter extends ProviderAdapter { return parts.filter(Boolean); }; - const history = await Promise.all(historyMsgs.map(async m => { + const rawHistory = await Promise.all(historyMsgs.map(async m => { if (m.role === 'tool' && m.tool_call_id) { return { role: 'function', @@ -455,8 +463,21 @@ export class GeminiAdapter extends ProviderAdapter { }; })); - const lastUserContent = typeof lastMsg.content === 'string' - ? lastMsg.content + // Gemini requires consecutive tool responses from the same multi-call turn + // to be grouped into a single role:'function' entry with multiple parts. + // Merge consecutive function entries to satisfy this requirement. + const history: any[] = []; + for (const entry of rawHistory) { + const prev = history[history.length - 1]; + if (entry.role === 'function' && prev?.role === 'function') { + prev.parts.push(...entry.parts); + } else { + history.push(entry); + } + } + + const lastUserContent = typeof lastMsg.content === 'string' + ? lastMsg.content : await mapContentParts(lastMsg.content); return { history, lastUserMessage: lastUserContent }; diff --git a/packages/toolpack-sdk/src/providers/ollama/adapter.ts b/packages/toolpack-sdk/src/providers/ollama/adapter.ts index 1ed827e..2bfdf51 100644 --- a/packages/toolpack-sdk/src/providers/ollama/adapter.ts +++ b/packages/toolpack-sdk/src/providers/ollama/adapter.ts @@ -6,6 +6,8 @@ * Framework-agnostic — usable from CLI, web servers, Electron, etc. */ +import { zodToJsonSchema } from 'zod-to-json-schema'; +import { type ZodType } from 'zod'; import { ProviderAdapter } from "../base/index.js"; import { CompletionRequest, @@ -192,6 +194,10 @@ export class OllamaAdapter extends ProviderAdapter { }, }; + if (request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format) { + payload.format = zodToJsonSchema(request.response_format as ZodType); + } + if (request.tools && request.tools.length > 0 && request.tool_choice !== 'none') { payload.tools = request.tools.map(t => ({ type: 'function', @@ -276,6 +282,10 @@ export class OllamaAdapter extends ProviderAdapter { }, }; + if (request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format) { + payload.format = zodToJsonSchema(request.response_format as ZodType); + } + if (request.tools && request.tools.length > 0 && request.tool_choice !== 'none') { payload.tools = request.tools.map(t => ({ type: 'function', diff --git a/packages/toolpack-sdk/src/providers/openai/index.ts b/packages/toolpack-sdk/src/providers/openai/index.ts index 949a609..abe7f18 100644 --- a/packages/toolpack-sdk/src/providers/openai/index.ts +++ b/packages/toolpack-sdk/src/providers/openai/index.ts @@ -1,4 +1,6 @@ import OpenAI from 'openai'; +import { zodResponseFormat } from 'openai/helpers/zod'; +import { type ZodType } from 'zod'; import { ProviderAdapter } from "../base/index.js"; import { CompletionRequest, CompletionResponse, CompletionChunk, ToolCallResult, Message, EmbeddingRequest, EmbeddingResponse, ProviderModelInfo, FileUploadRequest, FileUploadResponse } from "../../types/index.js"; import { AuthenticationError, RateLimitError, InvalidRequestError, ProviderError } from "../../errors/index.js"; @@ -140,7 +142,11 @@ export class OpenAIAdapter extends ProviderAdapter { temperature: request.temperature, max_tokens: request.max_tokens, top_p: request.top_p, - response_format: request.response_format === 'json_object' ? { type: 'json_object' } : undefined, + response_format: request.response_format === 'json_object' + ? { type: 'json_object' } + : (request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format) + ? zodResponseFormat(request.response_format as ZodType, 'structured_output') + : undefined, stream: false, }; @@ -224,7 +230,11 @@ export class OpenAIAdapter extends ProviderAdapter { temperature: request.temperature, max_tokens: request.max_tokens, top_p: request.top_p, - response_format: request.response_format === 'json_object' ? { type: 'json_object' } : undefined, + response_format: request.response_format === 'json_object' + ? { type: 'json_object' } + : (request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format) + ? zodResponseFormat(request.response_format as ZodType, 'structured_output') + : undefined, stream: true, }; diff --git a/packages/toolpack-sdk/src/providers/vertexai/index.ts b/packages/toolpack-sdk/src/providers/vertexai/index.ts index 0694cd9..fed4f4e 100644 --- a/packages/toolpack-sdk/src/providers/vertexai/index.ts +++ b/packages/toolpack-sdk/src/providers/vertexai/index.ts @@ -1,4 +1,6 @@ import { GoogleGenAI } from '@google/genai'; +import { zodToJsonSchema } from 'zod-to-json-schema'; +import { type ZodType } from 'zod'; import type { Content, Part } from '@google/genai'; import { ProviderAdapter } from '../base/index.js'; import type { @@ -209,6 +211,7 @@ export class VertexAIAdapter extends ProviderAdapter { // ─── Private helpers ──────────────────────────────────────────────────────── private buildRequestParams(request: CompletionRequest): { model: string; config: any } { + const isZodSchema = request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format; const config: any = { systemInstruction: this.extractSystemInstruction(request.messages), maxOutputTokens: request.max_tokens, @@ -217,9 +220,12 @@ export class VertexAIAdapter extends ProviderAdapter { // responseMimeType must not be set to 'application/json' when function declarations // are present — Vertex AI / Gemini does not support both simultaneously and will // truncate the response to a single token. JSON mode is honoured only for tool-free requests. - responseMimeType: (request.response_format === 'json_object' && !(request.tools?.length)) + responseMimeType: ((request.response_format === 'json_object' || isZodSchema) && !(request.tools?.length)) ? 'application/json' : 'text/plain', + ...(isZodSchema && !(request.tools?.length) + ? { responseSchema: this.sanitizeSchema(zodToJsonSchema(request.response_format as ZodType)) } + : {}), }; if (request.tools && request.tools.length > 0) { @@ -245,7 +251,7 @@ export class VertexAIAdapter extends ProviderAdapter { const historyMsgs = conversation.slice(0, -1); const lastMsg = conversation[conversation.length - 1]; - const history: Content[] = historyMsgs.map(m => { + const rawHistory: Content[] = historyMsgs.map(m => { if (m.role === 'tool' && m.tool_call_id) { return { role: 'function', @@ -283,6 +289,20 @@ export class VertexAIAdapter extends ProviderAdapter { }; }); + // Vertex AI requires that consecutive tool responses belonging to the same + // multi-call turn are grouped into a single role:'function' Content with + // multiple functionResponse parts — not emitted as separate Content entries. + const history: Content[] = []; + for (const entry of rawHistory) { + const prev = history[history.length - 1]; + if (entry.role === 'function' && prev?.role === 'function') { + // Merge into the previous function Content + (prev.parts as Part[]).push(...(entry.parts as Part[])); + } else { + history.push(entry); + } + } + return { history, lastUserMessage: this.contentToParts(lastMsg.content), diff --git a/packages/toolpack-sdk/src/toolpack.ts b/packages/toolpack-sdk/src/toolpack.ts index f9793f4..6a5d4ed 100644 --- a/packages/toolpack-sdk/src/toolpack.ts +++ b/packages/toolpack-sdk/src/toolpack.ts @@ -26,6 +26,7 @@ import { WorkflowExecutor } from './workflows/workflow-executor.js'; import { DEFAULT_WORKFLOW_CONFIG } from './workflows/workflow-types.js'; import { createMcpToolProject, disconnectMcpToolProject, McpToolsConfig } from './tools/index.js'; import type { ToolpackInterceptor, ToolpackNextFunction } from './interceptors/index.js'; +import type { ToolpackMcpServerConfig, McpServerHandle } from './mcp/server-types.js'; export interface ProviderOptions { /** @@ -868,6 +869,107 @@ export class Toolpack extends EventEmitter { } } + /** + * Expose Toolpack's built-in tools as an MCP server. + * + * Any MCP-compatible client (Claude Desktop, Cursor, Windsurf, custom agents) + * can connect and use the full tool catalog without importing this SDK. + * + * Requires `@modelcontextprotocol/sdk` to be installed: + * npm install @modelcontextprotocol/sdk + * + * @example stdio — Claude Desktop / Cursor + * ```typescript + * const sdk = await Toolpack.init({ provider: 'anthropic', tools: true }); + * await sdk.startMcpServer({ transport: 'stdio' }); + * ``` + * + * @example HTTP — open (localhost only) + * ```typescript + * await sdk.startMcpServer({ transport: 'http', port: 3000 }); + * ``` + * + * @example HTTP — with static bearer token auth (dev / self-hosted) + * ```typescript + * await sdk.startMcpServer({ + * transport: 'http', + * port: 3000, + * auth: { mode: 'static', tokens: [process.env.MCP_TOKEN!] }, + * }); + * ``` + * + * @example HTTP — with JWT auth (Auth0 / Supabase / Clerk / any OIDC provider) + * ```typescript + * await sdk.startMcpServer({ + * transport: 'http', + * port: 3000, + * auth: { + * mode: 'jwt', + * jwksUrl: 'https://your-tenant.auth0.com/.well-known/jwks.json', + * audience: 'https://your-mcp-server.example.com', + * issuer: 'https://your-tenant.auth0.com/', + * }, + * serverUrl: 'https://your-mcp-server.example.com', + * }); + * ``` + * + * @example expose only specific categories + * ```typescript + * await sdk.startMcpServer({ + * transport: 'stdio', + * expose: { categories: ['filesystem', 'github', 'slack'] }, + * }); + * ``` + * + * @example search mode — reduces context token usage for 110+ tools + * ```typescript + * // tools/list returns only tool.search; clients discover tools on-demand. + * // Add this to your system prompt: + * // "Use tool.search to discover tools before calling them." + * await sdk.startMcpServer({ transport: 'stdio', searchMode: true }); + * ``` + */ + async startMcpServer(config: ToolpackMcpServerConfig): Promise { + const registry = this.client.getToolRegistry(); + if (!registry) { + throw new Error( + 'No tool registry configured. Initialize Toolpack with tools enabled: Toolpack.init({ tools: true })', + ); + } + + // Dynamic import — @modelcontextprotocol/sdk is an optional peer dependency. + // Only loaded when startMcpServer() is actually called. + // Users who don't use MCP server pay zero overhead. + let startMcpServerFn: typeof import('./mcp/server.js').startMcpServer; + try { + const mod = await import('./mcp/server.js'); + startMcpServerFn = mod.startMcpServer; + } catch (err) { + // Only rewrite the error message when the failure is specifically + // a missing @modelcontextprotocol/sdk module. Other errors (e.g. + // runtime bugs in server.ts) should propagate as-is. + const isMissingDep = err instanceof Error && + (err as NodeJS.ErrnoException).code === 'MODULE_NOT_FOUND' && + err.message.includes('@modelcontextprotocol'); + if (isMissingDep) { + throw new Error( + 'MCP server requires @modelcontextprotocol/sdk. Install it with:\n' + + ' npm install @modelcontextprotocol/sdk', + ); + } + throw err; + } + + // When search mode is enabled, pass the AIClient's search function so the + // MCP server can execute tool.search without creating a separate BM25 instance. + // This reuses the already-indexed engine in AIClient instead of re-indexing. + const searchFn = config.searchMode + ? (args: Record) => this.client.executeToolSearch(args) + : undefined; + + return startMcpServerFn(registry, config, searchFn); + } + /** * Convenience method to get a flat list of all models across all providers. */ diff --git a/packages/toolpack-sdk/src/tools/coding-tools/parsers/babel-parser.ts b/packages/toolpack-sdk/src/tools/coding-tools/parsers/babel-parser.ts index 6fbbd08..2b8da9f 100644 --- a/packages/toolpack-sdk/src/tools/coding-tools/parsers/babel-parser.ts +++ b/packages/toolpack-sdk/src/tools/coding-tools/parsers/babel-parser.ts @@ -542,7 +542,7 @@ export class BabelParser implements LanguageParser { }); traverse(ast, { - CallExpression(callPath) { + CallExpression(callPath: NodePath) { const callee = callPath.node.callee; let match = false; if (callee.type === 'Identifier' && callee.name === targetName) { diff --git a/packages/toolpack-sdk/src/tools/exec-tools/index.test.ts b/packages/toolpack-sdk/src/tools/exec-tools/index.test.ts index 70d5db8..ac63454 100644 --- a/packages/toolpack-sdk/src/tools/exec-tools/index.test.ts +++ b/packages/toolpack-sdk/src/tools/exec-tools/index.test.ts @@ -10,9 +10,9 @@ describe('exec-tools project', () => { expect(execToolsProject.manifest.author).toBe('Sajeer'); }); - it('should export 6 tools matching the manifest', () => { + it('should export 8 tools matching the manifest', () => { expect(execToolsProject.tools).toHaveLength(execToolsProject.manifest.tools.length); - expect(execToolsProject.tools).toHaveLength(6); + expect(execToolsProject.tools).toHaveLength(8); }); it('should have tool names matching the manifest list', () => { diff --git a/packages/toolpack-sdk/src/tools/exec-tools/index.ts b/packages/toolpack-sdk/src/tools/exec-tools/index.ts index da142b6..f610948 100644 --- a/packages/toolpack-sdk/src/tools/exec-tools/index.ts +++ b/packages/toolpack-sdk/src/tools/exec-tools/index.ts @@ -2,14 +2,18 @@ import type { ToolProject } from "../types.js"; import { execRunTool } from './tools/run/index.js'; import { execRunShellTool } from './tools/run-shell/index.js'; import { execRunBackgroundTool } from './tools/run-background/index.js'; +import { execRunBlockingTool } from './tools/run-blocking/index.js'; import { execReadOutputTool } from './tools/read-output/index.js'; +import { execTailOutputTool } from './tools/tail-output/index.js'; import { execKillTool } from './tools/kill/index.js'; import { execListProcessesTool } from './tools/list-processes/index.js'; export { execRunTool } from './tools/run/index.js'; export { execRunShellTool } from './tools/run-shell/index.js'; export { execRunBackgroundTool } from './tools/run-background/index.js'; +export { execRunBlockingTool } from './tools/run-blocking/index.js'; export { execReadOutputTool } from './tools/read-output/index.js'; +export { execTailOutputTool } from './tools/tail-output/index.js'; export { execKillTool } from './tools/kill/index.js'; export { execListProcessesTool } from './tools/list-processes/index.js'; @@ -22,14 +26,14 @@ export const execToolsProject: ToolProject = { description: 'Code execution tools for running commands, managing background processes, and automation.', author: 'Sajeer', tools: [ - 'exec.run', 'exec.run_shell', 'exec.run_background', - 'exec.read_output', 'exec.kill', 'exec.list_processes', + 'exec.run', 'exec.run_shell', 'exec.run_background', 'exec.run_blocking', + 'exec.read_output', 'exec.tail_output', 'exec.kill', 'exec.list_processes', ], category: 'execution', }, tools: [ - execRunTool, execRunShellTool, execRunBackgroundTool, - execReadOutputTool, execKillTool, execListProcessesTool, + execRunTool, execRunShellTool, execRunBackgroundTool, execRunBlockingTool, + execReadOutputTool, execTailOutputTool, execKillTool, execListProcessesTool, ], dependencies: {}, }; diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-background/index.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-background/index.ts index c69cd8a..4ddf2c1 100644 --- a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-background/index.ts +++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-background/index.ts @@ -42,7 +42,7 @@ export const execRunBackgroundTool: ToolDefinition = { category, execute, confirmation: { - level: 'high', + level: 'medium', reason: 'This will spawn a background process that runs unsupervised.', showArgs: ['command'], }, diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts new file mode 100644 index 0000000..db5ff33 --- /dev/null +++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts @@ -0,0 +1,70 @@ +import { describe, it, expect } from 'vitest'; +import { execRunBlockingTool } from './index.js'; + +describe('exec.run_blocking tool', () => { + it('should have correct metadata', () => { + expect(execRunBlockingTool.name).toBe('exec.run_blocking'); + expect(execRunBlockingTool.category).toBe('execution'); + expect(execRunBlockingTool.confirmation?.level).toBe('medium'); + }); + + it('should execute a command and return structured result', async () => { + const result = JSON.parse(await execRunBlockingTool.execute({ command: 'echo hello' })); + expect(result.exitCode).toBe(0); + expect(result.success).toBe(true); + expect(result.stdout.trim()).toBe('hello'); + expect(result.stderr).toBe(''); + }); + + it('should support pipes and shell features', async () => { + const isWindows = process.platform === 'win32'; + const command = isWindows + ? 'echo "hello world" | ForEach-Object { $_ -replace " ", "_" }' + : 'echo "hello world" | tr " " "_"'; + const result = JSON.parse(await execRunBlockingTool.execute({ command })); + expect(result.exitCode).toBe(0); + expect(result.stdout.trim()).toBe('hello_world'); + }); + + it('should wait for slow commands to complete naturally', async () => { + const start = Date.now(); + const result = JSON.parse(await execRunBlockingTool.execute({ + command: 'sleep 1 && echo done', + })); + const elapsed = Date.now() - start; + expect(result.exitCode).toBe(0); + expect(result.stdout.trim()).toBe('done'); + expect(elapsed).toBeGreaterThanOrEqual(1000); + }, 10000); + + it('should return non-zero exitCode and success=false for failing commands', async () => { + const result = JSON.parse(await execRunBlockingTool.execute({ + command: 'ls /nonexistent-path-xyz 2>&1', + })); + expect(result.exitCode).not.toBe(0); + expect(result.success).toBe(false); + }); + + it('should capture stderr separately', async () => { + const isWindows = process.platform === 'win32'; + if (isWindows) return; // skip on Windows + const result = JSON.parse(await execRunBlockingTool.execute({ + command: 'echo out && echo err >&2', + })); + expect(result.stdout.trim()).toBe('out'); + expect(result.stderr.trim()).toBe('err'); + }); + + it('should throw if command is missing', async () => { + await expect(execRunBlockingTool.execute({})).rejects.toThrow('command is required'); + }); + + it('should accept a cwd argument', async () => { + const result = JSON.parse(await execRunBlockingTool.execute({ + command: 'pwd', + cwd: '/tmp', + })); + expect(result.exitCode).toBe(0); + expect(result.stdout.trim()).toContain('tmp'); + }); +}); diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.ts new file mode 100644 index 0000000..900eee0 --- /dev/null +++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.ts @@ -0,0 +1,81 @@ +import { spawn } from 'child_process'; +import { ToolDefinition } from '../../../types.js'; +import { name, displayName, description, parameters, category } from './schema.js'; +import { logDebug } from '../../../../providers/provider-logger.js'; + +function getDefaultShell(): string { + if (process.platform === 'win32') return 'powershell.exe'; + return process.env.SHELL || '/bin/sh'; +} + +async function execute(args: Record): Promise { + const command = args.command as string; + const cwd = args.cwd as string | undefined; + + if (!command) { + throw new Error('command is required'); + } + + logDebug(`[exec.run-blocking] execute command="${command.substring(0, 100)}" cwd=${cwd ?? 'default'} (no timeout)`); + + return new Promise((resolve) => { + let stdout = ''; + let stderr = ''; + + const proc = spawn(command, [], { + cwd, + shell: getDefaultShell(), + stdio: ['ignore', 'pipe', 'pipe'], + }); + + proc.stdout?.on('data', (data: Buffer) => { + stdout += data.toString(); + // Cap buffer at 2MB + if (stdout.length > 2_000_000) { + stdout = stdout.slice(-1_000_000); + } + }); + + proc.stderr?.on('data', (data: Buffer) => { + stderr += data.toString(); + if (stderr.length > 2_000_000) { + stderr = stderr.slice(-1_000_000); + } + }); + + proc.on('close', (code) => { + const exitCode = code ?? 0; + logDebug(`[exec.run-blocking] finished exitCode=${exitCode} stdout_len=${stdout.length} stderr_len=${stderr.length}`); + resolve(JSON.stringify({ + exitCode, + stdout: stdout || '(no output)', + stderr: stderr || '', + success: exitCode === 0, + })); + }); + + proc.on('error', (err) => { + logDebug(`[exec.run-blocking] spawn error: ${err.message}`); + resolve(JSON.stringify({ + exitCode: 1, + stdout: '', + stderr: err.message, + success: false, + })); + }); + }); +} + +export const execRunBlockingTool: ToolDefinition = { + name, + displayName, + description, + parameters, + category, + execute, + confirmation: { + level: 'medium', + reason: 'This will execute a shell command and block until it completes.', + showArgs: ['command'], + }, +}; diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/schema.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/schema.ts new file mode 100644 index 0000000..a62b8fc --- /dev/null +++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/schema.ts @@ -0,0 +1,24 @@ +import { ToolParameters } from '../../../types.js'; + +export const name = 'exec.run_blocking'; +export const displayName = 'Run Blocking'; +export const description = 'Execute a shell command and wait for it to finish naturally — no timeout. ' + + 'Use this for commands that take variable or unknown time (e.g. npm install, builds, tests). ' + + 'Returns exit code, stdout, and stderr when the process exits. ' + + 'For processes that never exit (servers, watchers), use exec.run_background instead.'; +export const category = 'execution'; + +export const parameters: ToolParameters = { + type: 'object', + properties: { + command: { + type: 'string', + description: 'The shell command to execute. Supports pipes, redirects, and shell features.', + }, + cwd: { + type: 'string', + description: 'Working directory for the command (optional). Defaults to the current working directory.', + }, + }, + required: ['command'], +}; diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.test.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.test.ts new file mode 100644 index 0000000..fc668de --- /dev/null +++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.test.ts @@ -0,0 +1,79 @@ +import { describe, it, expect, afterEach } from 'vitest'; +import { execTailOutputTool } from './index.js'; +import { execRunBackgroundTool } from '../run-background/index.js'; +import { killProcess } from '../../process-registry.js'; + +describe('exec.tail_output tool', () => { + const startedIds: string[] = []; + + afterEach(() => { + for (const id of startedIds) { + killProcess(id); + } + startedIds.length = 0; + }); + + it('should have correct metadata', () => { + expect(execTailOutputTool.name).toBe('exec.tail_output'); + expect(execTailOutputTool.category).toBe('execution'); + }); + + it('should return error for unknown process id', async () => { + const result = JSON.parse(await execTailOutputTool.execute({ process_id: 'proc_unknown_xyz' })); + expect(result.error).toBeDefined(); + expect(result.hint).toBeDefined(); + }); + + it('should return alive=true while process is running', async () => { + const bg = JSON.parse(await execRunBackgroundTool.execute({ command: 'sleep 5' })); + startedIds.push(bg.id); + + const tail = JSON.parse(await execTailOutputTool.execute({ process_id: bg.id })); + expect(tail.alive).toBe(true); + expect(tail.exitCode).toBeNull(); + }); + + it('should return last N lines of stdout', async () => { + const isWindows = process.platform === 'win32'; + const command = isWindows + ? 'for ($i=1; $i -le 10; $i++) { Write-Output "line $i" }' + : 'for i in $(seq 1 10); do echo "line $i"; done'; + + const bg = JSON.parse(await execRunBackgroundTool.execute({ command })); + startedIds.push(bg.id); + + // Wait for the process to produce output + await new Promise(r => setTimeout(r, 500)); + + const tail = JSON.parse(await execTailOutputTool.execute({ process_id: bg.id, lines: 3 })); + const lines = tail.lastLines.split('\n').filter((l: string) => l.trim()); + expect(lines.length).toBeLessThanOrEqual(3); + }, 10000); + + it('should return alive=false and exitCode after process exits', async () => { + const bg = JSON.parse(await execRunBackgroundTool.execute({ command: 'echo done' })); + startedIds.push(bg.id); + + // Wait for the process to finish + await new Promise(r => setTimeout(r, 300)); + + const tail = JSON.parse(await execTailOutputTool.execute({ process_id: bg.id })); + expect(tail.alive).toBe(false); + expect(tail.exitCode).toBe(0); + expect(tail.lastLines).toContain('done'); + }, 10000); + + it('should default to 20 lines when lines not specified', async () => { + const bg = JSON.parse(await execRunBackgroundTool.execute({ command: 'echo hello' })); + startedIds.push(bg.id); + await new Promise(r => setTimeout(r, 300)); + + const tail = JSON.parse(await execTailOutputTool.execute({ process_id: bg.id })); + expect(tail).toHaveProperty('lastLines'); + expect(tail).toHaveProperty('totalStdoutLines'); + }, 10000); + + it('should throw if process_id is missing', async () => { + await expect(execTailOutputTool.execute({})).rejects.toThrow('process_id is required'); + }); +}); diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.ts new file mode 100644 index 0000000..bd10367 --- /dev/null +++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.ts @@ -0,0 +1,49 @@ +import { ToolDefinition } from '../../../types.js'; +import { getProcess } from '../../process-registry.js'; +import { name, displayName, description, parameters, category } from './schema.js'; + +async function execute(args: Record): Promise { + const processId = args.process_id as string; + const numLines = typeof args.lines === 'number' ? Math.max(1, Math.floor(args.lines)) : 20; + + if (!processId) { + throw new Error('process_id is required'); + } + + const managed = getProcess(processId); + if (!managed) { + return JSON.stringify({ + error: `Process not found: ${processId}`, + hint: 'Use exec.run_background to start a process first, then pass its id here.', + }); + } + + const alive = managed.process.exitCode === null; + const exitCode = managed.process.exitCode; + + // Tail stdout + const stdoutLines = managed.stdout.split('\n'); + const tailLines = stdoutLines.slice(-numLines).join('\n').trim(); + + // Last stderr line (useful for error detection) + const stderrLines = managed.stderr.split('\n').filter(l => l.trim()); + const lastStderr = stderrLines.slice(-3).join('\n').trim(); + + return JSON.stringify({ + id: processId, + alive, + exitCode, + lastLines: tailLines || '(no output yet)', + lastStderr: lastStderr || '', + totalStdoutLines: stdoutLines.length, + }); +} + +export const execTailOutputTool: ToolDefinition = { + name, + displayName, + description, + parameters, + category, + execute, +}; diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/schema.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/schema.ts new file mode 100644 index 0000000..5caf304 --- /dev/null +++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/schema.ts @@ -0,0 +1,25 @@ +import { ToolParameters } from '../../../types.js'; + +export const name = 'exec.tail_output'; +export const displayName = 'Tail Process Output'; +export const description = 'Read the last N lines of output from a background process started with exec.run_background. ' + + 'Use this to monitor long-running or non-exiting processes (e.g. dev servers, watchers) ' + + 'to detect ready signals, errors, or progress without reading all accumulated output. ' + + 'Returns alive status, exit code, and the most recent lines of stdout and stderr.'; +export const category = 'execution'; + +export const parameters: ToolParameters = { + type: 'object', + properties: { + process_id: { + type: 'string', + description: 'The process ID returned by exec.run_background.', + }, + lines: { + type: 'integer', + description: 'Number of lines to return from the end of stdout (default: 20).', + default: 20, + }, + }, + required: ['process_id'], +}; diff --git a/packages/toolpack-sdk/src/tools/index.ts b/packages/toolpack-sdk/src/tools/index.ts index a6c984b..9172487 100644 --- a/packages/toolpack-sdk/src/tools/index.ts +++ b/packages/toolpack-sdk/src/tools/index.ts @@ -20,8 +20,8 @@ export { // exec-tools export { execToolsProject, - execRunTool, execRunShellTool, execRunBackgroundTool, - execReadOutputTool, execKillTool, execListProcessesTool, + execRunTool, execRunShellTool, execRunBackgroundTool, execRunBlockingTool, + execReadOutputTool, execTailOutputTool, execKillTool, execListProcessesTool, } from './exec-tools/index.js'; // system-tools diff --git a/packages/toolpack-sdk/src/tools/registry.ts b/packages/toolpack-sdk/src/tools/registry.ts index 6c8f786..61efbe5 100644 --- a/packages/toolpack-sdk/src/tools/registry.ts +++ b/packages/toolpack-sdk/src/tools/registry.ts @@ -95,6 +95,8 @@ export class ToolRegistry { description: t.description, parameters: t.parameters, category: t.category, + ...(t.cacheable !== undefined && { cacheable: t.cacheable }), + ...(t.annotations !== undefined && { annotations: t.annotations }), })); } @@ -236,6 +238,7 @@ export class ToolRegistry { const { dbToolsProject } = await import('./db-tools/index.js'); const { cloudToolsProject } = await import('./cloud-tools/index.js'); const { slackToolsProject } = await import('./slack-tools/index.js'); - await this.loadProjects([fsToolsProject, execToolsProject, systemToolsProject, httpToolsProject, githubToolsProject, webToolsProject, codingToolsProject, gitToolsProject, diffToolsProject, dbToolsProject, cloudToolsProject, slackToolsProject]); + const { k8sToolsProject } = await import('./k8s-tools/index.js'); + await this.loadProjects([fsToolsProject, execToolsProject, systemToolsProject, httpToolsProject, githubToolsProject, webToolsProject, codingToolsProject, gitToolsProject, diffToolsProject, dbToolsProject, cloudToolsProject, slackToolsProject, k8sToolsProject]); } } diff --git a/packages/toolpack-sdk/src/tools/types.ts b/packages/toolpack-sdk/src/tools/types.ts index 6d7dbac..e884e48 100644 --- a/packages/toolpack-sdk/src/tools/types.ts +++ b/packages/toolpack-sdk/src/tools/types.ts @@ -30,6 +30,48 @@ export interface ToolContext { log: (message: string) => void; } +// ── Tool Annotations (MCP) ──────────────────────────────────── + +/** + * Hints about tool behaviour sent to MCP clients in tools/list. + * All fields are optional — clients use them for safety UX (e.g. confirmation + * dialogs before destructive actions) but must not rely on them for security. + * + * MCP spec defaults when annotations are omitted entirely: + * readOnlyHint: false, destructiveHint: true, openWorldHint: true, idempotentHint: false + * + * The MCP server auto-derives annotations when this field is not set: + * - confirmation present → { destructiveHint: true } + * - neither set → annotations omitted (MCP spec defaults apply) + * Set explicitly to override. + */ +export interface ToolAnnotations { + /** + * Tool only reads data — never writes, calls APIs, or modifies state. + * MCP spec default (when absent): false. + * Set to true for pure read tools: fs.read_file, search, list-dir. + */ + readOnlyHint?: boolean; + /** + * Tool may cause irreversible side-effects (delete, overwrite, deploy, send). + * MCP spec default (when absent): true — clients assume worst case. + * Set to false for safe write operations (e.g. create-if-not-exists). + */ + destructiveHint?: boolean; + /** + * Calling the tool multiple times with the same args has no additional effect. + * MCP spec default (when absent): false. + * Set to true for idempotent operations. + */ + idempotentHint?: boolean; + /** + * Tool may interact with external systems: web, APIs, databases, shell, filesystem. + * MCP spec default (when absent): true. + * Set to false only for purely in-process, local tools with no side-effects. + */ + openWorldHint?: boolean; +} + // ── Tool Confirmation (HITL) ───────────────────────────────── export type ConfirmationLevel = 'high' | 'medium'; @@ -59,6 +101,15 @@ export interface ToolDefinition { * Note: Only effective when onToolConfirm callback is provided to AIClient. */ confirmation?: ToolConfirmation; + /** + * MCP annotation hints describing tool behaviour to clients. + * When omitted, the MCP server auto-derives from `confirmation`: + * - confirmation set → { destructiveHint: true } + * - no confirmation → annotations omitted (MCP spec defaults apply) + * Set explicitly to override — particularly useful for marking read-only tools + * (readOnlyHint: true) or idempotent tools (idempotentHint: true). + */ + annotations?: ToolAnnotations; } /** @@ -71,12 +122,14 @@ export interface ToolSchema { description: string; parameters: ToolParameters; category: string; - /** + /** * Whether this tool should be cached after discovery via tool.search. * If false, the tool must be re-discovered each time it's needed. * Default: true */ cacheable?: boolean; + /** MCP annotation hints. See ToolAnnotations for details. */ + annotations?: ToolAnnotations; } // ── Tool Project ────────────────────────────────────────────── diff --git a/packages/toolpack-sdk/src/types/index.ts b/packages/toolpack-sdk/src/types/index.ts index 989a5e6..3f4d43b 100644 --- a/packages/toolpack-sdk/src/types/index.ts +++ b/packages/toolpack-sdk/src/types/index.ts @@ -109,13 +109,30 @@ export interface ToolCallResult { duration?: number; } -export interface CompletionRequest { +export interface CompletionRequest { messages: Message[]; model: string; temperature?: number; max_tokens?: number; top_p?: number; - response_format?: 'text' | 'json_object'; + /** + * Controls the output format: + * - `'text'` — plain text (default) + * - `'json_object'` — unstructured JSON; you parse `response.content` yourself + * - `ZodType` — structured JSON matching the schema; parsed and validated result + * available in `response.data` as fully typed `T` + * + * @example structured output + * ```typescript + * import { z } from 'zod' + * const result = await sdk.generate({ + * messages, + * response_format: z.object({ sentiment: z.string(), score: z.number() }), + * }) + * result.data.sentiment // typed as string + * ``` + */ + response_format?: 'text' | 'json_object' | import('zod').ZodType; stream?: boolean; tools?: ToolCallRequest[]; requestTools?: RequestToolDefinition[]; @@ -139,8 +156,14 @@ export interface Usage { total_tokens: number; } -export interface CompletionResponse { +export interface CompletionResponse { content: string | null; // null if only tool calls + /** + * Parsed and validated structured output. + * Only present when `response_format` is a ZodType. + * TypeScript type is inferred from the schema via the generic on `generate()`. + */ + data?: T; usage?: Usage; /** Detailed breakdown of token usage when executed in agent/workflow mode */ usage_details?: { diff --git a/packages/toolpack-sdk/tests/integration/mcp-server.test.ts b/packages/toolpack-sdk/tests/integration/mcp-server.test.ts new file mode 100644 index 0000000..b0e4bc0 --- /dev/null +++ b/packages/toolpack-sdk/tests/integration/mcp-server.test.ts @@ -0,0 +1,316 @@ +/** + * MCP Server — HTTP integration tests + * + * Spins up a real HTTP MCP server on port 0 (OS-assigned), sends real JSON-RPC + * requests via fetch, and asserts on the responses. No mocking. + * + * Run with: npx vitest run tests/integration/mcp-server.test.ts + * + * Requires ANTHROPIC_API_KEY (or any provider key) — the server only routes + * tool *definitions* (not LLM calls) for these tests, so the key just needs + * to be non-empty for Toolpack.init() to succeed. + */ + +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { Toolpack } from '../../src/index.js'; +import type { McpServerHandle } from '../../src/mcp/server-types.js'; + + +// ─── helpers ────────────────────────────────────────────────────────────────── + +/** + * MCP client session. The Streamable HTTP transport is stateful: + * - First call must be `initialize` to get a session ID + * - All subsequent calls include the `mcp-session-id` header + */ +class McpSession { + private sessionId?: string; + constructor(private url: string, private authHeaders: Record = {}) {} + + async initialize() { + const res = await this.raw('initialize', { + protocolVersion: '2024-11-05', + capabilities: {}, + clientInfo: { name: 'test-client', version: '1.0' }, + }); + this.sessionId = res.sessionId; + // Send initialized notification + await fetch(this.url, { + method: 'POST', + headers: this.headers(), + body: JSON.stringify({ jsonrpc: '2.0', method: 'notifications/initialized' }), + }); + return res; + } + + async call(method: string, params: Record = {}) { + return this.raw(method, params); + } + + private headers(): Record { + return { + 'Content-Type': 'application/json', + 'Accept': 'application/json, text/event-stream', + ...(this.sessionId ? { 'mcp-session-id': this.sessionId } : {}), + ...this.authHeaders, + }; + } + + private async raw(method: string, params: Record) { + const res = await fetch(this.url, { + method: 'POST', + headers: this.headers(), + body: JSON.stringify({ jsonrpc: '2.0', id: 1, method, params }), + }); + + if (res.status === 401 || res.status === 403) { + return { status: res.status, body: {}, sessionId: undefined }; + } + + const contentType = res.headers.get('content-type') ?? ''; + const sessionId = res.headers.get('mcp-session-id') ?? undefined; + let body: Record; + + if (contentType.includes('text/event-stream')) { + const text = await res.text(); + const dataLine = text.split('\n').find(l => l.startsWith('data:')); + body = dataLine ? JSON.parse(dataLine.slice(5).trim()) as Record : {}; + } else { + body = await res.json() as Record; + } + + return { status: res.status, body, sessionId }; + } +} + +/** Open a session and return it ready for tool calls. */ +async function openSession(url: string, authHeaders: Record = {}) { + const session = new McpSession(url, authHeaders); + await session.initialize(); + return session; +} + +/** Raw fetch without session — for testing auth rejection before initialize. */ +async function rawPost(url: string, headers: Record = {}) { + const res = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json', 'Accept': 'application/json, text/event-stream', ...headers }, + body: JSON.stringify({ jsonrpc: '2.0', id: 1, method: 'tools/list', params: {} }), + }); + return { status: res.status }; +} + +async function startServer(overrides: Partial[0]> = {}) { + const searchMode = (overrides as Record).searchMode === true; + const sdk = await Toolpack.init({ + provider: 'anthropic', + tools: true, + apiKey: process.env.ANTHROPIC_API_KEY ?? 'test-key', + // Enable tool search in the default mode when MCP server is in search mode + ...(searchMode ? { modeOverrides: { default: { toolSearch: { enabled: true } } } } : {}), + }); + const handle = await sdk.startMcpServer({ + transport: 'http', + port: 0, + ...overrides, + } as Parameters[0]); + const url = `http://localhost:${handle.port}`; + return { handle, url, sdk }; +} + +// ─── tests ──────────────────────────────────────────────────────────────────── + +// Cross-platform path to a file that always exists and contains 'localhost' +const HOSTS_FILE = process.platform === 'win32' + ? 'C:\\Windows\\System32\\drivers\\etc\\hosts' + : '/etc/hosts'; + +// Opt-in: set RUN_INTEGRATION_TESTS=1 to run. Skipped in CI by default. +describe.runIf(process.env.RUN_INTEGRATION_TESTS === '1')('MCP Server — HTTP integration', () => { + describe('tools/list', () => { + let handle: McpServerHandle; + let session: McpSession; + + beforeAll(async () => { + const s = await startServer(); + handle = s.handle; + session = await openSession(s.url); + }); + afterAll(() => handle.stop()); + + it('returns 100+ tools', async () => { + const { status, body } = await session.call('tools/list'); + expect(status).toBe(200); + const result = body.result as { tools: unknown[] }; + expect(result.tools.length).toBeGreaterThan(100); + }); + + it('each tool has name, description, inputSchema', async () => { + const { body } = await session.call('tools/list'); + const result = body.result as { tools: Record[] }; + for (const tool of result.tools.slice(0, 5)) { + expect(typeof tool.name).toBe('string'); + expect(typeof tool.description).toBe('string'); + expect(tool.inputSchema).toBeDefined(); + } + }); + + it('toolCount matches tools/list length', async () => { + const { body } = await session.call('tools/list'); + const result = body.result as { tools: unknown[] }; + expect(handle.toolCount).toBe(result.tools.length); + }); + }); + + describe('tools/call', () => { + let handle: McpServerHandle; + let session: McpSession; + + beforeAll(async () => { + const s = await startServer(); + handle = s.handle; + session = await openSession(s.url); + }); + afterAll(() => handle.stop()); + + it('executes fs.read_file and returns real file content', async () => { + const { status, body } = await session.call('tools/call', { + name: 'fs.read_file', + arguments: { path: HOSTS_FILE }, + }); + expect(status).toBe(200); + const result = body.result as { content: Array<{ type: string; text: string }> }; + expect(result.content[0]?.type).toBe('text'); + expect(result.content[0]?.text).toContain('localhost'); + }); + + it('returns isError:true for unknown tool', async () => { + const { body } = await session.call('tools/call', { + name: 'does.not.exist', + arguments: {}, + }); + const result = body.result as { isError: boolean }; + expect(result.isError).toBe(true); + }); + + it('returns isError:true for tool execution error', async () => { + const { body } = await session.call('tools/call', { + name: 'fs.read_file', + arguments: { path: '/this/path/does/not/exist/ever' }, + }); + const result = body.result as { isError: boolean }; + expect(result.isError).toBe(true); + }); + }); + + describe('static auth', () => { + const TOKEN = 'integration-test-secret-token'; + let handle: McpServerHandle; + let url: string; + + beforeAll(async () => { + ({ handle, url } = await startServer({ + auth: { mode: 'static', tokens: [TOKEN] }, + })); + }); + afterAll(() => handle.stop()); + + it('rejects request with no token — HTTP 401', async () => { + const { status } = await rawPost(url); + expect(status).toBe(401); + }); + + it('rejects request with wrong token — HTTP 401', async () => { + const { status } = await rawPost(url, { Authorization: 'Bearer wrong-token' }); + expect(status).toBe(401); + }); + + it('accepts request with correct token and lists tools', async () => { + const session = await openSession(url, { Authorization: `Bearer ${TOKEN}` }); + const { status, body } = await session.call('tools/list'); + expect(status).toBe(200); + const result = body.result as { tools: unknown[] }; + expect(result.tools.length).toBeGreaterThan(0); + }); + }); + + describe('search mode', () => { + let handle: McpServerHandle; + let session: McpSession; + + beforeAll(async () => { + const s = await startServer({ searchMode: true }); + handle = s.handle; + session = await openSession(s.url); + }); + afterAll(() => handle.stop()); + + it('tools/list returns only tool.search (+ always-loaded)', async () => { + const { body } = await session.call('tools/list'); + const result = body.result as { tools: Array<{ name: string }> }; + const names = result.tools.map(t => t.name); + expect(names).toContain('tool.search'); + expect(names).not.toContain('fs.read_file'); + }); + + it('tool.search returns a JSON response with found and tools fields', async () => { + const { status, body } = await session.call('tools/call', { + name: 'tool.search', + arguments: { query: 'git commit log' }, + }); + expect(status).toBe(200); + const result = body.result as { content: Array<{ text: string }> }; + const text = result.content[0]?.text ?? ''; + const parsed = JSON.parse(text); + expect(parsed).toHaveProperty('query'); + expect(parsed).toHaveProperty('found'); + expect(parsed).toHaveProperty('tools'); + }); + + it('tool.search for git returns git tools', async () => { + const { body } = await session.call('tools/call', { + name: 'tool.search', + arguments: { query: 'git commit log' }, + }); + const result = body.result as { content: Array<{ text: string }> }; + const text = result.content[0]?.text.toLowerCase() ?? ''; + expect(text).toContain('git'); + }); + }); + + describe('expose config', () => { + let handle: McpServerHandle; + let session: McpSession; + + beforeAll(async () => { + const s = await startServer({ expose: { categories: ['filesystem'] } }); + handle = s.handle; + session = await openSession(s.url); + }); + afterAll(() => handle.stop()); + + it('only exposes tools from the specified category', async () => { + const { body } = await session.call('tools/list'); + const result = body.result as { tools: Array<{ name: string }> }; + const names = result.tools.map(t => t.name); + expect(names.every(n => n.startsWith('fs.'))).toBe(true); + expect(names).not.toContain('git.commit'); + expect(names).not.toContain('slack.chat.postMessage'); + }); + }); + + describe('port: 0 (OS-assigned port)', () => { + it('handle.port is a non-zero number', async () => { + const { handle } = await startServer(); + expect(handle.port).toBeGreaterThan(0); + await handle.stop(); + }); + + it('two servers on port:0 get different ports', async () => { + const a = await startServer(); + const b = await startServer(); + expect(a.handle.port).not.toBe(b.handle.port); + await Promise.all([a.handle.stop(), b.handle.stop()]); + }); + }); +}); diff --git a/packages/toolpack-sdk/tests/unit/mcp-server-auth.test.ts b/packages/toolpack-sdk/tests/unit/mcp-server-auth.test.ts new file mode 100644 index 0000000..526735c --- /dev/null +++ b/packages/toolpack-sdk/tests/unit/mcp-server-auth.test.ts @@ -0,0 +1,249 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import type { IncomingMessage, ServerResponse } from 'node:http'; +import type { AuthInfo } from '@modelcontextprotocol/sdk/server/auth/types.js'; + +// ─── jose mock ──────────────────────────────────────────────────────────────── +// We mock jose so JwtVerifier tests never make real network requests. + +vi.mock('jose', () => ({ + createRemoteJWKSet: vi.fn().mockReturnValue('mock-jwks'), + jwtVerify: vi.fn(), +})); + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +function makeReq(authHeader?: string): IncomingMessage & { auth?: AuthInfo } { + return { + headers: authHeader ? { authorization: authHeader } : {}, + } as unknown as IncomingMessage & { auth?: AuthInfo }; +} + +function makeRes() { + const written: { statusCode: number; headers: Record; body: string } = { + statusCode: 200, + headers: {}, + body: '', + }; + const res = { + writeHead: vi.fn((code: number, headers?: Record) => { + written.statusCode = code; + if (headers) Object.assign(written.headers, headers); + return res; + }), + end: vi.fn((body?: string) => { + written.body = body ?? ''; + return res; + }), + get headersSent() { return written.statusCode !== 200 || written.body !== ''; }, + _written: written, + }; + return res as unknown as ServerResponse & { _written: typeof written }; +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +describe('buildVerifier', () => { + it('returns a verifier for each mode without throwing', async () => { + const { buildVerifier } = await import('../../src/mcp/server-auth.js'); + expect(buildVerifier({ mode: 'static', tokens: ['tok'] })).toBeDefined(); + expect(buildVerifier({ mode: 'jwt', jwksUrl: 'https://example.com/.well-known/jwks.json' })).toBeDefined(); + expect(buildVerifier({ mode: 'custom', verifyAccessToken: async () => ({ token: 't', clientId: 'c', scopes: [] }) })).toBeDefined(); + }); +}); + +describe('StaticBearerVerifier', () => { + beforeEach(() => vi.resetModules()); + + it('resolves with AuthInfo for a valid token', async () => { + const { buildVerifier } = await import('../../src/mcp/server-auth.js'); + const verifier = buildVerifier({ mode: 'static', tokens: ['secret-token', 'other-token'] }); + const info = await verifier.verifyAccessToken('secret-token'); + expect(info.token).toBe('secret-token'); + expect(info.clientId).toBe('static-client'); + expect(info.scopes).toEqual([]); + }); + + it('throws for an invalid token', async () => { + const { buildVerifier } = await import('../../src/mcp/server-auth.js'); + const verifier = buildVerifier({ mode: 'static', tokens: ['correct'] }); + await expect(verifier.verifyAccessToken('wrong')).rejects.toThrow(); + }); + + it('throws at construction when tokens array is empty', async () => { + const { buildVerifier } = await import('../../src/mcp/server-auth.js'); + expect(() => buildVerifier({ mode: 'static', tokens: [] })).toThrow(/empty/i); + }); + + it('accepts any token from the allowlist', async () => { + const { buildVerifier } = await import('../../src/mcp/server-auth.js'); + const verifier = buildVerifier({ mode: 'static', tokens: ['a', 'b', 'c'] }); + await expect(verifier.verifyAccessToken('a')).resolves.toBeDefined(); + await expect(verifier.verifyAccessToken('b')).resolves.toBeDefined(); + await expect(verifier.verifyAccessToken('c')).resolves.toBeDefined(); + }); +}); + +describe('JwtVerifier', () => { + beforeEach(() => vi.resetModules()); + + async function getJwtVerifier(config = {}) { + const { buildVerifier } = await import('../../src/mcp/server-auth.js'); + return buildVerifier({ mode: 'jwt', jwksUrl: 'https://example.com/.well-known/jwks.json', ...config }); + } + + it('resolves with AuthInfo for a valid JWT — scope string', async () => { + const { jwtVerify } = await import('jose'); + vi.mocked(jwtVerify).mockResolvedValueOnce({ + payload: { sub: 'user-123', scope: 'read write', exp: 9999999999 }, + protectedHeader: { alg: 'RS256' }, + } as never); + const verifier = await getJwtVerifier(); + const info = await verifier.verifyAccessToken('jwt-token'); + expect(info.clientId).toBe('user-123'); + expect(info.scopes).toEqual(['read', 'write']); + expect(info.expiresAt).toBe(9999999999); + }); + + it('handles scp array claim (Okta / Azure AD)', async () => { + const { jwtVerify } = await import('jose'); + vi.mocked(jwtVerify).mockResolvedValueOnce({ + payload: { sub: 'user-456', scp: ['api:read', 'api:write'] }, + protectedHeader: { alg: 'RS256' }, + } as never); + const verifier = await getJwtVerifier(); + const info = await verifier.verifyAccessToken('jwt-token'); + expect(info.scopes).toEqual(['api:read', 'api:write']); + }); + + it('prefers client_id claim over sub for clientId', async () => { + const { jwtVerify } = await import('jose'); + vi.mocked(jwtVerify).mockResolvedValueOnce({ + payload: { sub: 'user-123', client_id: 'my-app', scope: '' }, + protectedHeader: { alg: 'RS256' }, + } as never); + const verifier = await getJwtVerifier(); + const info = await verifier.verifyAccessToken('jwt-token'); + expect(info.clientId).toBe('my-app'); + }); + + it('falls back to "unknown" clientId when neither client_id nor sub present', async () => { + const { jwtVerify } = await import('jose'); + vi.mocked(jwtVerify).mockResolvedValueOnce({ + payload: { scope: '' }, + protectedHeader: { alg: 'RS256' }, + } as never); + const verifier = await getJwtVerifier(); + const info = await verifier.verifyAccessToken('jwt-token'); + expect(info.clientId).toBe('unknown'); + }); + + it('propagates errors from jwtVerify (expired, invalid signature, etc.)', async () => { + const { jwtVerify } = await import('jose'); + vi.mocked(jwtVerify).mockRejectedValueOnce(new Error('JWTExpired')); + const verifier = await getJwtVerifier(); + await expect(verifier.verifyAccessToken('expired-jwt')).rejects.toThrow('JWTExpired'); + }); + + it('returns empty scopes when no scope claim present', async () => { + const { jwtVerify } = await import('jose'); + vi.mocked(jwtVerify).mockResolvedValueOnce({ + payload: { sub: 'u', client_id: 'c' }, + protectedHeader: { alg: 'RS256' }, + } as never); + const verifier = await getJwtVerifier(); + const info = await verifier.verifyAccessToken('jwt'); + expect(info.scopes).toEqual([]); + }); +}); + +describe('applyBearerAuth', () => { + beforeEach(() => vi.resetModules()); + + const mockVerifier = (result: 'ok' | 'throw') => ({ + verifyAccessToken: result === 'ok' + ? vi.fn().mockResolvedValue({ token: 'tok', clientId: 'c', scopes: ['read'] }) + : vi.fn().mockRejectedValue(new Error('bad token')), + }); + + it('returns false and writes 401 when Authorization header is missing', async () => { + const { applyBearerAuth } = await import('../../src/mcp/server-auth.js'); + const req = makeReq(); + const res = makeRes(); + const ok = await applyBearerAuth(req, res as unknown as ServerResponse, { mode: 'static', tokens: ['t'] }, mockVerifier('ok')); + expect(ok).toBe(false); + expect(res._written.statusCode).toBe(401); + expect(res._written.headers['WWW-Authenticate']).toBe('Bearer'); + }); + + it('returns false and writes 401 when Authorization header is not Bearer', async () => { + const { applyBearerAuth } = await import('../../src/mcp/server-auth.js'); + const req = makeReq('Basic dXNlcjpwYXNz'); + const res = makeRes(); + const ok = await applyBearerAuth(req, res as unknown as ServerResponse, { mode: 'static', tokens: ['t'] }, mockVerifier('ok')); + expect(ok).toBe(false); + expect(res._written.statusCode).toBe(401); + }); + + it('returns false and writes 401 when verifier throws', async () => { + const { applyBearerAuth } = await import('../../src/mcp/server-auth.js'); + const req = makeReq('Bearer invalid-token'); + const res = makeRes(); + const ok = await applyBearerAuth(req, res as unknown as ServerResponse, { mode: 'static', tokens: ['correct'] }, mockVerifier('throw')); + expect(ok).toBe(false); + expect(res._written.statusCode).toBe(401); + }); + + it('returns true and sets req.auth when token is valid', async () => { + const { applyBearerAuth } = await import('../../src/mcp/server-auth.js'); + const req = makeReq('Bearer valid-token'); + const res = makeRes(); + const ok = await applyBearerAuth(req, res as unknown as ServerResponse, { mode: 'static', tokens: ['valid-token'] }, mockVerifier('ok')); + expect(ok).toBe(true); + expect(req.auth).toBeDefined(); + expect(req.auth?.clientId).toBe('c'); + }); + + it('returns false and writes 403 when required scope is missing', async () => { + const { applyBearerAuth } = await import('../../src/mcp/server-auth.js'); + const req = makeReq('Bearer tok'); + const res = makeRes(); + // verifier returns scopes: ['read'], but we require 'write' + // Use mode: 'custom' — it properly declares requiredScopes in its type + const ok = await applyBearerAuth( + req, + res as unknown as ServerResponse, + { mode: 'custom', verifyAccessToken: async () => ({ token: 'tok', clientId: 'c', scopes: ['read'] }), requiredScopes: ['write'] }, + mockVerifier('ok'), + ); + expect(ok).toBe(false); + expect(res._written.statusCode).toBe(403); + expect(res._written.body).toContain('write'); + }); + + it('passes when token has all required scopes', async () => { + const { applyBearerAuth } = await import('../../src/mcp/server-auth.js'); + const req = makeReq('Bearer tok'); + const res = makeRes(); + // verifier returns scopes: ['read'], requiring only 'read' + const ok = await applyBearerAuth( + req, + res as unknown as ServerResponse, + { mode: 'custom', verifyAccessToken: async () => ({ token: 'tok', clientId: 'c', scopes: ['read'] }), requiredScopes: ['read'] }, + mockVerifier('ok'), + ); + expect(ok).toBe(true); + }); + + it('passes when requiredScopes is empty', async () => { + const { applyBearerAuth } = await import('../../src/mcp/server-auth.js'); + const req = makeReq('Bearer tok'); + const res = makeRes(); + const ok = await applyBearerAuth( + req, + res as unknown as ServerResponse, + { mode: 'custom', verifyAccessToken: async () => ({ token: 'tok', clientId: 'c', scopes: [] }), requiredScopes: [] }, + mockVerifier('ok'), + ); + expect(ok).toBe(true); + }); +}); diff --git a/packages/toolpack-sdk/tests/unit/mcp-server.test.ts b/packages/toolpack-sdk/tests/unit/mcp-server.test.ts new file mode 100644 index 0000000..be49ce8 --- /dev/null +++ b/packages/toolpack-sdk/tests/unit/mcp-server.test.ts @@ -0,0 +1,609 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { ToolRegistry } from '../../src/tools/registry.js'; +import type { ToolDefinition } from '../../src/tools/types.js'; +import type { ToolpackMcpServerConfig } from '../../src/mcp/server-types.js'; + +// ─── MCP SDK mocks ──────────────────────────────────────────────────────────── +// We mock the entire SDK so no real transport (stdin/stdout, HTTP) is created. +// The fake Server captures setRequestHandler calls so we can invoke them directly. + +type HandlerFn = (req: { params: Record }) => Promise; + +// Captured state, reset per test +let capturedHandlers: Map; + +vi.mock('@modelcontextprotocol/sdk/server/index.js', () => { + return { + Server: class FakeServer { + connect: ReturnType; + close: ReturnType; + constructor() { + capturedHandlers = new Map(); + this.connect = vi.fn().mockResolvedValue(undefined); + this.close = vi.fn().mockResolvedValue(undefined); + } + setRequestHandler(schema: { shape: { method: { _def: { value: string } } } }, handler: HandlerFn) { + const method = schema?.shape?.method?._def?.value ?? String(schema); + capturedHandlers.set(method, handler); + } + }, + }; +}); + +vi.mock('@modelcontextprotocol/sdk/server/stdio.js', () => ({ + StdioServerTransport: class FakeStdio {}, +})); + +vi.mock('@modelcontextprotocol/sdk/server/streamableHttp.js', () => ({ + StreamableHTTPServerTransport: class FakeHttp { + handleRequest = vi.fn(); + }, +})); + +vi.mock('@modelcontextprotocol/sdk/types.js', () => ({ + ListToolsRequestSchema: { shape: { method: { _def: { value: 'tools/list' } } } }, + CallToolRequestSchema: { shape: { method: { _def: { value: 'tools/call' } } } }, +})); + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +function makeTool(overrides: Partial = {}): ToolDefinition { + return { + name: 'test.tool', + displayName: 'Test Tool', + description: 'A test tool', + category: 'test', + parameters: { + type: 'object', + properties: { path: { type: 'string' } }, + required: ['path'], + }, + execute: vi.fn().mockResolvedValue('ok'), + ...overrides, + }; +} + +function makeRegistry(tools: ToolDefinition[] = []): ToolRegistry { + const r = new ToolRegistry(); + for (const t of tools) r.register(t); + return r; +} + +async function callList() { + const handler = capturedHandlers.get('tools/list'); + if (!handler) throw new Error('tools/list handler not registered'); + return handler({ params: {} }) as Promise<{ tools: { name: string; description: string; inputSchema: unknown; annotations?: Record }[] }>; +} + +async function callTool(name: string, args: Record = {}) { + const handler = capturedHandlers.get('tools/call'); + if (!handler) throw new Error('tools/call handler not registered'); + return handler({ params: { name, arguments: args } }) as Promise<{ + content: { type: string; text: string }[]; + isError: boolean; + }>; +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +describe('startMcpServer — unit', () => { + let startMcpServer: ( + registry: ToolRegistry, + config: ToolpackMcpServerConfig, + searchFn?: (args: Record) => string, + ) => Promise; + + beforeEach(async () => { + vi.resetModules(); + ({ startMcpServer } = await import('../../src/mcp/server.js')); + }); + + // ── Search mode ─────────────────────────────────────────────────────────── + + describe('search mode', () => { + const threeTools = () => [ + makeTool({ name: 'fs.read', category: 'filesystem' }), + makeTool({ name: 'slack.post', category: 'slack' }), + makeTool({ name: 'gh.pr', category: 'github' }), + ]; + + it('tools/list returns tool.search as first entry when searchMode is true', async () => { + const registry = makeRegistry(threeTools()); + await startMcpServer(registry, { transport: 'stdio', searchMode: true }); + const result = await callList(); + expect(result.tools[0].name).toBe('tool.search'); + }); + + it('tools/list does NOT include non-always-loaded tools in search mode', async () => { + const registry = makeRegistry(threeTools()); + await startMcpServer(registry, { transport: 'stdio', searchMode: true }); + const result = await callList(); + const names = result.tools.map(t => t.name); + expect(names).not.toContain('fs.read'); + expect(names).not.toContain('slack.post'); + expect(names).not.toContain('gh.pr'); + }); + + it('tools/list includes always-loaded tools alongside tool.search', async () => { + const registry = makeRegistry(threeTools()); + registry.setConfig({ + enabled: true, + autoExecute: true, + maxToolRounds: 5, + toolChoicePolicy: 'auto', + resultMaxChars: 20_000, + enabledTools: [], + enabledToolCategories: [], + toolSearch: { + enabled: true, + alwaysLoadedTools: ['fs.read'], + alwaysLoadedCategories: [], + searchResultLimit: 5, + cacheDiscoveredTools: true, + }, + }); + await startMcpServer(registry, { transport: 'stdio', searchMode: true }); + const result = await callList(); + const names = result.tools.map(t => t.name); + expect(names).toContain('tool.search'); + expect(names).toContain('fs.read'); + expect(names).not.toContain('slack.post'); + }); + + it('tool.search entry has readOnlyHint annotation', async () => { + const registry = makeRegistry(threeTools()); + await startMcpServer(registry, { transport: 'stdio', searchMode: true }); + const result = await callList(); + expect(result.tools[0].annotations).toEqual({ readOnlyHint: true }); + }); + + it('tools/list returns all tools when searchMode is false (default)', async () => { + const registry = makeRegistry(threeTools()); + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callList(); + const names = result.tools.map(t => t.name); + expect(names).not.toContain('tool.search'); + expect(names).toContain('fs.read'); + expect(names).toContain('slack.post'); + expect(names).toContain('gh.pr'); + }); + + it('tools/call for tool.search invokes searchFn and returns result', async () => { + const registry = makeRegistry(threeTools()); + const searchFn = vi.fn().mockReturnValue(JSON.stringify({ found: 1, tools: [{ name: 'fs.read' }] })); + await startMcpServer(registry, { transport: 'stdio', searchMode: true }, searchFn); + const result = await callTool('tool.search', { query: 'read file' }); + expect(searchFn).toHaveBeenCalledWith({ query: 'read file' }); + expect(result.isError).toBe(false); + expect(result.content[0].text).toContain('fs.read'); + }); + + it('tools/call for tool.search returns isError when searchFn throws', async () => { + const registry = makeRegistry(threeTools()); + const searchFn = vi.fn().mockImplementation(() => { throw new Error('search failed'); }); + await startMcpServer(registry, { transport: 'stdio', searchMode: true }, searchFn); + const result = await callTool('tool.search', { query: 'whatever' }); + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('search failed'); + }); + + it('tools/call for tool.search returns isError when searchMode is true but searchFn missing', async () => { + const registry = makeRegistry(threeTools()); + await startMcpServer(registry, { transport: 'stdio', searchMode: true }); // no searchFn + const result = await callTool('tool.search', { query: 'test' }); + expect(result.isError).toBe(true); + }); + + it('tools/call for tool.search falls through to "not found" when searchMode is false', async () => { + const registry = makeRegistry(threeTools()); + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callTool('tool.search', { query: 'test' }); + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('tool.search'); + }); + }); + + // ── Tool annotations ────────────────────────────────────────────────────── + + describe('tool annotations', () => { + it('uses explicit annotations when set on the tool', async () => { + const registry = makeRegistry([makeTool({ + name: 'x', + annotations: { readOnlyHint: true, openWorldHint: false }, + })]); + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callList(); + expect(result.tools[0].annotations).toEqual({ readOnlyHint: true, openWorldHint: false }); + }); + + it('derives { destructiveHint: true } when confirmation is set and no explicit annotations', async () => { + const registry = makeRegistry([makeTool({ + name: 'x', + confirmation: { level: 'high', reason: 'This will delete.' }, + })]); + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callList(); + expect(result.tools[0].annotations).toEqual({ destructiveHint: true }); + }); + + it('derives { destructiveHint: true } for confirmation.level medium as well', async () => { + const registry = makeRegistry([makeTool({ + name: 'x', + confirmation: { level: 'medium', reason: 'This will modify.' }, + })]); + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callList(); + expect(result.tools[0].annotations).toEqual({ destructiveHint: true }); + }); + + it('omits annotations entirely when neither annotations nor confirmation is set', async () => { + // MCP spec defaults apply: destructiveHint=true, openWorldHint=true, readOnlyHint=false. + // We must NOT claim readOnlyHint:true for tools we have no signal about + // (e.g. slack.post, create-dir — not read-only but no confirmation set). + const registry = makeRegistry([makeTool({ name: 'x' })]); + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callList(); + expect(result.tools[0].annotations).toBeUndefined(); + }); + + it('explicit annotations take priority over confirmation', async () => { + // Tool has both — explicit annotations must win + const registry = makeRegistry([makeTool({ + name: 'x', + confirmation: { level: 'high', reason: 'danger' }, + annotations: { destructiveHint: false, idempotentHint: true }, + })]); + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callList(); + expect(result.tools[0].annotations).toEqual({ destructiveHint: false, idempotentHint: true }); + }); + }); + + // ── Schema translation ──────────────────────────────────────────────────── + + describe('schema translation', () => { + it('maps parameters → inputSchema in tools/list response', async () => { + const params = { type: 'object', properties: { path: { type: 'string' } }, required: ['path'] }; + const registry = makeRegistry([makeTool({ name: 'fs.read_file', parameters: params })]); + + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callList(); + + expect(result.tools).toHaveLength(1); + expect(result.tools[0].name).toBe('fs.read_file'); + expect(result.tools[0].inputSchema).toEqual(params); + }); + + it('falls back to empty-object inputSchema when parameters is undefined', async () => { + const tool = makeTool({ name: 'no.params' }); + // @ts-expect-error intentional: testing undefined parameters path + delete tool.parameters; + const registry = makeRegistry([tool]); + + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callList(); + + expect(result.tools[0].inputSchema).toEqual({ type: 'object', properties: {} }); + }); + + it('preserves tool description in tools/list', async () => { + const registry = makeRegistry([makeTool({ name: 'x', description: 'does something' })]); + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callList(); + expect(result.tools[0].description).toBe('does something'); + }); + }); + + // ── Agent exposure ──────────────────────────────────────────────────────── + + describe('agent exposure', () => { + const makeAgentDef = (name: string, overrides: Partial<{ description: string; inputSchema: Record; invoke: () => Promise }> = {}) => ({ + name, + description: overrides.description ?? `${name} agent`, + ...(overrides.inputSchema !== undefined && { inputSchema: overrides.inputSchema }), + invoke: overrides.invoke ?? vi.fn().mockResolvedValue(`${name} result`), + }); + + it('tools/list includes agent entries as agent.', async () => { + const registry = makeRegistry([makeTool({ name: 'fs.read' })]); + await startMcpServer(registry, { transport: 'stdio', agents: [makeAgentDef('pr_reviewer')] }); + const result = await callList(); + const names = result.tools.map(t => t.name); + expect(names).toContain('agent.pr_reviewer'); + }); + + it('agents coexist with regular tools in tools/list', async () => { + const registry = makeRegistry([makeTool({ name: 'fs.read' })]); + await startMcpServer(registry, { transport: 'stdio', agents: [makeAgentDef('pr_reviewer')] }); + const result = await callList(); + const names = result.tools.map(t => t.name); + expect(names).toContain('fs.read'); + expect(names).toContain('agent.pr_reviewer'); + }); + + it('agent entry uses provided inputSchema', async () => { + const schema = { type: 'object', properties: { pr_url: { type: 'string' } }, required: ['pr_url'] }; + const registry = makeRegistry([]); + await startMcpServer(registry, { transport: 'stdio', agents: [makeAgentDef('x', { inputSchema: schema })] }); + const result = await callList(); + expect(result.tools[0].inputSchema).toEqual(schema); + }); + + it('agent entry defaults to empty-object inputSchema when not provided', async () => { + const registry = makeRegistry([]); + await startMcpServer(registry, { transport: 'stdio', agents: [makeAgentDef('x')] }); + const result = await callList(); + expect(result.tools[0].inputSchema).toEqual({ type: 'object', properties: {} }); + }); + + it('tools/call invokes the agent and returns its output', async () => { + const invoke = vi.fn().mockResolvedValue('LGTM!'); + const registry = makeRegistry([]); + await startMcpServer(registry, { transport: 'stdio', agents: [makeAgentDef('pr_reviewer', { invoke })] }); + const result = await callTool('agent.pr_reviewer', { pr_url: 'https://github.com/...' }); + expect(invoke).toHaveBeenCalledWith({ pr_url: 'https://github.com/...' }); + expect(result.isError).toBe(false); + expect(result.content[0].text).toBe('LGTM!'); + }); + + it('tools/call returns isError when invoke() throws', async () => { + const invoke = vi.fn().mockRejectedValue(new Error('agent failed')); + const registry = makeRegistry([]); + await startMcpServer(registry, { transport: 'stdio', agents: [makeAgentDef('x', { invoke })] }); + const result = await callTool('agent.x', {}); + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('agent failed'); + }); + + it('tools/call returns isError for unknown agent name', async () => { + const registry = makeRegistry([]); + await startMcpServer(registry, { transport: 'stdio', agents: [makeAgentDef('pr_reviewer')] }); + const result = await callTool('agent.unknown', {}); + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('unknown'); + }); + + it('agents appear in tools/list even when searchMode is true', async () => { + // Agents are not in the ToolRegistry — tool.search cannot find them. + // They must always be listed explicitly. + const registry = makeRegistry([makeTool({ name: 'fs.read' })]); + await startMcpServer(registry, { + transport: 'stdio', + searchMode: true, + agents: [makeAgentDef('pr_reviewer')], + }); + const result = await callList(); + const names = result.tools.map(t => t.name); + expect(names).toContain('tool.search'); + expect(names).toContain('agent.pr_reviewer'); + expect(names).not.toContain('fs.read'); // regular tools deferred + }); + + it('tools/list has no agent entries when agents array is empty', async () => { + const registry = makeRegistry([makeTool({ name: 'fs.read' })]); + await startMcpServer(registry, { transport: 'stdio', agents: [] }); + const result = await callList(); + expect(result.tools.every(t => !t.name.startsWith('agent.'))).toBe(true); + }); + + it('multiple agents all appear in tools/list', async () => { + const registry = makeRegistry([]); + await startMcpServer(registry, { + transport: 'stdio', + agents: [makeAgentDef('pr_reviewer'), makeAgentDef('code_analyst')], + }); + const result = await callList(); + const names = result.tools.map(t => t.name); + expect(names).toContain('agent.pr_reviewer'); + expect(names).toContain('agent.code_analyst'); + }); + }); + + // ── Tool filtering ──────────────────────────────────────────────────────── + + describe('tool filtering', () => { + const threeTools = () => [ + makeTool({ name: 'fs.read', category: 'filesystem' }), + makeTool({ name: 'slack.post', category: 'slack' }), + makeTool({ name: 'gh.pr', category: 'github' }), + ]; + + it('exposes all enabled tools when expose is omitted', async () => { + const registry = makeRegistry(threeTools()); + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callList(); + expect(result.tools).toHaveLength(3); + }); + + it('filters by categories when expose.categories is set', async () => { + const registry = makeRegistry(threeTools()); + await startMcpServer(registry, { transport: 'stdio', expose: { categories: ['filesystem', 'slack'] } }); + const result = await callList(); + expect(result.tools.map(t => t.name).sort()).toEqual(['fs.read', 'slack.post']); + }); + + it('filters by exact names when expose.tools is set', async () => { + const registry = makeRegistry(threeTools()); + await startMcpServer(registry, { transport: 'stdio', expose: { tools: ['fs.read', 'gh.pr'] } }); + const result = await callList(); + expect(result.tools.map(t => t.name).sort()).toEqual(['fs.read', 'gh.pr']); + }); + + it('falls back to all enabled tools when expose.categories is an empty array', async () => { + const registry = makeRegistry(threeTools()); + await startMcpServer(registry, { transport: 'stdio', expose: { categories: [] } }); + const result = await callList(); + expect(result.tools).toHaveLength(3); + }); + + it('falls back to all enabled tools when expose.tools is an empty array', async () => { + const registry = makeRegistry(threeTools()); + await startMcpServer(registry, { transport: 'stdio', expose: { tools: [] } }); + const result = await callList(); + expect(result.tools).toHaveLength(3); + }); + }); + + // ── Result translation ──────────────────────────────────────────────────── + + describe('result translation', () => { + it('wraps a string result in MCP content', async () => { + const registry = makeRegistry([makeTool({ execute: vi.fn().mockResolvedValue('hello world') })]); + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callTool('test.tool'); + expect(result.content).toEqual([{ type: 'text', text: 'hello world' }]); + expect(result.isError).toBe(false); + }); + + it('JSON-stringifies an object result', async () => { + const registry = makeRegistry([makeTool({ execute: vi.fn().mockResolvedValue({ files: ['a.ts', 'b.ts'] }) })]); + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callTool('test.tool'); + expect(result.content[0].type).toBe('text'); + expect(JSON.parse(result.content[0].text)).toEqual({ files: ['a.ts', 'b.ts'] }); + expect(result.isError).toBe(false); + }); + + it('returns isError: true when execute() throws', async () => { + const registry = makeRegistry([makeTool({ execute: vi.fn().mockRejectedValue(new Error('disk full')) })]); + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callTool('test.tool'); + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('disk full'); + }); + + it('includes the tool name in the error message when execute() throws', async () => { + const registry = makeRegistry([makeTool({ name: 'my.tool', execute: vi.fn().mockRejectedValue(new Error('boom')) })]); + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callTool('my.tool'); + expect(result.content[0].text).toContain('my.tool'); + }); + }); + + // ── Tool not found ──────────────────────────────────────────────────────── + + describe('tool not found', () => { + it('returns isError: true for an unknown tool name without throwing', async () => { + const registry = makeRegistry([makeTool({ name: 'real.tool' })]); + await startMcpServer(registry, { transport: 'stdio' }); + const result = await callTool('ghost.tool'); + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('ghost.tool'); + }); + + it('returns isError: true for a tool registered but excluded by expose.categories', async () => { + const tools = [ + makeTool({ name: 'fs.read', category: 'filesystem' }), + makeTool({ name: 'slack.post', category: 'slack' }), + ]; + const registry = makeRegistry(tools); + await startMcpServer(registry, { transport: 'stdio', expose: { categories: ['filesystem'] } }); + + // slack.post is registered but not in the exposed category + const result = await callTool('slack.post'); + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('slack.post'); + }); + + it('returns isError: true for a tool disabled in registry config (no expose filter set)', async () => { + // Simulates: toolpack.config.json has enabledToolCategories: ['filesystem'] + // but the MCP server is started with no expose filter. + // tools/call must not execute a tool outside the enabled set. + const tools = [ + makeTool({ name: 'fs.read', category: 'filesystem' }), + makeTool({ name: 'slack.post', category: 'slack' }), + ]; + const registry = makeRegistry(tools); + // Restrict the registry to only the 'filesystem' category + registry.setConfig({ + enabled: true, + autoExecute: true, + maxToolRounds: 5, + toolChoicePolicy: 'auto', + resultMaxChars: 20_000, + enabledTools: [], + enabledToolCategories: ['filesystem'], + }); + + await startMcpServer(registry, { transport: 'stdio' }); // no expose filter + const result = await callTool('slack.post'); + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('slack.post'); + }); + + it('returns isError: true for a tool disabled in registry config when expose arrays are empty', async () => { + // expose = { categories: [] } falls back to getEnabled() in resolveTools. + // resolveToolByName must do the same — not bypass the registry filter. + const tools = [ + makeTool({ name: 'fs.read', category: 'filesystem' }), + makeTool({ name: 'slack.post', category: 'slack' }), + ]; + const registry = makeRegistry(tools); + registry.setConfig({ + enabled: true, + autoExecute: true, + maxToolRounds: 5, + toolChoicePolicy: 'auto', + resultMaxChars: 20_000, + enabledTools: [], + enabledToolCategories: ['filesystem'], + }); + + // Empty array → falls back to registry enabled filter + await startMcpServer(registry, { transport: 'stdio', expose: { categories: [] } }); + const result = await callTool('slack.post'); + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('slack.post'); + }); + + it('returns isError: true for a tool registered but excluded by expose.tools', async () => { + const tools = [ + makeTool({ name: 'fs.read', category: 'filesystem' }), + makeTool({ name: 'fs.write', category: 'filesystem' }), + ]; + const registry = makeRegistry(tools); + await startMcpServer(registry, { transport: 'stdio', expose: { tools: ['fs.read'] } }); + + // fs.write is registered but not in the explicit allow-list + const result = await callTool('fs.write'); + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('fs.write'); + }); + }); + + // ── Unknown transport ───────────────────────────────────────────────────── + + describe('unknown transport', () => { + it('throws a descriptive error for an unsupported transport value', async () => { + const registry = makeRegistry([makeTool()]); + const config = { transport: 'grpc' } as unknown as ToolpackMcpServerConfig; + await expect(startMcpServer(registry, config)).rejects.toThrow(/grpc/); + }); + }); + + // ── McpServerHandle ─────────────────────────────────────────────────────── + + describe('McpServerHandle', () => { + it('toolCount reflects the number of tools currently exposed', async () => { + const tools = [ + makeTool({ name: 'a', category: 'x' }), + makeTool({ name: 'b', category: 'x' }), + makeTool({ name: 'c', category: 'y' }), + ]; + const registry = makeRegistry(tools); + const handle = await startMcpServer(registry, { + transport: 'stdio', + expose: { categories: ['x'] }, + }) as { toolCount: number; stop(): Promise }; + expect(handle.toolCount).toBe(2); + }); + + it('uses custom serverName and serverVersion when provided', async () => { + // Just verify startMcpServer resolves without throwing. + const registry = makeRegistry([makeTool()]); + await expect( + startMcpServer(registry, { transport: 'stdio', serverName: 'My Server', serverVersion: '3.0.0' }) + ).resolves.toBeDefined(); + }); + }); +}); From 426a452d89bbf2434100f3f3236bf20851ee40f4 Mon Sep 17 00:00:00 2001 From: sajeerzeji Date: Mon, 8 Jun 2026 00:05:04 +0530 Subject: [PATCH 2/5] Windows pipeline issues fixed --- .../tools/run-blocking/index.test.ts | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts index db5ff33..4816e0e 100644 --- a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts +++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts @@ -27,10 +27,12 @@ describe('exec.run_blocking tool', () => { }); it('should wait for slow commands to complete naturally', async () => { + const isWindows = process.platform === 'win32'; + const command = isWindows + ? 'ping -n 2 127.0.0.1 > nul && echo done' + : 'sleep 1 && echo done'; const start = Date.now(); - const result = JSON.parse(await execRunBlockingTool.execute({ - command: 'sleep 1 && echo done', - })); + const result = JSON.parse(await execRunBlockingTool.execute({ command })); const elapsed = Date.now() - start; expect(result.exitCode).toBe(0); expect(result.stdout.trim()).toBe('done'); @@ -60,11 +62,12 @@ describe('exec.run_blocking tool', () => { }); it('should accept a cwd argument', async () => { - const result = JSON.parse(await execRunBlockingTool.execute({ - command: 'pwd', - cwd: '/tmp', - })); + const isWindows = process.platform === 'win32'; + const cwd = isWindows ? process.env.TEMP ?? 'C:\\Windows\\Temp' : '/tmp'; + const command = isWindows ? 'cd' : 'pwd'; + const expectedSubstring = isWindows ? 'temp' : 'tmp'; + const result = JSON.parse(await execRunBlockingTool.execute({ command, cwd })); expect(result.exitCode).toBe(0); - expect(result.stdout.trim()).toContain('tmp'); + expect(result.stdout.trim().toLowerCase()).toContain(expectedSubstring); }); }); From 40920c2ffbd7962a08d50b9190025ae3059e7e0c Mon Sep 17 00:00:00 2001 From: sajeerzeji Date: Mon, 8 Jun 2026 00:11:56 +0530 Subject: [PATCH 3/5] Windows pipeline issues fixed --- .../tools/run-blocking/index.test.ts | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts index 4816e0e..b336797 100644 --- a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts +++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts @@ -1,4 +1,5 @@ import { describe, it, expect } from 'vitest'; +import { tmpdir } from 'node:os'; import { execRunBlockingTool } from './index.js'; describe('exec.run_blocking tool', () => { @@ -27,12 +28,10 @@ describe('exec.run_blocking tool', () => { }); it('should wait for slow commands to complete naturally', async () => { - const isWindows = process.platform === 'win32'; - const command = isWindows - ? 'ping -n 2 127.0.0.1 > nul && echo done' - : 'sleep 1 && echo done'; const start = Date.now(); - const result = JSON.parse(await execRunBlockingTool.execute({ command })); + const result = JSON.parse(await execRunBlockingTool.execute({ + command: `node -e "setTimeout(() => { process.stdout.write('done\\n'); }, 1000)"`, + })); const elapsed = Date.now() - start; expect(result.exitCode).toBe(0); expect(result.stdout.trim()).toBe('done'); @@ -62,12 +61,12 @@ describe('exec.run_blocking tool', () => { }); it('should accept a cwd argument', async () => { - const isWindows = process.platform === 'win32'; - const cwd = isWindows ? process.env.TEMP ?? 'C:\\Windows\\Temp' : '/tmp'; - const command = isWindows ? 'cd' : 'pwd'; - const expectedSubstring = isWindows ? 'temp' : 'tmp'; - const result = JSON.parse(await execRunBlockingTool.execute({ command, cwd })); + const cwd = tmpdir(); + const result = JSON.parse(await execRunBlockingTool.execute({ + command: `node -e "process.stdout.write(process.cwd())"`, + cwd, + })); expect(result.exitCode).toBe(0); - expect(result.stdout.trim().toLowerCase()).toContain(expectedSubstring); + expect(result.stdout.trim()).toBe(cwd); }); }); From f1217a2a49b164b9f482597c7d20b1c979709944 Mon Sep 17 00:00:00 2001 From: sajeerzeji Date: Mon, 8 Jun 2026 00:17:05 +0530 Subject: [PATCH 4/5] Github pipeline issues fixed --- .../src/tools/exec-tools/tools/run-blocking/index.test.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts index b336797..9e90bfe 100644 --- a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts +++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts @@ -1,5 +1,6 @@ import { describe, it, expect } from 'vitest'; import { tmpdir } from 'node:os'; +import { realpathSync } from 'node:fs'; import { execRunBlockingTool } from './index.js'; describe('exec.run_blocking tool', () => { @@ -61,7 +62,7 @@ describe('exec.run_blocking tool', () => { }); it('should accept a cwd argument', async () => { - const cwd = tmpdir(); + const cwd = realpathSync(tmpdir()); const result = JSON.parse(await execRunBlockingTool.execute({ command: `node -e "process.stdout.write(process.cwd())"`, cwd, From a1ec8ddcea6606f220c00da353e4a27db9f2f02e Mon Sep 17 00:00:00 2001 From: sajeerzeji Date: Mon, 8 Jun 2026 00:24:55 +0530 Subject: [PATCH 5/5] Github pipeline issues fixed --- .../src/tools/exec-tools/tools/run-blocking/index.test.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts index 9e90bfe..9e0f3a8 100644 --- a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts +++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts @@ -62,12 +62,14 @@ describe('exec.run_blocking tool', () => { }); it('should accept a cwd argument', async () => { - const cwd = realpathSync(tmpdir()); + const cwd = tmpdir(); const result = JSON.parse(await execRunBlockingTool.execute({ command: `node -e "process.stdout.write(process.cwd())"`, cwd, })); expect(result.exitCode).toBe(0); - expect(result.stdout.trim()).toBe(cwd); + // Canonicalise both sides: macOS symlinks (/var → /private/var) and + // Windows 8.3 short paths (RUNNER~1 → runneradmin) differ in raw form. + expect(realpathSync.native(result.stdout.trim())).toBe(realpathSync.native(cwd)); }); });