From 86449aae380744708daa440bfc3a5400a97429b0 Mon Sep 17 00:00:00 2001
From: sajeerzeji <sajeerzeji44@gmail.com>
Date: Sun, 7 Jun 2026 23:58:18 +0530
Subject: [PATCH 1/5] feat: add MCP server, McpChannel, eval primitives, and
 OTel tracing

---
 README.md                                     |   56 +-
 package-lock.json                             | 1314 ++++++++++++++++-
 packages/toolpack-agents/README.md            |   76 +-
 packages/toolpack-agents/docs/README.md       |    2 +-
 packages/toolpack-agents/docs/channels.md     |   53 +
 packages/toolpack-agents/docs/testing.md      |  117 ++
 packages/toolpack-agents/package.json         |   21 +-
 .../toolpack-agents/src/channels/index.ts     |    2 +
 .../src/channels/mcp-channel.test.ts          |  124 ++
 .../src/channels/mcp-channel.ts               |  124 ++
 packages/toolpack-agents/src/index.ts         |   34 +
 .../interceptors/builtins/builtins.test.ts    |  172 +++
 .../src/interceptors/builtins/index.ts        |   10 +
 .../src/interceptors/builtins/otel-tracer.ts  |  186 +++
 .../toolpack-agents/src/interceptors/index.ts |    8 +
 .../src/testing/eval-dataset.ts               |  121 ++
 .../src/testing/eval-report.ts                |  116 ++
 .../src/testing/eval-runner.ts                |   89 ++
 .../src/testing/eval-scorer.ts                |  248 ++++
 .../toolpack-agents/src/testing/eval-types.ts |  160 ++
 .../toolpack-agents/src/testing/eval.test.ts  |  406 +++++
 packages/toolpack-agents/src/testing/index.ts |   24 +
 packages/toolpack-knowledge/package.json      |    2 +-
 packages/toolpack-sdk/README.md               |    8 +-
 .../docs/examples/mcp-server-example.ts       |  121 ++
 packages/toolpack-sdk/package.json            |   26 +-
 packages/toolpack-sdk/src/client/index.ts     |   12 +-
 packages/toolpack-sdk/src/mcp/index.ts        |   26 +-
 packages/toolpack-sdk/src/mcp/server-auth.ts  |  134 ++
 packages/toolpack-sdk/src/mcp/server-types.ts |  226 +++
 packages/toolpack-sdk/src/mcp/server.ts       |  378 +++++
 .../src/providers/anthropic/index.ts          |   24 +-
 .../src/providers/gemini/index.ts             |   31 +-
 .../src/providers/ollama/adapter.ts           |   10 +
 .../src/providers/openai/index.ts             |   14 +-
 .../src/providers/vertexai/index.ts           |   24 +-
 packages/toolpack-sdk/src/toolpack.ts         |  102 ++
 .../coding-tools/parsers/babel-parser.ts      |    2 +-
 .../src/tools/exec-tools/index.test.ts        |    4 +-
 .../src/tools/exec-tools/index.ts             |   12 +-
 .../exec-tools/tools/run-background/index.ts  |    2 +-
 .../tools/run-blocking/index.test.ts          |   70 +
 .../exec-tools/tools/run-blocking/index.ts    |   81 +
 .../exec-tools/tools/run-blocking/schema.ts   |   24 +
 .../tools/tail-output/index.test.ts           |   79 +
 .../exec-tools/tools/tail-output/index.ts     |   49 +
 .../exec-tools/tools/tail-output/schema.ts    |   25 +
 packages/toolpack-sdk/src/tools/index.ts      |    4 +-
 packages/toolpack-sdk/src/tools/registry.ts   |    5 +-
 packages/toolpack-sdk/src/tools/types.ts      |   55 +-
 packages/toolpack-sdk/src/types/index.ts      |   29 +-
 .../tests/integration/mcp-server.test.ts      |  316 ++++
 .../tests/unit/mcp-server-auth.test.ts        |  249 ++++
 .../tests/unit/mcp-server.test.ts             |  609 ++++++++
 54 files changed, 6135 insertions(+), 81 deletions(-)
 create mode 100644 packages/toolpack-agents/src/channels/mcp-channel.test.ts
 create mode 100644 packages/toolpack-agents/src/channels/mcp-channel.ts
 create mode 100644 packages/toolpack-agents/src/interceptors/builtins/otel-tracer.ts
 create mode 100644 packages/toolpack-agents/src/testing/eval-dataset.ts
 create mode 100644 packages/toolpack-agents/src/testing/eval-report.ts
 create mode 100644 packages/toolpack-agents/src/testing/eval-runner.ts
 create mode 100644 packages/toolpack-agents/src/testing/eval-scorer.ts
 create mode 100644 packages/toolpack-agents/src/testing/eval-types.ts
 create mode 100644 packages/toolpack-agents/src/testing/eval.test.ts
 create mode 100644 packages/toolpack-sdk/docs/examples/mcp-server-example.ts
 create mode 100644 packages/toolpack-sdk/src/mcp/server-auth.ts
 create mode 100644 packages/toolpack-sdk/src/mcp/server-types.ts
 create mode 100644 packages/toolpack-sdk/src/mcp/server.ts
 create mode 100644 packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
 create mode 100644 packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.ts
 create mode 100644 packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/schema.ts
 create mode 100644 packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.test.ts
 create mode 100644 packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.ts
 create mode 100644 packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/schema.ts
 create mode 100644 packages/toolpack-sdk/tests/integration/mcp-server.test.ts
 create mode 100644 packages/toolpack-sdk/tests/unit/mcp-server-auth.test.ts
 create mode 100644 packages/toolpack-sdk/tests/unit/mcp-server.test.ts

diff --git a/README.md b/README.md
index 433139c..8af4071 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Toolpack SDK
 
-A unified TypeScript/Node.js SDK for building AI-powered applications with multiple providers, 100+ built-in tools, a workflow engine, and a flexible mode system — all through a single API.
+The TypeScript SDK for building production AI agents — 100+ built-in tools, 8 channel integrations, a persistent cognitive layer, and full Knowledge/RAG, all in one package.
 
 [![npm version](https://img.shields.io/npm/v/toolpack-sdk.svg)](https://www.npmjs.com/package/toolpack-sdk)
 [![License: Apache 2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
@@ -17,8 +17,8 @@ A unified TypeScript/Node.js SDK for building AI-powered applications with multi
 - **Workflow Engine** — AI-driven planning with plan-direct execution and parallel tool orchestration
 - **Mode System** — Built-in Agent and Chat modes, plus `createMode()` for custom modes with tool filtering
 - **HITL Confirmation** — Human-in-the-loop approval for high-risk operations with configurable bypass rules
-- **Custom Providers** — Bring your own provider by implementing the `ProviderAdapter` interface
-- **100+ Built-in Tools** across 14 categories:
+- **Extensible at Every Layer** — Every built-in component is a plug-in point: custom tools (`ToolDefinition`), custom channels (`BaseChannel`), custom provider adapters (`ProviderAdapter`), custom agents (`BaseAgent`), custom modes (`createMode()`), and custom interceptors — all using the same interfaces as the built-ins
+- **100+ Built-in Tools** across 12 categories:
 - **MCP Tool Server Integration** — dynamically bridge external Model Context Protocol servers into Toolpack as first-class tools via `createMcpToolProject()` and `disconnectMcpToolProject()`.
 
 | Category | Tools | Description |
@@ -163,15 +163,14 @@ See `packages/toolpack-sdk/docs/examples/kubernetes-usage.ts` for a complete exa
 - **OpenAI**: Supports `reasoningTier` and `costTier` on model info for GPT-5.x reasoning models. API key read from `OPENAI_API_KEY` or `TOOLPACK_OPENAI_KEY`.
 - **Anthropic**: Does not support embeddings. Tool results are converted to `tool_result` content blocks automatically. `tool_choice: none` is handled by omitting tools from the request. `max_tokens` defaults to `4096` if not specified. API key read from `ANTHROPIC_API_KEY` or `TOOLPACK_ANTHROPIC_KEY`.
 
-## MCP Tool Server Support
+## MCP Support
 
-Toolpack now includes first-class support for Model Context Protocol (MCP) adapters and server tool discovery.
+Toolpack has first-class MCP support in both directions: as a **client** (consume external MCP servers) and as a **server** (expose Toolpack tools + agents to any MCP client).
 
-### Quick MCP Setup
+### MCP Client — consume external MCP servers
 
 ```typescript
-import { Toolpack } from 'toolpack-sdk';
-import { createMcpToolProject } from './tools/mcp-tools';
+import { Toolpack, createMcpToolProject } from 'toolpack-sdk';
 
 const mcpToolProject = await createMcpToolProject({
   servers: [
@@ -182,13 +181,6 @@ const mcpToolProject = await createMcpToolProject({
       args: ['-y', '@modelcontextprotocol/server-filesystem', '/workspace'],
       autoConnect: true,
     },
-    {
-      name: 'custom',
-      displayName: 'Custom MCP',
-      command: 'npx',
-      args: ['-y', '@modelcontextprotocol/server-tools'],
-      autoConnect: true,
-    },
   ],
 });
 
@@ -198,11 +190,41 @@ const sdk = await Toolpack.init({
   customTools: [mcpToolProject],
 });
 
-// On shutdown/cold path:
+// On shutdown:
 // await disconnectMcpToolProject(mcpToolProject);
 ```
 
-See `docs/MCP_INTEGRATION.md` and `docs/examples/mcp-integration-example.ts` for full instructions and best practices.
+See `docs/MCP_INTEGRATION.md` for full client configuration options.
+
+### MCP Server — expose Toolpack as an MCP server
+
+Expose all 100+ built-in tools (or a filtered subset) to any MCP client — Claude Desktop, Cursor, custom agents:
+
+```typescript
+const handle = await sdk.startMcpServer({
+  transport: 'http',   // or 'stdio' for Claude Desktop / Cursor
+  port: 3000,
+
+  // Optional: restrict exposed tools
+  expose: { categories: ['filesystem', 'version-control'] },
+
+  // Optional: static bearer token auth
+  auth: { mode: 'static', tokens: [process.env.MCP_TOKEN!] },
+
+  // Optional: expose Toolpack agents as MCP tools
+  agents: [mcpChannel.asAgentDefinition(myAgent)],
+
+  // Optional: search mode — expose only tool.search, clients discover tools on demand
+  searchMode: true,
+});
+
+console.log(`MCP server running on port ${handle.port}`);
+// handle.stop() to shut down
+```
+
+**Auth modes:** `static` (pre-shared tokens), `jwt` (JWKS/Auth0/Supabase), `custom` (your own verifier).
+
+**Search mode** reduces context token usage for large tool sets — clients call `tool.search` to discover tools on demand instead of receiving all 100+ upfront.
 - **Gemini**: Uses synthetic tool call IDs (`gemini_<timestamp>_<random>`) since the Gemini API doesn't return tool call IDs natively. Tool results are converted to `functionResponse` parts in chat history automatically. API key read from `GOOGLE_GENERATIVE_AI_KEY` or `TOOLPACK_GEMINI_KEY`.
 - **Ollama**: Auto-discovers all locally pulled models when registered as `{ ollama: {} }`. Uses `/api/show` and tool probing to detect capabilities (tool calling, vision, embeddings) per model. Models without tool support are automatically stripped of tools and given a system instruction to prevent hallucinated tool usage. Uses synthetic tool call IDs (`ollama_<timestamp>_<random>`). Embeddings use the modern `/api/embed` batch endpoint. Legacy per-model registration (`{ 'ollama-llama3': {} }`) is also supported.
 - **OpenRouter**: Routes requests to any of the 300+ models available on [openrouter.ai](https://openrouter.ai) via an OpenAI-compatible API. Models are discovered dynamically from the `/models` endpoint. Tool calling is fully supported; models that reject `tool_choice: 'none'` have tools stripped gracefully instead. No embeddings support. Optional `siteUrl` and `siteName` config for OpenRouter's attribution leaderboard. API key read from `OPENROUTER_API_KEY` or `TOOLPACK_OPENROUTER_KEY`.
diff --git a/package-lock.json b/package-lock.json
index f9c9d91..eec42fd 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -959,6 +959,19 @@
         "node": ">=18.0.0"
       }
     },
+    "node_modules/@hono/node-server": {
+      "version": "1.19.14",
+      "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.14.tgz",
+      "integrity": "sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=18.14.1"
+      },
+      "peerDependencies": {
+        "hono": "^4"
+      }
+    },
     "node_modules/@humanfs/core": {
       "version": "0.19.1",
       "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz",
@@ -1164,6 +1177,71 @@
       "integrity": "sha512-GaHYm+c0O9MjZRu0ongGBRbinu8gVAMd2UZjji6jVmqKtZluZnptXGWhz1E8j8D2HJ3f/yMxKAUC0b+57wncIw==",
       "license": "MIT"
     },
+    "node_modules/@modelcontextprotocol/sdk": {
+      "version": "1.29.0",
+      "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.29.0.tgz",
+      "integrity": "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@hono/node-server": "^1.19.9",
+        "ajv": "^8.17.1",
+        "ajv-formats": "^3.0.1",
+        "content-type": "^1.0.5",
+        "cors": "^2.8.5",
+        "cross-spawn": "^7.0.5",
+        "eventsource": "^3.0.2",
+        "eventsource-parser": "^3.0.0",
+        "express": "^5.2.1",
+        "express-rate-limit": "^8.2.1",
+        "hono": "^4.11.4",
+        "jose": "^6.1.3",
+        "json-schema-typed": "^8.0.2",
+        "pkce-challenge": "^5.0.0",
+        "raw-body": "^3.0.0",
+        "zod": "^3.25 || ^4.0",
+        "zod-to-json-schema": "^3.25.1"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "peerDependencies": {
+        "@cfworker/json-schema": "^4.1.1",
+        "zod": "^3.25 || ^4.0"
+      },
+      "peerDependenciesMeta": {
+        "@cfworker/json-schema": {
+          "optional": true
+        },
+        "zod": {
+          "optional": false
+        }
+      }
+    },
+    "node_modules/@modelcontextprotocol/sdk/node_modules/ajv": {
+      "version": "8.20.0",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.20.0.tgz",
+      "integrity": "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fast-deep-equal": "^3.1.3",
+        "fast-uri": "^3.0.1",
+        "json-schema-traverse": "^1.0.0",
+        "require-from-string": "^2.0.2"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/epoberezkin"
+      }
+    },
+    "node_modules/@modelcontextprotocol/sdk/node_modules/json-schema-traverse": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
+      "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@napi-rs/wasm-runtime": {
       "version": "1.1.2",
       "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.2.tgz",
@@ -2655,6 +2733,47 @@
         "npm": ">=7.0.0"
       }
     },
+    "node_modules/accepts": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/accepts/-/accepts-2.0.0.tgz",
+      "integrity": "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "mime-types": "^3.0.0",
+        "negotiator": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/accepts/node_modules/mime-db": {
+      "version": "1.54.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz",
+      "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/accepts/node_modules/mime-types": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz",
+      "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "mime-db": "^1.54.0"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
     "node_modules/acorn": {
       "version": "8.16.0",
       "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.16.0.tgz",
@@ -2717,6 +2836,48 @@
         "url": "https://github.com/sponsors/epoberezkin"
       }
     },
+    "node_modules/ajv-formats": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-3.0.1.tgz",
+      "integrity": "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ajv": "^8.0.0"
+      },
+      "peerDependencies": {
+        "ajv": "^8.0.0"
+      },
+      "peerDependenciesMeta": {
+        "ajv": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/ajv-formats/node_modules/ajv": {
+      "version": "8.20.0",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.20.0.tgz",
+      "integrity": "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fast-deep-equal": "^3.1.3",
+        "fast-uri": "^3.0.1",
+        "json-schema-traverse": "^1.0.0",
+        "require-from-string": "^2.0.2"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/epoberezkin"
+      }
+    },
+    "node_modules/ajv-formats/node_modules/json-schema-traverse": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
+      "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/ansi-regex": {
       "version": "5.0.1",
       "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
@@ -3024,6 +3185,48 @@
         "readable-stream": "^3.4.0"
       }
     },
+    "node_modules/body-parser": {
+      "version": "2.2.2",
+      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-2.2.2.tgz",
+      "integrity": "sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "bytes": "^3.1.2",
+        "content-type": "^1.0.5",
+        "debug": "^4.4.3",
+        "http-errors": "^2.0.0",
+        "iconv-lite": "^0.7.0",
+        "on-finished": "^2.4.1",
+        "qs": "^6.14.1",
+        "raw-body": "^3.0.1",
+        "type-is": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/body-parser/node_modules/iconv-lite": {
+      "version": "0.7.2",
+      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz",
+      "integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "safer-buffer": ">= 2.1.2 < 3.0.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
     "node_modules/boolbase": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
@@ -3108,6 +3311,16 @@
         "esbuild": ">=0.18"
       }
     },
+    "node_modules/bytes": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
+      "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
     "node_modules/cac": {
       "version": "6.7.14",
       "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz",
@@ -3341,6 +3554,30 @@
         "node": "^14.18.0 || >=16.10.0"
       }
     },
+    "node_modules/content-disposition": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.1.0.tgz",
+      "integrity": "sha512-5jRCH9Z/+DRP7rkvY83B+yGIGX96OYdJmzngqnw2SBSxqCFPd0w2km3s5iawpGX8krnwSGmF0FW5Nhr0Hfai3g==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/content-type": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz",
+      "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
     "node_modules/convert-source-map": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz",
@@ -3348,6 +3585,44 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/cookie": {
+      "version": "0.7.2",
+      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz",
+      "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/cookie-signature": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.2.2.tgz",
+      "integrity": "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.6.0"
+      }
+    },
+    "node_modules/cors": {
+      "version": "2.8.6",
+      "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.6.tgz",
+      "integrity": "sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "object-assign": "^4",
+        "vary": "^1"
+      },
+      "engines": {
+        "node": ">= 0.10"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
     "node_modules/cosmiconfig": {
       "version": "9.0.1",
       "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.1.tgz",
@@ -3533,6 +3808,16 @@
         "node": ">=0.10"
       }
     },
+    "node_modules/depd": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz",
+      "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
     "node_modules/detect-libc": {
       "version": "2.1.2",
       "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz",
@@ -3691,12 +3976,29 @@
         "safe-buffer": "^5.0.1"
       }
     },
+    "node_modules/ee-first": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
+      "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/emoji-regex": {
       "version": "8.0.0",
       "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
       "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
       "license": "MIT"
     },
+    "node_modules/encodeurl": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
+      "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
     "node_modules/encoding-sniffer": {
       "version": "0.2.1",
       "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz",
@@ -3856,6 +4158,13 @@
         "node": ">=6"
       }
     },
+    "node_modules/escape-html": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
+      "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/escape-string-regexp": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
@@ -4065,6 +4374,16 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/etag": {
+      "version": "1.8.1",
+      "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
+      "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
     "node_modules/events-universal": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz",
@@ -4074,6 +4393,29 @@
         "bare-events": "^2.7.0"
       }
     },
+    "node_modules/eventsource": {
+      "version": "3.0.7",
+      "resolved": "https://registry.npmjs.org/eventsource/-/eventsource-3.0.7.tgz",
+      "integrity": "sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "eventsource-parser": "^3.0.1"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/eventsource-parser": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.1.0.tgz",
+      "integrity": "sha512-kJezFj9YFAMLeORyi7aCLxLbD5/qWMQnoMVlVPyHIll7lgRJCc3JVln9Vgl9nwQi0YkMnhdGTMNn7CkRRAptMg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
     "node_modules/expand-template": {
       "version": "2.0.3",
       "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
@@ -4093,6 +4435,96 @@
         "node": ">=12.0.0"
       }
     },
+    "node_modules/express": {
+      "version": "5.2.1",
+      "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz",
+      "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "accepts": "^2.0.0",
+        "body-parser": "^2.2.1",
+        "content-disposition": "^1.0.0",
+        "content-type": "^1.0.5",
+        "cookie": "^0.7.1",
+        "cookie-signature": "^1.2.1",
+        "debug": "^4.4.0",
+        "depd": "^2.0.0",
+        "encodeurl": "^2.0.0",
+        "escape-html": "^1.0.3",
+        "etag": "^1.8.1",
+        "finalhandler": "^2.1.0",
+        "fresh": "^2.0.0",
+        "http-errors": "^2.0.0",
+        "merge-descriptors": "^2.0.0",
+        "mime-types": "^3.0.0",
+        "on-finished": "^2.4.1",
+        "once": "^1.4.0",
+        "parseurl": "^1.3.3",
+        "proxy-addr": "^2.0.7",
+        "qs": "^6.14.0",
+        "range-parser": "^1.2.1",
+        "router": "^2.2.0",
+        "send": "^1.1.0",
+        "serve-static": "^2.2.0",
+        "statuses": "^2.0.1",
+        "type-is": "^2.0.1",
+        "vary": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/express-rate-limit": {
+      "version": "8.5.2",
+      "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.5.2.tgz",
+      "integrity": "sha512-5Kb34ipNX694DH48vN9irak1Qx30nb0PLYHXfJgw4YEjiC3ZEmZJhwOp+VfiCYwFzvFTdB9QkArYS5kXa2cx2A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ip-address": "^10.2.0"
+      },
+      "engines": {
+        "node": ">= 16"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/express-rate-limit"
+      },
+      "peerDependencies": {
+        "express": ">= 4.11"
+      }
+    },
+    "node_modules/express/node_modules/mime-db": {
+      "version": "1.54.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz",
+      "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/express/node_modules/mime-types": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz",
+      "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "mime-db": "^1.54.0"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
     "node_modules/extend": {
       "version": "3.0.2",
       "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
@@ -4174,6 +4606,23 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/fast-uri": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.2.tgz",
+      "integrity": "sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/fastify"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/fastify"
+        }
+      ],
+      "license": "BSD-3-Clause"
+    },
     "node_modules/fastq": {
       "version": "1.20.1",
       "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.20.1.tgz",
@@ -4264,6 +4713,28 @@
         "node": ">=8"
       }
     },
+    "node_modules/finalhandler": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-2.1.1.tgz",
+      "integrity": "sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "debug": "^4.4.0",
+        "encodeurl": "^2.0.0",
+        "escape-html": "^1.0.3",
+        "on-finished": "^2.4.1",
+        "parseurl": "^1.3.3",
+        "statuses": "^2.0.1"
+      },
+      "engines": {
+        "node": ">= 18.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
     "node_modules/find-up": {
       "version": "5.0.0",
       "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz",
@@ -4381,6 +4852,26 @@
         "node": ">=12.20.0"
       }
     },
+    "node_modules/forwarded": {
+      "version": "0.2.0",
+      "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
+      "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/fresh": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/fresh/-/fresh-2.0.0.tgz",
+      "integrity": "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
     "node_modules/fs-constants": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
@@ -4712,6 +5203,16 @@
         "node": ">= 0.4"
       }
     },
+    "node_modules/hono": {
+      "version": "4.12.23",
+      "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.23.tgz",
+      "integrity": "sha512-eIaZ9qDgu7XV0pxOCrg7/WhnQ6Ivm22UcxhXx/A3dcbqbbYgBEkc6e/J/s7j2tS96zoB0S9VBdLwQNCWwUo4LA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=16.9.0"
+      }
+    },
     "node_modules/html-escaper": {
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz",
@@ -4750,6 +5251,27 @@
         "url": "https://github.com/fb55/entities?sponsor=1"
       }
     },
+    "node_modules/http-errors": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz",
+      "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "depd": "~2.0.0",
+        "inherits": "~2.0.4",
+        "setprototypeof": "~1.2.0",
+        "statuses": "~2.0.2",
+        "toidentifier": "~1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
     "node_modules/http-proxy-agent": {
       "version": "7.0.2",
       "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
@@ -4857,14 +5379,24 @@
       "license": "ISC"
     },
     "node_modules/ip-address": {
-      "version": "10.1.0",
-      "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz",
-      "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==",
+      "version": "10.2.0",
+      "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.2.0.tgz",
+      "integrity": "sha512-/+S6j4E9AHvW9SWMSEY9Xfy66O5PWvVEJ08O0y5JGyEKQpojb0K0GKpz/v5HJ/G0vi3D2sjGK78119oXZeE0qA==",
       "license": "MIT",
       "engines": {
         "node": ">= 12"
       }
     },
+    "node_modules/ipaddr.js": {
+      "version": "1.9.1",
+      "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
+      "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.10"
+      }
+    },
     "node_modules/is-arrayish": {
       "version": "0.2.1",
       "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz",
@@ -4910,6 +5442,13 @@
         "node": ">=0.12.0"
       }
     },
+    "node_modules/is-promise": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz",
+      "integrity": "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/is-property": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/is-property/-/is-property-1.0.2.tgz",
@@ -4990,6 +5529,16 @@
         "@pkgjs/parseargs": "^0.11.0"
       }
     },
+    "node_modules/jose": {
+      "version": "6.2.3",
+      "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.3.tgz",
+      "integrity": "sha512-YYVDInQKFJfR/xa3ojUTl8c2KoTwiL1R5Wg9YCydwH0x0B9grbzlg5HC7mMjCtUJjbQ/YnGEZIhI5tCgfTb4Hw==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/panva"
+      }
+    },
     "node_modules/joycon": {
       "version": "3.1.1",
       "resolved": "https://registry.npmjs.org/joycon/-/joycon-3.1.1.tgz",
@@ -5072,6 +5621,13 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/json-schema-typed": {
+      "version": "8.0.2",
+      "resolved": "https://registry.npmjs.org/json-schema-typed/-/json-schema-typed-8.0.2.tgz",
+      "integrity": "sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==",
+      "dev": true,
+      "license": "BSD-2-Clause"
+    },
     "node_modules/json-stable-stringify-without-jsonify": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz",
@@ -5624,6 +6180,29 @@
         "node": ">= 0.4"
       }
     },
+    "node_modules/media-typer": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-1.1.0.tgz",
+      "integrity": "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/merge-descriptors": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-2.0.0.tgz",
+      "integrity": "sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
     "node_modules/merge2": {
       "version": "1.4.1",
       "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz",
@@ -5850,6 +6429,16 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/negotiator": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-1.0.0.tgz",
+      "integrity": "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
     "node_modules/netlify": {
       "version": "24.9.0",
       "resolved": "https://registry.npmjs.org/netlify/-/netlify-24.9.0.tgz",
@@ -10685,6 +11274,331 @@
         }
       }
     },
+    "node_modules/netlify/node_modules/@rollup/rollup-android-arm-eabi": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.59.0.tgz",
+      "integrity": "sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==",
+      "cpu": [
+        "arm"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "android"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-android-arm64": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.59.0.tgz",
+      "integrity": "sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q==",
+      "cpu": [
+        "arm64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "android"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-darwin-arm64": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.59.0.tgz",
+      "integrity": "sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg==",
+      "cpu": [
+        "arm64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-darwin-x64": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.59.0.tgz",
+      "integrity": "sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w==",
+      "cpu": [
+        "x64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-freebsd-arm64": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.59.0.tgz",
+      "integrity": "sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA==",
+      "cpu": [
+        "arm64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "freebsd"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-freebsd-x64": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.59.0.tgz",
+      "integrity": "sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg==",
+      "cpu": [
+        "x64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "freebsd"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-linux-arm-gnueabihf": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.59.0.tgz",
+      "integrity": "sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw==",
+      "cpu": [
+        "arm"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-linux-arm-musleabihf": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.59.0.tgz",
+      "integrity": "sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA==",
+      "cpu": [
+        "arm"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-linux-arm64-gnu": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.59.0.tgz",
+      "integrity": "sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA==",
+      "cpu": [
+        "arm64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-linux-arm64-musl": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.59.0.tgz",
+      "integrity": "sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA==",
+      "cpu": [
+        "arm64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-linux-loong64-gnu": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.59.0.tgz",
+      "integrity": "sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg==",
+      "cpu": [
+        "loong64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-linux-loong64-musl": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.59.0.tgz",
+      "integrity": "sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q==",
+      "cpu": [
+        "loong64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-linux-ppc64-gnu": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.59.0.tgz",
+      "integrity": "sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA==",
+      "cpu": [
+        "ppc64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-linux-ppc64-musl": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.59.0.tgz",
+      "integrity": "sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA==",
+      "cpu": [
+        "ppc64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-linux-riscv64-gnu": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.59.0.tgz",
+      "integrity": "sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg==",
+      "cpu": [
+        "riscv64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-linux-riscv64-musl": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.59.0.tgz",
+      "integrity": "sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg==",
+      "cpu": [
+        "riscv64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-linux-s390x-gnu": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.59.0.tgz",
+      "integrity": "sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w==",
+      "cpu": [
+        "s390x"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-linux-x64-gnu": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.59.0.tgz",
+      "integrity": "sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg==",
+      "cpu": [
+        "x64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-linux-x64-musl": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.59.0.tgz",
+      "integrity": "sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg==",
+      "cpu": [
+        "x64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-openbsd-x64": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.59.0.tgz",
+      "integrity": "sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ==",
+      "cpu": [
+        "x64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "openbsd"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-openharmony-arm64": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.59.0.tgz",
+      "integrity": "sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA==",
+      "cpu": [
+        "arm64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "openharmony"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-win32-arm64-msvc": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.59.0.tgz",
+      "integrity": "sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A==",
+      "cpu": [
+        "arm64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "win32"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-win32-ia32-msvc": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.59.0.tgz",
+      "integrity": "sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA==",
+      "cpu": [
+        "ia32"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "win32"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-win32-x64-gnu": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.59.0.tgz",
+      "integrity": "sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA==",
+      "cpu": [
+        "x64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "win32"
+      ]
+    },
+    "node_modules/netlify/node_modules/@rollup/rollup-win32-x64-msvc": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.59.0.tgz",
+      "integrity": "sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA==",
+      "cpu": [
+        "x64"
+      ],
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "win32"
+      ]
+    },
     "node_modules/netlify/node_modules/@sec-ant/readable-stream": {
       "version": "0.4.1",
       "license": "MIT"
@@ -13629,6 +14543,17 @@
         "safe-buffer": "~5.1.0"
       }
     },
+    "node_modules/netlify/node_modules/fsevents": {
+      "version": "2.3.3",
+      "extraneous": true,
+      "license": "MIT",
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+      }
+    },
     "node_modules/netlify/node_modules/function-bind": {
       "version": "1.1.2",
       "license": "MIT",
@@ -17066,6 +17991,51 @@
       "version": "1.4.1",
       "license": "MIT"
     },
+    "node_modules/netlify/node_modules/rollup": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.59.0.tgz",
+      "integrity": "sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg==",
+      "extraneous": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/estree": "1.0.8"
+      },
+      "bin": {
+        "rollup": "dist/bin/rollup"
+      },
+      "engines": {
+        "node": ">=18.0.0",
+        "npm": ">=8.0.0"
+      },
+      "optionalDependencies": {
+        "@rollup/rollup-android-arm-eabi": "4.59.0",
+        "@rollup/rollup-android-arm64": "4.59.0",
+        "@rollup/rollup-darwin-arm64": "4.59.0",
+        "@rollup/rollup-darwin-x64": "4.59.0",
+        "@rollup/rollup-freebsd-arm64": "4.59.0",
+        "@rollup/rollup-freebsd-x64": "4.59.0",
+        "@rollup/rollup-linux-arm-gnueabihf": "4.59.0",
+        "@rollup/rollup-linux-arm-musleabihf": "4.59.0",
+        "@rollup/rollup-linux-arm64-gnu": "4.59.0",
+        "@rollup/rollup-linux-arm64-musl": "4.59.0",
+        "@rollup/rollup-linux-loong64-gnu": "4.59.0",
+        "@rollup/rollup-linux-loong64-musl": "4.59.0",
+        "@rollup/rollup-linux-ppc64-gnu": "4.59.0",
+        "@rollup/rollup-linux-ppc64-musl": "4.59.0",
+        "@rollup/rollup-linux-riscv64-gnu": "4.59.0",
+        "@rollup/rollup-linux-riscv64-musl": "4.59.0",
+        "@rollup/rollup-linux-s390x-gnu": "4.59.0",
+        "@rollup/rollup-linux-x64-gnu": "4.59.0",
+        "@rollup/rollup-linux-x64-musl": "4.59.0",
+        "@rollup/rollup-openbsd-x64": "4.59.0",
+        "@rollup/rollup-openharmony-arm64": "4.59.0",
+        "@rollup/rollup-win32-arm64-msvc": "4.59.0",
+        "@rollup/rollup-win32-ia32-msvc": "4.59.0",
+        "@rollup/rollup-win32-x64-gnu": "4.59.0",
+        "@rollup/rollup-win32-x64-msvc": "4.59.0",
+        "fsevents": "~2.3.2"
+      }
+    },
     "node_modules/netlify/node_modules/router": {
       "version": "2.2.0",
       "license": "MIT",
@@ -18924,6 +19894,19 @@
       ],
       "license": "MIT"
     },
+    "node_modules/on-finished": {
+      "version": "2.4.1",
+      "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz",
+      "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "ee-first": "1.1.1"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
     "node_modules/once": {
       "version": "1.4.0",
       "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
@@ -19135,6 +20118,16 @@
         "url": "https://github.com/fb55/entities?sponsor=1"
       }
     },
+    "node_modules/parseurl": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
+      "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
     "node_modules/path-exists": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
@@ -19179,6 +20172,17 @@
       "dev": true,
       "license": "ISC"
     },
+    "node_modules/path-to-regexp": {
+      "version": "8.4.2",
+      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.4.2.tgz",
+      "integrity": "sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
     "node_modules/pathe": {
       "version": "2.0.3",
       "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz",
@@ -19310,6 +20314,16 @@
         "node": ">= 6"
       }
     },
+    "node_modules/pkce-challenge": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.1.tgz",
+      "integrity": "sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=16.20.0"
+      }
+    },
     "node_modules/pkg-types": {
       "version": "1.3.1",
       "resolved": "https://registry.npmjs.org/pkg-types/-/pkg-types-1.3.1.tgz",
@@ -19503,6 +20517,20 @@
         "node": ">=12.0.0"
       }
     },
+    "node_modules/proxy-addr": {
+      "version": "2.0.7",
+      "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
+      "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "forwarded": "0.2.0",
+        "ipaddr.js": "1.9.1"
+      },
+      "engines": {
+        "node": ">= 0.10"
+      }
+    },
     "node_modules/proxy-agent": {
       "version": "6.5.0",
       "resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.5.0.tgz",
@@ -19623,6 +20651,49 @@
       ],
       "license": "MIT"
     },
+    "node_modules/range-parser": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
+      "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/raw-body": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.2.tgz",
+      "integrity": "sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "bytes": "~3.1.2",
+        "http-errors": "~2.0.1",
+        "iconv-lite": "~0.7.0",
+        "unpipe": "~1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.10"
+      }
+    },
+    "node_modules/raw-body/node_modules/iconv-lite": {
+      "version": "0.7.2",
+      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz",
+      "integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "safer-buffer": ">= 2.1.2 < 3.0.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
     "node_modules/rc": {
       "version": "1.2.8",
       "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz",
@@ -19684,6 +20755,16 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/require-from-string": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
+      "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/resolve-from": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
@@ -19807,6 +20888,23 @@
         "fsevents": "~2.3.2"
       }
     },
+    "node_modules/router": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/router/-/router-2.2.0.tgz",
+      "integrity": "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "debug": "^4.4.0",
+        "depd": "^2.0.0",
+        "is-promise": "^4.0.0",
+        "parseurl": "^1.3.3",
+        "path-to-regexp": "^8.0.0"
+      },
+      "engines": {
+        "node": ">= 18"
+      }
+    },
     "node_modules/rss-parser": {
       "version": "3.13.0",
       "resolved": "https://registry.npmjs.org/rss-parser/-/rss-parser-3.13.0.tgz",
@@ -19904,6 +21002,87 @@
         "node": ">=10"
       }
     },
+    "node_modules/send": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/send/-/send-1.2.1.tgz",
+      "integrity": "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "debug": "^4.4.3",
+        "encodeurl": "^2.0.0",
+        "escape-html": "^1.0.3",
+        "etag": "^1.8.1",
+        "fresh": "^2.0.0",
+        "http-errors": "^2.0.1",
+        "mime-types": "^3.0.2",
+        "ms": "^2.1.3",
+        "on-finished": "^2.4.1",
+        "range-parser": "^1.2.1",
+        "statuses": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/send/node_modules/mime-db": {
+      "version": "1.54.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz",
+      "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/send/node_modules/mime-types": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz",
+      "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "mime-db": "^1.54.0"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/serve-static": {
+      "version": "2.2.1",
+      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-2.2.1.tgz",
+      "integrity": "sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "encodeurl": "^2.0.0",
+        "escape-html": "^1.0.3",
+        "parseurl": "^1.3.3",
+        "send": "^1.2.0"
+      },
+      "engines": {
+        "node": ">= 18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/setprototypeof": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
+      "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==",
+      "dev": true,
+      "license": "ISC"
+    },
     "node_modules/shebang-command": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
@@ -20172,6 +21351,16 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/statuses": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz",
+      "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
     "node_modules/std-env": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/std-env/-/std-env-4.0.0.tgz",
@@ -20429,6 +21618,16 @@
         "node": ">=8.0"
       }
     },
+    "node_modules/toidentifier": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz",
+      "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.6"
+      }
+    },
     "node_modules/toolpack-sdk": {
       "resolved": "packages/toolpack-sdk",
       "link": true
@@ -20713,6 +21912,66 @@
         "node": ">= 0.8.0"
       }
     },
+    "node_modules/type-is": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/type-is/-/type-is-2.1.0.tgz",
+      "integrity": "sha512-faYHw0anBbc/kWF3zFTEnxSFOAGUX9GFbOBthvDdLsIlEoWOFOtS0zgCiQYwIskL9iGXZL3kAXD8OoZ4GmMATA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "content-type": "^2.0.0",
+        "media-typer": "^1.1.0",
+        "mime-types": "^3.0.0"
+      },
+      "engines": {
+        "node": ">= 18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/type-is/node_modules/content-type": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/content-type/-/content-type-2.0.0.tgz",
+      "integrity": "sha512-j/O/d7GcZCyNl7/hwZAb606rzqkyvaDctLmckbxLzHvFBzTJHuGEdodATcP3yIRoDrLHkIATJuvzbFlp/ki2cQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/type-is/node_modules/mime-db": {
+      "version": "1.54.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz",
+      "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/type-is/node_modules/mime-types": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz",
+      "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "mime-db": "^1.54.0"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
     "node_modules/typed-query-selector": {
       "version": "2.12.1",
       "resolved": "https://registry.npmjs.org/typed-query-selector/-/typed-query-selector-2.12.1.tgz",
@@ -20779,6 +22038,16 @@
       "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==",
       "license": "MIT"
     },
+    "node_modules/unpipe": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
+      "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
     "node_modules/uri-js": {
       "version": "4.4.1",
       "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
@@ -20816,6 +22085,16 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/vary": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
+      "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
     "node_modules/vite": {
       "version": "8.0.3",
       "resolved": "https://registry.npmjs.org/vite/-/vite-8.0.3.tgz",
@@ -21250,9 +22529,19 @@
         "url": "https://github.com/sponsors/colinhacks"
       }
     },
+    "node_modules/zod-to-json-schema": {
+      "version": "3.25.2",
+      "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.2.tgz",
+      "integrity": "sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA==",
+      "dev": true,
+      "license": "ISC",
+      "peerDependencies": {
+        "zod": "^3.25.28 || ^4"
+      }
+    },
     "packages/toolpack-agents": {
       "name": "@toolpack-sdk/agents",
-      "version": "2.1.0",
+      "version": "2.1.1",
       "license": "Apache-2.0",
       "dependencies": {
         "cron-parser": "^5.5.0"
@@ -21269,11 +22558,11 @@
         "node": ">=20"
       },
       "peerDependencies": {
-        "@toolpack-sdk/knowledge": "^2.0.0",
+        "@toolpack-sdk/knowledge": "^2.1.1",
         "better-sqlite3": "^12.6.2",
         "discord.js": "^14.x",
         "nodemailer": "^6.x",
-        "toolpack-sdk": "^2.0.0",
+        "toolpack-sdk": "^2.1.1",
         "twilio": "^5.x"
       },
       "peerDependenciesMeta": {
@@ -21296,7 +22585,7 @@
     },
     "packages/toolpack-knowledge": {
       "name": "@toolpack-sdk/knowledge",
-      "version": "2.1.0",
+      "version": "2.1.1",
       "license": "Apache-2.0",
       "dependencies": {
         "better-sqlite3": "^12.6.2",
@@ -21317,7 +22606,7 @@
       }
     },
     "packages/toolpack-sdk": {
-      "version": "2.1.0",
+      "version": "2.1.1",
       "license": "Apache-2.0",
       "dependencies": {
         "@anthropic-ai/sdk": "^0.73.0",
@@ -21341,6 +22630,7 @@
       },
       "devDependencies": {
         "@eslint/js": "^9.39.2",
+        "@modelcontextprotocol/sdk": "^1.29.0",
         "@types/babel__core": "^7.20.5",
         "@types/babel__traverse": "^7.28.0",
         "@types/better-sqlite3": "^7.6.13",
@@ -21361,6 +22651,14 @@
       },
       "engines": {
         "node": ">=20"
+      },
+      "peerDependencies": {
+        "@modelcontextprotocol/sdk": "^1.29.0"
+      },
+      "peerDependenciesMeta": {
+        "@modelcontextprotocol/sdk": {
+          "optional": true
+        }
       }
     },
     "packages/toolpack-sdk/node_modules/@google/genai": {
diff --git a/packages/toolpack-agents/README.md b/packages/toolpack-agents/README.md
index d92d2fa..08afa8c 100644
--- a/packages/toolpack-agents/README.md
+++ b/packages/toolpack-agents/README.md
@@ -8,12 +8,14 @@ Build production-ready AI agents with channels, workflows, and event-driven arch
 ## Features
 
 - **4 Built-in Agents** — Research, Coding, Data, Browser
-- **7 Channel Types** — Slack, Telegram, Discord, Email, SMS, Webhook, Scheduled
+- **8 Channel Types** — Slack, Telegram, Discord, Email, SMS, Webhook, Scheduled, MCP
 - **Event-Driven** — Full lifecycle hooks and events
 - **Human-in-the-Loop** — `ask()` support for two-way channels
 - **Knowledge Integration** — Built-in RAG support with knowledge bases
+- **Agent Mind** — Persistent cognitive layer: goals, beliefs, reflections, cross-run recall
+- **Evals** — `EvalDataset`, `EvalRunner`, 4 scorer types, regression reports
+- **OTel Tracing** — OpenTelemetry interceptor for distributed traces
 - **Type-Safe** — Full TypeScript support
-- **Production-Ready** — 573 tests passing
 
 ## Installation
 
@@ -239,6 +241,27 @@ const smsOutbound = new SMSChannel({
 });
 ```
 
+### McpChannel (Two-way)
+
+Exposes a Toolpack agent as a tool in an MCP server. The agent appears in `tools/list` as `agent.<name>` and is callable by any MCP client.
+
+```typescript
+import { McpChannel } from '@toolpack-sdk/agents';
+import { Toolpack } from 'toolpack-sdk';
+
+const ch = new McpChannel({ name: 'mcp' });
+const agent = new PrReviewerAgent({ channels: [ch] });
+await agent.start();
+
+const sdk = await Toolpack.init({ provider: 'anthropic', tools: true });
+await sdk.startMcpServer({
+  transport: 'stdio',   // or 'http' with port
+  agents: [ch.asAgentDefinition(agent)],
+});
+```
+
+`ch.asAgentDefinition(agent)` produces the entry that `startMcpServer` registers in `tools/list`. Each MCP `tools/call` for `agent.<name>` is routed through the channel to `agent.invokeAgent()` and the output is returned as the tool result.
+
 ## Creating Custom Agents
 
 Extend `BaseAgent` to create custom agents:
@@ -763,6 +786,7 @@ class MyAgent extends BaseAgent {
 | `createCaptureInterceptor` | Persist inbound and outbound messages to conversation history (auto-registered) |
 | `createDepthGuardInterceptor` | Reject delegation chains that exceed a configured depth |
 | `createTracerInterceptor` | Structured logging of each chain hop for debugging |
+| `createOTelTracerInterceptor` | OpenTelemetry span per invocation — compatible with any OTel-compliant backend |
 
 ## Capabilities
 
@@ -818,14 +842,58 @@ const result = await summarizer.invokeAgent({
 const summary = JSON.parse(result.output) as SummarizerOutput;
 ```
 
+## Evals — LLM Quality Evaluation
+
+Unit tests verify wiring; evals verify agent **quality**. Use the eval primitives to build regression suites and track answer quality over time.
+
+```typescript
+import {
+  EvalDataset,
+  EvalRunner,
+  ContainsScorer,
+  LLMJudgeScorer,
+  compareEvalRuns,
+  formatEvalReport,
+} from '@toolpack-sdk/agents';
+
+const dataset = new EvalDataset([
+  { id: 'q1', input: 'What is 2+2?', expectedOutput: '4' },
+  { id: 'q2', input: 'Capital of France?', expectedOutput: 'Paris' },
+]);
+
+const runner = new EvalRunner({
+  agent: myAgent,
+  dataset,
+  scorers: [new ContainsScorer()],
+});
+
+const run = await runner.run();
+console.log(`Average score: ${(run.averageScore * 100).toFixed(1)}%`);
+```
+
+**Four built-in scorers:**
+
+| Scorer | When to use |
+|---|---|
+| `ExactMatchScorer` | Deterministic outputs — exact string match |
+| `ContainsScorer` | Output must contain the expected string |
+| `LLMJudgeScorer` | Open-ended answers — ask an LLM to grade on 0–1 |
+| `CustomScorer` | Any custom scoring logic |
+
+**Regression detection:**
+
+```typescript
+const report = compareEvalRuns(baselineRun, currentRun);
+console.log(formatEvalReport(report));
+expect(report.regressions).toHaveLength(0); // CI gate
+```
+
 ## Testing
 
 ```bash
 npm test
 ```
 
-**Test Coverage:** 573 tests passing across 29 test files.
-
 ## License
 
 Apache 2.0 © Toolpack SDK
diff --git a/packages/toolpack-agents/docs/README.md b/packages/toolpack-agents/docs/README.md
index 34cf672..c9f4c97 100644
--- a/packages/toolpack-agents/docs/README.md
+++ b/packages/toolpack-agents/docs/README.md
@@ -54,7 +54,7 @@
 |---|---|
 | [agents.md](agents.md) | Creating agents — `BaseAgent` API, built-in agents, lifecycle |
 | [registry.md](registry.md) | `AgentRegistry` — multi-agent coordination |
-| [channels.md](channels.md) | All 7 channel integrations (Slack, Discord, Telegram, Webhook, Scheduled, Email, SMS) |
+| [channels.md](channels.md) | All 8 channel integrations (Slack, Discord, Telegram, Webhook, Scheduled, Email, SMS, MCP) |
 | [scheduler.md](scheduler.md) | `SchedulerStore` and `createSchedulerTools` — persistent job scheduling reference |
 | [mind.md](mind.md) | `AgentMind` — persistent cognitive layer: goals, beliefs, reflections |
 | [conversation-history.md](conversation-history.md) | Conversation storage, `assemblePrompt`, addressed-only mode |
diff --git a/packages/toolpack-agents/docs/channels.md b/packages/toolpack-agents/docs/channels.md
index df87027..260e043 100644
--- a/packages/toolpack-agents/docs/channels.md
+++ b/packages/toolpack-agents/docs/channels.md
@@ -434,6 +434,59 @@ const sms = new SMSChannel({
 
 ---
 
+## McpChannel
+
+`McpChannel` exposes a Toolpack agent as a tool in an MCP server. When an MCP client calls `agent.<name>`, the channel delivers the input to the agent and returns its output as the tool result.
+
+`isTriggerChannel = false` — the MCP client drives the conversation, so `ask()` works normally.
+
+### Configuration
+
+```typescript
+import { McpChannel } from '@toolpack-sdk/agents';
+
+const ch = new McpChannel({
+  // Optional: descriptive name used for sendTo() routing
+  name: 'mcp',
+});
+```
+
+### Wiring to an agent and MCP server
+
+```typescript
+import { McpChannel } from '@toolpack-sdk/agents';
+import { Toolpack } from 'toolpack-sdk';
+
+const ch = new McpChannel();
+const agent = new PrReviewerAgent({ channels: [ch] });
+await agent.start();
+
+const sdk = await Toolpack.init({ provider: 'anthropic', tools: true });
+
+await sdk.startMcpServer({
+  transport: 'stdio',     // or 'http'
+  agents: [ch.asAgentDefinition(agent)],
+});
+```
+
+`ch.asAgentDefinition(agent)` produces the `McpAgentDefinition` object that `startMcpServer` uses to register the agent in `tools/list` as `agent.<agentName>`.
+
+### `McpChannelConfig`
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `name` | `string` | `'mcp'` | Channel name for `sendTo()` routing. |
+
+### Flow
+
+1. MCP client calls `tools/call` with `name: 'agent.<agentName>'`
+2. `startMcpServer` routes the call to `ch.asAgentDefinition(agent).invoke(args)`
+3. `McpChannel` wraps args into an `AgentInput` and calls `agent.invokeAgent()`
+4. Agent runs, returns `AgentResult`
+5. Output is returned to the MCP client as a text tool result
+
+---
+
 ## Custom channels
 
 Implement `ChannelInterface` (or extend `BaseChannel`) to connect any data source:
diff --git a/packages/toolpack-agents/docs/testing.md b/packages/toolpack-agents/docs/testing.md
index 0edb8bc..a9ce89f 100644
--- a/packages/toolpack-agents/docs/testing.md
+++ b/packages/toolpack-agents/docs/testing.md
@@ -563,3 +563,120 @@ it('emits agent:error on failure', async () => {
   events.stop();
 });
 ```
+
+---
+
+## Evals — LLM quality evaluation
+
+Unit tests verify agent wiring; evals verify agent **quality** — does the agent give correct, helpful answers on real inputs? The eval primitives let you build regression suites and track quality over time.
+
+### Import path
+
+```typescript
+import {
+  EvalDataset,
+  EvalRunner,
+  ExactMatchScorer,
+  ContainsScorer,
+  LLMJudgeScorer,
+  CustomScorer,
+  compareEvalRuns,
+  formatEvalReport,
+} from '@toolpack-sdk/agents';
+```
+
+### Quick start
+
+```typescript
+import { EvalDataset, EvalRunner, ContainsScorer } from '@toolpack-sdk/agents';
+
+const dataset = new EvalDataset([
+  {
+    id: 'greet-1',
+    input: 'Say hello',
+    expectedOutput: 'hello',
+  },
+  {
+    id: 'summarise-1',
+    input: 'Summarise: The sky is blue.',
+    expectedOutput: 'blue',
+  },
+]);
+
+const runner = new EvalRunner({
+  agent: myAgent,
+  dataset,
+  scorers: [new ContainsScorer()],
+});
+
+const run = await runner.run();
+console.log(`Score: ${run.averageScore * 100}%`);
+```
+
+### `EvalDataset`
+
+Holds a list of `EvalCase` objects.
+
+```typescript
+interface EvalCase {
+  id: string;              // unique identifier
+  input: string;           // message sent to the agent
+  expectedOutput: string;  // used by scorers
+  metadata?: Record<string, unknown>;
+}
+```
+
+```typescript
+const dataset = new EvalDataset(cases);
+dataset.add({ id: 'c3', input: 'test', expectedOutput: 'expected' });
+const subset = dataset.filter(c => c.id.startsWith('greet'));
+```
+
+### `EvalRunner`
+
+```typescript
+const runner = new EvalRunner({
+  agent,                    // BaseAgent instance
+  dataset,                  // EvalDataset
+  scorers,                  // EvalScorer[]
+  concurrency?: 1,          // parallel cases (default: 1)
+});
+
+const run: EvalRun = await runner.run();
+```
+
+### Scorers
+
+| Scorer | Description |
+|---|---|
+| `ExactMatchScorer` | Score 1.0 if output === expectedOutput (trimmed, case-insensitive by default) |
+| `ContainsScorer` | Score 1.0 if output contains expectedOutput |
+| `LLMJudgeScorer` | Ask an LLM to score the output on a 0–1 scale |
+| `CustomScorer` | Your own scoring function |
+
+```typescript
+// LLM judge
+const judge = new LLMJudgeScorer({
+  sdk: myToolpack,
+  prompt: 'Is this response factually correct and helpful? Score 0-1.',
+});
+
+// Custom scorer
+const lengthScorer = new CustomScorer({
+  name: 'brevity',
+  score: async ({ output, expectedOutput }) =>
+    output.length <= expectedOutput.length ? 1.0 : 0.0,
+});
+```
+
+### Regression reports
+
+```typescript
+import { compareEvalRuns, formatEvalReport } from '@toolpack-sdk/agents';
+
+const report = compareEvalRuns(baselineRun, currentRun);
+console.log(formatEvalReport(report));
+
+// CI gate
+expect(report.regressions).toHaveLength(0);
+```
diff --git a/packages/toolpack-agents/package.json b/packages/toolpack-agents/package.json
index e705232..d54b331 100644
--- a/packages/toolpack-agents/package.json
+++ b/packages/toolpack-agents/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@toolpack-sdk/agents",
   "version": "2.1.1",
-  "description": "Agent layer for the Toolpack SDK - build, compose, and deploy AI agents with a consistent, extensible pattern",
+  "description": "Production AI agents for Toolpack SDK — 8 channel integrations (Slack, Discord, Telegram, SMS, Email, Webhook, Scheduled, MCP), AgentMind persistent cognitive layer (goals, beliefs, reflections), interceptors, evals, and multi-agent coordination",
   "engines": {
     "node": ">=20"
   },
@@ -59,15 +59,20 @@
     "publish:npm": "npm run build && npm run test && npm publish"
   },
   "keywords": [
+    "ai-agent",
+    "production-ai",
+    "agent-mind",
+    "cognitive-layer",
     "ai",
     "llm",
-    "agent",
-    "ai-agent",
-    "slack",
-    "telegram",
+    "slack-bot",
+    "discord-bot",
+    "telegram-bot",
+    "sms",
+    "email-agent",
     "webhook",
-    "cron",
     "scheduler",
+    "multi-agent",
     "typescript",
     "sdk",
     "toolpack"
@@ -83,6 +88,7 @@
     "url": "https://github.com/toolpack-ai/toolpack-sdk/issues"
   },
   "peerDependencies": {
+    "@opentelemetry/api": "^1.x",
     "@toolpack-sdk/knowledge": "^2.1.1",
     "better-sqlite3": "^12.6.2",
     "discord.js": "^14.x",
@@ -91,6 +97,9 @@
     "twilio": "^5.x"
   },
   "peerDependenciesMeta": {
+    "@opentelemetry/api": {
+      "optional": true
+    },
     "@toolpack-sdk/knowledge": {
       "optional": true
     },
diff --git a/packages/toolpack-agents/src/channels/index.ts b/packages/toolpack-agents/src/channels/index.ts
index 0aec180..868c436 100644
--- a/packages/toolpack-agents/src/channels/index.ts
+++ b/packages/toolpack-agents/src/channels/index.ts
@@ -6,3 +6,5 @@ export { TelegramChannel, TelegramChannelConfig } from './telegram-channel.js';
 export { DiscordChannel, DiscordChannelConfig } from './discord-channel.js';
 export { EmailChannel, EmailChannelConfig } from './email-channel.js';
 export { SMSChannel, SMSChannelConfig } from './sms-channel.js';
+export { McpChannel } from './mcp-channel.js';
+export type { McpChannelConfig } from './mcp-channel.js';
diff --git a/packages/toolpack-agents/src/channels/mcp-channel.test.ts b/packages/toolpack-agents/src/channels/mcp-channel.test.ts
new file mode 100644
index 0000000..69816f3
--- /dev/null
+++ b/packages/toolpack-agents/src/channels/mcp-channel.test.ts
@@ -0,0 +1,124 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { McpChannel } from './mcp-channel.js';
+import type { AgentInput, AgentOutput } from '../agent/types.js';
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+function makeChannel(timeout?: number) {
+    return new McpChannel({ timeout });
+}
+
+/** Wire a handler that immediately calls send() with the given output. */
+function wireHandler(ch: McpChannel, output: string) {
+    ch.onMessage(async (_input: AgentInput) => {
+        await ch.send({ output });
+    });
+}
+
+// ─── Tests ────────────────────────────────────────────────────────────────────
+
+describe('McpChannel', () => {
+    describe('listen()', () => {
+        it('is a no-op and does not throw', () => {
+            const ch = makeChannel();
+            expect(() => ch.listen()).not.toThrow();
+        });
+    });
+
+    describe('normalize()', () => {
+        it('uses a string message field directly', () => {
+            const ch = makeChannel();
+            const input = ch.normalize({ message: 'review this PR' });
+            expect(input.message).toBe('review this PR');
+        });
+
+        it('JSON-stringifies non-string args as message', () => {
+            const ch = makeChannel();
+            const input = ch.normalize({ pr_url: 'https://github.com/...' });
+            expect(input.message).toBe(JSON.stringify({ pr_url: 'https://github.com/...' }));
+        });
+
+        it('sets data to the raw args', () => {
+            const ch = makeChannel();
+            const args = { pr_url: 'https://github.com/...', depth: 3 };
+            const input = ch.normalize(args);
+            expect(input.data).toEqual(args);
+        });
+
+        it('generates a unique conversationId per call', () => {
+            const ch = makeChannel();
+            const a = ch.normalize({});
+            const b = ch.normalize({});
+            expect(a.conversationId).not.toBe(b.conversationId);
+        });
+    });
+
+    describe('trigger()', () => {
+        it('resolves with agent output when send() is called', async () => {
+            const ch = makeChannel();
+            wireHandler(ch, 'LGTM — no issues found');
+            const result = await ch.trigger({ pr_url: 'https://github.com/...' });
+            expect(result).toBe('LGTM — no issues found');
+        });
+
+        it('passes normalized input to the handler', async () => {
+            const ch = makeChannel();
+            let received: AgentInput | undefined;
+            ch.onMessage(async (input) => {
+                received = input;
+                await ch.send({ output: 'ok' });
+            });
+            await ch.trigger({ message: 'hello' });
+            expect(received?.message).toBe('hello');
+            expect(received?.data).toEqual({ message: 'hello' });
+        });
+
+        it('rejects when no handler is registered (timeout fires)', async () => {
+            const ch = makeChannel(50); // short timeout so test completes fast
+            // No handler registered — handleMessage is a no-op, send() never called
+            await expect(ch.trigger({})).rejects.toThrow(/50ms/);
+        });
+
+        it('rejects after timeout when agent never calls send()', async () => {
+            const ch = makeChannel(50); // 50ms timeout for fast test
+            ch.onMessage(async () => { /* never calls send */ });
+            await expect(ch.trigger({})).rejects.toThrow(/50ms/);
+        });
+
+        it('rejects when handleMessage throws', async () => {
+            const ch = makeChannel();
+            ch.onMessage(async () => { throw new Error('agent crashed'); });
+            await expect(ch.trigger({})).rejects.toThrow('agent crashed');
+        });
+    });
+
+    describe('asAgentDefinition()', () => {
+        it('returns correct name and description', () => {
+            const ch = makeChannel();
+            const def = ch.asAgentDefinition({ name: 'pr_reviewer', description: 'Reviews PRs' });
+            expect(def.name).toBe('pr_reviewer');
+            expect(def.description).toBe('Reviews PRs');
+        });
+
+        it('includes inputSchema when provided', () => {
+            const ch = makeChannel();
+            const schema = { type: 'object', properties: { pr_url: { type: 'string' } } };
+            const def = ch.asAgentDefinition({ name: 'x', description: 'y' }, schema);
+            expect(def.inputSchema).toEqual(schema);
+        });
+
+        it('omits inputSchema when not provided', () => {
+            const ch = makeChannel();
+            const def = ch.asAgentDefinition({ name: 'x', description: 'y' });
+            expect(def.inputSchema).toBeUndefined();
+        });
+
+        it('invoke() delegates to trigger()', async () => {
+            const ch = makeChannel();
+            wireHandler(ch, 'done');
+            const def = ch.asAgentDefinition({ name: 'x', description: 'y' });
+            const result = await def.invoke({ task: 'test' });
+            expect(result).toBe('done');
+        });
+    });
+});
diff --git a/packages/toolpack-agents/src/channels/mcp-channel.ts b/packages/toolpack-agents/src/channels/mcp-channel.ts
new file mode 100644
index 0000000..9763119
--- /dev/null
+++ b/packages/toolpack-agents/src/channels/mcp-channel.ts
@@ -0,0 +1,124 @@
+import { BaseChannel } from './base-channel.js';
+import type { AgentInput, AgentOutput } from '../agent/types.js';
+
+export interface McpChannelConfig {
+    /**
+     * Maximum milliseconds to wait for the agent to respond.
+     * Default: 120_000 (2 minutes).
+     */
+    timeout?: number;
+}
+
+/**
+ * Channel that connects a Toolpack agent to an MCP server as a tool.
+ *
+ * Unlike other channels (Slack, Webhook) this channel does not own a server or
+ * socket. Instead it exposes a `trigger()` method that the MCP tools/call handler
+ * calls directly. The agent runs and sends its output back through `send()`, which
+ * resolves the Promise that `trigger()` is waiting on.
+ *
+ * Usage:
+ * ```typescript
+ * const ch = new McpChannel();
+ * const agent = new PrReviewerAgent({ channels: [ch] });
+ * await agent.start();
+ *
+ * await sdk.startMcpServer({
+ *   transport: 'stdio',
+ *   agents: [ch.asAgentDefinition(agent)],
+ * });
+ * ```
+ *
+ * ⚠ One McpChannel handles one concurrent call at a time. If two tools/call
+ * requests arrive for the same channel simultaneously, the second call's
+ * pendingResolve overwrites the first and the first call's result is lost.
+ * Create one McpChannel per agent instance and do not share channels.
+ */
+export class McpChannel extends BaseChannel {
+    readonly isTriggerChannel = false;
+
+    private readonly _timeout: number;
+    private _pendingResolve?: (output: AgentOutput) => void;
+
+    constructor(config: McpChannelConfig = {}) {
+        super();
+        this._timeout = config.timeout ?? 120_000;
+    }
+
+    /**
+     * No-op — McpChannel is driven by trigger(), not a background listener.
+     */
+    listen(): void { /* intentional no-op */ }
+
+    /**
+     * Resolves the pending trigger() Promise with the agent's output.
+     */
+    async send(output: AgentOutput): Promise<void> {
+        this._pendingResolve?.(output);
+        this._pendingResolve = undefined;
+    }
+
+    /**
+     * Convert raw MCP arguments into AgentInput.
+     * If args contains a string 'message' field it is used as the message;
+     * otherwise the entire args object is JSON-stringified as the message.
+     */
+    normalize(incoming: unknown): AgentInput {
+        const args = incoming as Record<string, unknown>;
+        const message = typeof args['message'] === 'string'
+            ? args['message']
+            : JSON.stringify(args);
+        return {
+            message,
+            data: args,
+            conversationId: `mcp-${Date.now()}-${Math.random().toString(36).slice(2, 7)}`,
+        };
+    }
+
+    /**
+     * Called by the MCP tools/call handler.
+     * Triggers the agent and waits for it to respond via send().
+     * Rejects if the agent does not respond within the configured timeout.
+     */
+    async trigger(args: Record<string, unknown>): Promise<string> {
+        const input = this.normalize(args);
+
+        return new Promise<string>((resolve, reject) => {
+            const timer = setTimeout(() => {
+                this._pendingResolve = undefined;
+                reject(new Error(`McpChannel: agent did not respond within ${this._timeout}ms`));
+            }, this._timeout);
+
+            this._pendingResolve = (output: AgentOutput) => {
+                clearTimeout(timer);
+                resolve(output.output);
+            };
+
+            // Fire-and-forget — the agent will call send() when done,
+            // which resolves the Promise above.
+            this.handleMessage(input).catch(err => {
+                clearTimeout(timer);
+                this._pendingResolve = undefined;
+                reject(err instanceof Error ? err : new Error(String(err)));
+            });
+        });
+    }
+
+    /**
+     * Produce an McpAgentDefinition suitable for startMcpServer({ agents: [...] }).
+     *
+     * @param agent  Object with name and description (typically a BaseAgent instance).
+     * @param inputSchema  Optional JSON Schema for the tool's input parameters.
+     */
+    asAgentDefinition(
+        agent: { name: string; description: string },
+        inputSchema?: Record<string, unknown>,
+    ) {
+        return {
+            name: agent.name,
+            description: agent.description,
+            ...(inputSchema !== undefined && { inputSchema }),
+            invoke: (args: Record<string, unknown>) => this.trigger(args),
+        };
+    }
+}
diff --git a/packages/toolpack-agents/src/index.ts b/packages/toolpack-agents/src/index.ts
index f2d9410..dd9ae8a 100644
--- a/packages/toolpack-agents/src/index.ts
+++ b/packages/toolpack-agents/src/index.ts
@@ -36,6 +36,8 @@ export { TelegramChannel, TelegramChannelConfig } from './channels/telegram-chan
 export { DiscordChannel, DiscordChannelConfig } from './channels/discord-channel.js';
 export { EmailChannel, EmailChannelConfig } from './channels/email-channel.js';
 export { SMSChannel, SMSChannelConfig } from './channels/sms-channel.js';
+export { McpChannel } from './channels/mcp-channel.js';
+export type { McpChannelConfig } from './channels/mcp-channel.js';
 
 // Transport layer for agent-to-agent communication
 export {
@@ -130,6 +132,14 @@ export {
   DepthExceededError,
   createTracerInterceptor,
   type TracerConfig,
+  createOTelTracerInterceptor,
+  OTelSpanStatusCode,
+  type OTelTracerConfig,
+  type OTelTracerProvider,
+  type OTelTracer,
+  type OTelSpan,
+  type OTelSpanOptions,
+  type OTelSpanStatus,
 } from './interceptors/index.js';
 
 // Scheduler — persistent job store and LLM-callable tools
@@ -141,3 +151,27 @@ export {
   type CreateJobResult,
   type JobStatus,
 } from './scheduler/index.js';
+
+// Eval primitives — dataset management, runner, scoring, and regression reports
+export {
+  EvalDataset,
+  EvalRunner,
+  ExactMatchScorer,
+  ContainsScorer,
+  LLMJudgeScorer,
+  CustomScorer,
+  compareEvalRuns,
+  formatEvalReport,
+  type EvalRunnerOptions,
+  type EvalScorer,
+  type LLMJudgeScorerOptions,
+  type EvalCase,
+  type EvalCaseResult,
+  type EvalRun,
+  type EvalVerdict,
+  type EvalScoredResult,
+  type EvalScoredRun,
+  type EvalRegression,
+  type EvalImprovement,
+  type EvalReport,
+} from './testing/index.js';
diff --git a/packages/toolpack-agents/src/interceptors/builtins/builtins.test.ts b/packages/toolpack-agents/src/interceptors/builtins/builtins.test.ts
index da0af11..8faf753 100644
--- a/packages/toolpack-agents/src/interceptors/builtins/builtins.test.ts
+++ b/packages/toolpack-agents/src/interceptors/builtins/builtins.test.ts
@@ -11,6 +11,7 @@ import { createParticipantResolverInterceptor } from './participant-resolver.js'
 import { createAddressCheckInterceptor, isAgentNameOnlyInCodeBlocks, type AddressCheckResult } from './address-check.js';
 import { createDepthGuardInterceptor, DepthExceededError } from './depth-guard.js';
 import { createTracerInterceptor } from './tracer.js';
+import { createOTelTracerInterceptor, OTelSpanStatusCode, type OTelSpan, type OTelTracerProvider } from './otel-tracer.js';
 import { createIntentClassifierInterceptor } from './intent-classifier.js';
 
 // ---------- Test helpers ----------
@@ -1094,3 +1095,174 @@ describe('skip sentinel integration', () => {
     expect(isSkipSentinel({ output: 'x' })).toBe(false);
   });
 });
+
+// ---------- otel-tracer ----------
+
+function createMockSpan(): OTelSpan & {
+  _attributes: Record<string, string | number | boolean>;
+  _status: { code: OTelSpanStatusCode; message?: string } | null;
+  _exceptions: unknown[];
+  _ended: boolean;
+} {
+  const span = {
+    _attributes: {} as Record<string, string | number | boolean>,
+    _status: null as { code: OTelSpanStatusCode; message?: string } | null,
+    _exceptions: [] as unknown[],
+    _ended: false,
+    setAttribute(key: string, value: string | number | boolean) { this._attributes[key] = value; },
+    setStatus(status: { code: OTelSpanStatusCode; message?: string }) { this._status = status; },
+    recordException(err: Error | string) { this._exceptions.push(err); },
+    end() { this._ended = true; },
+  };
+  return span;
+}
+
+function createMockProvider(span = createMockSpan()): OTelTracerProvider & { span: ReturnType<typeof createMockSpan> } {
+  return {
+    span,
+    getTracer: () => ({
+      startSpan: () => span,
+    }),
+  };
+}
+
+describe('createOTelTracerInterceptor', () => {
+  it('is a transparent pass-through when no tracerProvider is supplied', async () => {
+    const interceptor = createOTelTracerInterceptor();
+    const { result, agent } = await runInterceptor(interceptor, {
+      message: 'hi',
+      conversationId: 'c1',
+    });
+    expect(result).not.toBeNull();
+    expect(agent.invokeAgent).toHaveBeenCalledTimes(1);
+  });
+
+  it('starts and ends a span on successful invocation', async () => {
+    const { span, ...provider } = createMockProvider();
+    const interceptor = createOTelTracerInterceptor({ tracerProvider: provider });
+    const { result } = await runInterceptor(interceptor, {
+      message: 'hi',
+      conversationId: 'c1',
+    });
+
+    expect(result).not.toBeNull();
+    expect(span._ended).toBe(true);
+    expect(span._status?.code).toBe(OTelSpanStatusCode.OK);
+    expect(span._attributes['agent.name']).toBe('test-agent');
+    expect(span._attributes['channel.name']).toBe('test-channel');
+    expect(typeof span._attributes['duration.ms']).toBe('number');
+  });
+
+  it('records conversation.id and agent.intent as span attributes when present', async () => {
+    const { span, ...provider } = createMockProvider();
+    const interceptor = createOTelTracerInterceptor({ tracerProvider: provider });
+    await runInterceptor(interceptor, {
+      message: 'hi',
+      conversationId: 'conv-42',
+      intent: 'support',
+    });
+
+    expect(span._attributes['conversation.id']).toBe('conv-42');
+    expect(span._attributes['agent.intent']).toBe('support');
+  });
+
+  it('records workflow step attributes when result.steps are present', async () => {
+    const { span, ...provider } = createMockProvider();
+    const interceptor = createOTelTracerInterceptor({ tracerProvider: provider });
+    const agentResult: AgentResult = {
+      output: 'done',
+      steps: [
+        {
+          number: 1,
+          description: 'fetch data',
+          status: 'completed',
+          result: { success: true, duration: 120, toolsUsed: ['http.get'] },
+        },
+        {
+          number: 2,
+          description: 'summarize',
+          status: 'failed',
+          result: { success: false, error: 'timeout' },
+        },
+      ],
+    };
+
+    const agent = createMockAgent('test-agent', agentResult);
+    const chain = composeChain([interceptor], agent, createMockChannel(), createMockRegistry());
+    await executeChain(chain, { message: 'go', conversationId: 'c1' });
+
+    expect(span._attributes['steps.total']).toBe(2);
+    expect(span._attributes['steps.failed']).toBe(1);
+    expect(span._attributes['step.0.description']).toBe('fetch data');
+    expect(span._attributes['step.0.status']).toBe('completed');
+    expect(span._attributes['step.0.duration.ms']).toBe(120);
+    expect(span._attributes['step.0.tools']).toBe('http.get');
+    expect(span._attributes['step.1.status']).toBe('failed');
+  });
+
+  it('does not record step attributes when recordSteps is false', async () => {
+    const { span, ...provider } = createMockProvider();
+    const interceptor = createOTelTracerInterceptor({ tracerProvider: provider, recordSteps: false });
+    const agentResult: AgentResult = {
+      output: 'done',
+      steps: [{ number: 1, description: 'step', status: 'completed' }],
+    };
+    const agent = createMockAgent('test-agent', agentResult);
+    const chain = composeChain([interceptor], agent, createMockChannel(), createMockRegistry());
+    await executeChain(chain, { message: 'go', conversationId: 'c1' });
+
+    expect(span._attributes['steps.total']).toBeUndefined();
+  });
+
+  it('sets ERROR status and records exception on downstream throw', async () => {
+    const { span, ...provider } = createMockProvider();
+    const interceptor = createOTelTracerInterceptor({ tracerProvider: provider });
+    const thrower: Interceptor = async () => { throw new Error('oops'); };
+
+    const agent = createMockAgent('test-agent');
+    const chain = composeChain([interceptor, thrower], agent, createMockChannel(), createMockRegistry());
+
+    await expect(executeChain(chain, { message: 'hi', conversationId: 'c1' })).rejects.toThrow('oops');
+
+    expect(span._ended).toBe(true);
+    expect(span._status?.code).toBe(OTelSpanStatusCode.ERROR);
+    expect(span._status?.message).toBe('oops');
+    expect(span._exceptions).toHaveLength(1);
+  });
+
+  it('marks span OK and sets result.skipped=true for skip sentinel', async () => {
+    const { span, ...provider } = createMockProvider();
+    const interceptor = createOTelTracerInterceptor({ tracerProvider: provider });
+    const skipper: Interceptor = async (_input, ctx) => ctx.skip();
+
+    const agent = createMockAgent('test-agent');
+    const chain = composeChain([interceptor, skipper], agent, createMockChannel(), createMockRegistry());
+    const result = await executeChain(chain, { message: 'hi', conversationId: 'c1' });
+
+    expect(result).toBeNull();
+    expect(span._ended).toBe(true);
+    expect(span._status?.code).toBe(OTelSpanStatusCode.OK);
+    expect(span._attributes['result.skipped']).toBe(true);
+  });
+
+  it('skips tracing when shouldTrace returns false', async () => {
+    const { span, ...provider } = createMockProvider();
+    const shouldTrace = vi.fn(() => false);
+    const interceptor = createOTelTracerInterceptor({ tracerProvider: provider, shouldTrace });
+    const { result, agent } = await runInterceptor(interceptor, { message: 'hi', conversationId: 'c1' });
+
+    expect(shouldTrace).toHaveBeenCalled();
+    expect(result).not.toBeNull();
+    expect(agent.invokeAgent).toHaveBeenCalledTimes(1);
+    expect(span._ended).toBe(false);
+  });
+
+  it('uses custom tracerName when building the tracer', async () => {
+    const getTracerSpy = vi.fn().mockReturnValue({ startSpan: () => createMockSpan() });
+    const provider: OTelTracerProvider = { getTracer: getTracerSpy };
+    const interceptor = createOTelTracerInterceptor({ tracerProvider: provider, tracerName: 'my-service', tracerVersion: '3.0.0' });
+    await runInterceptor(interceptor, { message: 'hi', conversationId: 'c1' });
+
+    expect(getTracerSpy).toHaveBeenCalledWith('my-service', '3.0.0');
+  });
+});
diff --git a/packages/toolpack-agents/src/interceptors/builtins/index.ts b/packages/toolpack-agents/src/interceptors/builtins/index.ts
index de38d4a..42a0738 100644
--- a/packages/toolpack-agents/src/interceptors/builtins/index.ts
+++ b/packages/toolpack-agents/src/interceptors/builtins/index.ts
@@ -11,3 +11,13 @@ export { createAddressCheckInterceptor, type AddressCheckConfig, type AddressChe
 export { createIntentClassifierInterceptor, type IntentClassifierInterceptorConfig } from './intent-classifier.js';
 export { createDepthGuardInterceptor, type DepthGuardConfig, DepthExceededError } from './depth-guard.js';
 export { createTracerInterceptor, type TracerConfig } from './tracer.js';
+export {
+  createOTelTracerInterceptor,
+  OTelSpanStatusCode,
+  type OTelTracerConfig,
+  type OTelTracerProvider,
+  type OTelTracer,
+  type OTelSpan,
+  type OTelSpanOptions,
+  type OTelSpanStatus,
+} from './otel-tracer.js';
diff --git a/packages/toolpack-agents/src/interceptors/builtins/otel-tracer.ts b/packages/toolpack-agents/src/interceptors/builtins/otel-tracer.ts
new file mode 100644
index 0000000..10fe35b
--- /dev/null
+++ b/packages/toolpack-agents/src/interceptors/builtins/otel-tracer.ts
@@ -0,0 +1,186 @@
+import type { AgentInput } from '../../agent/types.js';
+import type { Interceptor, InterceptorResult } from '../types.js';
+import { isSkipSentinel } from '../types.js';
+
+/**
+ * OTel TracerProvider interface — mirrors @opentelemetry/api's TracerProvider
+ * without requiring the package as a hard dependency.
+ */
+export interface OTelTracerProvider {
+  getTracer(name: string, version?: string): OTelTracer;
+}
+
+export interface OTelTracer {
+  startSpan(name: string, options?: OTelSpanOptions): OTelSpan;
+}
+
+export interface OTelSpanOptions {
+  attributes?: Record<string, string | number | boolean>;
+}
+
+export interface OTelSpan {
+  setAttribute(key: string, value: string | number | boolean): void;
+  setStatus(status: OTelSpanStatus): void;
+  recordException(error: Error | string): void;
+  end(): void;
+}
+
+export interface OTelSpanStatus {
+  code: OTelSpanStatusCode;
+  message?: string;
+}
+
+export enum OTelSpanStatusCode {
+  UNSET = 0,
+  OK = 1,
+  ERROR = 2,
+}
+
+/**
+ * Configuration for the OTel tracer interceptor.
+ */
+export interface OTelTracerConfig {
+  /**
+   * An OTel-compatible TracerProvider (e.g. from @opentelemetry/sdk-node or any OTel-compatible backend).
+   * When omitted, the interceptor is a transparent no-op and adds zero overhead.
+   */
+  tracerProvider?: OTelTracerProvider;
+
+  /**
+   * Name used to identify the tracer in OTel (default: 'toolpack-agents').
+   */
+  tracerName?: string;
+
+  /**
+   * Version string attached to the tracer.
+   * When omitted, no version is passed to the OTel TracerProvider.
+   */
+  tracerVersion?: string;
+
+  /**
+   * Whether to record workflow step durations as span attributes (default: true).
+   */
+  recordSteps?: boolean;
+
+  /**
+   * Optional: filter which inputs to trace.
+   * Return false to skip tracing for a specific input.
+   */
+  shouldTrace?: (input: AgentInput) => boolean;
+}
+
+/**
+ * Creates an OTel-compatible tracer interceptor.
+ *
+ * Emits spans for:
+ * - Agent invocation (wraps the entire chain below it)
+ * - Each workflow step in the result (if recordSteps is true)
+ * - Errors thrown downstream
+ *
+ * Works with any OTel-compatible backend: Jaeger, Honeycomb, Datadog, OTLP, etc.
+ * When no tracerProvider is supplied it is a zero-cost transparent pass-through.
+ *
+ * @example
+ * ```ts
+ * import { NodeTracerProvider } from '@opentelemetry/sdk-node';
+ *
+ * const provider = new NodeTracerProvider();
+ * provider.register();
+ *
+ * const registry = new AgentRegistry([
+ *   {
+ *     agent: MyAgent,
+ *     channels: [slackChannel],
+ *     interceptors: [
+ *       createOTelTracerInterceptor({ tracerProvider: provider }),
+ *     ],
+ *   },
+ * ]);
+ * ```
+ */
+export function createOTelTracerInterceptor(config: OTelTracerConfig = {}): Interceptor {
+  const {
+    tracerProvider,
+    tracerName = 'toolpack-agents',
+    tracerVersion,
+    recordSteps = true,
+    shouldTrace,
+  } = config;
+
+  // Acquire the tracer once at construction time, not per-invocation.
+  const tracer = tracerProvider?.getTracer(tracerName, tracerVersion);
+
+  return async (input, ctx, next): Promise<InterceptorResult> => {
+    // No-op path — cheapest guard first
+    if (!tracer) {
+      return await next();
+    }
+
+    if (shouldTrace && !shouldTrace(input)) {
+      return await next();
+    }
+
+    const span = tracer.startSpan('agent.invocation');
+
+    span.setAttribute('agent.name', ctx.agent.name);
+    span.setAttribute('channel.name', ctx.channel.name ?? 'unknown');
+    span.setAttribute('invocation.depth', ctx.invocationDepth);
+    if (input.conversationId) span.setAttribute('conversation.id', input.conversationId);
+    if (input.intent) span.setAttribute('agent.intent', input.intent);
+
+    const startTime = performance.now();
+
+    try {
+      const result = await next();
+      const durationMs = performance.now() - startTime;
+
+      span.setAttribute('duration.ms', Math.round(durationMs));
+
+      if (isSkipSentinel(result)) {
+        span.setAttribute('result.skipped', true);
+        span.setStatus({ code: OTelSpanStatusCode.OK });
+      } else {
+        span.setAttribute('result.output.length', result.output.length);
+
+        if (recordSteps && result.steps && result.steps.length > 0) {
+          span.setAttribute('steps.total', result.steps.length);
+
+          const failedSteps = result.steps.filter(s => s.status === 'failed');
+          if (failedSteps.length > 0) {
+            span.setAttribute('steps.failed', failedSteps.length);
+          }
+
+          result.steps.forEach((step, index) => {
+            const prefix = `step.${index}`;
+            span.setAttribute(`${prefix}.description`, step.description);
+            span.setAttribute(`${prefix}.status`, step.status);
+            if (step.result?.duration !== undefined) {
+              span.setAttribute(`${prefix}.duration.ms`, step.result.duration);
+            }
+            if (step.result?.toolsUsed && step.result.toolsUsed.length > 0) {
+              span.setAttribute(`${prefix}.tools`, step.result.toolsUsed.join(','));
+            }
+          });
+        }
+
+        span.setStatus({ code: OTelSpanStatusCode.OK });
+      }
+
+      return result;
+    } catch (error) {
+      const durationMs = performance.now() - startTime;
+
+      span.setAttribute('duration.ms', Math.round(durationMs));
+      const exception = error instanceof Error ? error : String(error);
+      span.recordException(exception);
+      span.setStatus({
+        code: OTelSpanStatusCode.ERROR,
+        message: error instanceof Error ? error.message : String(error),
+      });
+
+      throw error;
+    } finally {
+      span.end();
+    }
+  };
+}
diff --git a/packages/toolpack-agents/src/interceptors/index.ts b/packages/toolpack-agents/src/interceptors/index.ts
index babe15b..6ac6422 100644
--- a/packages/toolpack-agents/src/interceptors/index.ts
+++ b/packages/toolpack-agents/src/interceptors/index.ts
@@ -44,4 +44,12 @@ export {
   DepthExceededError,
   createTracerInterceptor,
   type TracerConfig,
+  createOTelTracerInterceptor,
+  OTelSpanStatusCode,
+  type OTelTracerConfig,
+  type OTelTracerProvider,
+  type OTelTracer,
+  type OTelSpan,
+  type OTelSpanOptions,
+  type OTelSpanStatus,
 } from './builtins/index.js';
diff --git a/packages/toolpack-agents/src/testing/eval-dataset.ts b/packages/toolpack-agents/src/testing/eval-dataset.ts
new file mode 100644
index 0000000..f4b0a58
--- /dev/null
+++ b/packages/toolpack-agents/src/testing/eval-dataset.ts
@@ -0,0 +1,121 @@
+import { readFileSync, writeFileSync } from 'node:fs';
+import type { EvalCase } from './eval-types.js';
+
+/**
+ * A collection of eval cases that can be loaded from / saved to JSON.
+ *
+ * @example
+ * ```ts
+ * const dataset = new EvalDataset([
+ *   {
+ *     id: 'q1',
+ *     input: { message: 'What is 2 + 2?' },
+ *     expectedOutput: '4',
+ *   },
+ * ]);
+ *
+ * dataset.save('./evals/math.json');
+ *
+ * const loaded = EvalDataset.load('./evals/math.json');
+ * ```
+ */
+export class EvalDataset {
+  private _cases: EvalCase[];
+
+  constructor(cases: EvalCase[] = []) {
+    this._cases = [...cases];
+  }
+
+  // ── Read ──────────────────────────────────────────────────────────────────
+
+  /** All cases in the dataset. */
+  get cases(): EvalCase[] {
+    return [...this._cases];
+  }
+
+  /** Number of cases. */
+  get size(): number {
+    return this._cases.length;
+  }
+
+  /**
+   * Get a case by ID.
+   * Returns `undefined` if not found.
+   */
+  get(id: string): EvalCase | undefined {
+    return this._cases.find(c => c.id === id);
+  }
+
+  // ── Write ─────────────────────────────────────────────────────────────────
+
+  /**
+   * Add one or more cases.
+   * Throws if a case with the same ID already exists.
+   */
+  add(...cases: EvalCase[]): this {
+    // Validate all before mutating — prevents partial add on duplicate within the batch
+    const seen = new Set(this._cases.map(c => c.id));
+    for (const c of cases) {
+      if (seen.has(c.id)) {
+        throw new Error(`EvalDataset: case with id "${c.id}" already exists.`);
+      }
+      seen.add(c.id);
+    }
+    this._cases.push(...cases);
+    return this;
+  }
+
+  /**
+   * Remove a case by ID.
+   * Returns `true` if removed, `false` if not found.
+   */
+  remove(id: string): boolean {
+    const before = this._cases.length;
+    this._cases = this._cases.filter(c => c.id !== id);
+    return this._cases.length < before;
+  }
+
+  /**
+   * Filter cases by a predicate. Returns a new EvalDataset.
+   */
+  filter(predicate: (c: EvalCase) => boolean): EvalDataset {
+    return new EvalDataset(this._cases.filter(predicate));
+  }
+
+  // ── Persistence ───────────────────────────────────────────────────────────
+
+  /**
+   * Serialize to a plain array (suitable for `JSON.stringify`).
+   */
+  toJSON(): EvalCase[] {
+    return [...this._cases];
+  }
+
+  /**
+   * Save cases to a JSON file.
+   *
+   * @param filePath Absolute or relative path to the output file.
+   */
+  save(filePath: string): void {
+    writeFileSync(filePath, JSON.stringify(this._cases, null, 2), 'utf-8');
+  }
+
+  /**
+   * Load cases from a JSON file.
+   * The file must contain a JSON array of `EvalCase` objects.
+   *
+   * @param filePath Absolute or relative path to the JSON file.
+   */
+  static load(filePath: string): EvalDataset {
+    const raw = readFileSync(filePath, 'utf-8');
+    const cases = JSON.parse(raw) as EvalCase[];
+    return new EvalDataset(cases);
+  }
+
+  /**
+   * Create an `EvalDataset` from a plain array (e.g. from a database query).
+   */
+  static from(cases: EvalCase[]): EvalDataset {
+    return new EvalDataset(cases);
+  }
+}
diff --git a/packages/toolpack-agents/src/testing/eval-report.ts b/packages/toolpack-agents/src/testing/eval-report.ts
new file mode 100644
index 0000000..2748f4d
--- /dev/null
+++ b/packages/toolpack-agents/src/testing/eval-report.ts
@@ -0,0 +1,116 @@
+import type { EvalScoredRun, EvalReport, EvalRegression, EvalImprovement } from './eval-types.js';
+
+/**
+ * Compares two scored runs and produces a regression/improvement report.
+ *
+ * @example
+ * ```ts
+ * const report = compareEvalRuns(baselineScoredRun, candidateScoredRun);
+ *
+ * if (report.regressions.length > 0) {
+ *   console.error('Regressions detected:', report.regressions);
+ *   process.exit(1);
+ * }
+ *
+ * console.log(`Pass rate: ${report.baselinePassRate} → ${report.candidatePassRate} (Δ${report.delta > 0 ? '+' : ''}${report.delta.toFixed(2)})`);
+ * ```
+ */
+export function compareEvalRuns(baseline: EvalScoredRun, candidate: EvalScoredRun): EvalReport {
+  const baselineById = new Map(baseline.scoredResults.map(r => [r.caseResult.evalCase.id, r]));
+  const candidateById = new Map(candidate.scoredResults.map(r => [r.caseResult.evalCase.id, r]));
+
+  const regressions: EvalRegression[] = [];
+  const improvements: EvalImprovement[] = [];
+  const stablePasses: string[] = [];
+  const stableFails: string[] = [];
+
+  // Union of all case IDs across both runs
+  const allIds = new Set([...baselineById.keys(), ...candidateById.keys()]);
+
+  for (const id of allIds) {
+    const base = baselineById.get(id);
+    const cand = candidateById.get(id);
+
+    // Case only in one run — skip regression/improvement analysis
+    if (!base || !cand) continue;
+
+    if (base.verdict === 'pass' && cand.verdict === 'fail') {
+      regressions.push({
+        caseId: id,
+        baselineOutput: base.caseResult.actualOutput,
+        candidateOutput: cand.caseResult.actualOutput,
+      });
+    } else if (base.verdict === 'fail' && cand.verdict === 'pass') {
+      improvements.push({
+        caseId: id,
+        baselineOutput: base.caseResult.actualOutput,
+        candidateOutput: cand.caseResult.actualOutput,
+      });
+    } else if (base.verdict === 'pass' && cand.verdict === 'pass') {
+      stablePasses.push(id);
+    } else {
+      stableFails.push(id);
+    }
+  }
+
+  const delta = candidate.passRate - baseline.passRate;
+
+  return {
+    baselineRunId: baseline.run.runId,
+    candidateRunId: candidate.run.runId,
+    baselinePassRate: baseline.passRate,
+    candidatePassRate: candidate.passRate,
+    delta,
+    regressions,
+    improvements,
+    stablePasses,
+    stableFails,
+  };
+}
+
+/**
+ * Format an `EvalReport` as a human-readable summary string.
+ *
+ * @example
+ * ```ts
+ * console.log(formatEvalReport(report));
+ * ```
+ */
+export function formatEvalReport(report: EvalReport): string {
+  const lines: string[] = [];
+  const deltaSign = report.delta >= 0 ? '+' : '';
+  const pct = (n: number) => `${(n * 100).toFixed(1)}%`;
+
+  lines.push(`Eval Report: ${report.baselineRunId} → ${report.candidateRunId}`);
+  lines.push(`Pass rate:   ${pct(report.baselinePassRate)} → ${pct(report.candidatePassRate)} (Δ${deltaSign}${pct(report.delta)})`);
+  lines.push('');
+
+  if (report.regressions.length > 0) {
+    lines.push(`Regressions (${report.regressions.length}):`);
+    for (const r of report.regressions) {
+      lines.push(`  ✗ ${r.caseId}`);
+      lines.push(`    baseline:  ${truncate(r.baselineOutput)}`);
+      lines.push(`    candidate: ${truncate(r.candidateOutput)}`);
+    }
+    lines.push('');
+  }
+
+  if (report.improvements.length > 0) {
+    lines.push(`Improvements (${report.improvements.length}):`);
+    for (const imp of report.improvements) {
+      lines.push(`  ✓ ${imp.caseId}`);
+      lines.push(`    baseline:  ${truncate(imp.baselineOutput)}`);
+      lines.push(`    candidate: ${truncate(imp.candidateOutput)}`);
+    }
+    lines.push('');
+  }
+
+  lines.push(`Stable passes: ${report.stablePasses.length}  |  Stable fails: ${report.stableFails.length}`);
+
+  return lines.join('\n');
+}
+
+function truncate(s: string, max = 80): string {
+  const single = s.replace(/\n/g, ' ');
+  return single.length > max ? `${single.slice(0, max)}…` : single;
+}
diff --git a/packages/toolpack-agents/src/testing/eval-runner.ts b/packages/toolpack-agents/src/testing/eval-runner.ts
new file mode 100644
index 0000000..2a28089
--- /dev/null
+++ b/packages/toolpack-agents/src/testing/eval-runner.ts
@@ -0,0 +1,89 @@
+import type { BaseAgent } from '../agent/base-agent.js';
+import type { EvalDataset } from './eval-dataset.js';
+import type { EvalRun, EvalCaseResult } from './eval-types.js';
+
+export interface EvalRunnerOptions {
+  /**
+   * Identifier for this run — use something meaningful like a version or PR number.
+   * Defaults to a timestamp string.
+   */
+  runId?: string;
+
+  /**
+   * Concurrency limit — how many cases to run in parallel.
+   * Defaults to 1 (sequential) to avoid overwhelming the provider.
+   */
+  concurrency?: number;
+}
+
+/**
+ * Runs an agent against every case in an `EvalDataset` and collects the
+ * results into an `EvalRun`.
+ *
+ * @example
+ * ```ts
+ * const runner = new EvalRunner(agent);
+ * const run = await runner.run(dataset, { runId: 'v1.2' });
+ *
+ * console.log(`${run.results.length} cases run in ${run.totalDurationMs}ms`);
+ * ```
+ */
+export class EvalRunner {
+  private agent: BaseAgent;
+
+  constructor(agent: BaseAgent) {
+    this.agent = agent;
+  }
+
+  /**
+   * Run all cases in the dataset and return an `EvalRun`.
+   */
+  async run(dataset: EvalDataset, options: EvalRunnerOptions = {}): Promise<EvalRun> {
+    const runId = options.runId ?? new Date().toISOString();
+    const concurrency = Math.max(1, options.concurrency ?? 1);
+    const startedAt = new Date().toISOString();
+    const runStart = Date.now();
+
+    const cases = dataset.cases;
+    const results: EvalCaseResult[] = [];
+
+    // Process in batches of `concurrency`
+    for (let i = 0; i < cases.length; i += concurrency) {
+      const batch = cases.slice(i, i + concurrency);
+      const batchResults = await Promise.all(
+        batch.map(async (evalCase) => {
+          const caseStart = Date.now();
+          try {
+            const result = await this.agent.invokeAgent({
+              message: evalCase.input.message,
+              intent: evalCase.input.intent,
+              conversationId: evalCase.input.conversationId,
+              context: evalCase.input.context,
+            });
+            return {
+              evalCase,
+              actualOutput: result.output,
+              durationMs: Date.now() - caseStart,
+            } satisfies EvalCaseResult;
+          } catch (err) {
+            return {
+              evalCase,
+              actualOutput: '',
+              durationMs: Date.now() - caseStart,
+              error: err instanceof Error ? err.message : String(err),
+            } satisfies EvalCaseResult;
+          }
+        }),
+      );
+      results.push(...batchResults);
+    }
+
+    return {
+      runId,
+      startedAt,
+      completedAt: new Date().toISOString(),
+      totalDurationMs: Date.now() - runStart,
+      results,
+    };
+  }
+}
diff --git a/packages/toolpack-agents/src/testing/eval-scorer.ts b/packages/toolpack-agents/src/testing/eval-scorer.ts
new file mode 100644
index 0000000..283360e
--- /dev/null
+++ b/packages/toolpack-agents/src/testing/eval-scorer.ts
@@ -0,0 +1,248 @@
+import type { BaseAgent } from '../agent/base-agent.js';
+import type {
+  EvalRun,
+  EvalCaseResult,
+  EvalScoredResult,
+  EvalScoredRun,
+  EvalVerdict,
+} from './eval-types.js';
+
+// ─── Scorer interface ─────────────────────────────────────────────────────────
+
+/**
+ * A scorer evaluates each `EvalCaseResult` in a run and produces a
+ * pass/fail verdict with an optional explanation.
+ *
+ * Implement this interface to create custom scoring logic.
+ */
+export interface EvalScorer {
+  score(run: EvalRun): Promise<EvalScoredRun>;
+}
+
+// ─── Shared helper ────────────────────────────────────────────────────────────
+
+function buildScoredRun(run: EvalRun, scoredResults: EvalScoredResult[]): EvalScoredRun {
+  const passCount = scoredResults.filter(r => r.verdict === 'pass').length;
+  const failCount = scoredResults.length - passCount;
+  return {
+    run,
+    scoredResults,
+    passCount,
+    failCount,
+    passRate: scoredResults.length === 0 ? 0 : passCount / scoredResults.length,
+  };
+}
+
+function scoreResult(result: EvalCaseResult, verdict: EvalVerdict, explanation?: string): EvalScoredResult {
+  const scored: EvalScoredResult = { caseResult: result, verdict };
+  if (explanation !== undefined) scored.explanation = explanation;
+  return scored;
+}
+
+// ─── ExactMatchScorer ─────────────────────────────────────────────────────────
+
+/**
+ * Passes a case when `actualOutput` exactly equals `expectedOutput`.
+ * Optionally case-insensitive and/or trimmed.
+ *
+ * @example
+ * ```ts
+ * const scorer = new ExactMatchScorer({ trim: true, caseInsensitive: true });
+ * const scored = await scorer.score(run);
+ * ```
+ */
+export class ExactMatchScorer implements EvalScorer {
+  private trim: boolean;
+  private caseInsensitive: boolean;
+
+  constructor(options: { trim?: boolean; caseInsensitive?: boolean } = {}) {
+    this.trim = options.trim ?? true;
+    this.caseInsensitive = options.caseInsensitive ?? false;
+  }
+
+  async score(run: EvalRun): Promise<EvalScoredRun> {
+    const scoredResults = run.results.map(result => {
+      if (result.error) {
+        return scoreResult(result, 'fail', `Agent threw: ${result.error}`);
+      }
+
+      let actual = result.actualOutput;
+      let expected = result.evalCase.expectedOutput;
+
+      if (this.trim) {
+        actual = actual.trim();
+        expected = expected.trim();
+      }
+      if (this.caseInsensitive) {
+        actual = actual.toLowerCase();
+        expected = expected.toLowerCase();
+      }
+
+      const pass = actual === expected;
+      return scoreResult(result, pass ? 'pass' : 'fail');
+    });
+
+    return buildScoredRun(run, scoredResults);
+  }
+}
+
+// ─── ContainsScorer ───────────────────────────────────────────────────────────
+
+/**
+ * Passes a case when `actualOutput` contains `expectedOutput` as a substring.
+ * Optionally case-insensitive.
+ *
+ * @example
+ * ```ts
+ * const scorer = new ContainsScorer({ caseInsensitive: true });
+ * const scored = await scorer.score(run);
+ * ```
+ */
+export class ContainsScorer implements EvalScorer {
+  private caseInsensitive: boolean;
+
+  constructor(options: { caseInsensitive?: boolean } = {}) {
+    this.caseInsensitive = options.caseInsensitive ?? true;
+  }
+
+  async score(run: EvalRun): Promise<EvalScoredRun> {
+    const scoredResults = run.results.map(result => {
+      if (result.error) {
+        return scoreResult(result, 'fail', `Agent threw: ${result.error}`);
+      }
+
+      let actual = result.actualOutput;
+      let expected = result.evalCase.expectedOutput;
+
+      if (this.caseInsensitive) {
+        actual = actual.toLowerCase();
+        expected = expected.toLowerCase();
+      }
+
+      const pass = actual.includes(expected);
+      return scoreResult(result, pass ? 'pass' : 'fail');
+    });
+
+    return buildScoredRun(run, scoredResults);
+  }
+}
+
+// ─── LLMJudgeScorer ───────────────────────────────────────────────────────────
+
+export interface LLMJudgeScorerOptions {
+  /**
+   * Custom judge prompt template.
+   * Use `{{question}}`, `{{expected}}`, and `{{actual}}` as placeholders.
+   * Must instruct the LLM to respond with only "pass" or "fail" on the first line,
+   * optionally followed by an explanation.
+   */
+  promptTemplate?: string;
+}
+
+const DEFAULT_JUDGE_PROMPT = `You are an impartial evaluator assessing whether an AI agent's answer is correct.
+
+Question / Task:
+{{question}}
+
+Expected answer:
+{{expected}}
+
+Actual answer:
+{{actual}}
+
+Is the actual answer correct or equivalent to the expected answer?
+Respond with ONLY "pass" or "fail" on the first line, then optionally a one-sentence explanation.`;
+
+/**
+ * Uses an LLM agent as a judge to score each case.
+ * The judge is prompted with the question, expected answer, and actual answer.
+ *
+ * @example
+ * ```ts
+ * const judgeAgent = new MyAgent({ toolpack });
+ * const scorer = new LLMJudgeScorer(judgeAgent);
+ * const scored = await scorer.score(run);
+ * ```
+ */
+export class LLMJudgeScorer implements EvalScorer {
+  private judgeAgent: BaseAgent;
+  private promptTemplate: string;
+
+  constructor(judgeAgent: BaseAgent, options: LLMJudgeScorerOptions = {}) {
+    this.judgeAgent = judgeAgent;
+    this.promptTemplate = options.promptTemplate ?? DEFAULT_JUDGE_PROMPT;
+  }
+
+  async score(run: EvalRun): Promise<EvalScoredRun> {
+    const scoredResults: EvalScoredResult[] = [];
+
+    for (const result of run.results) {
+      if (result.error) {
+        scoredResults.push(scoreResult(result, 'fail', `Agent threw: ${result.error}`));
+        continue;
+      }
+
+      const prompt = this.promptTemplate
+        .replace('{{question}}', result.evalCase.input.message)
+        .replace('{{expected}}', result.evalCase.expectedOutput)
+        .replace('{{actual}}', result.actualOutput);
+
+      try {
+        const judgeResult = await this.judgeAgent.invokeAgent({ message: prompt });
+        const lines = judgeResult.output.trim().split('\n');
+        const verdict: EvalVerdict = lines[0].toLowerCase().startsWith('pass') ? 'pass' : 'fail';
+        const explanation = lines.slice(1).join(' ').trim() || undefined;
+        scoredResults.push(scoreResult(result, verdict, explanation));
+      } catch (err) {
+        scoredResults.push(
+          scoreResult(result, 'fail', `Judge threw: ${err instanceof Error ? err.message : String(err)}`),
+        );
+      }
+    }
+
+    return buildScoredRun(run, scoredResults);
+  }
+}
+
+// ─── CustomScorer ─────────────────────────────────────────────────────────────
+
+/**
+ * Wraps a user-supplied scoring function.
+ *
+ * @example
+ * ```ts
+ * const scorer = new CustomScorer(async (result) => {
+ *   const pass = result.actualOutput.includes('Paris');
+ *   return { verdict: pass ? 'pass' : 'fail' };
+ * });
+ * ```
+ */
+export class CustomScorer implements EvalScorer {
+  private fn: (result: EvalCaseResult) => Promise<{ verdict: EvalVerdict; explanation?: string }>;
+
+  constructor(fn: (result: EvalCaseResult) => Promise<{ verdict: EvalVerdict; explanation?: string }>) {
+    this.fn = fn;
+  }
+
+  async score(run: EvalRun): Promise<EvalScoredRun> {
+    const scoredResults: EvalScoredResult[] = [];
+
+    for (const result of run.results) {
+      if (result.error) {
+        scoredResults.push(scoreResult(result, 'fail', `Agent threw: ${result.error}`));
+        continue;
+      }
+
+      try {
+        const { verdict, explanation } = await this.fn(result);
+        scoredResults.push(scoreResult(result, verdict, explanation));
+      } catch (err) {
+        scoredResults.push(
+          scoreResult(result, 'fail', `Scorer threw: ${err instanceof Error ? err.message : String(err)}`),
+        );
+      }
+    }
+
+    return buildScoredRun(run, scoredResults);
+  }
+}
diff --git a/packages/toolpack-agents/src/testing/eval-types.ts b/packages/toolpack-agents/src/testing/eval-types.ts
new file mode 100644
index 0000000..88b28ec
--- /dev/null
+++ b/packages/toolpack-agents/src/testing/eval-types.ts
@@ -0,0 +1,160 @@
+/**
+ * Eval primitives — shared types across EvalDataset, EvalRunner, EvalScorer, and EvalReport.
+ */
+
+// ─── Dataset ──────────────────────────────────────────────────────────────────
+
+/**
+ * A single eval case: an input fed to the agent and the expected output used
+ * for scoring.
+ */
+export interface EvalCase {
+  /** Unique identifier for this case. */
+  id: string;
+
+  /** The input passed to `agent.invokeAgent()`. */
+  input: {
+    message: string;
+    intent?: string;
+    conversationId?: string;
+    context?: Record<string, unknown>;
+  };
+
+  /**
+   * The expected output used by scorers.
+   * Exact-match and contains scorers compare `actualOutput` against this.
+   * LLM-judge scorers use it as the reference answer.
+   */
+  expectedOutput: string;
+
+  /** Optional free-form metadata (e.g. tags, difficulty, source). */
+  metadata?: Record<string, unknown>;
+}
+
+// ─── Runner ───────────────────────────────────────────────────────────────────
+
+/**
+ * The actual output produced by running a single eval case against an agent.
+ */
+export interface EvalCaseResult {
+  /** The eval case that was run. */
+  evalCase: EvalCase;
+
+  /** The output produced by the agent. */
+  actualOutput: string;
+
+  /** Wall-clock duration in milliseconds. */
+  durationMs: number;
+
+  /** Error message if the agent threw, otherwise undefined. */
+  error?: string;
+}
+
+/**
+ * The result of running an entire dataset through an agent.
+ */
+export interface EvalRun {
+  /** Identifier for this run (e.g. "v1.2", "pr-456"). */
+  runId: string;
+
+  /** ISO timestamp of when the run started. */
+  startedAt: string;
+
+  /** ISO timestamp of when the run completed. */
+  completedAt: string;
+
+  /** Total wall-clock duration in milliseconds. */
+  totalDurationMs: number;
+
+  /** Per-case results, in dataset order. */
+  results: EvalCaseResult[];
+}
+
+// ─── Scorer ───────────────────────────────────────────────────────────────────
+
+/** The verdict for a single scored case. */
+export type EvalVerdict = 'pass' | 'fail';
+
+/**
+ * A scored result — wraps an EvalCaseResult with a pass/fail verdict and
+ * an optional explanation.
+ */
+export interface EvalScoredResult {
+  /** The underlying case result. */
+  caseResult: EvalCaseResult;
+
+  /** Pass or fail. */
+  verdict: EvalVerdict;
+
+  /**
+   * Optional human-readable explanation of the verdict.
+   * Populated by LLMJudgeScorer; optional for other scorers.
+   */
+  explanation?: string;
+}
+
+/**
+ * A fully scored run — an EvalRun annotated with per-case verdicts and
+ * aggregate pass/fail counts.
+ */
+export interface EvalScoredRun {
+  /** The original run. */
+  run: EvalRun;
+
+  /** Scored results, in run order. */
+  scoredResults: EvalScoredResult[];
+
+  /** Number of passing cases. */
+  passCount: number;
+
+  /** Number of failing cases. */
+  failCount: number;
+
+  /** Pass rate as a fraction between 0 and 1. */
+  passRate: number;
+}
+
+// ─── Report ───────────────────────────────────────────────────────────────────
+
+/**
+ * A regression entry — a case that passed in the baseline but fails in the
+ * candidate.
+ */
+export interface EvalRegression {
+  caseId: string;
+  baselineOutput: string;
+  candidateOutput: string;
+}
+
+/**
+ * An improvement entry — a case that failed in the baseline but passes in the
+ * candidate.
+ */
+export interface EvalImprovement {
+  caseId: string;
+  baselineOutput: string;
+  candidateOutput: string;
+}
+
+/**
+ * Comparison report between a baseline scored run and a candidate scored run.
+ */
+export interface EvalReport {
+  baselineRunId: string;
+  candidateRunId: string;
+
+  baselinePassRate: number;
+  candidatePassRate: number;
+
+  /** Δ pass rate (candidate − baseline). Positive = improvement. */
+  delta: number;
+
+  regressions: EvalRegression[];
+  improvements: EvalImprovement[];
+
+  /** Cases that passed in both runs. */
+  stablePasses: string[];
+
+  /** Cases that failed in both runs. */
+  stableFails: string[];
+}
diff --git a/packages/toolpack-agents/src/testing/eval.test.ts b/packages/toolpack-agents/src/testing/eval.test.ts
new file mode 100644
index 0000000..5bda5e4
--- /dev/null
+++ b/packages/toolpack-agents/src/testing/eval.test.ts
@@ -0,0 +1,406 @@
+import { describe, it, expect, vi } from 'vitest';
+import { EvalDataset } from './eval-dataset.js';
+import { EvalRunner } from './eval-runner.js';
+import {
+  ExactMatchScorer,
+  ContainsScorer,
+  LLMJudgeScorer,
+  CustomScorer,
+} from './eval-scorer.js';
+import { compareEvalRuns, formatEvalReport } from './eval-report.js';
+import type { EvalRun, EvalCase } from './eval-types.js';
+import type { BaseAgent } from '../agent/base-agent.js';
+
+// ─── Fixtures ─────────────────────────────────────────────────────────────────
+
+const cases: EvalCase[] = [
+  { id: 'q1', input: { message: 'What is 2+2?' }, expectedOutput: '4' },
+  { id: 'q2', input: { message: 'Capital of France?' }, expectedOutput: 'Paris' },
+  { id: 'q3', input: { message: 'Colour of the sky?' }, expectedOutput: 'blue' },
+];
+
+function makeRun(outputs: string[], runId = 'test-run'): EvalRun {
+  return {
+    runId,
+    startedAt: new Date().toISOString(),
+    completedAt: new Date().toISOString(),
+    totalDurationMs: 100,
+    results: cases.map((c, i) => ({
+      evalCase: c,
+      actualOutput: outputs[i] ?? '',
+      durationMs: 10,
+    })),
+  };
+}
+
+// ─── EvalDataset ──────────────────────────────────────────────────────────────
+
+describe('EvalDataset', () => {
+  it('stores cases passed to constructor', () => {
+    const dataset = new EvalDataset(cases);
+    expect(dataset.size).toBe(3);
+    expect(dataset.cases).toHaveLength(3);
+  });
+
+  it('get() returns case by id', () => {
+    const dataset = new EvalDataset(cases);
+    expect(dataset.get('q2')?.expectedOutput).toBe('Paris');
+  });
+
+  it('get() returns undefined for unknown id', () => {
+    const dataset = new EvalDataset(cases);
+    expect(dataset.get('nope')).toBeUndefined();
+  });
+
+  it('add() appends cases', () => {
+    const dataset = new EvalDataset(cases);
+    dataset.add({ id: 'q4', input: { message: 'Hi' }, expectedOutput: 'Hello' });
+    expect(dataset.size).toBe(4);
+  });
+
+  it('add() throws on duplicate id vs existing', () => {
+    const dataset = new EvalDataset(cases);
+    expect(() => dataset.add({ id: 'q1', input: { message: 'x' }, expectedOutput: 'x' }))
+      .toThrow('already exists');
+  });
+
+  it('add() does not partially mutate when duplicate is within the batch', () => {
+    const dataset = new EvalDataset([]);
+    const newCase = { id: 'n1', input: { message: 'x' }, expectedOutput: 'x' };
+    expect(() => dataset.add(newCase, { ...newCase })).toThrow('already exists');
+    expect(dataset.size).toBe(0); // no partial add
+  });
+
+  it('remove() deletes a case and returns true', () => {
+    const dataset = new EvalDataset(cases);
+    expect(dataset.remove('q1')).toBe(true);
+    expect(dataset.size).toBe(2);
+    expect(dataset.get('q1')).toBeUndefined();
+  });
+
+  it('remove() returns false for unknown id', () => {
+    const dataset = new EvalDataset(cases);
+    expect(dataset.remove('nope')).toBe(false);
+  });
+
+  it('filter() returns a new dataset matching the predicate', () => {
+    const dataset = new EvalDataset(cases);
+    const filtered = dataset.filter(c => c.id !== 'q1');
+    expect(filtered.size).toBe(2);
+    expect(dataset.size).toBe(3); // original unchanged
+  });
+
+  it('toJSON() returns a plain array', () => {
+    const dataset = new EvalDataset(cases);
+    const json = dataset.toJSON();
+    expect(Array.isArray(json)).toBe(true);
+    expect(json).toHaveLength(3);
+  });
+
+  it('EvalDataset.from() creates from an array', () => {
+    const dataset = EvalDataset.from(cases);
+    expect(dataset.size).toBe(3);
+  });
+
+  it('cases getter returns a defensive copy', () => {
+    const dataset = new EvalDataset(cases);
+    const first = dataset.cases;
+    first.push({ id: 'injected', input: { message: 'x' }, expectedOutput: 'x' });
+    expect(dataset.size).toBe(3);
+  });
+});
+
+// ─── EvalRunner ───────────────────────────────────────────────────────────────
+
+describe('EvalRunner', () => {
+  function makeAgent(responses: string[]): BaseAgent {
+    let callIdx = 0;
+    return {
+      invokeAgent: vi.fn(async () => ({
+        output: responses[callIdx++] ?? '',
+        steps: undefined,
+        metadata: undefined,
+      })),
+    } as unknown as BaseAgent;
+  }
+
+  it('runs all cases and returns an EvalRun', async () => {
+    const agent = makeAgent(['4', 'Paris', 'blue']);
+    const dataset = new EvalDataset(cases);
+    const runner = new EvalRunner(agent);
+    const run = await runner.run(dataset, { runId: 'v1' });
+
+    expect(run.runId).toBe('v1');
+    expect(run.results).toHaveLength(3);
+    expect(run.results[0].actualOutput).toBe('4');
+    expect(run.results[1].actualOutput).toBe('Paris');
+    expect(run.results[2].actualOutput).toBe('blue');
+  });
+
+  it('captures errors without throwing', async () => {
+    const agent = {
+      invokeAgent: vi.fn().mockRejectedValue(new Error('network error')),
+    } as unknown as BaseAgent;
+    const dataset = new EvalDataset([cases[0]]);
+    const runner = new EvalRunner(agent);
+    const run = await runner.run(dataset);
+
+    expect(run.results[0].error).toBe('network error');
+    expect(run.results[0].actualOutput).toBe('');
+  });
+
+  it('records durationMs per case', async () => {
+    const agent = makeAgent(['4']);
+    const dataset = new EvalDataset([cases[0]]);
+    const runner = new EvalRunner(agent);
+    const run = await runner.run(dataset);
+
+    expect(run.results[0].durationMs).toBeGreaterThanOrEqual(0);
+  });
+
+  it('defaults runId to an ISO timestamp', async () => {
+    const agent = makeAgent(['4']);
+    const dataset = new EvalDataset([cases[0]]);
+    const runner = new EvalRunner(agent);
+    const run = await runner.run(dataset);
+
+    expect(run.runId).toMatch(/^\d{4}-\d{2}-\d{2}T/);
+  });
+});
+
+// ─── ExactMatchScorer ─────────────────────────────────────────────────────────
+
+describe('ExactMatchScorer', () => {
+  it('passes when actual equals expected (trimmed)', async () => {
+    const scorer = new ExactMatchScorer();
+    const run = makeRun(['4', 'Paris', 'blue']);
+    const result = await scorer.score(run);
+
+    expect(result.passCount).toBe(3);
+    expect(result.failCount).toBe(0);
+    expect(result.passRate).toBe(1);
+  });
+
+  it('fails when actual differs', async () => {
+    const scorer = new ExactMatchScorer();
+    const run = makeRun(['5', 'Paris', 'blue']);
+    const result = await scorer.score(run);
+
+    expect(result.passCount).toBe(2);
+    expect(result.failCount).toBe(1);
+    expect(result.scoredResults[0].verdict).toBe('fail');
+  });
+
+  it('trims whitespace by default', async () => {
+    const scorer = new ExactMatchScorer({ trim: true });
+    const run = makeRun(['  4  ', 'Paris', 'blue']);
+    const result = await scorer.score(run);
+    expect(result.scoredResults[0].verdict).toBe('pass');
+  });
+
+  it('is case-sensitive by default', async () => {
+    const scorer = new ExactMatchScorer();
+    const run = makeRun(['4', 'paris', 'blue']); // lowercase paris
+    const result = await scorer.score(run);
+    expect(result.scoredResults[1].verdict).toBe('fail');
+  });
+
+  it('caseInsensitive option ignores case', async () => {
+    const scorer = new ExactMatchScorer({ caseInsensitive: true });
+    const run = makeRun(['4', 'PARIS', 'BLUE']);
+    const result = await scorer.score(run);
+    expect(result.passCount).toBe(3);
+  });
+
+  it('fails cases that errored', async () => {
+    const scorer = new ExactMatchScorer();
+    const run: EvalRun = {
+      runId: 'x', startedAt: '', completedAt: '', totalDurationMs: 0,
+      results: [{ evalCase: cases[0], actualOutput: '', durationMs: 0, error: 'boom' }],
+    };
+    const result = await scorer.score(run);
+    expect(result.scoredResults[0].verdict).toBe('fail');
+    expect(result.scoredResults[0].explanation).toContain('boom');
+  });
+});
+
+// ─── ContainsScorer ───────────────────────────────────────────────────────────
+
+describe('ContainsScorer', () => {
+  it('passes when actual contains expected', async () => {
+    const scorer = new ContainsScorer();
+    const run = makeRun(['The answer is 4.', 'The capital is Paris!', 'The sky is blue.']);
+    const result = await scorer.score(run);
+    expect(result.passCount).toBe(3);
+  });
+
+  it('fails when actual does not contain expected', async () => {
+    const scorer = new ContainsScorer();
+    const run = makeRun(['The answer is 5.', 'Paris', 'blue']);
+    const result = await scorer.score(run);
+    expect(result.scoredResults[0].verdict).toBe('fail');
+  });
+
+  it('is case-insensitive by default', async () => {
+    const scorer = new ContainsScorer();
+    const run = makeRun(['4', 'PARIS IS THE CAPITAL', 'blue']);
+    const result = await scorer.score(run);
+    expect(result.scoredResults[1].verdict).toBe('pass');
+  });
+
+  it('can be made case-sensitive', async () => {
+    const scorer = new ContainsScorer({ caseInsensitive: false });
+    const run = makeRun(['4', 'paris', 'blue']); // lowercase but expected is 'Paris'
+    const result = await scorer.score(run);
+    expect(result.scoredResults[1].verdict).toBe('fail');
+  });
+});
+
+// ─── CustomScorer ─────────────────────────────────────────────────────────────
+
+describe('CustomScorer', () => {
+  it('uses the provided function', async () => {
+    const scorer = new CustomScorer(async (result) => ({
+      verdict: result.actualOutput.length > 0 ? 'pass' : 'fail',
+    }));
+    const run = makeRun(['4', '', 'blue']);
+    const scored = await scorer.score(run);
+    expect(scored.scoredResults[0].verdict).toBe('pass');
+    expect(scored.scoredResults[1].verdict).toBe('fail');
+  });
+
+  it('catches scorer errors and marks as fail', async () => {
+    const scorer = new CustomScorer(async () => { throw new Error('scorer crash'); });
+    const run = makeRun(['4']);
+    const scored = await scorer.score(run);
+    expect(scored.scoredResults[0].verdict).toBe('fail');
+    expect(scored.scoredResults[0].explanation).toContain('scorer crash');
+  });
+});
+
+// ─── LLMJudgeScorer ───────────────────────────────────────────────────────────
+
+describe('LLMJudgeScorer', () => {
+  function makeJudgeAgent(verdict: 'pass' | 'fail', explanation = ''): BaseAgent {
+    return {
+      invokeAgent: vi.fn(async () => ({
+        output: explanation ? `${verdict}\n${explanation}` : verdict,
+      })),
+    } as unknown as BaseAgent;
+  }
+
+  it('passes when judge returns "pass"', async () => {
+    const scorer = new LLMJudgeScorer(makeJudgeAgent('pass'));
+    const run = makeRun(['4', 'Paris', 'blue']);
+    const scored = await scorer.score(run);
+    expect(scored.passCount).toBe(3);
+  });
+
+  it('fails when judge returns "fail"', async () => {
+    const scorer = new LLMJudgeScorer(makeJudgeAgent('fail'));
+    const run = makeRun(['5', 'London', 'red']);
+    const scored = await scorer.score(run);
+    expect(scored.failCount).toBe(3);
+  });
+
+  it('captures explanation from second line', async () => {
+    const scorer = new LLMJudgeScorer(makeJudgeAgent('pass', 'The answer is correct.'));
+    const scored = await scorer.score(makeRun(['4']));
+    expect(scored.scoredResults[0].explanation).toBe('The answer is correct.');
+  });
+
+  it('handles judge throwing', async () => {
+    const judgeAgent = {
+      invokeAgent: vi.fn().mockRejectedValue(new Error('judge exploded')),
+    } as unknown as BaseAgent;
+    const scorer = new LLMJudgeScorer(judgeAgent);
+    const run = makeRun(['4']);
+    const scored = await scorer.score(run);
+    expect(scored.scoredResults[0].verdict).toBe('fail');
+    expect(scored.scoredResults[0].explanation).toContain('judge exploded');
+  });
+});
+
+// ─── compareEvalRuns ──────────────────────────────────────────────────────────
+
+describe('compareEvalRuns', () => {
+  async function scoredRun(outputs: string[], runId: string) {
+    const scorer = new ExactMatchScorer();
+    return scorer.score(makeRun(outputs, runId));
+  }
+
+  it('detects regressions (pass → fail)', async () => {
+    const baseline  = await scoredRun(['4', 'Paris', 'blue'], 'v1');
+    const candidate = await scoredRun(['5', 'Paris', 'blue'], 'v2');
+    const report = compareEvalRuns(baseline, candidate);
+
+    expect(report.regressions).toHaveLength(1);
+    expect(report.regressions[0].caseId).toBe('q1');
+  });
+
+  it('detects improvements (fail → pass)', async () => {
+    const baseline  = await scoredRun(['5', 'Paris', 'blue'], 'v1');
+    const candidate = await scoredRun(['4', 'Paris', 'blue'], 'v2');
+    const report = compareEvalRuns(baseline, candidate);
+
+    expect(report.improvements).toHaveLength(1);
+    expect(report.improvements[0].caseId).toBe('q1');
+  });
+
+  it('computes stable passes and fails', async () => {
+    const baseline  = await scoredRun(['4', 'Paris', 'WRONG'], 'v1');
+    const candidate = await scoredRun(['4', 'Paris', 'WRONG'], 'v2');
+    const report = compareEvalRuns(baseline, candidate);
+
+    expect(report.stablePasses).toContain('q1');
+    expect(report.stablePasses).toContain('q2');
+    expect(report.stableFails).toContain('q3');
+    expect(report.regressions).toHaveLength(0);
+    expect(report.improvements).toHaveLength(0);
+  });
+
+  it('computes delta correctly', async () => {
+    const baseline  = await scoredRun(['4', 'WRONG', 'WRONG'], 'v1'); // 1/3 pass
+    const candidate = await scoredRun(['4', 'Paris', 'WRONG'], 'v2'); // 2/3 pass
+    const report = compareEvalRuns(baseline, candidate);
+
+    expect(report.baselinePassRate).toBeCloseTo(1 / 3);
+    expect(report.candidatePassRate).toBeCloseTo(2 / 3);
+    expect(report.delta).toBeCloseTo(1 / 3);
+  });
+
+  it('sets correct run IDs', async () => {
+    const baseline  = await scoredRun(['4', 'Paris', 'blue'], 'baseline-v1');
+    const candidate = await scoredRun(['4', 'Paris', 'blue'], 'candidate-v2');
+    const report = compareEvalRuns(baseline, candidate);
+
+    expect(report.baselineRunId).toBe('baseline-v1');
+    expect(report.candidateRunId).toBe('candidate-v2');
+  });
+});
+
+// ─── formatEvalReport ─────────────────────────────────────────────────────────
+
+describe('formatEvalReport', () => {
+  it('includes run IDs and pass rates', async () => {
+    const scorer = new ExactMatchScorer();
+    const baseline  = await scorer.score(makeRun(['4', 'Paris', 'blue'], 'v1'));
+    const candidate = await scorer.score(makeRun(['5', 'Paris', 'blue'], 'v2'));
+    const report = compareEvalRuns(baseline, candidate);
+    const formatted = formatEvalReport(report);
+
+    expect(formatted).toContain('v1');
+    expect(formatted).toContain('v2');
+    expect(formatted).toContain('Regressions');
+  });
+
+  it('does not include Regressions section when there are none', async () => {
+    const scorer = new ExactMatchScorer();
+    const baseline  = await scorer.score(makeRun(['4', 'Paris', 'blue'], 'v1'));
+    const candidate = await scorer.score(makeRun(['4', 'Paris', 'blue'], 'v2'));
+    const report = compareEvalRuns(baseline, candidate);
+    const formatted = formatEvalReport(report);
+
+    expect(formatted).not.toContain('Regressions');
+  });
+});
diff --git a/packages/toolpack-agents/src/testing/index.ts b/packages/toolpack-agents/src/testing/index.ts
index ca66673..5d5b93b 100644
--- a/packages/toolpack-agents/src/testing/index.ts
+++ b/packages/toolpack-agents/src/testing/index.ts
@@ -20,6 +20,30 @@ export type {
   TestAgentResult,
 } from './create-test-agent.js';
 
+// Eval primitives
+export { EvalDataset } from './eval-dataset.js';
+export { EvalRunner } from './eval-runner.js';
+export type { EvalRunnerOptions } from './eval-runner.js';
+export {
+  ExactMatchScorer,
+  ContainsScorer,
+  LLMJudgeScorer,
+  CustomScorer,
+} from './eval-scorer.js';
+export type { EvalScorer, LLMJudgeScorerOptions } from './eval-scorer.js';
+export { compareEvalRuns, formatEvalReport } from './eval-report.js';
+export type {
+  EvalCase,
+  EvalCaseResult,
+  EvalRun,
+  EvalVerdict,
+  EvalScoredResult,
+  EvalScoredRun,
+  EvalRegression,
+  EvalImprovement,
+  EvalReport,
+} from './eval-types.js';
+
 // Event Capture
 export { captureEvents, registerEventMatchers } from './capture-events.js';
 export type {
diff --git a/packages/toolpack-knowledge/package.json b/packages/toolpack-knowledge/package.json
index 704f894..e2a6e31 100644
--- a/packages/toolpack-knowledge/package.json
+++ b/packages/toolpack-knowledge/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@toolpack-sdk/knowledge",
   "version": "2.1.1",
-  "description": "RAG (Retrieval-Augmented Generation) package for Toolpack SDK",
+  "description": "Knowledge/RAG package for Toolpack SDK — web crawling, REST API ingestion, hybrid semantic + keyword search, and streaming indexing across 6 source types (Markdown, Web, API, JSON, SQLite, PostgreSQL)",
   "type": "module",
   "main": "dist/index.cjs",
   "module": "dist/index.js",
diff --git a/packages/toolpack-sdk/README.md b/packages/toolpack-sdk/README.md
index dbf225b..7bcfdb9 100644
--- a/packages/toolpack-sdk/README.md
+++ b/packages/toolpack-sdk/README.md
@@ -1,6 +1,6 @@
 # Toolpack SDK
 
-A unified TypeScript/Node.js SDK for building AI-powered applications with multiple providers, 100+ built-in tools, a workflow engine, and a flexible mode system — all through a single API.
+The TypeScript SDK for building production AI agents — 100+ built-in tools, 8 channel integrations, a persistent cognitive layer, and full Knowledge/RAG, all in one package.
 
 [![npm version](https://img.shields.io/npm/v/toolpack-sdk.svg)](https://www.npmjs.com/package/toolpack-sdk)
 [![License: Apache 2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
@@ -17,9 +17,9 @@ A unified TypeScript/Node.js SDK for building AI-powered applications with multi
 - **Workflow Engine** — AI-driven planning with plan-direct execution and parallel tool orchestration
 - **Mode System** — Built-in Agent and Chat modes, plus `createMode()` for custom modes with tool filtering
 - **HITL Confirmation** — Human-in-the-loop approval for high-risk operations with configurable bypass rules
-- **Custom Providers** — Bring your own provider by implementing the `ProviderAdapter` interface
-- **101 Built-in Tools** across 14 categories (including 4 skill-tools and 2 mcp-tools):
-- **MCP Tool Server Integration** — dynamically bridge external Model Context Protocol servers into Toolpack as first-class tools via `createMcpToolProject()` and `disconnectMcpToolProject()`.
+- **Extensible at Every Layer** — Every built-in component is a plug-in point: custom tools (`ToolDefinition`), custom channels (`BaseChannel`), custom provider adapters (`ProviderAdapter`), custom agents (`BaseAgent`), custom modes (`createMode()`), and custom interceptors — all using the same interfaces as the built-ins
+- **100+ Built-in Tools** across 12 categories:
+- **MCP Client & Server** — consume external MCP servers via `createMcpToolProject()`, or expose Toolpack as an MCP server via `sdk.startMcpServer()` with static/JWT/custom auth, search mode, and agent exposure.
 
 | Category | Tools | Description |
 |----------|-------|-------------|
diff --git a/packages/toolpack-sdk/docs/examples/mcp-server-example.ts b/packages/toolpack-sdk/docs/examples/mcp-server-example.ts
new file mode 100644
index 0000000..06b1db0
--- /dev/null
+++ b/packages/toolpack-sdk/docs/examples/mcp-server-example.ts
@@ -0,0 +1,121 @@
+/**
+ * Toolpack MCP Server — entry point example
+ *
+ * Exposes Toolpack's 110+ built-in tools as an MCP server so any MCP-compatible
+ * client (Claude Desktop, Cursor, Windsurf, custom agents) can use them.
+ *
+ * Prerequisites:
+ *   npm install toolpack-sdk @modelcontextprotocol/sdk
+ *
+ * ─── stdio transport (Claude Desktop / Cursor) ────────────────────────────────
+ *
+ * 1. Run this file: node mcp-server-example.js
+ * 2. Add to ~/Library/Application Support/Claude/claude_desktop_config.json:
+ *
+ *    {
+ *      "mcpServers": {
+ *        "toolpack": {
+ *          "command": "node",
+ *          "args": ["/absolute/path/to/mcp-server-example.js"]
+ *        }
+ *      }
+ *    }
+ *
+ * ─── HTTP transport (remote / hosted) ────────────────────────────────────────
+ *
+ * Set TOOLPACK_MCP_TRANSPORT=http and TOOLPACK_MCP_PORT=3000 to run as an HTTP server.
+ * MCP clients connect to http://localhost:3000.
+ *
+ * Always set MCP_AUTH_MODE when using HTTP outside of localhost.
+ * Supported values: 'static', 'jwt', 'none' (localhost only, not recommended in production)
+ */
+
+import { Toolpack } from 'toolpack-sdk';
+import type { McpAuthConfig } from 'toolpack-sdk';
+
+const transport = (process.env.TOOLPACK_MCP_TRANSPORT ?? 'stdio') as 'stdio' | 'http';
+const port = Number(process.env.TOOLPACK_MCP_PORT ?? 3000);
+
+const sdk = await Toolpack.init({
+    provider: 'anthropic',
+    tools: true,
+    apiKey: process.env.ANTHROPIC_API_KEY,
+});
+
+// ─── Auth configuration ───────────────────────────────────────────────────────
+// Auth is only enforced on the HTTP transport. stdio is process-isolated.
+// When transport is 'http' and no auth is set, a warning is logged and all
+// requests are accepted — safe for localhost only.
+
+function buildAuth(): McpAuthConfig | undefined {
+    const mode = process.env.MCP_AUTH_MODE;
+
+    if (mode === 'static') {
+        // Pre-shared bearer token — suitable for self-hosted / dev deployments.
+        // Generate: node -e "console.log(require('crypto').randomBytes(32).toString('hex'))"
+        const token = process.env.MCP_TOKEN;
+        if (!token) throw new Error('MCP_TOKEN env var required for static auth mode');
+        return { mode: 'static', tokens: [token] };
+    }
+
+    if (mode === 'jwt') {
+        // JWT verification via JWKS — works with Auth0, Supabase, Clerk, Keycloak, etc.
+        const jwksUrl = process.env.MCP_JWKS_URL;
+        if (!jwksUrl) throw new Error('MCP_JWKS_URL env var required for jwt auth mode');
+        return {
+            mode: 'jwt',
+            jwksUrl,
+            audience: process.env.MCP_JWT_AUDIENCE,   // e.g. 'https://your-mcp-server.example.com'
+            issuer:   process.env.MCP_JWT_ISSUER,      // e.g. 'https://your-tenant.auth0.com/'
+        };
+    }
+
+    // No auth — open server. Only safe on localhost.
+    return undefined;
+}
+
+// ─── Start server ─────────────────────────────────────────────────────────────
+
+const handle = await sdk.startMcpServer({
+    transport,
+    port,
+    auth: buildAuth(),
+
+    // Optional: advertise which OAuth server issues tokens for this server.
+    // Only used with jwt auth mode — enables MCP client auto-discovery.
+    serverUrl: process.env.MCP_SERVER_URL,   // e.g. 'https://your-mcp-server.example.com'
+
+    // Optional: search mode — dramatically reduces context token usage for 110+ tools.
+    // tools/list returns only `tool.search` + always-loaded tools.
+    // Clients call tool.search to discover tools on-demand instead of loading all upfront.
+    //
+    // Requires this addition to your system prompt:
+    //   "You have access to a large library of tools via tool.search.
+    //    Before calling any tool that is not already listed, call tool.search
+    //    with a short description of what you want to do."
+    //
+    // searchMode: true,
+
+    // Optional: expose only specific tool categories instead of all 110+ tools
+    // expose: { categories: ['filesystem', 'github', 'slack', 'database'] },
+
+    // Optional: expose specific tools by name
+    // expose: { tools: ['fs.read_file', 'fs.write_file', 'slack.chat.postMessage'] },
+});
+
+console.error(
+    `Toolpack MCP server started — ${handle.toolCount} tools exposed over ${transport}` +
+    (transport === 'http' ? ` on port ${port}` : ''),
+);
+
+// ─── Graceful shutdown ────────────────────────────────────────────────────────
+
+process.on('SIGINT', async () => {
+    await handle.stop();
+    process.exit(0);
+});
+
+process.on('SIGTERM', async () => {
+    await handle.stop();
+    process.exit(0);
+});
diff --git a/packages/toolpack-sdk/package.json b/packages/toolpack-sdk/package.json
index 8cfff73..7af84bc 100644
--- a/packages/toolpack-sdk/package.json
+++ b/packages/toolpack-sdk/package.json
@@ -1,7 +1,7 @@
 {
   "name": "toolpack-sdk",
   "version": "2.1.1",
-  "description": "Unified TypeScript SDK for AI providers (OpenAI, Anthropic, Gemini, Ollama) with 100+ built-in tools, workflow engine, and mode system for building AI-powered applications",
+  "description": "TypeScript SDK for production AI agents — 110+ built-in tools across 14 categories, multi-provider support (OpenAI, Anthropic, Gemini, Ollama), workflow engine, and mode system",
   "engines": {
     "node": ">=20"
   },
@@ -55,18 +55,22 @@
     "publish:npm": "npm run build && npm run test && npm publish --access public"
   },
   "keywords": [
+    "ai-agent",
+    "production-ai",
     "ai",
     "llm",
+    "typescript",
+    "sdk",
     "openai",
     "anthropic",
     "claude",
     "gemini",
-    "ai-agent",
-    "coding-agent",
+    "ollama",
     "tools",
     "function-calling",
-    "typescript",
-    "sdk"
+    "rag",
+    "knowledge-base",
+    "multi-provider"
   ],
   "author": "Sajeer (https://sajeerzeji.com)",
   "license": "Apache-2.0",
@@ -80,6 +84,7 @@
   },
   "devDependencies": {
     "@eslint/js": "^9.39.2",
+    "@modelcontextprotocol/sdk": "^1.29.0",
     "@types/babel__core": "^7.20.5",
     "@types/babel__traverse": "^7.28.0",
     "@types/better-sqlite3": "^7.6.13",
@@ -100,6 +105,9 @@
   },
   "dependencies": {
     "@anthropic-ai/sdk": "^0.73.0",
+    "jose": "^6.2.3",
+    "zod": "^3.25.0",
+    "zod-to-json-schema": "^3.24.5",
     "@babel/parser": "^7.29.0",
     "@babel/traverse": "^7.29.0",
     "@babel/types": "^7.29.0",
@@ -118,6 +126,14 @@
     "simple-git": "^3.32.3",
     "web-tree-sitter": "^0.22.6"
   },
+  "peerDependencies": {
+    "@modelcontextprotocol/sdk": "^1.29.0"
+  },
+  "peerDependenciesMeta": {
+    "@modelcontextprotocol/sdk": {
+      "optional": true
+    }
+  },
   "directories": {
     "doc": "docs",
     "test": "tests"
diff --git a/packages/toolpack-sdk/src/client/index.ts b/packages/toolpack-sdk/src/client/index.ts
index 21e3ba6..a71a544 100644
--- a/packages/toolpack-sdk/src/client/index.ts
+++ b/packages/toolpack-sdk/src/client/index.ts
@@ -494,7 +494,7 @@ export class AIClient extends EventEmitter {
      * When tools are enabled and autoExecute is true, handles the full
      * tool call → execute → send result → get final answer loop.
      */
-    async generate(request: CompletionRequest, providerName?: string): Promise<CompletionResponse> {
+    async generate<T = unknown>(request: CompletionRequest<T>, providerName?: string): Promise<CompletionResponse<T>> {
         const provider = this.getProvider(providerName);
         try {
             const requestId = newRequestId();
@@ -728,7 +728,13 @@ export class AIClient extends EventEmitter {
                 }
             }
 
-            return response;
+            if (response.data === undefined && response.content && request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format) {
+                response.data = (request.response_format as import('zod').ZodType<T>).parse(
+                    JSON.parse(response.content),
+                );
+            }
+
+            return response as CompletionResponse<T>;
         } catch (error) {
             throw this.wrapError(error);
         }
@@ -1806,7 +1812,7 @@ NEVER guess or hallucinate tool names. ALWAYS use tool.search to discover tools
     /**
      * Execute tool.search using BM25 engine.
      */
-    private executeToolSearch(args: Record<string, any>): string {
+    executeToolSearch(args: Record<string, any>): string {
         const { query, category } = args;
         const limit = this.toolsConfig.toolSearch?.searchResultLimit ?? 5;
         const requestedCategory = typeof category === 'string' && category.length > 0 ? category : undefined;
diff --git a/packages/toolpack-sdk/src/mcp/index.ts b/packages/toolpack-sdk/src/mcp/index.ts
index 337266e..b6b879e 100644
--- a/packages/toolpack-sdk/src/mcp/index.ts
+++ b/packages/toolpack-sdk/src/mcp/index.ts
@@ -1,10 +1,30 @@
-// MCP Client Module
-// Provides JSON-RPC transport for communicating with MCP servers (e.g., chrome-devtools-mcp)
+// MCP Module
+// Client: JSON-RPC transport for consuming external MCP servers
+// Server: expose Toolpack's built-in tools as an MCP server
 
+// ─── Client ───────────────────────────────────────────────────────────────────
 export { McpClient, McpClientConfig, McpTimeoutError, McpConnectionError } from './client.js';
 export {
     JsonRpcRequest,
     JsonRpcResponse,
     McpTool,
     McpServerCapabilities,
-} from "./types.js";
+} from './types.js';
+
+// ─── Server ───────────────────────────────────────────────────────────────────
+// startMcpServer() is intentionally NOT re-exported here. server.ts has static
+// imports of @modelcontextprotocol/sdk (an optional peer dep). A static re-export
+// would eagerly load those imports and break users who haven't installed the SDK.
+// Use Toolpack.startMcpServer() instead — it gates the load behind a dynamic import.
+export type {
+    ToolpackMcpServerConfig,
+    McpServerHandle,
+    McpTransport,
+    McpServerExposeConfig,
+    McpAgentDefinition,
+    McpAuthConfig,
+    McpStaticAuthConfig,
+    McpJwtAuthConfig,
+    McpCustomAuthConfig,
+    AuthInfo,
+} from './server-types.js';
diff --git a/packages/toolpack-sdk/src/mcp/server-auth.ts b/packages/toolpack-sdk/src/mcp/server-auth.ts
new file mode 100644
index 0000000..a0d63dd
--- /dev/null
+++ b/packages/toolpack-sdk/src/mcp/server-auth.ts
@@ -0,0 +1,134 @@
+import { createRemoteJWKSet, jwtVerify } from 'jose';
+import type { IncomingMessage, ServerResponse } from 'node:http';
+import type { AuthInfo } from '@modelcontextprotocol/sdk/server/auth/types.js';
+import type { OAuthTokenVerifier } from '@modelcontextprotocol/sdk/server/auth/provider.js';
+import type { McpAuthConfig } from './server-types.js';
+
+// ─── Public helpers ───────────────────────────────────────────────────────────
+
+/**
+ * Build an OAuthTokenVerifier from the given auth config.
+ * The returned verifier is stateful (JwtVerifier caches the JWKS) —
+ * create once per server lifetime, not once per request.
+ */
+export function buildVerifier(auth: McpAuthConfig): OAuthTokenVerifier {
+    switch (auth.mode) {
+        case 'static': return new StaticBearerVerifier(auth.tokens);
+        case 'jwt':    return new JwtVerifier(auth);
+        case 'custom': return { verifyAccessToken: auth.verifyAccessToken };
+    }
+}
+
+/**
+ * Extract and verify a bearer token from an incoming HTTP request.
+ *
+ * On success: attaches AuthInfo to req.auth and returns true.
+ * On failure: writes a 401 or 403 response and returns false.
+ *             The caller must stop processing the request when false is returned.
+ */
+export async function applyBearerAuth(
+    req: IncomingMessage & { auth?: AuthInfo },
+    res: ServerResponse,
+    auth: McpAuthConfig,
+    verifier: OAuthTokenVerifier,
+): Promise<boolean> {
+    const authHeader = req.headers['authorization'];
+    const token = typeof authHeader === 'string' && authHeader.startsWith('Bearer ')
+        ? authHeader.slice(7)
+        : null;
+
+    if (!token) {
+        res.writeHead(401, { 'WWW-Authenticate': 'Bearer' }).end();
+        return false;
+    }
+
+    let authInfo: AuthInfo;
+    try {
+        authInfo = await verifier.verifyAccessToken(token);
+    } catch {
+        res.writeHead(401, { 'WWW-Authenticate': 'Bearer' }).end();
+        return false;
+    }
+
+    // Scope enforcement — only when requiredScopes is explicitly provided.
+    const required = 'requiredScopes' in auth ? auth.requiredScopes : undefined;
+    if (required?.length) {
+        const granted = new Set(authInfo.scopes);
+        const missing = required.filter(s => !granted.has(s));
+        if (missing.length > 0) {
+            res.writeHead(403).end(`Missing required scopes: ${missing.join(', ')}`);
+            return false;
+        }
+    }
+
+    req.auth = authInfo;
+    return true;
+}
+
+// ─── StaticBearerVerifier ─────────────────────────────────────────────────────
+
+class StaticBearerVerifier implements OAuthTokenVerifier {
+    // Set for O(1) lookup. Timing is not perfectly constant across the Set.has()
+    // call, but acceptable for static tokens — they are opaque random strings,
+    // not secrets where a timing-safe compare is strictly required.
+    private readonly tokenSet: Set<string>;
+
+    constructor(tokens: string[]) {
+        if (tokens.length === 0) {
+            throw new Error(
+                'McpAuthConfig static mode: tokens array must not be empty. ' +
+                'Generate a token with: crypto.randomBytes(32).toString("hex")',
+            );
+        }
+        this.tokenSet = new Set(tokens);
+    }
+
+    async verifyAccessToken(token: string): Promise<AuthInfo> {
+        if (!this.tokenSet.has(token)) {
+            throw new Error('Invalid bearer token.');
+        }
+        return { token, clientId: 'static-client', scopes: [] };
+    }
+}
+
+// ─── JwtVerifier ──────────────────────────────────────────────────────────────
+
+class JwtVerifier implements OAuthTokenVerifier {
+    // createRemoteJWKSet returns a cached, auto-rotating key set.
+    // One instance per server lifetime is the correct usage.
+    private readonly JWKS: ReturnType<typeof createRemoteJWKSet>;
+    private readonly audience?: string;
+    private readonly issuer?: string;
+
+    constructor(config: { jwksUrl: string; audience?: string; issuer?: string }) {
+        this.JWKS = createRemoteJWKSet(new URL(config.jwksUrl));
+        this.audience = config.audience;
+        this.issuer = config.issuer;
+    }
+
+    async verifyAccessToken(token: string): Promise<AuthInfo> {
+        const { payload } = await jwtVerify(token, this.JWKS, {
+            audience: this.audience,
+            issuer: this.issuer,
+        });
+
+        // Scope extraction:
+        // - RFC 9068: `scope` claim — space-separated string
+        // - Okta / Azure AD: `scp` claim — array of strings
+        const scopeRaw = payload['scope'] ?? payload['scp'];
+        const scopes: string[] = Array.isArray(scopeRaw)
+            ? scopeRaw.filter((s): s is string => typeof s === 'string')
+            : typeof scopeRaw === 'string'
+                ? scopeRaw.split(' ').filter(Boolean)
+                : [];
+
+        // clientId: prefer explicit `client_id` claim, fall back to `sub`
+        const clientId = typeof payload['client_id'] === 'string'
+            ? payload['client_id']
+            : typeof payload.sub === 'string'
+                ? payload.sub
+                : 'unknown';
+
+        return { token, clientId, scopes, expiresAt: payload.exp };
+    }
+}
diff --git a/packages/toolpack-sdk/src/mcp/server-types.ts b/packages/toolpack-sdk/src/mcp/server-types.ts
new file mode 100644
index 0000000..f6181b0
--- /dev/null
+++ b/packages/toolpack-sdk/src/mcp/server-types.ts
@@ -0,0 +1,226 @@
+import type { AuthInfo } from '@modelcontextprotocol/sdk/server/auth/types.js';
+
+export type McpTransport = 'stdio' | 'http';
+
+// Re-export AuthInfo so users implementing custom verifiers don't need to
+// import from the SDK's internal path directly.
+export type { AuthInfo };
+
+// ─── Auth config ──────────────────────────────────────────────────────────────
+
+export interface McpServerExposeConfig {
+    /** Expose only tools in these categories. Mutually exclusive with `tools`. */
+    categories?: string[];
+    /** Expose only these exact tool names. Mutually exclusive with `categories`. */
+    tools?: string[];
+}
+
+export interface McpStaticAuthConfig {
+    mode: 'static';
+    /**
+     * One or more pre-shared bearer tokens that grant access.
+     * Generate with: crypto.randomBytes(32).toString('hex')
+     * All tokens in the array are valid — useful for token rotation.
+     */
+    tokens: string[];
+}
+
+export interface McpJwtAuthConfig {
+    mode: 'jwt';
+    /**
+     * JWKS endpoint URL for JWT signature verification.
+     * @example 'https://your-tenant.auth0.com/.well-known/jwks.json'
+     * @example 'https://your-project.supabase.co/auth/v1/jwks'
+     */
+    jwksUrl: string;
+    /**
+     * Expected `aud` claim in the JWT.
+     * Required for most OIDC providers — omitting may accept tokens intended for other services.
+     */
+    audience?: string;
+    /**
+     * Expected `iss` claim in the JWT. Recommended.
+     * Also used to populate the `authorization_servers` field in
+     * /.well-known/oauth-protected-resource when serverUrl is set.
+     */
+    issuer?: string;
+    /** JWT must have all of these scopes. Checked after signature verification. */
+    requiredScopes?: string[];
+}
+
+export interface McpCustomAuthConfig {
+    mode: 'custom';
+    /**
+     * Your own token verification logic.
+     * Throw any error to reject the token — the caller receives a 401.
+     * Return a valid AuthInfo on success.
+     *
+     * @example
+     * ```typescript
+     * verifyAccessToken: async (token) => {
+     *   const user = await db.findByToken(token);
+     *   if (!user) throw new Error('Invalid token');
+     *   return { token, clientId: user.id, scopes: user.scopes };
+     * }
+     * ```
+     */
+    verifyAccessToken(token: string): Promise<AuthInfo>;
+    /** Token must have all of these scopes. Checked after verifyAccessToken resolves. */
+    requiredScopes?: string[];
+}
+
+export type McpAuthConfig = McpStaticAuthConfig | McpJwtAuthConfig | McpCustomAuthConfig;
+
+// ─── Agent definition ─────────────────────────────────────────────────────────
+
+/**
+ * Minimal contract for exposing an agent as an MCP tool.
+ * Satisfied by McpChannel.asAgentDefinition() from toolpack-agents,
+ * or by any plain object with these four fields.
+ */
+export interface McpAgentDefinition {
+    /** Exposed as "agent.<name>" in tools/list. Must be unique across all agents. */
+    name: string;
+    /** Shown to the MCP client as the tool description. */
+    description: string;
+    /** JSON Schema for the arguments the agent accepts. Defaults to empty object schema. */
+    inputSchema?: Record<string, unknown>;
+    /**
+     * Called when tools/call arrives for this agent.
+     * Must return the agent's output as a string.
+     * Throw to signal an error — the MCP client receives isError: true.
+     */
+    invoke(args: Record<string, unknown>): Promise<string>;
+}
+
+// ─── Server config ────────────────────────────────────────────────────────────
+
+export interface ToolpackMcpServerConfig {
+    /** Transport type. 'stdio' for Claude Desktop / Cursor. 'http' for remote use. */
+    transport: McpTransport;
+    /** Port for HTTP transport. Default: 3000. Only used when transport is 'http'. */
+    port?: number;
+    /** Filter which tools to expose. Exposes all enabled tools when omitted. */
+    expose?: McpServerExposeConfig;
+    /** Server name shown to MCP clients. Default: 'Toolpack SDK'. */
+    serverName?: string;
+    /** Server version shown to MCP clients. Default: '2.0.0'. */
+    serverVersion?: string;
+
+    /**
+     * Authentication for the HTTP transport. Ignored when transport is 'stdio'.
+     *
+     * When omitted, the HTTP server accepts all requests — safe for localhost only.
+     * When set, every request must carry a valid Bearer token; missing or invalid
+     * tokens are rejected with 401. Scope violations are rejected with 403.
+     *
+     * @example Static tokens (dev / self-hosted)
+     * ```typescript
+     * auth: { mode: 'static', tokens: [process.env.MCP_TOKEN!] }
+     * ```
+     *
+     * @example JWT with Auth0 / Supabase / Clerk
+     * ```typescript
+     * auth: {
+     *   mode: 'jwt',
+     *   jwksUrl: 'https://your-tenant.auth0.com/.well-known/jwks.json',
+     *   audience: 'https://your-mcp-server.example.com',
+     *   issuer:   'https://your-tenant.auth0.com/',
+     * }
+     * ```
+     *
+     * @example Custom verification
+     * ```typescript
+     * auth: {
+     *   mode: 'custom',
+     *   verifyAccessToken: async (token) => {
+     *     const user = await db.findByToken(token);
+     *     if (!user) throw new Error('invalid');
+     *     return { token, clientId: user.id, scopes: user.scopes };
+     *   }
+     * }
+     * ```
+     */
+    auth?: McpAuthConfig;
+
+    /**
+     * Agents to expose as MCP tools alongside regular tools.
+     * Each agent appears in tools/list as "agent.<name>".
+     *
+     * Agents run to completion before returning — synchronous from the MCP
+     * client's perspective. For long-running agents, ensure the MCP client's
+     * timeout is set appropriately.
+     *
+     * The easiest way to produce an entry is via McpChannel.asAgentDefinition()
+     * from toolpack-agents. A plain object with { name, description, invoke }
+     * also works — no import from toolpack-agents required.
+     *
+     * @example using McpChannel (toolpack-agents)
+     * ```typescript
+     * const ch = new McpChannel();
+     * const agent = new PrReviewerAgent({ channels: [ch] });
+     * await agent.start();
+     * await sdk.startMcpServer({
+     *   transport: 'stdio',
+     *   agents: [ch.asAgentDefinition(agent)],
+     * });
+     * ```
+     *
+     * @example plain object (no extra dependency)
+     * ```typescript
+     * await sdk.startMcpServer({
+     *   transport: 'stdio',
+     *   agents: [{
+     *     name: 'pr_reviewer',
+     *     description: 'Reviews a pull request end-to-end.',
+     *     inputSchema: { type: 'object', properties: { pr_url: { type: 'string' } }, required: ['pr_url'] },
+     *     invoke: async (args) => {
+     *       const result = await prReviewer.invokeAgent({ data: args });
+     *       return result.output;
+     *     },
+     *   }],
+     * });
+     * ```
+     */
+    agents?: McpAgentDefinition[];
+
+    /**
+     * Enable tool search mode.
+     *
+     * When true, tools/list returns only `tool.search` plus any always-loaded tools
+     * configured in ToolSearchConfig (alwaysLoadedTools / alwaysLoadedCategories).
+     * MCP clients call `tool.search` first to discover tools on-demand, dramatically
+     * reducing context token usage for registries with 110+ tools.
+     *
+     * Requires the system prompt to instruct the client to use tool.search.
+     * See docs/examples/mcp-server-example.ts for the recommended prompt snippet.
+     *
+     * Default: false — all enabled tools sent upfront.
+     */
+    searchMode?: boolean;
+
+    /**
+     * Public base URL of this MCP server (e.g. 'https://mcp.example.com').
+     * Only used when auth.mode is 'jwt'.
+     *
+     * When provided alongside jwt auth, the server mounts
+     * /.well-known/oauth-protected-resource so MCP clients can
+     * auto-discover which OAuth server issues tokens for this server.
+     *
+     * Ignored for static and custom auth modes.
+     */
+    serverUrl?: string;
+}
+
+export interface McpServerHandle {
+    /** Stop the MCP server and release all resources. */
+    stop(): Promise<void>;
+    /** Number of tools currently exposed. */
+    toolCount: number;
+    /**
+     * Actual bound port (HTTP transport only). Useful when port:0 is passed and
+     * the OS assigns a free port — integration tests read this to know where to connect.
+     * Always 0 for stdio transport.
+     */
+    port: number;
+}
diff --git a/packages/toolpack-sdk/src/mcp/server.ts b/packages/toolpack-sdk/src/mcp/server.ts
new file mode 100644
index 0000000..e329377
--- /dev/null
+++ b/packages/toolpack-sdk/src/mcp/server.ts
@@ -0,0 +1,378 @@
+import { createServer, type IncomingMessage } from 'node:http';
+import { randomUUID } from 'node:crypto';
+import { Server } from '@modelcontextprotocol/sdk/server/index.js';
+import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
+import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
+import {
+    ListToolsRequestSchema,
+    CallToolRequestSchema,
+} from '@modelcontextprotocol/sdk/types.js';
+import type { AuthInfo } from '@modelcontextprotocol/sdk/server/auth/types.js';
+import type { ToolRegistry } from '../tools/registry.js';
+import type { ToolContext, ToolDefinition } from '../tools/types.js';
+import type { ToolpackMcpServerConfig, McpServerHandle } from './server-types.js';
+import { logInfo } from '../providers/provider-logger.js';
+import { buildVerifier, applyBearerAuth } from './server-auth.js';
+import { getToolSearchSchema, isToolSearchTool, TOOL_SEARCH_NAME } from '../tools/search/index.js';
+
+// ─── Public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Start an MCP server exposing Toolpack's built-in tools.
+ *
+ * Uses the low-level Server class (not McpServer) because Toolpack tools use
+ * plain JSON Schema and McpServer.tool() only accepts Zod schemas.
+ */
+export async function startMcpServer(
+    registry: ToolRegistry,
+    config: ToolpackMcpServerConfig,
+    searchFn?: (args: Record<string, unknown>) => string,
+): Promise<McpServerHandle> {
+
+    // 1. Create the low-level MCP Server with tools capability declared
+    const server = new Server(
+        {
+            name: config.serverName ?? 'Toolpack SDK',
+            version: config.serverVersion ?? '2.0.0',
+        },
+        {
+            capabilities: { tools: {} },
+        },
+    );
+
+    // 2. Handle tools/list — resolve fresh on every request so tools added
+    //    via loadToolProject() after startMcpServer() are always included.
+    server.setRequestHandler(ListToolsRequestSchema, async () => {
+        if (config.searchMode) {
+            // Search mode: expose tool.search + always-loaded tools only.
+            // All other tools are deferred — clients call tool.search to discover them.
+            const alwaysLoaded = resolveAlwaysLoadedTools(registry, config);
+            const searchToolSchema = getToolSearchSchema();
+            const alwaysLoadedEntries = alwaysLoaded.map(tool => {
+                const annotations = deriveAnnotations(tool);
+                return {
+                    name: tool.name,
+                    description: tool.description ?? '',
+                    inputSchema: (tool.parameters ?? { type: 'object', properties: {} }) as unknown as Record<string, unknown>,
+                    ...(annotations !== undefined && { annotations }),
+                };
+            });
+            // Agents are always listed even in search mode — they are not in the
+            // ToolRegistry so tool.search cannot discover them. Omitting them here
+            // would make them completely invisible and uncallable.
+            return {
+                tools: [
+                    {
+                        name: searchToolSchema.name,
+                        description: searchToolSchema.description ?? '',
+                        inputSchema: searchToolSchema.parameters as unknown as Record<string, unknown>,
+                        annotations: { readOnlyHint: true },
+                    },
+                    ...alwaysLoadedEntries,
+                    ...buildAgentEntries(config),
+                ],
+            };
+        }
+
+        const tools = resolveTools(registry, config.expose);
+        const toolEntries = tools.map(tool => {
+            const annotations = deriveAnnotations(tool);
+            return {
+                name: tool.name,
+                description: tool.description ?? '',
+                inputSchema: (tool.parameters ?? { type: 'object', properties: {} }) as unknown as Record<string, unknown>,
+                // Only include annotations when there is actual signal — omitting lets
+                // MCP spec defaults apply (destructiveHint: true, openWorldHint: true),
+                // which are the correct conservative defaults for uncategorised tools.
+                ...(annotations !== undefined && { annotations }),
+            };
+        });
+
+        return { tools: [...toolEntries, ...buildAgentEntries(config)] };
+    });
+
+    // 3. Handle tools/call — intercept tool.search in search mode, then normal lookup
+    server.setRequestHandler(CallToolRequestSchema, async (request: { params: { name: string; arguments?: Record<string, unknown> } }) => {
+        const { name, arguments: args } = request.params;
+
+        // Intercept tool.search when search mode is enabled.
+        if (config.searchMode && isToolSearchTool(name)) {
+            if (!searchFn) {
+                return toMcpResult('tool.search is not available: searchFn was not provided to startMcpServer.', true);
+            }
+            try {
+                const result = searchFn(args ?? {});
+                return toMcpResult(result);
+            } catch (err) {
+                const message = err instanceof Error ? err.message : String(err);
+                return toMcpResult(`tool.search error: ${message}`, true);
+            }
+        }
+
+        // Intercept agent.* calls before the normal tool lookup.
+        if (name.startsWith('agent.')) {
+            const agentName = name.slice('agent.'.length);
+            const agentDef = (config.agents ?? []).find(a => a.name === agentName);
+            if (!agentDef) {
+                return toMcpResult(`Agent not found: ${agentName}`, true);
+            }
+            try {
+                const output = await agentDef.invoke(args ?? {});
+                return toMcpResult(output);
+            } catch (err) {
+                const message = err instanceof Error ? err.message : String(err);
+                return toMcpResult(`Agent error (${agentName}): ${message}`, true);
+            }
+        }
+
+        const tool = resolveToolByName(registry, name, config.expose);
+
+        if (!tool) {
+            return toMcpResult(`Tool not found: ${name}`, true);
+        }
+
+        try {
+            const ctx: ToolContext = {
+                workspaceRoot: process.cwd(),
+                config: registry.getConfig().additionalConfigurations ?? {},
+                log: (msg) => logInfo(`[MCP Tool] ${msg}`),
+            };
+            const result = await tool.execute(args ?? {}, ctx);
+            return toMcpResult(result);
+        } catch (err) {
+            const message = err instanceof Error ? err.message : String(err);
+            return toMcpResult(`Error executing ${name}: ${message}`, true);
+        }
+    });
+
+    // 4. Connect the appropriate transport
+    if (config.transport === 'stdio') {
+        const transport = new StdioServerTransport();
+        await server.connect(transport);
+
+        return {
+            get toolCount() { return resolveTools(registry, config.expose).length; },
+            port: 0,
+            stop: async () => { await server.close(); },
+        };
+    }
+
+    if (config.transport === 'http') {
+        const port = config.port ?? 3000;
+
+        // Warn when running without auth — open HTTP server is unsafe beyond localhost.
+        if (!config.auth) {
+            logInfo(
+                '[MCP Server] Warning: HTTP transport started without authentication. ' +
+                'Safe for localhost only. Set `auth` in startMcpServer() before ' +
+                'exposing this server to a network.',
+            );
+        }
+
+        // StreamableHTTPServerTransport is middleware — it does NOT bind to a port.
+        // We create a Node.js http.Server and route all requests through it.
+        const transport = new StreamableHTTPServerTransport({
+            sessionIdGenerator: () => randomUUID(),
+        });
+
+        // Build the verifier once — JwtVerifier caches the JWKS key set internally,
+        // so creating it per-request would defeat the caching and cause unnecessary
+        // network fetches.
+        const verifier = config.auth ? buildVerifier(config.auth) : null;
+
+        const httpServer = createServer(async (req, res) => {
+            // ── OAuth Protected Resource Metadata (RFC 9728) ──────────────────
+            // Allows MCP clients to discover which OAuth server issues tokens for
+            // this server. Only mounted for jwt mode — static/custom auth has no
+            // external OAuth server to advertise.
+            if (
+                config.auth?.mode === 'jwt' &&
+                config.serverUrl &&
+                req.url === '/.well-known/oauth-protected-resource'
+            ) {
+                const metadata: Record<string, unknown> = { resource: config.serverUrl };
+                if (config.auth.issuer) {
+                    metadata['authorization_servers'] = [config.auth.issuer];
+                }
+                res.writeHead(200, { 'Content-Type': 'application/json' })
+                   .end(JSON.stringify(metadata));
+                return;
+            }
+
+            // ── Bearer auth ───────────────────────────────────────────────────
+            // When auth is configured, every request must carry a valid Bearer
+            // token. applyBearerAuth writes 401/403 and returns false on failure.
+            if (verifier) {
+                const ok = await applyBearerAuth(
+                    req as IncomingMessage & { auth?: AuthInfo },
+                    res,
+                    config.auth!,
+                    verifier,
+                ).catch(err => {
+                    // Unexpected error in the verifier itself (e.g. JWKS fetch crash).
+                    logInfo(`[MCP Server] Auth error: ${err instanceof Error ? err.message : String(err)}`);
+                    if (!res.headersSent) res.writeHead(500).end('Internal Server Error');
+                    return false;
+                });
+                if (!ok) return;
+            }
+
+            // ── MCP transport ─────────────────────────────────────────────────
+            transport.handleRequest(req, res).catch(err => {
+                logInfo(`[MCP Server] HTTP request handler error: ${err instanceof Error ? err.message : String(err)}`);
+                if (!res.headersSent) {
+                    res.writeHead(500).end('Internal Server Error');
+                }
+            });
+        });
+
+        await server.connect(transport);
+
+        try {
+            await new Promise<void>((resolve, reject) => {
+                const onError = (err: NodeJS.ErrnoException) => {
+                    reject(err.code === 'EADDRINUSE'
+                        ? new Error(`MCP HTTP server failed to start: port ${port} is already in use.`)
+                        : err,
+                    );
+                };
+                httpServer.once('error', onError);
+                httpServer.listen(port, () => {
+                    httpServer.off('error', onError);
+                    resolve();
+                });
+            });
+        } catch (listenErr) {
+            // httpServer failed to start — close the already-connected server
+            // and transport so they don't leak.
+            await server.close().catch(() => { /* ignore close errors during cleanup */ });
+            throw listenErr;
+        }
+
+        const boundPort = (httpServer.address() as { port: number }).port;
+
+        return {
+            get toolCount() { return resolveTools(registry, config.expose).length; },
+            port: boundPort,
+            stop: async () => {
+                try {
+                    await server.close();
+                } finally {
+                    await new Promise<void>((resolve, reject) => {
+                        httpServer.close(err => (err ? reject(err) : resolve()));
+                    });
+                }
+            },
+        };
+    }
+
+    throw new Error(`Unknown MCP transport: "${config.transport}". Use 'stdio' or 'http'.`);
+}
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+function resolveTools(registry: ToolRegistry, expose?: ToolpackMcpServerConfig['expose']) {
+    if (!expose) return registry.getEnabled();
+    // Treat empty arrays as "not specified" — expose all rather than zero tools silently
+    if (expose.categories?.length) return registry.getByCategories(expose.categories);
+    if (expose.tools?.length) return registry.getByNames(expose.tools);
+    return registry.getEnabled();
+}
+
+// O(1) variant used by tools/call — avoids iterating the full list just to find one tool.
+// Must stay consistent with resolveTools: a tool that doesn't appear in tools/list
+// must not be callable via tools/call.
+function resolveToolByName(
+    registry: ToolRegistry,
+    name: string,
+    expose?: ToolpackMcpServerConfig['expose'],
+): ToolDefinition | undefined {
+    const tool = registry.get(name);
+    if (!tool) return undefined;
+
+    if (expose?.categories?.length) return new Set(expose.categories).has(tool.category) ? tool : undefined;
+    if (expose?.tools?.length) return expose.tools.includes(name) ? tool : undefined;
+
+    // No active MCP-level filter (expose is undefined or has empty arrays) —
+    // fall back to the registry's own enabled filter, matching resolveTools behaviour.
+    return isEnabledInRegistry(registry, tool, name) ? tool : undefined;
+}
+
+// Returns true when the tool passes the registry's enabledTools / enabledToolCategories
+// config. Fast path (default config): both arrays are empty → all registered tools enabled.
+function isEnabledInRegistry(registry: ToolRegistry, tool: ToolDefinition, name: string): boolean {
+    const { enabledTools, enabledToolCategories } = registry.getConfig();
+    if (enabledTools.length === 0 && enabledToolCategories.length === 0) return true;
+    return enabledTools.includes(name) || enabledToolCategories.includes(tool.category);
+}
+
+/** Build the tools/list entries for all configured agents. */
+function buildAgentEntries(config: ToolpackMcpServerConfig) {
+    return (config.agents ?? []).map(agent => ({
+        name: `agent.${agent.name}`,
+        description: agent.description,
+        inputSchema: (agent.inputSchema ?? { type: 'object', properties: {} }) as Record<string, unknown>,
+    }));
+}
+
+/**
+ * Resolve the always-loaded tools for search mode.
+ * These appear in tools/list alongside tool.search — clients can call them directly
+ * without searching first. Respects the expose filter if set.
+ */
+function resolveAlwaysLoadedTools(
+    registry: ToolRegistry,
+    config: ToolpackMcpServerConfig,
+): ToolDefinition[] {
+    const searchConfig = registry.getConfig().toolSearch;
+    if (!searchConfig) return [];
+
+    const byName = registry.getByNames(searchConfig.alwaysLoadedTools);
+    const byCategory = registry.getByCategories(searchConfig.alwaysLoadedCategories);
+
+    // Deduplicate and apply expose filter so always-loaded tools are also
+    // restricted to what's actually callable.
+    const seen = new Set<string>([TOOL_SEARCH_NAME]); // exclude tool.search itself
+    const candidates = [...byName, ...byCategory].filter(t => {
+        if (seen.has(t.name)) return false;
+        seen.add(t.name);
+        return true;
+    });
+
+    // Intersect with expose filter when active
+    if (config.expose?.categories?.length) {
+        const cats = new Set(config.expose.categories);
+        return candidates.filter(t => cats.has(t.category));
+    }
+    if (config.expose?.tools?.length) {
+        const names = new Set(config.expose.tools);
+        return candidates.filter(t => names.has(t.name));
+    }
+    return candidates;
+}
+
+/**
+ * Derive MCP tool annotations from a ToolDefinition.
+ *
+ * Priority:
+ *   1. Explicit tool.annotations — used as-is.
+ *   2. tool.confirmation present — { destructiveHint: true } (tool modifies state).
+ *   3. Neither — undefined (omit annotations; MCP spec defaults apply).
+ *
+ * MCP spec defaults when annotations are absent:
+ *   readOnlyHint: false, destructiveHint: true, openWorldHint: true
+ *
+ * These conservative defaults are correct for tools we have no signal about
+ * (e.g. slack.post, gh.create_pr, create-dir — not read-only but no confirmation set).
+ * Returning readOnlyHint: true for those tools would be a semantic lie.
+ */
+function deriveAnnotations(tool: ToolDefinition): Record<string, unknown> | undefined {
+    if (tool.annotations) return tool.annotations as Record<string, unknown>;
+    if (tool.confirmation) return { destructiveHint: true };
+    return undefined;
+}
+
+function toMcpResult(result: unknown, isError = false) {
+    const text = typeof result === 'string' ? result : JSON.stringify(result, null, 2);
+    return { content: [{ type: 'text' as const, text }], isError };
+}
diff --git a/packages/toolpack-sdk/src/providers/anthropic/index.ts b/packages/toolpack-sdk/src/providers/anthropic/index.ts
index a2acecc..1eb67b6 100644
--- a/packages/toolpack-sdk/src/providers/anthropic/index.ts
+++ b/packages/toolpack-sdk/src/providers/anthropic/index.ts
@@ -1,4 +1,6 @@
 import Anthropic from '@anthropic-ai/sdk';
+import { zodOutputFormat } from '@anthropic-ai/sdk/helpers/zod';
+import { type ZodType } from 'zod';
 import { ProviderAdapter } from "../base/index.js";
 import { CompletionRequest, CompletionResponse, CompletionChunk, ToolCallResult, Message, EmbeddingRequest, EmbeddingResponse, ProviderModelInfo, FileUploadRequest, FileUploadResponse } from "../../types/index.js";
 import { AuthenticationError, RateLimitError, InvalidRequestError, ProviderError } from "../../errors/index.js";
@@ -160,10 +162,16 @@ export class AnthropicAdapter extends ProviderAdapter {
             logDebug(`[Anthropic][${requestId}] generate() request: model=${params.model}, messages=${params.messages.length}, tools=${params.tools?.length || 0}, tool_choice=${params.tool_choice?.type ?? 'unset'}`);
             logMessagePreview(requestId, 'Anthropic', params.messages);
 
-            const response = await this.client.messages.create(
-                params,
-                request.signal ? { signal: request.signal } : undefined,
-            );
+            const isZodSchema = request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format;
+
+            if (isZodSchema) {
+                params.output_config = { format: zodOutputFormat(request.response_format as ZodType) };
+            }
+
+            const rawResponse = isZodSchema
+                ? await (this.client.messages as any).parse(params, request.signal ? { signal: request.signal } : undefined)
+                : await this.client.messages.create(params, request.signal ? { signal: request.signal } : undefined);
+            const response = rawResponse as any;
 
             const textParts: string[] = [];
             const toolCalls: ToolCallResult[] = [];
@@ -182,7 +190,7 @@ export class AnthropicAdapter extends ProviderAdapter {
 
             logDebug(`[Anthropic][${requestId}] Response finish_reason=${response.stop_reason} tool_calls=${toolCalls.length} content_preview=${safePreview(textParts.join(''), 200)}`);
 
-            return {
+            const completionResponse: CompletionResponse = {
                 content: textParts.length > 0 ? textParts.join('') : null,
                 usage: {
                     prompt_tokens: response.usage.input_tokens,
@@ -193,6 +201,12 @@ export class AnthropicAdapter extends ProviderAdapter {
                 tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
                 raw: response,
             };
+
+            if (isZodSchema && response.parsed_output !== undefined) {
+                completionResponse.data = response.parsed_output;
+            }
+
+            return completionResponse;
         } catch (error) {
             throw this.handleError(error);
         }
diff --git a/packages/toolpack-sdk/src/providers/gemini/index.ts b/packages/toolpack-sdk/src/providers/gemini/index.ts
index fbe75f5..c4de5dc 100644
--- a/packages/toolpack-sdk/src/providers/gemini/index.ts
+++ b/packages/toolpack-sdk/src/providers/gemini/index.ts
@@ -1,4 +1,6 @@
 import { GoogleGenerativeAI } from '@google/generative-ai';
+import { zodToJsonSchema } from 'zod-to-json-schema';
+import { type ZodType } from 'zod';
 import { ProviderAdapter } from "../base/index.js";
 import { CompletionRequest, CompletionResponse, CompletionChunk, ToolCallResult, Message, EmbeddingRequest, EmbeddingResponse, ProviderModelInfo, FileUploadRequest, FileUploadResponse } from "../../types/index.js";
 import { AuthenticationError, RateLimitError, InvalidRequestError, ProviderError } from "../../errors/index.js";
@@ -208,9 +210,12 @@ export class GeminiAdapter extends ProviderAdapter {
                     topP: request.top_p,
                     // responseMimeType must not be 'application/json' when tools are present —
                     // Gemini does not support both simultaneously and will truncate responses.
-                    responseMimeType: (request.response_format === 'json_object' && !(request.tools?.length))
+                    responseMimeType: ((request.response_format === 'json_object' || (request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format)) && !(request.tools?.length))
                         ? 'application/json'
                         : 'text/plain',
+                    ...(request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format && !(request.tools?.length)
+                        ? { responseSchema: this.sanitizeSchema(zodToJsonSchema(request.response_format as ZodType)) }
+                        : {}),
                 },
             });
 
@@ -291,9 +296,12 @@ export class GeminiAdapter extends ProviderAdapter {
                     topP: request.top_p,
                     // responseMimeType must not be 'application/json' when tools are present —
                     // Gemini does not support both simultaneously and will truncate responses.
-                    responseMimeType: (request.response_format === 'json_object' && !(request.tools?.length))
+                    responseMimeType: ((request.response_format === 'json_object' || (request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format)) && !(request.tools?.length))
                         ? 'application/json'
                         : 'text/plain',
+                    ...(request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format && !(request.tools?.length)
+                        ? { responseSchema: this.sanitizeSchema(zodToJsonSchema(request.response_format as ZodType)) }
+                        : {}),
                 },
             });
 
@@ -409,7 +417,7 @@ export class GeminiAdapter extends ProviderAdapter {
             return parts.filter(Boolean);
         };
 
-        const history = await Promise.all(historyMsgs.map(async m => {
+        const rawHistory = await Promise.all(historyMsgs.map(async m => {
             if (m.role === 'tool' && m.tool_call_id) {
                 return {
                     role: 'function',
@@ -455,8 +463,21 @@ export class GeminiAdapter extends ProviderAdapter {
             };
         }));
 
-        const lastUserContent = typeof lastMsg.content === 'string' 
-            ? lastMsg.content 
+        // Gemini requires consecutive tool responses from the same multi-call turn
+        // to be grouped into a single role:'function' entry with multiple parts.
+        // Merge consecutive function entries to satisfy this requirement.
+        const history: any[] = [];
+        for (const entry of rawHistory) {
+            const prev = history[history.length - 1];
+            if (entry.role === 'function' && prev?.role === 'function') {
+                prev.parts.push(...entry.parts);
+            } else {
+                history.push(entry);
+            }
+        }
+
+        const lastUserContent = typeof lastMsg.content === 'string'
+            ? lastMsg.content
             : await mapContentParts(lastMsg.content);
 
         return { history, lastUserMessage: lastUserContent };
diff --git a/packages/toolpack-sdk/src/providers/ollama/adapter.ts b/packages/toolpack-sdk/src/providers/ollama/adapter.ts
index 1ed827e..2bfdf51 100644
--- a/packages/toolpack-sdk/src/providers/ollama/adapter.ts
+++ b/packages/toolpack-sdk/src/providers/ollama/adapter.ts
@@ -6,6 +6,8 @@
  * Framework-agnostic — usable from CLI, web servers, Electron, etc.
  */
 
+import { zodToJsonSchema } from 'zod-to-json-schema';
+import { type ZodType } from 'zod';
 import { ProviderAdapter } from "../base/index.js";
 import {
     CompletionRequest,
@@ -192,6 +194,10 @@ export class OllamaAdapter extends ProviderAdapter {
             },
         };
 
+        if (request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format) {
+            payload.format = zodToJsonSchema(request.response_format as ZodType);
+        }
+
         if (request.tools && request.tools.length > 0 && request.tool_choice !== 'none') {
             payload.tools = request.tools.map(t => ({
                 type: 'function',
@@ -276,6 +282,10 @@ export class OllamaAdapter extends ProviderAdapter {
             },
         };
 
+        if (request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format) {
+            payload.format = zodToJsonSchema(request.response_format as ZodType);
+        }
+
         if (request.tools && request.tools.length > 0 && request.tool_choice !== 'none') {
             payload.tools = request.tools.map(t => ({
                 type: 'function',
diff --git a/packages/toolpack-sdk/src/providers/openai/index.ts b/packages/toolpack-sdk/src/providers/openai/index.ts
index 949a609..abe7f18 100644
--- a/packages/toolpack-sdk/src/providers/openai/index.ts
+++ b/packages/toolpack-sdk/src/providers/openai/index.ts
@@ -1,4 +1,6 @@
 import OpenAI from 'openai';
+import { zodResponseFormat } from 'openai/helpers/zod';
+import { type ZodType } from 'zod';
 import { ProviderAdapter } from "../base/index.js";
 import { CompletionRequest, CompletionResponse, CompletionChunk, ToolCallResult, Message, EmbeddingRequest, EmbeddingResponse, ProviderModelInfo, FileUploadRequest, FileUploadResponse } from "../../types/index.js";
 import { AuthenticationError, RateLimitError, InvalidRequestError, ProviderError } from "../../errors/index.js";
@@ -140,7 +142,11 @@ export class OpenAIAdapter extends ProviderAdapter {
                 temperature: request.temperature,
                 max_tokens: request.max_tokens,
                 top_p: request.top_p,
-                response_format: request.response_format === 'json_object' ? { type: 'json_object' } : undefined,
+                response_format: request.response_format === 'json_object'
+                    ? { type: 'json_object' }
+                    : (request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format)
+                        ? zodResponseFormat(request.response_format as ZodType, 'structured_output')
+                        : undefined,
                 stream: false,
             };
 
@@ -224,7 +230,11 @@ export class OpenAIAdapter extends ProviderAdapter {
                 temperature: request.temperature,
                 max_tokens: request.max_tokens,
                 top_p: request.top_p,
-                response_format: request.response_format === 'json_object' ? { type: 'json_object' } : undefined,
+                response_format: request.response_format === 'json_object'
+                    ? { type: 'json_object' }
+                    : (request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format)
+                        ? zodResponseFormat(request.response_format as ZodType, 'structured_output')
+                        : undefined,
                 stream: true,
             };
 
diff --git a/packages/toolpack-sdk/src/providers/vertexai/index.ts b/packages/toolpack-sdk/src/providers/vertexai/index.ts
index 0694cd9..fed4f4e 100644
--- a/packages/toolpack-sdk/src/providers/vertexai/index.ts
+++ b/packages/toolpack-sdk/src/providers/vertexai/index.ts
@@ -1,4 +1,6 @@
 import { GoogleGenAI } from '@google/genai';
+import { zodToJsonSchema } from 'zod-to-json-schema';
+import { type ZodType } from 'zod';
 import type { Content, Part } from '@google/genai';
 import { ProviderAdapter } from '../base/index.js';
 import type {
@@ -209,6 +211,7 @@ export class VertexAIAdapter extends ProviderAdapter {
     // ─── Private helpers ────────────────────────────────────────────────────────
 
     private buildRequestParams(request: CompletionRequest): { model: string; config: any } {
+        const isZodSchema = request.response_format && typeof request.response_format === 'object' && 'parse' in request.response_format;
         const config: any = {
             systemInstruction: this.extractSystemInstruction(request.messages),
             maxOutputTokens: request.max_tokens,
@@ -217,9 +220,12 @@ export class VertexAIAdapter extends ProviderAdapter {
             // responseMimeType must not be set to 'application/json' when function declarations
             // are present — Vertex AI / Gemini does not support both simultaneously and will
             // truncate the response to a single token. JSON mode is honoured only for tool-free requests.
-            responseMimeType: (request.response_format === 'json_object' && !(request.tools?.length))
+            responseMimeType: ((request.response_format === 'json_object' || isZodSchema) && !(request.tools?.length))
                 ? 'application/json'
                 : 'text/plain',
+            ...(isZodSchema && !(request.tools?.length)
+                ? { responseSchema: this.sanitizeSchema(zodToJsonSchema(request.response_format as ZodType)) }
+                : {}),
         };
 
         if (request.tools && request.tools.length > 0) {
@@ -245,7 +251,7 @@ export class VertexAIAdapter extends ProviderAdapter {
         const historyMsgs = conversation.slice(0, -1);
         const lastMsg = conversation[conversation.length - 1];
 
-        const history: Content[] = historyMsgs.map(m => {
+        const rawHistory: Content[] = historyMsgs.map(m => {
             if (m.role === 'tool' && m.tool_call_id) {
                 return {
                     role: 'function',
@@ -283,6 +289,20 @@ export class VertexAIAdapter extends ProviderAdapter {
             };
         });
 
+        // Vertex AI requires that consecutive tool responses belonging to the same
+        // multi-call turn are grouped into a single role:'function' Content with
+        // multiple functionResponse parts — not emitted as separate Content entries.
+        const history: Content[] = [];
+        for (const entry of rawHistory) {
+            const prev = history[history.length - 1];
+            if (entry.role === 'function' && prev?.role === 'function') {
+                // Merge into the previous function Content
+                (prev.parts as Part[]).push(...(entry.parts as Part[]));
+            } else {
+                history.push(entry);
+            }
+        }
+
         return {
             history,
             lastUserMessage: this.contentToParts(lastMsg.content),
diff --git a/packages/toolpack-sdk/src/toolpack.ts b/packages/toolpack-sdk/src/toolpack.ts
index f9793f4..6a5d4ed 100644
--- a/packages/toolpack-sdk/src/toolpack.ts
+++ b/packages/toolpack-sdk/src/toolpack.ts
@@ -26,6 +26,7 @@ import { WorkflowExecutor } from './workflows/workflow-executor.js';
 import { DEFAULT_WORKFLOW_CONFIG } from './workflows/workflow-types.js';
 import { createMcpToolProject, disconnectMcpToolProject, McpToolsConfig } from './tools/index.js';
 import type { ToolpackInterceptor, ToolpackNextFunction } from './interceptors/index.js';
+import type { ToolpackMcpServerConfig, McpServerHandle } from './mcp/server-types.js';
 
 export interface ProviderOptions {
     /**
@@ -868,6 +869,107 @@ export class Toolpack extends EventEmitter {
         }
     }
 
+    /**
+     * Expose Toolpack's built-in tools as an MCP server.
+     *
+     * Any MCP-compatible client (Claude Desktop, Cursor, Windsurf, custom agents)
+     * can connect and use the full tool catalog without importing this SDK.
+     *
+     * Requires `@modelcontextprotocol/sdk` to be installed:
+     *   npm install @modelcontextprotocol/sdk
+     *
+     * @example stdio — Claude Desktop / Cursor
+     * ```typescript
+     * const sdk = await Toolpack.init({ provider: 'anthropic', tools: true });
+     * await sdk.startMcpServer({ transport: 'stdio' });
+     * ```
+     *
+     * @example HTTP — open (localhost only)
+     * ```typescript
+     * await sdk.startMcpServer({ transport: 'http', port: 3000 });
+     * ```
+     *
+     * @example HTTP — with static bearer token auth (dev / self-hosted)
+     * ```typescript
+     * await sdk.startMcpServer({
+     *   transport: 'http',
+     *   port: 3000,
+     *   auth: { mode: 'static', tokens: [process.env.MCP_TOKEN!] },
+     * });
+     * ```
+     *
+     * @example HTTP — with JWT auth (Auth0 / Supabase / Clerk / any OIDC provider)
+     * ```typescript
+     * await sdk.startMcpServer({
+     *   transport: 'http',
+     *   port: 3000,
+     *   auth: {
+     *     mode: 'jwt',
+     *     jwksUrl: 'https://your-tenant.auth0.com/.well-known/jwks.json',
+     *     audience: 'https://your-mcp-server.example.com',
+     *     issuer:   'https://your-tenant.auth0.com/',
+     *   },
+     *   serverUrl: 'https://your-mcp-server.example.com',
+     * });
+     * ```
+     *
+     * @example expose only specific categories
+     * ```typescript
+     * await sdk.startMcpServer({
+     *   transport: 'stdio',
+     *   expose: { categories: ['filesystem', 'github', 'slack'] },
+     * });
+     * ```
+     *
+     * @example search mode — reduces context token usage for 110+ tools
+     * ```typescript
+     * // tools/list returns only tool.search; clients discover tools on-demand.
+     * // Add this to your system prompt:
+     * //   "Use tool.search to discover tools before calling them."
+     * await sdk.startMcpServer({ transport: 'stdio', searchMode: true });
+     * ```
+     */
+    async startMcpServer(config: ToolpackMcpServerConfig): Promise<McpServerHandle> {
+        const registry = this.client.getToolRegistry();
+        if (!registry) {
+            throw new Error(
+                'No tool registry configured. Initialize Toolpack with tools enabled: Toolpack.init({ tools: true })',
+            );
+        }
+
+        // Dynamic import — @modelcontextprotocol/sdk is an optional peer dependency.
+        // Only loaded when startMcpServer() is actually called.
+        // Users who don't use MCP server pay zero overhead.
+        let startMcpServerFn: typeof import('./mcp/server.js').startMcpServer;
+        try {
+            const mod = await import('./mcp/server.js');
+            startMcpServerFn = mod.startMcpServer;
+        } catch (err) {
+            // Only rewrite the error message when the failure is specifically
+            // a missing @modelcontextprotocol/sdk module. Other errors (e.g.
+            // runtime bugs in server.ts) should propagate as-is.
+            const isMissingDep = err instanceof Error &&
+                (err as NodeJS.ErrnoException).code === 'MODULE_NOT_FOUND' &&
+                err.message.includes('@modelcontextprotocol');
+            if (isMissingDep) {
+                throw new Error(
+                    'MCP server requires @modelcontextprotocol/sdk. Install it with:\n' +
+                    '  npm install @modelcontextprotocol/sdk',
+                );
+            }
+            throw err;
+        }
+
+        // When search mode is enabled, pass the AIClient's search function so the
+        // MCP server can execute tool.search without creating a separate BM25 instance.
+        // This reuses the already-indexed engine in AIClient instead of re-indexing.
+        const searchFn = config.searchMode
+            ? (args: Record<string, unknown>) => this.client.executeToolSearch(args)
+            : undefined;
+
+        return startMcpServerFn(registry, config, searchFn);
+    }
+
     /**
      * Convenience method to get a flat list of all models across all providers.
      */
diff --git a/packages/toolpack-sdk/src/tools/coding-tools/parsers/babel-parser.ts b/packages/toolpack-sdk/src/tools/coding-tools/parsers/babel-parser.ts
index 6fbbd08..2b8da9f 100644
--- a/packages/toolpack-sdk/src/tools/coding-tools/parsers/babel-parser.ts
+++ b/packages/toolpack-sdk/src/tools/coding-tools/parsers/babel-parser.ts
@@ -542,7 +542,7 @@ export class BabelParser implements LanguageParser {
             });
 
             traverse(ast, {
-                CallExpression(callPath) {
+                CallExpression(callPath: NodePath<t.CallExpression>) {
                     const callee = callPath.node.callee;
                     let match = false;
                     if (callee.type === 'Identifier' && callee.name === targetName) {
diff --git a/packages/toolpack-sdk/src/tools/exec-tools/index.test.ts b/packages/toolpack-sdk/src/tools/exec-tools/index.test.ts
index 70d5db8..ac63454 100644
--- a/packages/toolpack-sdk/src/tools/exec-tools/index.test.ts
+++ b/packages/toolpack-sdk/src/tools/exec-tools/index.test.ts
@@ -10,9 +10,9 @@ describe('exec-tools project', () => {
         expect(execToolsProject.manifest.author).toBe('Sajeer');
     });
 
-    it('should export 6 tools matching the manifest', () => {
+    it('should export 8 tools matching the manifest', () => {
         expect(execToolsProject.tools).toHaveLength(execToolsProject.manifest.tools.length);
-        expect(execToolsProject.tools).toHaveLength(6);
+        expect(execToolsProject.tools).toHaveLength(8);
     });
 
     it('should have tool names matching the manifest list', () => {
diff --git a/packages/toolpack-sdk/src/tools/exec-tools/index.ts b/packages/toolpack-sdk/src/tools/exec-tools/index.ts
index da142b6..f610948 100644
--- a/packages/toolpack-sdk/src/tools/exec-tools/index.ts
+++ b/packages/toolpack-sdk/src/tools/exec-tools/index.ts
@@ -2,14 +2,18 @@ import type { ToolProject } from "../types.js";
 import { execRunTool } from './tools/run/index.js';
 import { execRunShellTool } from './tools/run-shell/index.js';
 import { execRunBackgroundTool } from './tools/run-background/index.js';
+import { execRunBlockingTool } from './tools/run-blocking/index.js';
 import { execReadOutputTool } from './tools/read-output/index.js';
+import { execTailOutputTool } from './tools/tail-output/index.js';
 import { execKillTool } from './tools/kill/index.js';
 import { execListProcessesTool } from './tools/list-processes/index.js';
 
 export { execRunTool } from './tools/run/index.js';
 export { execRunShellTool } from './tools/run-shell/index.js';
 export { execRunBackgroundTool } from './tools/run-background/index.js';
+export { execRunBlockingTool } from './tools/run-blocking/index.js';
 export { execReadOutputTool } from './tools/read-output/index.js';
+export { execTailOutputTool } from './tools/tail-output/index.js';
 export { execKillTool } from './tools/kill/index.js';
 export { execListProcessesTool } from './tools/list-processes/index.js';
 
@@ -22,14 +26,14 @@ export const execToolsProject: ToolProject = {
         description: 'Code execution tools for running commands, managing background processes, and automation.',
         author: 'Sajeer',
         tools: [
-            'exec.run', 'exec.run_shell', 'exec.run_background',
-            'exec.read_output', 'exec.kill', 'exec.list_processes',
+            'exec.run', 'exec.run_shell', 'exec.run_background', 'exec.run_blocking',
+            'exec.read_output', 'exec.tail_output', 'exec.kill', 'exec.list_processes',
         ],
         category: 'execution',
     },
     tools: [
-        execRunTool, execRunShellTool, execRunBackgroundTool,
-        execReadOutputTool, execKillTool, execListProcessesTool,
+        execRunTool, execRunShellTool, execRunBackgroundTool, execRunBlockingTool,
+        execReadOutputTool, execTailOutputTool, execKillTool, execListProcessesTool,
     ],
     dependencies: {},
 };
diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-background/index.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-background/index.ts
index c69cd8a..4ddf2c1 100644
--- a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-background/index.ts
+++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-background/index.ts
@@ -42,7 +42,7 @@ export const execRunBackgroundTool: ToolDefinition = {
     category,
     execute,
     confirmation: {
-        level: 'high',
+        level: 'medium',
         reason: 'This will spawn a background process that runs unsupervised.',
         showArgs: ['command'],
     },
diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
new file mode 100644
index 0000000..db5ff33
--- /dev/null
+++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
@@ -0,0 +1,70 @@
+import { describe, it, expect } from 'vitest';
+import { execRunBlockingTool } from './index.js';
+
+describe('exec.run_blocking tool', () => {
+    it('should have correct metadata', () => {
+        expect(execRunBlockingTool.name).toBe('exec.run_blocking');
+        expect(execRunBlockingTool.category).toBe('execution');
+        expect(execRunBlockingTool.confirmation?.level).toBe('medium');
+    });
+
+    it('should execute a command and return structured result', async () => {
+        const result = JSON.parse(await execRunBlockingTool.execute({ command: 'echo hello' }));
+        expect(result.exitCode).toBe(0);
+        expect(result.success).toBe(true);
+        expect(result.stdout.trim()).toBe('hello');
+        expect(result.stderr).toBe('');
+    });
+
+    it('should support pipes and shell features', async () => {
+        const isWindows = process.platform === 'win32';
+        const command = isWindows
+            ? 'echo "hello world" | ForEach-Object { $_ -replace " ", "_" }'
+            : 'echo "hello world" | tr " " "_"';
+        const result = JSON.parse(await execRunBlockingTool.execute({ command }));
+        expect(result.exitCode).toBe(0);
+        expect(result.stdout.trim()).toBe('hello_world');
+    });
+
+    it('should wait for slow commands to complete naturally', async () => {
+        const start = Date.now();
+        const result = JSON.parse(await execRunBlockingTool.execute({
+            command: 'sleep 1 && echo done',
+        }));
+        const elapsed = Date.now() - start;
+        expect(result.exitCode).toBe(0);
+        expect(result.stdout.trim()).toBe('done');
+        expect(elapsed).toBeGreaterThanOrEqual(1000);
+    }, 10000);
+
+    it('should return non-zero exitCode and success=false for failing commands', async () => {
+        const result = JSON.parse(await execRunBlockingTool.execute({
+            command: 'ls /nonexistent-path-xyz 2>&1',
+        }));
+        expect(result.exitCode).not.toBe(0);
+        expect(result.success).toBe(false);
+    });
+
+    it('should capture stderr separately', async () => {
+        const isWindows = process.platform === 'win32';
+        if (isWindows) return; // skip on Windows
+        const result = JSON.parse(await execRunBlockingTool.execute({
+            command: 'echo out && echo err >&2',
+        }));
+        expect(result.stdout.trim()).toBe('out');
+        expect(result.stderr.trim()).toBe('err');
+    });
+
+    it('should throw if command is missing', async () => {
+        await expect(execRunBlockingTool.execute({})).rejects.toThrow('command is required');
+    });
+
+    it('should accept a cwd argument', async () => {
+        const result = JSON.parse(await execRunBlockingTool.execute({
+            command: 'pwd',
+            cwd: '/tmp',
+        }));
+        expect(result.exitCode).toBe(0);
+        expect(result.stdout.trim()).toContain('tmp');
+    });
+});
diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.ts
new file mode 100644
index 0000000..900eee0
--- /dev/null
+++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.ts
@@ -0,0 +1,81 @@
+import { spawn } from 'child_process';
+import { ToolDefinition } from '../../../types.js';
+import { name, displayName, description, parameters, category } from './schema.js';
+import { logDebug } from '../../../../providers/provider-logger.js';
+
+function getDefaultShell(): string {
+    if (process.platform === 'win32') return 'powershell.exe';
+    return process.env.SHELL || '/bin/sh';
+}
+
+async function execute(args: Record<string, any>): Promise<string> {
+    const command = args.command as string;
+    const cwd = args.cwd as string | undefined;
+
+    if (!command) {
+        throw new Error('command is required');
+    }
+
+    logDebug(`[exec.run-blocking] execute command="${command.substring(0, 100)}" cwd=${cwd ?? 'default'} (no timeout)`);
+
+    return new Promise((resolve) => {
+        let stdout = '';
+        let stderr = '';
+
+        const proc = spawn(command, [], {
+            cwd,
+            shell: getDefaultShell(),
+            stdio: ['ignore', 'pipe', 'pipe'],
+        });
+
+        proc.stdout?.on('data', (data: Buffer) => {
+            stdout += data.toString();
+            // Cap buffer at 2MB
+            if (stdout.length > 2_000_000) {
+                stdout = stdout.slice(-1_000_000);
+            }
+        });
+
+        proc.stderr?.on('data', (data: Buffer) => {
+            stderr += data.toString();
+            if (stderr.length > 2_000_000) {
+                stderr = stderr.slice(-1_000_000);
+            }
+        });
+
+        proc.on('close', (code) => {
+            const exitCode = code ?? 0;
+            logDebug(`[exec.run-blocking] finished exitCode=${exitCode} stdout_len=${stdout.length} stderr_len=${stderr.length}`);
+            resolve(JSON.stringify({
+                exitCode,
+                stdout: stdout || '(no output)',
+                stderr: stderr || '',
+                success: exitCode === 0,
+            }));
+        });
+
+        proc.on('error', (err) => {
+            logDebug(`[exec.run-blocking] spawn error: ${err.message}`);
+            resolve(JSON.stringify({
+                exitCode: 1,
+                stdout: '',
+                stderr: err.message,
+                success: false,
+            }));
+        });
+    });
+}
+
+export const execRunBlockingTool: ToolDefinition = {
+    name,
+    displayName,
+    description,
+    parameters,
+    category,
+    execute,
+    confirmation: {
+        level: 'medium',
+        reason: 'This will execute a shell command and block until it completes.',
+        showArgs: ['command'],
+    },
+};
diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/schema.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/schema.ts
new file mode 100644
index 0000000..a62b8fc
--- /dev/null
+++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/schema.ts
@@ -0,0 +1,24 @@
+import { ToolParameters } from '../../../types.js';
+
+export const name = 'exec.run_blocking';
+export const displayName = 'Run Blocking';
+export const description = 'Execute a shell command and wait for it to finish naturally — no timeout. ' +
+    'Use this for commands that take variable or unknown time (e.g. npm install, builds, tests). ' +
+    'Returns exit code, stdout, and stderr when the process exits. ' +
+    'For processes that never exit (servers, watchers), use exec.run_background instead.';
+export const category = 'execution';
+
+export const parameters: ToolParameters = {
+    type: 'object',
+    properties: {
+        command: {
+            type: 'string',
+            description: 'The shell command to execute. Supports pipes, redirects, and shell features.',
+        },
+        cwd: {
+            type: 'string',
+            description: 'Working directory for the command (optional). Defaults to the current working directory.',
+        },
+    },
+    required: ['command'],
+};
diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.test.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.test.ts
new file mode 100644
index 0000000..fc668de
--- /dev/null
+++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.test.ts
@@ -0,0 +1,79 @@
+import { describe, it, expect, afterEach } from 'vitest';
+import { execTailOutputTool } from './index.js';
+import { execRunBackgroundTool } from '../run-background/index.js';
+import { killProcess } from '../../process-registry.js';
+
+describe('exec.tail_output tool', () => {
+    const startedIds: string[] = [];
+
+    afterEach(() => {
+        for (const id of startedIds) {
+            killProcess(id);
+        }
+        startedIds.length = 0;
+    });
+
+    it('should have correct metadata', () => {
+        expect(execTailOutputTool.name).toBe('exec.tail_output');
+        expect(execTailOutputTool.category).toBe('execution');
+    });
+
+    it('should return error for unknown process id', async () => {
+        const result = JSON.parse(await execTailOutputTool.execute({ process_id: 'proc_unknown_xyz' }));
+        expect(result.error).toBeDefined();
+        expect(result.hint).toBeDefined();
+    });
+
+    it('should return alive=true while process is running', async () => {
+        const bg = JSON.parse(await execRunBackgroundTool.execute({ command: 'sleep 5' }));
+        startedIds.push(bg.id);
+
+        const tail = JSON.parse(await execTailOutputTool.execute({ process_id: bg.id }));
+        expect(tail.alive).toBe(true);
+        expect(tail.exitCode).toBeNull();
+    });
+
+    it('should return last N lines of stdout', async () => {
+        const isWindows = process.platform === 'win32';
+        const command = isWindows
+            ? 'for ($i=1; $i -le 10; $i++) { Write-Output "line $i" }'
+            : 'for i in $(seq 1 10); do echo "line $i"; done';
+
+        const bg = JSON.parse(await execRunBackgroundTool.execute({ command }));
+        startedIds.push(bg.id);
+
+        // Wait for the process to produce output
+        await new Promise(r => setTimeout(r, 500));
+
+        const tail = JSON.parse(await execTailOutputTool.execute({ process_id: bg.id, lines: 3 }));
+        const lines = tail.lastLines.split('\n').filter((l: string) => l.trim());
+        expect(lines.length).toBeLessThanOrEqual(3);
+    }, 10000);
+
+    it('should return alive=false and exitCode after process exits', async () => {
+        const bg = JSON.parse(await execRunBackgroundTool.execute({ command: 'echo done' }));
+        startedIds.push(bg.id);
+
+        // Wait for the process to finish
+        await new Promise(r => setTimeout(r, 300));
+
+        const tail = JSON.parse(await execTailOutputTool.execute({ process_id: bg.id }));
+        expect(tail.alive).toBe(false);
+        expect(tail.exitCode).toBe(0);
+        expect(tail.lastLines).toContain('done');
+    }, 10000);
+
+    it('should default to 20 lines when lines not specified', async () => {
+        const bg = JSON.parse(await execRunBackgroundTool.execute({ command: 'echo hello' }));
+        startedIds.push(bg.id);
+        await new Promise(r => setTimeout(r, 300));
+
+        const tail = JSON.parse(await execTailOutputTool.execute({ process_id: bg.id }));
+        expect(tail).toHaveProperty('lastLines');
+        expect(tail).toHaveProperty('totalStdoutLines');
+    }, 10000);
+
+    it('should throw if process_id is missing', async () => {
+        await expect(execTailOutputTool.execute({})).rejects.toThrow('process_id is required');
+    });
+});
diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.ts
new file mode 100644
index 0000000..bd10367
--- /dev/null
+++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/index.ts
@@ -0,0 +1,49 @@
+import { ToolDefinition } from '../../../types.js';
+import { getProcess } from '../../process-registry.js';
+import { name, displayName, description, parameters, category } from './schema.js';
+
+async function execute(args: Record<string, any>): Promise<string> {
+    const processId = args.process_id as string;
+    const numLines = typeof args.lines === 'number' ? Math.max(1, Math.floor(args.lines)) : 20;
+
+    if (!processId) {
+        throw new Error('process_id is required');
+    }
+
+    const managed = getProcess(processId);
+    if (!managed) {
+        return JSON.stringify({
+            error: `Process not found: ${processId}`,
+            hint: 'Use exec.run_background to start a process first, then pass its id here.',
+        });
+    }
+
+    const alive = managed.process.exitCode === null;
+    const exitCode = managed.process.exitCode;
+
+    // Tail stdout
+    const stdoutLines = managed.stdout.split('\n');
+    const tailLines = stdoutLines.slice(-numLines).join('\n').trim();
+
+    // Last stderr line (useful for error detection)
+    const stderrLines = managed.stderr.split('\n').filter(l => l.trim());
+    const lastStderr = stderrLines.slice(-3).join('\n').trim();
+
+    return JSON.stringify({
+        id: processId,
+        alive,
+        exitCode,
+        lastLines: tailLines || '(no output yet)',
+        lastStderr: lastStderr || '',
+        totalStdoutLines: stdoutLines.length,
+    });
+}
+
+export const execTailOutputTool: ToolDefinition = {
+    name,
+    displayName,
+    description,
+    parameters,
+    category,
+    execute,
+};
diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/schema.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/schema.ts
new file mode 100644
index 0000000..5caf304
--- /dev/null
+++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/tail-output/schema.ts
@@ -0,0 +1,25 @@
+import { ToolParameters } from '../../../types.js';
+
+export const name = 'exec.tail_output';
+export const displayName = 'Tail Process Output';
+export const description = 'Read the last N lines of output from a background process started with exec.run_background. ' +
+    'Use this to monitor long-running or non-exiting processes (e.g. dev servers, watchers) ' +
+    'to detect ready signals, errors, or progress without reading all accumulated output. ' +
+    'Returns alive status, exit code, and the most recent lines of stdout and stderr.';
+export const category = 'execution';
+
+export const parameters: ToolParameters = {
+    type: 'object',
+    properties: {
+        process_id: {
+            type: 'string',
+            description: 'The process ID returned by exec.run_background.',
+        },
+        lines: {
+            type: 'integer',
+            description: 'Number of lines to return from the end of stdout (default: 20).',
+            default: 20,
+        },
+    },
+    required: ['process_id'],
+};
diff --git a/packages/toolpack-sdk/src/tools/index.ts b/packages/toolpack-sdk/src/tools/index.ts
index a6c984b..9172487 100644
--- a/packages/toolpack-sdk/src/tools/index.ts
+++ b/packages/toolpack-sdk/src/tools/index.ts
@@ -20,8 +20,8 @@ export {
 // exec-tools
 export {
     execToolsProject,
-    execRunTool, execRunShellTool, execRunBackgroundTool,
-    execReadOutputTool, execKillTool, execListProcessesTool,
+    execRunTool, execRunShellTool, execRunBackgroundTool, execRunBlockingTool,
+    execReadOutputTool, execTailOutputTool, execKillTool, execListProcessesTool,
 } from './exec-tools/index.js';
 
 // system-tools
diff --git a/packages/toolpack-sdk/src/tools/registry.ts b/packages/toolpack-sdk/src/tools/registry.ts
index 6c8f786..61efbe5 100644
--- a/packages/toolpack-sdk/src/tools/registry.ts
+++ b/packages/toolpack-sdk/src/tools/registry.ts
@@ -95,6 +95,8 @@ export class ToolRegistry {
             description: t.description,
             parameters: t.parameters,
             category: t.category,
+            ...(t.cacheable !== undefined && { cacheable: t.cacheable }),
+            ...(t.annotations !== undefined && { annotations: t.annotations }),
         }));
     }
 
@@ -236,6 +238,7 @@ export class ToolRegistry {
         const { dbToolsProject } = await import('./db-tools/index.js');
         const { cloudToolsProject } = await import('./cloud-tools/index.js');
         const { slackToolsProject } = await import('./slack-tools/index.js');
-        await this.loadProjects([fsToolsProject, execToolsProject, systemToolsProject, httpToolsProject, githubToolsProject, webToolsProject, codingToolsProject, gitToolsProject, diffToolsProject, dbToolsProject, cloudToolsProject, slackToolsProject]);
+        const { k8sToolsProject } = await import('./k8s-tools/index.js');
+        await this.loadProjects([fsToolsProject, execToolsProject, systemToolsProject, httpToolsProject, githubToolsProject, webToolsProject, codingToolsProject, gitToolsProject, diffToolsProject, dbToolsProject, cloudToolsProject, slackToolsProject, k8sToolsProject]);
     }
 }
diff --git a/packages/toolpack-sdk/src/tools/types.ts b/packages/toolpack-sdk/src/tools/types.ts
index 6d7dbac..e884e48 100644
--- a/packages/toolpack-sdk/src/tools/types.ts
+++ b/packages/toolpack-sdk/src/tools/types.ts
@@ -30,6 +30,48 @@ export interface ToolContext {
     log: (message: string) => void;
 }
 
+// ── Tool Annotations (MCP) ────────────────────────────────────
+
+/**
+ * Hints about tool behaviour sent to MCP clients in tools/list.
+ * All fields are optional — clients use them for safety UX (e.g. confirmation
+ * dialogs before destructive actions) but must not rely on them for security.
+ *
+ * MCP spec defaults when annotations are omitted entirely:
+ *   readOnlyHint: false, destructiveHint: true, openWorldHint: true, idempotentHint: false
+ *
+ * The MCP server auto-derives annotations when this field is not set:
+ *   - confirmation present → { destructiveHint: true }
+ *   - neither set          → annotations omitted (MCP spec defaults apply)
+ * Set explicitly to override.
+ */
+export interface ToolAnnotations {
+    /**
+     * Tool only reads data — never writes, calls APIs, or modifies state.
+     * MCP spec default (when absent): false.
+     * Set to true for pure read tools: fs.read_file, search, list-dir.
+     */
+    readOnlyHint?: boolean;
+    /**
+     * Tool may cause irreversible side-effects (delete, overwrite, deploy, send).
+     * MCP spec default (when absent): true — clients assume worst case.
+     * Set to false for safe write operations (e.g. create-if-not-exists).
+     */
+    destructiveHint?: boolean;
+    /**
+     * Calling the tool multiple times with the same args has no additional effect.
+     * MCP spec default (when absent): false.
+     * Set to true for idempotent operations.
+     */
+    idempotentHint?: boolean;
+    /**
+     * Tool may interact with external systems: web, APIs, databases, shell, filesystem.
+     * MCP spec default (when absent): true.
+     * Set to false only for purely in-process, local tools with no side-effects.
+     */
+    openWorldHint?: boolean;
+}
+
 // ── Tool Confirmation (HITL) ─────────────────────────────────
 
 export type ConfirmationLevel = 'high' | 'medium';
@@ -59,6 +101,15 @@ export interface ToolDefinition {
      * Note: Only effective when onToolConfirm callback is provided to AIClient.
      */
     confirmation?: ToolConfirmation;
+    /**
+     * MCP annotation hints describing tool behaviour to clients.
+     * When omitted, the MCP server auto-derives from `confirmation`:
+     *   - confirmation set  → { destructiveHint: true }
+     *   - no confirmation   → annotations omitted (MCP spec defaults apply)
+     * Set explicitly to override — particularly useful for marking read-only tools
+     * (readOnlyHint: true) or idempotent tools (idempotentHint: true).
+     */
+    annotations?: ToolAnnotations;
 }
 
 /**
@@ -71,12 +122,14 @@ export interface ToolSchema {
     description: string;
     parameters: ToolParameters;
     category: string;
-    /** 
+    /**
      * Whether this tool should be cached after discovery via tool.search.
      * If false, the tool must be re-discovered each time it's needed.
      * Default: true
      */
     cacheable?: boolean;
+    /** MCP annotation hints. See ToolAnnotations for details. */
+    annotations?: ToolAnnotations;
 }
 
 // ── Tool Project ──────────────────────────────────────────────
diff --git a/packages/toolpack-sdk/src/types/index.ts b/packages/toolpack-sdk/src/types/index.ts
index 989a5e6..3f4d43b 100644
--- a/packages/toolpack-sdk/src/types/index.ts
+++ b/packages/toolpack-sdk/src/types/index.ts
@@ -109,13 +109,30 @@ export interface ToolCallResult {
     duration?: number;
 }
 
-export interface CompletionRequest {
+export interface CompletionRequest<T = unknown> {
     messages: Message[];
     model: string;
     temperature?: number;
     max_tokens?: number;
     top_p?: number;
-    response_format?: 'text' | 'json_object';
+    /**
+     * Controls the output format:
+     * - `'text'`        — plain text (default)
+     * - `'json_object'` — unstructured JSON; you parse `response.content` yourself
+     * - `ZodType<T>`    — structured JSON matching the schema; parsed and validated result
+     *                     available in `response.data` as fully typed `T`
+     *
+     * @example structured output
+     * ```typescript
+     * import { z } from 'zod'
+     * const result = await sdk.generate({
+     *   messages,
+     *   response_format: z.object({ sentiment: z.string(), score: z.number() }),
+     * })
+     * result.data.sentiment // typed as string
+     * ```
+     */
+    response_format?: 'text' | 'json_object' | import('zod').ZodType<T>;
     stream?: boolean;
     tools?: ToolCallRequest[];
     requestTools?: RequestToolDefinition[];
@@ -139,8 +156,14 @@ export interface Usage {
     total_tokens: number;
 }
 
-export interface CompletionResponse {
+export interface CompletionResponse<T = unknown> {
     content: string | null;  // null if only tool calls
+    /**
+     * Parsed and validated structured output.
+     * Only present when `response_format` is a ZodType.
+     * TypeScript type is inferred from the schema via the generic on `generate<T>()`.
+     */
+    data?: T;
     usage?: Usage;
     /** Detailed breakdown of token usage when executed in agent/workflow mode */
     usage_details?: {
diff --git a/packages/toolpack-sdk/tests/integration/mcp-server.test.ts b/packages/toolpack-sdk/tests/integration/mcp-server.test.ts
new file mode 100644
index 0000000..b0e4bc0
--- /dev/null
+++ b/packages/toolpack-sdk/tests/integration/mcp-server.test.ts
@@ -0,0 +1,316 @@
+/**
+ * MCP Server — HTTP integration tests
+ *
+ * Spins up a real HTTP MCP server on port 0 (OS-assigned), sends real JSON-RPC
+ * requests via fetch, and asserts on the responses. No mocking.
+ *
+ * Run with:  npx vitest run tests/integration/mcp-server.test.ts
+ *
+ * Requires ANTHROPIC_API_KEY (or any provider key) — the server only routes
+ * tool *definitions* (not LLM calls) for these tests, so the key just needs
+ * to be non-empty for Toolpack.init() to succeed.
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import { Toolpack } from '../../src/index.js';
+import type { McpServerHandle } from '../../src/mcp/server-types.js';
+
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/**
+ * MCP client session. The Streamable HTTP transport is stateful:
+ * - First call must be `initialize` to get a session ID
+ * - All subsequent calls include the `mcp-session-id` header
+ */
+class McpSession {
+    private sessionId?: string;
+    constructor(private url: string, private authHeaders: Record<string, string> = {}) {}
+
+    async initialize() {
+        const res = await this.raw('initialize', {
+            protocolVersion: '2024-11-05',
+            capabilities: {},
+            clientInfo: { name: 'test-client', version: '1.0' },
+        });
+        this.sessionId = res.sessionId;
+        // Send initialized notification
+        await fetch(this.url, {
+            method: 'POST',
+            headers: this.headers(),
+            body: JSON.stringify({ jsonrpc: '2.0', method: 'notifications/initialized' }),
+        });
+        return res;
+    }
+
+    async call(method: string, params: Record<string, unknown> = {}) {
+        return this.raw(method, params);
+    }
+
+    private headers(): Record<string, string> {
+        return {
+            'Content-Type': 'application/json',
+            'Accept': 'application/json, text/event-stream',
+            ...(this.sessionId ? { 'mcp-session-id': this.sessionId } : {}),
+            ...this.authHeaders,
+        };
+    }
+
+    private async raw(method: string, params: Record<string, unknown>) {
+        const res = await fetch(this.url, {
+            method: 'POST',
+            headers: this.headers(),
+            body: JSON.stringify({ jsonrpc: '2.0', id: 1, method, params }),
+        });
+
+        if (res.status === 401 || res.status === 403) {
+            return { status: res.status, body: {}, sessionId: undefined };
+        }
+
+        const contentType = res.headers.get('content-type') ?? '';
+        const sessionId = res.headers.get('mcp-session-id') ?? undefined;
+        let body: Record<string, unknown>;
+
+        if (contentType.includes('text/event-stream')) {
+            const text = await res.text();
+            const dataLine = text.split('\n').find(l => l.startsWith('data:'));
+            body = dataLine ? JSON.parse(dataLine.slice(5).trim()) as Record<string, unknown> : {};
+        } else {
+            body = await res.json() as Record<string, unknown>;
+        }
+
+        return { status: res.status, body, sessionId };
+    }
+}
+
+/** Open a session and return it ready for tool calls. */
+async function openSession(url: string, authHeaders: Record<string, string> = {}) {
+    const session = new McpSession(url, authHeaders);
+    await session.initialize();
+    return session;
+}
+
+/** Raw fetch without session — for testing auth rejection before initialize. */
+async function rawPost(url: string, headers: Record<string, string> = {}) {
+    const res = await fetch(url, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json', 'Accept': 'application/json, text/event-stream', ...headers },
+        body: JSON.stringify({ jsonrpc: '2.0', id: 1, method: 'tools/list', params: {} }),
+    });
+    return { status: res.status };
+}
+
+async function startServer(overrides: Partial<Parameters<Toolpack['startMcpServer']>[0]> = {}) {
+    const searchMode = (overrides as Record<string, unknown>).searchMode === true;
+    const sdk = await Toolpack.init({
+        provider: 'anthropic',
+        tools: true,
+        apiKey: process.env.ANTHROPIC_API_KEY ?? 'test-key',
+        // Enable tool search in the default mode when MCP server is in search mode
+        ...(searchMode ? { modeOverrides: { default: { toolSearch: { enabled: true } } } } : {}),
+    });
+    const handle = await sdk.startMcpServer({
+        transport: 'http',
+        port: 0,
+        ...overrides,
+    } as Parameters<Toolpack['startMcpServer']>[0]);
+    const url = `http://localhost:${handle.port}`;
+    return { handle, url, sdk };
+}
+
+// ─── tests ────────────────────────────────────────────────────────────────────
+
+// Cross-platform path to a file that always exists and contains 'localhost'
+const HOSTS_FILE = process.platform === 'win32'
+    ? 'C:\\Windows\\System32\\drivers\\etc\\hosts'
+    : '/etc/hosts';
+
+// Opt-in: set RUN_INTEGRATION_TESTS=1 to run. Skipped in CI by default.
+describe.runIf(process.env.RUN_INTEGRATION_TESTS === '1')('MCP Server — HTTP integration', () => {
+    describe('tools/list', () => {
+        let handle: McpServerHandle;
+        let session: McpSession;
+
+        beforeAll(async () => {
+            const s = await startServer();
+            handle = s.handle;
+            session = await openSession(s.url);
+        });
+        afterAll(() => handle.stop());
+
+        it('returns 100+ tools', async () => {
+            const { status, body } = await session.call('tools/list');
+            expect(status).toBe(200);
+            const result = body.result as { tools: unknown[] };
+            expect(result.tools.length).toBeGreaterThan(100);
+        });
+
+        it('each tool has name, description, inputSchema', async () => {
+            const { body } = await session.call('tools/list');
+            const result = body.result as { tools: Record<string, unknown>[] };
+            for (const tool of result.tools.slice(0, 5)) {
+                expect(typeof tool.name).toBe('string');
+                expect(typeof tool.description).toBe('string');
+                expect(tool.inputSchema).toBeDefined();
+            }
+        });
+
+        it('toolCount matches tools/list length', async () => {
+            const { body } = await session.call('tools/list');
+            const result = body.result as { tools: unknown[] };
+            expect(handle.toolCount).toBe(result.tools.length);
+        });
+    });
+
+    describe('tools/call', () => {
+        let handle: McpServerHandle;
+        let session: McpSession;
+
+        beforeAll(async () => {
+            const s = await startServer();
+            handle = s.handle;
+            session = await openSession(s.url);
+        });
+        afterAll(() => handle.stop());
+
+        it('executes fs.read_file and returns real file content', async () => {
+            const { status, body } = await session.call('tools/call', {
+                name: 'fs.read_file',
+                arguments: { path: HOSTS_FILE },
+            });
+            expect(status).toBe(200);
+            const result = body.result as { content: Array<{ type: string; text: string }> };
+            expect(result.content[0]?.type).toBe('text');
+            expect(result.content[0]?.text).toContain('localhost');
+        });
+
+        it('returns isError:true for unknown tool', async () => {
+            const { body } = await session.call('tools/call', {
+                name: 'does.not.exist',
+                arguments: {},
+            });
+            const result = body.result as { isError: boolean };
+            expect(result.isError).toBe(true);
+        });
+
+        it('returns isError:true for tool execution error', async () => {
+            const { body } = await session.call('tools/call', {
+                name: 'fs.read_file',
+                arguments: { path: '/this/path/does/not/exist/ever' },
+            });
+            const result = body.result as { isError: boolean };
+            expect(result.isError).toBe(true);
+        });
+    });
+
+    describe('static auth', () => {
+        const TOKEN = 'integration-test-secret-token';
+        let handle: McpServerHandle;
+        let url: string;
+
+        beforeAll(async () => {
+            ({ handle, url } = await startServer({
+                auth: { mode: 'static', tokens: [TOKEN] },
+            }));
+        });
+        afterAll(() => handle.stop());
+
+        it('rejects request with no token — HTTP 401', async () => {
+            const { status } = await rawPost(url);
+            expect(status).toBe(401);
+        });
+
+        it('rejects request with wrong token — HTTP 401', async () => {
+            const { status } = await rawPost(url, { Authorization: 'Bearer wrong-token' });
+            expect(status).toBe(401);
+        });
+
+        it('accepts request with correct token and lists tools', async () => {
+            const session = await openSession(url, { Authorization: `Bearer ${TOKEN}` });
+            const { status, body } = await session.call('tools/list');
+            expect(status).toBe(200);
+            const result = body.result as { tools: unknown[] };
+            expect(result.tools.length).toBeGreaterThan(0);
+        });
+    });
+
+    describe('search mode', () => {
+        let handle: McpServerHandle;
+        let session: McpSession;
+
+        beforeAll(async () => {
+            const s = await startServer({ searchMode: true });
+            handle = s.handle;
+            session = await openSession(s.url);
+        });
+        afterAll(() => handle.stop());
+
+        it('tools/list returns only tool.search (+ always-loaded)', async () => {
+            const { body } = await session.call('tools/list');
+            const result = body.result as { tools: Array<{ name: string }> };
+            const names = result.tools.map(t => t.name);
+            expect(names).toContain('tool.search');
+            expect(names).not.toContain('fs.read_file');
+        });
+
+        it('tool.search returns a JSON response with found and tools fields', async () => {
+            const { status, body } = await session.call('tools/call', {
+                name: 'tool.search',
+                arguments: { query: 'git commit log' },
+            });
+            expect(status).toBe(200);
+            const result = body.result as { content: Array<{ text: string }> };
+            const text = result.content[0]?.text ?? '';
+            const parsed = JSON.parse(text);
+            expect(parsed).toHaveProperty('query');
+            expect(parsed).toHaveProperty('found');
+            expect(parsed).toHaveProperty('tools');
+        });
+
+        it('tool.search for git returns git tools', async () => {
+            const { body } = await session.call('tools/call', {
+                name: 'tool.search',
+                arguments: { query: 'git commit log' },
+            });
+            const result = body.result as { content: Array<{ text: string }> };
+            const text = result.content[0]?.text.toLowerCase() ?? '';
+            expect(text).toContain('git');
+        });
+    });
+
+    describe('expose config', () => {
+        let handle: McpServerHandle;
+        let session: McpSession;
+
+        beforeAll(async () => {
+            const s = await startServer({ expose: { categories: ['filesystem'] } });
+            handle = s.handle;
+            session = await openSession(s.url);
+        });
+        afterAll(() => handle.stop());
+
+        it('only exposes tools from the specified category', async () => {
+            const { body } = await session.call('tools/list');
+            const result = body.result as { tools: Array<{ name: string }> };
+            const names = result.tools.map(t => t.name);
+            expect(names.every(n => n.startsWith('fs.'))).toBe(true);
+            expect(names).not.toContain('git.commit');
+            expect(names).not.toContain('slack.chat.postMessage');
+        });
+    });
+
+    describe('port: 0 (OS-assigned port)', () => {
+        it('handle.port is a non-zero number', async () => {
+            const { handle } = await startServer();
+            expect(handle.port).toBeGreaterThan(0);
+            await handle.stop();
+        });
+
+        it('two servers on port:0 get different ports', async () => {
+            const a = await startServer();
+            const b = await startServer();
+            expect(a.handle.port).not.toBe(b.handle.port);
+            await Promise.all([a.handle.stop(), b.handle.stop()]);
+        });
+    });
+});
diff --git a/packages/toolpack-sdk/tests/unit/mcp-server-auth.test.ts b/packages/toolpack-sdk/tests/unit/mcp-server-auth.test.ts
new file mode 100644
index 0000000..526735c
--- /dev/null
+++ b/packages/toolpack-sdk/tests/unit/mcp-server-auth.test.ts
@@ -0,0 +1,249 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import type { IncomingMessage, ServerResponse } from 'node:http';
+import type { AuthInfo } from '@modelcontextprotocol/sdk/server/auth/types.js';
+
+// ─── jose mock ────────────────────────────────────────────────────────────────
+// We mock jose so JwtVerifier tests never make real network requests.
+
+vi.mock('jose', () => ({
+    createRemoteJWKSet: vi.fn().mockReturnValue('mock-jwks'),
+    jwtVerify: vi.fn(),
+}));
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+function makeReq(authHeader?: string): IncomingMessage & { auth?: AuthInfo } {
+    return {
+        headers: authHeader ? { authorization: authHeader } : {},
+    } as unknown as IncomingMessage & { auth?: AuthInfo };
+}
+
+function makeRes() {
+    const written: { statusCode: number; headers: Record<string, string>; body: string } = {
+        statusCode: 200,
+        headers: {},
+        body: '',
+    };
+    const res = {
+        writeHead: vi.fn((code: number, headers?: Record<string, string>) => {
+            written.statusCode = code;
+            if (headers) Object.assign(written.headers, headers);
+            return res;
+        }),
+        end: vi.fn((body?: string) => {
+            written.body = body ?? '';
+            return res;
+        }),
+        get headersSent() { return written.statusCode !== 200 || written.body !== ''; },
+        _written: written,
+    };
+    return res as unknown as ServerResponse & { _written: typeof written };
+}
+
+// ─── Tests ────────────────────────────────────────────────────────────────────
+
+describe('buildVerifier', () => {
+    it('returns a verifier for each mode without throwing', async () => {
+        const { buildVerifier } = await import('../../src/mcp/server-auth.js');
+        expect(buildVerifier({ mode: 'static', tokens: ['tok'] })).toBeDefined();
+        expect(buildVerifier({ mode: 'jwt', jwksUrl: 'https://example.com/.well-known/jwks.json' })).toBeDefined();
+        expect(buildVerifier({ mode: 'custom', verifyAccessToken: async () => ({ token: 't', clientId: 'c', scopes: [] }) })).toBeDefined();
+    });
+});
+
+describe('StaticBearerVerifier', () => {
+    beforeEach(() => vi.resetModules());
+
+    it('resolves with AuthInfo for a valid token', async () => {
+        const { buildVerifier } = await import('../../src/mcp/server-auth.js');
+        const verifier = buildVerifier({ mode: 'static', tokens: ['secret-token', 'other-token'] });
+        const info = await verifier.verifyAccessToken('secret-token');
+        expect(info.token).toBe('secret-token');
+        expect(info.clientId).toBe('static-client');
+        expect(info.scopes).toEqual([]);
+    });
+
+    it('throws for an invalid token', async () => {
+        const { buildVerifier } = await import('../../src/mcp/server-auth.js');
+        const verifier = buildVerifier({ mode: 'static', tokens: ['correct'] });
+        await expect(verifier.verifyAccessToken('wrong')).rejects.toThrow();
+    });
+
+    it('throws at construction when tokens array is empty', async () => {
+        const { buildVerifier } = await import('../../src/mcp/server-auth.js');
+        expect(() => buildVerifier({ mode: 'static', tokens: [] })).toThrow(/empty/i);
+    });
+
+    it('accepts any token from the allowlist', async () => {
+        const { buildVerifier } = await import('../../src/mcp/server-auth.js');
+        const verifier = buildVerifier({ mode: 'static', tokens: ['a', 'b', 'c'] });
+        await expect(verifier.verifyAccessToken('a')).resolves.toBeDefined();
+        await expect(verifier.verifyAccessToken('b')).resolves.toBeDefined();
+        await expect(verifier.verifyAccessToken('c')).resolves.toBeDefined();
+    });
+});
+
+describe('JwtVerifier', () => {
+    beforeEach(() => vi.resetModules());
+
+    async function getJwtVerifier(config = {}) {
+        const { buildVerifier } = await import('../../src/mcp/server-auth.js');
+        return buildVerifier({ mode: 'jwt', jwksUrl: 'https://example.com/.well-known/jwks.json', ...config });
+    }
+
+    it('resolves with AuthInfo for a valid JWT — scope string', async () => {
+        const { jwtVerify } = await import('jose');
+        vi.mocked(jwtVerify).mockResolvedValueOnce({
+            payload: { sub: 'user-123', scope: 'read write', exp: 9999999999 },
+            protectedHeader: { alg: 'RS256' },
+        } as never);
+        const verifier = await getJwtVerifier();
+        const info = await verifier.verifyAccessToken('jwt-token');
+        expect(info.clientId).toBe('user-123');
+        expect(info.scopes).toEqual(['read', 'write']);
+        expect(info.expiresAt).toBe(9999999999);
+    });
+
+    it('handles scp array claim (Okta / Azure AD)', async () => {
+        const { jwtVerify } = await import('jose');
+        vi.mocked(jwtVerify).mockResolvedValueOnce({
+            payload: { sub: 'user-456', scp: ['api:read', 'api:write'] },
+            protectedHeader: { alg: 'RS256' },
+        } as never);
+        const verifier = await getJwtVerifier();
+        const info = await verifier.verifyAccessToken('jwt-token');
+        expect(info.scopes).toEqual(['api:read', 'api:write']);
+    });
+
+    it('prefers client_id claim over sub for clientId', async () => {
+        const { jwtVerify } = await import('jose');
+        vi.mocked(jwtVerify).mockResolvedValueOnce({
+            payload: { sub: 'user-123', client_id: 'my-app', scope: '' },
+            protectedHeader: { alg: 'RS256' },
+        } as never);
+        const verifier = await getJwtVerifier();
+        const info = await verifier.verifyAccessToken('jwt-token');
+        expect(info.clientId).toBe('my-app');
+    });
+
+    it('falls back to "unknown" clientId when neither client_id nor sub present', async () => {
+        const { jwtVerify } = await import('jose');
+        vi.mocked(jwtVerify).mockResolvedValueOnce({
+            payload: { scope: '' },
+            protectedHeader: { alg: 'RS256' },
+        } as never);
+        const verifier = await getJwtVerifier();
+        const info = await verifier.verifyAccessToken('jwt-token');
+        expect(info.clientId).toBe('unknown');
+    });
+
+    it('propagates errors from jwtVerify (expired, invalid signature, etc.)', async () => {
+        const { jwtVerify } = await import('jose');
+        vi.mocked(jwtVerify).mockRejectedValueOnce(new Error('JWTExpired'));
+        const verifier = await getJwtVerifier();
+        await expect(verifier.verifyAccessToken('expired-jwt')).rejects.toThrow('JWTExpired');
+    });
+
+    it('returns empty scopes when no scope claim present', async () => {
+        const { jwtVerify } = await import('jose');
+        vi.mocked(jwtVerify).mockResolvedValueOnce({
+            payload: { sub: 'u', client_id: 'c' },
+            protectedHeader: { alg: 'RS256' },
+        } as never);
+        const verifier = await getJwtVerifier();
+        const info = await verifier.verifyAccessToken('jwt');
+        expect(info.scopes).toEqual([]);
+    });
+});
+
+describe('applyBearerAuth', () => {
+    beforeEach(() => vi.resetModules());
+
+    const mockVerifier = (result: 'ok' | 'throw') => ({
+        verifyAccessToken: result === 'ok'
+            ? vi.fn().mockResolvedValue({ token: 'tok', clientId: 'c', scopes: ['read'] })
+            : vi.fn().mockRejectedValue(new Error('bad token')),
+    });
+
+    it('returns false and writes 401 when Authorization header is missing', async () => {
+        const { applyBearerAuth } = await import('../../src/mcp/server-auth.js');
+        const req = makeReq();
+        const res = makeRes();
+        const ok = await applyBearerAuth(req, res as unknown as ServerResponse, { mode: 'static', tokens: ['t'] }, mockVerifier('ok'));
+        expect(ok).toBe(false);
+        expect(res._written.statusCode).toBe(401);
+        expect(res._written.headers['WWW-Authenticate']).toBe('Bearer');
+    });
+
+    it('returns false and writes 401 when Authorization header is not Bearer', async () => {
+        const { applyBearerAuth } = await import('../../src/mcp/server-auth.js');
+        const req = makeReq('Basic dXNlcjpwYXNz');
+        const res = makeRes();
+        const ok = await applyBearerAuth(req, res as unknown as ServerResponse, { mode: 'static', tokens: ['t'] }, mockVerifier('ok'));
+        expect(ok).toBe(false);
+        expect(res._written.statusCode).toBe(401);
+    });
+
+    it('returns false and writes 401 when verifier throws', async () => {
+        const { applyBearerAuth } = await import('../../src/mcp/server-auth.js');
+        const req = makeReq('Bearer invalid-token');
+        const res = makeRes();
+        const ok = await applyBearerAuth(req, res as unknown as ServerResponse, { mode: 'static', tokens: ['correct'] }, mockVerifier('throw'));
+        expect(ok).toBe(false);
+        expect(res._written.statusCode).toBe(401);
+    });
+
+    it('returns true and sets req.auth when token is valid', async () => {
+        const { applyBearerAuth } = await import('../../src/mcp/server-auth.js');
+        const req = makeReq('Bearer valid-token');
+        const res = makeRes();
+        const ok = await applyBearerAuth(req, res as unknown as ServerResponse, { mode: 'static', tokens: ['valid-token'] }, mockVerifier('ok'));
+        expect(ok).toBe(true);
+        expect(req.auth).toBeDefined();
+        expect(req.auth?.clientId).toBe('c');
+    });
+
+    it('returns false and writes 403 when required scope is missing', async () => {
+        const { applyBearerAuth } = await import('../../src/mcp/server-auth.js');
+        const req = makeReq('Bearer tok');
+        const res = makeRes();
+        // verifier returns scopes: ['read'], but we require 'write'
+        // Use mode: 'custom' — it properly declares requiredScopes in its type
+        const ok = await applyBearerAuth(
+            req,
+            res as unknown as ServerResponse,
+            { mode: 'custom', verifyAccessToken: async () => ({ token: 'tok', clientId: 'c', scopes: ['read'] }), requiredScopes: ['write'] },
+            mockVerifier('ok'),
+        );
+        expect(ok).toBe(false);
+        expect(res._written.statusCode).toBe(403);
+        expect(res._written.body).toContain('write');
+    });
+
+    it('passes when token has all required scopes', async () => {
+        const { applyBearerAuth } = await import('../../src/mcp/server-auth.js');
+        const req = makeReq('Bearer tok');
+        const res = makeRes();
+        // verifier returns scopes: ['read'], requiring only 'read'
+        const ok = await applyBearerAuth(
+            req,
+            res as unknown as ServerResponse,
+            { mode: 'custom', verifyAccessToken: async () => ({ token: 'tok', clientId: 'c', scopes: ['read'] }), requiredScopes: ['read'] },
+            mockVerifier('ok'),
+        );
+        expect(ok).toBe(true);
+    });
+
+    it('passes when requiredScopes is empty', async () => {
+        const { applyBearerAuth } = await import('../../src/mcp/server-auth.js');
+        const req = makeReq('Bearer tok');
+        const res = makeRes();
+        const ok = await applyBearerAuth(
+            req,
+            res as unknown as ServerResponse,
+            { mode: 'custom', verifyAccessToken: async () => ({ token: 'tok', clientId: 'c', scopes: [] }), requiredScopes: [] },
+            mockVerifier('ok'),
+        );
+        expect(ok).toBe(true);
+    });
+});
diff --git a/packages/toolpack-sdk/tests/unit/mcp-server.test.ts b/packages/toolpack-sdk/tests/unit/mcp-server.test.ts
new file mode 100644
index 0000000..be49ce8
--- /dev/null
+++ b/packages/toolpack-sdk/tests/unit/mcp-server.test.ts
@@ -0,0 +1,609 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { ToolRegistry } from '../../src/tools/registry.js';
+import type { ToolDefinition } from '../../src/tools/types.js';
+import type { ToolpackMcpServerConfig } from '../../src/mcp/server-types.js';
+
+// ─── MCP SDK mocks ────────────────────────────────────────────────────────────
+// We mock the entire SDK so no real transport (stdin/stdout, HTTP) is created.
+// The fake Server captures setRequestHandler calls so we can invoke them directly.
+
+type HandlerFn = (req: { params: Record<string, unknown> }) => Promise<unknown>;
+
+// Captured state, reset per test
+let capturedHandlers: Map<string, HandlerFn>;
+
+vi.mock('@modelcontextprotocol/sdk/server/index.js', () => {
+    return {
+        Server: class FakeServer {
+            connect: ReturnType<typeof vi.fn>;
+            close: ReturnType<typeof vi.fn>;
+            constructor() {
+                capturedHandlers = new Map();
+                this.connect = vi.fn().mockResolvedValue(undefined);
+                this.close = vi.fn().mockResolvedValue(undefined);
+            }
+            setRequestHandler(schema: { shape: { method: { _def: { value: string } } } }, handler: HandlerFn) {
+                const method = schema?.shape?.method?._def?.value ?? String(schema);
+                capturedHandlers.set(method, handler);
+            }
+        },
+    };
+});
+
+vi.mock('@modelcontextprotocol/sdk/server/stdio.js', () => ({
+    StdioServerTransport: class FakeStdio {},
+}));
+
+vi.mock('@modelcontextprotocol/sdk/server/streamableHttp.js', () => ({
+    StreamableHTTPServerTransport: class FakeHttp {
+        handleRequest = vi.fn();
+    },
+}));
+
+vi.mock('@modelcontextprotocol/sdk/types.js', () => ({
+    ListToolsRequestSchema: { shape: { method: { _def: { value: 'tools/list' } } } },
+    CallToolRequestSchema: { shape: { method: { _def: { value: 'tools/call' } } } },
+}));
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+function makeTool(overrides: Partial<ToolDefinition> = {}): ToolDefinition {
+    return {
+        name: 'test.tool',
+        displayName: 'Test Tool',
+        description: 'A test tool',
+        category: 'test',
+        parameters: {
+            type: 'object',
+            properties: { path: { type: 'string' } },
+            required: ['path'],
+        },
+        execute: vi.fn().mockResolvedValue('ok'),
+        ...overrides,
+    };
+}
+
+function makeRegistry(tools: ToolDefinition[] = []): ToolRegistry {
+    const r = new ToolRegistry();
+    for (const t of tools) r.register(t);
+    return r;
+}
+
+async function callList() {
+    const handler = capturedHandlers.get('tools/list');
+    if (!handler) throw new Error('tools/list handler not registered');
+    return handler({ params: {} }) as Promise<{ tools: { name: string; description: string; inputSchema: unknown; annotations?: Record<string, unknown> }[] }>;
+}
+
+async function callTool(name: string, args: Record<string, unknown> = {}) {
+    const handler = capturedHandlers.get('tools/call');
+    if (!handler) throw new Error('tools/call handler not registered');
+    return handler({ params: { name, arguments: args } }) as Promise<{
+        content: { type: string; text: string }[];
+        isError: boolean;
+    }>;
+}
+
+// ─── Tests ────────────────────────────────────────────────────────────────────
+
+describe('startMcpServer — unit', () => {
+    let startMcpServer: (
+        registry: ToolRegistry,
+        config: ToolpackMcpServerConfig,
+        searchFn?: (args: Record<string, unknown>) => string,
+    ) => Promise<unknown>;
+
+    beforeEach(async () => {
+        vi.resetModules();
+        ({ startMcpServer } = await import('../../src/mcp/server.js'));
+    });
+
+    // ── Search mode ───────────────────────────────────────────────────────────
+
+    describe('search mode', () => {
+        const threeTools = () => [
+            makeTool({ name: 'fs.read', category: 'filesystem' }),
+            makeTool({ name: 'slack.post', category: 'slack' }),
+            makeTool({ name: 'gh.pr', category: 'github' }),
+        ];
+
+        it('tools/list returns tool.search as first entry when searchMode is true', async () => {
+            const registry = makeRegistry(threeTools());
+            await startMcpServer(registry, { transport: 'stdio', searchMode: true });
+            const result = await callList();
+            expect(result.tools[0].name).toBe('tool.search');
+        });
+
+        it('tools/list does NOT include non-always-loaded tools in search mode', async () => {
+            const registry = makeRegistry(threeTools());
+            await startMcpServer(registry, { transport: 'stdio', searchMode: true });
+            const result = await callList();
+            const names = result.tools.map(t => t.name);
+            expect(names).not.toContain('fs.read');
+            expect(names).not.toContain('slack.post');
+            expect(names).not.toContain('gh.pr');
+        });
+
+        it('tools/list includes always-loaded tools alongside tool.search', async () => {
+            const registry = makeRegistry(threeTools());
+            registry.setConfig({
+                enabled: true,
+                autoExecute: true,
+                maxToolRounds: 5,
+                toolChoicePolicy: 'auto',
+                resultMaxChars: 20_000,
+                enabledTools: [],
+                enabledToolCategories: [],
+                toolSearch: {
+                    enabled: true,
+                    alwaysLoadedTools: ['fs.read'],
+                    alwaysLoadedCategories: [],
+                    searchResultLimit: 5,
+                    cacheDiscoveredTools: true,
+                },
+            });
+            await startMcpServer(registry, { transport: 'stdio', searchMode: true });
+            const result = await callList();
+            const names = result.tools.map(t => t.name);
+            expect(names).toContain('tool.search');
+            expect(names).toContain('fs.read');
+            expect(names).not.toContain('slack.post');
+        });
+
+        it('tool.search entry has readOnlyHint annotation', async () => {
+            const registry = makeRegistry(threeTools());
+            await startMcpServer(registry, { transport: 'stdio', searchMode: true });
+            const result = await callList();
+            expect(result.tools[0].annotations).toEqual({ readOnlyHint: true });
+        });
+
+        it('tools/list returns all tools when searchMode is false (default)', async () => {
+            const registry = makeRegistry(threeTools());
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callList();
+            const names = result.tools.map(t => t.name);
+            expect(names).not.toContain('tool.search');
+            expect(names).toContain('fs.read');
+            expect(names).toContain('slack.post');
+            expect(names).toContain('gh.pr');
+        });
+
+        it('tools/call for tool.search invokes searchFn and returns result', async () => {
+            const registry = makeRegistry(threeTools());
+            const searchFn = vi.fn().mockReturnValue(JSON.stringify({ found: 1, tools: [{ name: 'fs.read' }] }));
+            await startMcpServer(registry, { transport: 'stdio', searchMode: true }, searchFn);
+            const result = await callTool('tool.search', { query: 'read file' });
+            expect(searchFn).toHaveBeenCalledWith({ query: 'read file' });
+            expect(result.isError).toBe(false);
+            expect(result.content[0].text).toContain('fs.read');
+        });
+
+        it('tools/call for tool.search returns isError when searchFn throws', async () => {
+            const registry = makeRegistry(threeTools());
+            const searchFn = vi.fn().mockImplementation(() => { throw new Error('search failed'); });
+            await startMcpServer(registry, { transport: 'stdio', searchMode: true }, searchFn);
+            const result = await callTool('tool.search', { query: 'whatever' });
+            expect(result.isError).toBe(true);
+            expect(result.content[0].text).toContain('search failed');
+        });
+
+        it('tools/call for tool.search returns isError when searchMode is true but searchFn missing', async () => {
+            const registry = makeRegistry(threeTools());
+            await startMcpServer(registry, { transport: 'stdio', searchMode: true }); // no searchFn
+            const result = await callTool('tool.search', { query: 'test' });
+            expect(result.isError).toBe(true);
+        });
+
+        it('tools/call for tool.search falls through to "not found" when searchMode is false', async () => {
+            const registry = makeRegistry(threeTools());
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callTool('tool.search', { query: 'test' });
+            expect(result.isError).toBe(true);
+            expect(result.content[0].text).toContain('tool.search');
+        });
+    });
+
+    // ── Tool annotations ──────────────────────────────────────────────────────
+
+    describe('tool annotations', () => {
+        it('uses explicit annotations when set on the tool', async () => {
+            const registry = makeRegistry([makeTool({
+                name: 'x',
+                annotations: { readOnlyHint: true, openWorldHint: false },
+            })]);
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callList();
+            expect(result.tools[0].annotations).toEqual({ readOnlyHint: true, openWorldHint: false });
+        });
+
+        it('derives { destructiveHint: true } when confirmation is set and no explicit annotations', async () => {
+            const registry = makeRegistry([makeTool({
+                name: 'x',
+                confirmation: { level: 'high', reason: 'This will delete.' },
+            })]);
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callList();
+            expect(result.tools[0].annotations).toEqual({ destructiveHint: true });
+        });
+
+        it('derives { destructiveHint: true } for confirmation.level medium as well', async () => {
+            const registry = makeRegistry([makeTool({
+                name: 'x',
+                confirmation: { level: 'medium', reason: 'This will modify.' },
+            })]);
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callList();
+            expect(result.tools[0].annotations).toEqual({ destructiveHint: true });
+        });
+
+        it('omits annotations entirely when neither annotations nor confirmation is set', async () => {
+            // MCP spec defaults apply: destructiveHint=true, openWorldHint=true, readOnlyHint=false.
+            // We must NOT claim readOnlyHint:true for tools we have no signal about
+            // (e.g. slack.post, create-dir — not read-only but no confirmation set).
+            const registry = makeRegistry([makeTool({ name: 'x' })]);
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callList();
+            expect(result.tools[0].annotations).toBeUndefined();
+        });
+
+        it('explicit annotations take priority over confirmation', async () => {
+            // Tool has both — explicit annotations must win
+            const registry = makeRegistry([makeTool({
+                name: 'x',
+                confirmation: { level: 'high', reason: 'danger' },
+                annotations: { destructiveHint: false, idempotentHint: true },
+            })]);
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callList();
+            expect(result.tools[0].annotations).toEqual({ destructiveHint: false, idempotentHint: true });
+        });
+    });
+
+    // ── Schema translation ────────────────────────────────────────────────────
+
+    describe('schema translation', () => {
+        it('maps parameters → inputSchema in tools/list response', async () => {
+            const params = { type: 'object', properties: { path: { type: 'string' } }, required: ['path'] };
+            const registry = makeRegistry([makeTool({ name: 'fs.read_file', parameters: params })]);
+
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callList();
+
+            expect(result.tools).toHaveLength(1);
+            expect(result.tools[0].name).toBe('fs.read_file');
+            expect(result.tools[0].inputSchema).toEqual(params);
+        });
+
+        it('falls back to empty-object inputSchema when parameters is undefined', async () => {
+            const tool = makeTool({ name: 'no.params' });
+            // @ts-expect-error intentional: testing undefined parameters path
+            delete tool.parameters;
+            const registry = makeRegistry([tool]);
+
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callList();
+
+            expect(result.tools[0].inputSchema).toEqual({ type: 'object', properties: {} });
+        });
+
+        it('preserves tool description in tools/list', async () => {
+            const registry = makeRegistry([makeTool({ name: 'x', description: 'does something' })]);
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callList();
+            expect(result.tools[0].description).toBe('does something');
+        });
+    });
+
+    // ── Agent exposure ────────────────────────────────────────────────────────
+
+    describe('agent exposure', () => {
+        const makeAgentDef = (name: string, overrides: Partial<{ description: string; inputSchema: Record<string, unknown>; invoke: () => Promise<string> }> = {}) => ({
+            name,
+            description: overrides.description ?? `${name} agent`,
+            ...(overrides.inputSchema !== undefined && { inputSchema: overrides.inputSchema }),
+            invoke: overrides.invoke ?? vi.fn().mockResolvedValue(`${name} result`),
+        });
+
+        it('tools/list includes agent entries as agent.<name>', async () => {
+            const registry = makeRegistry([makeTool({ name: 'fs.read' })]);
+            await startMcpServer(registry, { transport: 'stdio', agents: [makeAgentDef('pr_reviewer')] });
+            const result = await callList();
+            const names = result.tools.map(t => t.name);
+            expect(names).toContain('agent.pr_reviewer');
+        });
+
+        it('agents coexist with regular tools in tools/list', async () => {
+            const registry = makeRegistry([makeTool({ name: 'fs.read' })]);
+            await startMcpServer(registry, { transport: 'stdio', agents: [makeAgentDef('pr_reviewer')] });
+            const result = await callList();
+            const names = result.tools.map(t => t.name);
+            expect(names).toContain('fs.read');
+            expect(names).toContain('agent.pr_reviewer');
+        });
+
+        it('agent entry uses provided inputSchema', async () => {
+            const schema = { type: 'object', properties: { pr_url: { type: 'string' } }, required: ['pr_url'] };
+            const registry = makeRegistry([]);
+            await startMcpServer(registry, { transport: 'stdio', agents: [makeAgentDef('x', { inputSchema: schema })] });
+            const result = await callList();
+            expect(result.tools[0].inputSchema).toEqual(schema);
+        });
+
+        it('agent entry defaults to empty-object inputSchema when not provided', async () => {
+            const registry = makeRegistry([]);
+            await startMcpServer(registry, { transport: 'stdio', agents: [makeAgentDef('x')] });
+            const result = await callList();
+            expect(result.tools[0].inputSchema).toEqual({ type: 'object', properties: {} });
+        });
+
+        it('tools/call invokes the agent and returns its output', async () => {
+            const invoke = vi.fn().mockResolvedValue('LGTM!');
+            const registry = makeRegistry([]);
+            await startMcpServer(registry, { transport: 'stdio', agents: [makeAgentDef('pr_reviewer', { invoke })] });
+            const result = await callTool('agent.pr_reviewer', { pr_url: 'https://github.com/...' });
+            expect(invoke).toHaveBeenCalledWith({ pr_url: 'https://github.com/...' });
+            expect(result.isError).toBe(false);
+            expect(result.content[0].text).toBe('LGTM!');
+        });
+
+        it('tools/call returns isError when invoke() throws', async () => {
+            const invoke = vi.fn().mockRejectedValue(new Error('agent failed'));
+            const registry = makeRegistry([]);
+            await startMcpServer(registry, { transport: 'stdio', agents: [makeAgentDef('x', { invoke })] });
+            const result = await callTool('agent.x', {});
+            expect(result.isError).toBe(true);
+            expect(result.content[0].text).toContain('agent failed');
+        });
+
+        it('tools/call returns isError for unknown agent name', async () => {
+            const registry = makeRegistry([]);
+            await startMcpServer(registry, { transport: 'stdio', agents: [makeAgentDef('pr_reviewer')] });
+            const result = await callTool('agent.unknown', {});
+            expect(result.isError).toBe(true);
+            expect(result.content[0].text).toContain('unknown');
+        });
+
+        it('agents appear in tools/list even when searchMode is true', async () => {
+            // Agents are not in the ToolRegistry — tool.search cannot find them.
+            // They must always be listed explicitly.
+            const registry = makeRegistry([makeTool({ name: 'fs.read' })]);
+            await startMcpServer(registry, {
+                transport: 'stdio',
+                searchMode: true,
+                agents: [makeAgentDef('pr_reviewer')],
+            });
+            const result = await callList();
+            const names = result.tools.map(t => t.name);
+            expect(names).toContain('tool.search');
+            expect(names).toContain('agent.pr_reviewer');
+            expect(names).not.toContain('fs.read'); // regular tools deferred
+        });
+
+        it('tools/list has no agent entries when agents array is empty', async () => {
+            const registry = makeRegistry([makeTool({ name: 'fs.read' })]);
+            await startMcpServer(registry, { transport: 'stdio', agents: [] });
+            const result = await callList();
+            expect(result.tools.every(t => !t.name.startsWith('agent.'))).toBe(true);
+        });
+
+        it('multiple agents all appear in tools/list', async () => {
+            const registry = makeRegistry([]);
+            await startMcpServer(registry, {
+                transport: 'stdio',
+                agents: [makeAgentDef('pr_reviewer'), makeAgentDef('code_analyst')],
+            });
+            const result = await callList();
+            const names = result.tools.map(t => t.name);
+            expect(names).toContain('agent.pr_reviewer');
+            expect(names).toContain('agent.code_analyst');
+        });
+    });
+
+    // ── Tool filtering ────────────────────────────────────────────────────────
+
+    describe('tool filtering', () => {
+        const threeTools = () => [
+            makeTool({ name: 'fs.read', category: 'filesystem' }),
+            makeTool({ name: 'slack.post', category: 'slack' }),
+            makeTool({ name: 'gh.pr', category: 'github' }),
+        ];
+
+        it('exposes all enabled tools when expose is omitted', async () => {
+            const registry = makeRegistry(threeTools());
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callList();
+            expect(result.tools).toHaveLength(3);
+        });
+
+        it('filters by categories when expose.categories is set', async () => {
+            const registry = makeRegistry(threeTools());
+            await startMcpServer(registry, { transport: 'stdio', expose: { categories: ['filesystem', 'slack'] } });
+            const result = await callList();
+            expect(result.tools.map(t => t.name).sort()).toEqual(['fs.read', 'slack.post']);
+        });
+
+        it('filters by exact names when expose.tools is set', async () => {
+            const registry = makeRegistry(threeTools());
+            await startMcpServer(registry, { transport: 'stdio', expose: { tools: ['fs.read', 'gh.pr'] } });
+            const result = await callList();
+            expect(result.tools.map(t => t.name).sort()).toEqual(['fs.read', 'gh.pr']);
+        });
+
+        it('falls back to all enabled tools when expose.categories is an empty array', async () => {
+            const registry = makeRegistry(threeTools());
+            await startMcpServer(registry, { transport: 'stdio', expose: { categories: [] } });
+            const result = await callList();
+            expect(result.tools).toHaveLength(3);
+        });
+
+        it('falls back to all enabled tools when expose.tools is an empty array', async () => {
+            const registry = makeRegistry(threeTools());
+            await startMcpServer(registry, { transport: 'stdio', expose: { tools: [] } });
+            const result = await callList();
+            expect(result.tools).toHaveLength(3);
+        });
+    });
+
+    // ── Result translation ────────────────────────────────────────────────────
+
+    describe('result translation', () => {
+        it('wraps a string result in MCP content', async () => {
+            const registry = makeRegistry([makeTool({ execute: vi.fn().mockResolvedValue('hello world') })]);
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callTool('test.tool');
+            expect(result.content).toEqual([{ type: 'text', text: 'hello world' }]);
+            expect(result.isError).toBe(false);
+        });
+
+        it('JSON-stringifies an object result', async () => {
+            const registry = makeRegistry([makeTool({ execute: vi.fn().mockResolvedValue({ files: ['a.ts', 'b.ts'] }) })]);
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callTool('test.tool');
+            expect(result.content[0].type).toBe('text');
+            expect(JSON.parse(result.content[0].text)).toEqual({ files: ['a.ts', 'b.ts'] });
+            expect(result.isError).toBe(false);
+        });
+
+        it('returns isError: true when execute() throws', async () => {
+            const registry = makeRegistry([makeTool({ execute: vi.fn().mockRejectedValue(new Error('disk full')) })]);
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callTool('test.tool');
+            expect(result.isError).toBe(true);
+            expect(result.content[0].text).toContain('disk full');
+        });
+
+        it('includes the tool name in the error message when execute() throws', async () => {
+            const registry = makeRegistry([makeTool({ name: 'my.tool', execute: vi.fn().mockRejectedValue(new Error('boom')) })]);
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callTool('my.tool');
+            expect(result.content[0].text).toContain('my.tool');
+        });
+    });
+
+    // ── Tool not found ────────────────────────────────────────────────────────
+
+    describe('tool not found', () => {
+        it('returns isError: true for an unknown tool name without throwing', async () => {
+            const registry = makeRegistry([makeTool({ name: 'real.tool' })]);
+            await startMcpServer(registry, { transport: 'stdio' });
+            const result = await callTool('ghost.tool');
+            expect(result.isError).toBe(true);
+            expect(result.content[0].text).toContain('ghost.tool');
+        });
+
+        it('returns isError: true for a tool registered but excluded by expose.categories', async () => {
+            const tools = [
+                makeTool({ name: 'fs.read', category: 'filesystem' }),
+                makeTool({ name: 'slack.post', category: 'slack' }),
+            ];
+            const registry = makeRegistry(tools);
+            await startMcpServer(registry, { transport: 'stdio', expose: { categories: ['filesystem'] } });
+
+            // slack.post is registered but not in the exposed category
+            const result = await callTool('slack.post');
+            expect(result.isError).toBe(true);
+            expect(result.content[0].text).toContain('slack.post');
+        });
+
+        it('returns isError: true for a tool disabled in registry config (no expose filter set)', async () => {
+            // Simulates: toolpack.config.json has enabledToolCategories: ['filesystem']
+            // but the MCP server is started with no expose filter.
+            // tools/call must not execute a tool outside the enabled set.
+            const tools = [
+                makeTool({ name: 'fs.read', category: 'filesystem' }),
+                makeTool({ name: 'slack.post', category: 'slack' }),
+            ];
+            const registry = makeRegistry(tools);
+            // Restrict the registry to only the 'filesystem' category
+            registry.setConfig({
+                enabled: true,
+                autoExecute: true,
+                maxToolRounds: 5,
+                toolChoicePolicy: 'auto',
+                resultMaxChars: 20_000,
+                enabledTools: [],
+                enabledToolCategories: ['filesystem'],
+            });
+
+            await startMcpServer(registry, { transport: 'stdio' }); // no expose filter
+            const result = await callTool('slack.post');
+            expect(result.isError).toBe(true);
+            expect(result.content[0].text).toContain('slack.post');
+        });
+
+        it('returns isError: true for a tool disabled in registry config when expose arrays are empty', async () => {
+            // expose = { categories: [] } falls back to getEnabled() in resolveTools.
+            // resolveToolByName must do the same — not bypass the registry filter.
+            const tools = [
+                makeTool({ name: 'fs.read', category: 'filesystem' }),
+                makeTool({ name: 'slack.post', category: 'slack' }),
+            ];
+            const registry = makeRegistry(tools);
+            registry.setConfig({
+                enabled: true,
+                autoExecute: true,
+                maxToolRounds: 5,
+                toolChoicePolicy: 'auto',
+                resultMaxChars: 20_000,
+                enabledTools: [],
+                enabledToolCategories: ['filesystem'],
+            });
+
+            // Empty array → falls back to registry enabled filter
+            await startMcpServer(registry, { transport: 'stdio', expose: { categories: [] } });
+            const result = await callTool('slack.post');
+            expect(result.isError).toBe(true);
+            expect(result.content[0].text).toContain('slack.post');
+        });
+
+        it('returns isError: true for a tool registered but excluded by expose.tools', async () => {
+            const tools = [
+                makeTool({ name: 'fs.read', category: 'filesystem' }),
+                makeTool({ name: 'fs.write', category: 'filesystem' }),
+            ];
+            const registry = makeRegistry(tools);
+            await startMcpServer(registry, { transport: 'stdio', expose: { tools: ['fs.read'] } });
+
+            // fs.write is registered but not in the explicit allow-list
+            const result = await callTool('fs.write');
+            expect(result.isError).toBe(true);
+            expect(result.content[0].text).toContain('fs.write');
+        });
+    });
+
+    // ── Unknown transport ─────────────────────────────────────────────────────
+
+    describe('unknown transport', () => {
+        it('throws a descriptive error for an unsupported transport value', async () => {
+            const registry = makeRegistry([makeTool()]);
+            const config = { transport: 'grpc' } as unknown as ToolpackMcpServerConfig;
+            await expect(startMcpServer(registry, config)).rejects.toThrow(/grpc/);
+        });
+    });
+
+    // ── McpServerHandle ───────────────────────────────────────────────────────
+
+    describe('McpServerHandle', () => {
+        it('toolCount reflects the number of tools currently exposed', async () => {
+            const tools = [
+                makeTool({ name: 'a', category: 'x' }),
+                makeTool({ name: 'b', category: 'x' }),
+                makeTool({ name: 'c', category: 'y' }),
+            ];
+            const registry = makeRegistry(tools);
+            const handle = await startMcpServer(registry, {
+                transport: 'stdio',
+                expose: { categories: ['x'] },
+            }) as { toolCount: number; stop(): Promise<void> };
+            expect(handle.toolCount).toBe(2);
+        });
+
+        it('uses custom serverName and serverVersion when provided', async () => {
+            // Just verify startMcpServer resolves without throwing.
+            const registry = makeRegistry([makeTool()]);
+            await expect(
+                startMcpServer(registry, { transport: 'stdio', serverName: 'My Server', serverVersion: '3.0.0' })
+            ).resolves.toBeDefined();
+        });
+    });
+});

From 426a452d89bbf2434100f3f3236bf20851ee40f4 Mon Sep 17 00:00:00 2001
From: sajeerzeji <sajeerzeji44@gmail.com>
Date: Mon, 8 Jun 2026 00:05:04 +0530
Subject: [PATCH 2/5] Windows pipeline issues fixed

---
 .../tools/run-blocking/index.test.ts          | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
index db5ff33..4816e0e 100644
--- a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
+++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
@@ -27,10 +27,12 @@ describe('exec.run_blocking tool', () => {
     });
 
     it('should wait for slow commands to complete naturally', async () => {
+        const isWindows = process.platform === 'win32';
+        const command = isWindows
+            ? 'ping -n 2 127.0.0.1 > nul && echo done'
+            : 'sleep 1 && echo done';
         const start = Date.now();
-        const result = JSON.parse(await execRunBlockingTool.execute({
-            command: 'sleep 1 && echo done',
-        }));
+        const result = JSON.parse(await execRunBlockingTool.execute({ command }));
         const elapsed = Date.now() - start;
         expect(result.exitCode).toBe(0);
         expect(result.stdout.trim()).toBe('done');
@@ -60,11 +62,12 @@ describe('exec.run_blocking tool', () => {
     });
 
     it('should accept a cwd argument', async () => {
-        const result = JSON.parse(await execRunBlockingTool.execute({
-            command: 'pwd',
-            cwd: '/tmp',
-        }));
+        const isWindows = process.platform === 'win32';
+        const cwd = isWindows ? process.env.TEMP ?? 'C:\\Windows\\Temp' : '/tmp';
+        const command = isWindows ? 'cd' : 'pwd';
+        const expectedSubstring = isWindows ? 'temp' : 'tmp';
+        const result = JSON.parse(await execRunBlockingTool.execute({ command, cwd }));
         expect(result.exitCode).toBe(0);
-        expect(result.stdout.trim()).toContain('tmp');
+        expect(result.stdout.trim().toLowerCase()).toContain(expectedSubstring);
     });
 });

From 40920c2ffbd7962a08d50b9190025ae3059e7e0c Mon Sep 17 00:00:00 2001
From: sajeerzeji <sajeerzeji44@gmail.com>
Date: Mon, 8 Jun 2026 00:11:56 +0530
Subject: [PATCH 3/5] Windows pipeline issues fixed

---
 .../tools/run-blocking/index.test.ts          | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
index 4816e0e..b336797 100644
--- a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
+++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
@@ -1,4 +1,5 @@
 import { describe, it, expect } from 'vitest';
+import { tmpdir } from 'node:os';
 import { execRunBlockingTool } from './index.js';
 
 describe('exec.run_blocking tool', () => {
@@ -27,12 +28,10 @@ describe('exec.run_blocking tool', () => {
     });
 
     it('should wait for slow commands to complete naturally', async () => {
-        const isWindows = process.platform === 'win32';
-        const command = isWindows
-            ? 'ping -n 2 127.0.0.1 > nul && echo done'
-            : 'sleep 1 && echo done';
         const start = Date.now();
-        const result = JSON.parse(await execRunBlockingTool.execute({ command }));
+        const result = JSON.parse(await execRunBlockingTool.execute({
+            command: `node -e "setTimeout(() => { process.stdout.write('done\\n'); }, 1000)"`,
+        }));
         const elapsed = Date.now() - start;
         expect(result.exitCode).toBe(0);
         expect(result.stdout.trim()).toBe('done');
@@ -62,12 +61,12 @@ describe('exec.run_blocking tool', () => {
     });
 
     it('should accept a cwd argument', async () => {
-        const isWindows = process.platform === 'win32';
-        const cwd = isWindows ? process.env.TEMP ?? 'C:\\Windows\\Temp' : '/tmp';
-        const command = isWindows ? 'cd' : 'pwd';
-        const expectedSubstring = isWindows ? 'temp' : 'tmp';
-        const result = JSON.parse(await execRunBlockingTool.execute({ command, cwd }));
+        const cwd = tmpdir();
+        const result = JSON.parse(await execRunBlockingTool.execute({
+            command: `node -e "process.stdout.write(process.cwd())"`,
+            cwd,
+        }));
         expect(result.exitCode).toBe(0);
-        expect(result.stdout.trim().toLowerCase()).toContain(expectedSubstring);
+        expect(result.stdout.trim()).toBe(cwd);
     });
 });

From f1217a2a49b164b9f482597c7d20b1c979709944 Mon Sep 17 00:00:00 2001
From: sajeerzeji <sajeerzeji44@gmail.com>
Date: Mon, 8 Jun 2026 00:17:05 +0530
Subject: [PATCH 4/5] Github pipeline issues fixed

---
 .../src/tools/exec-tools/tools/run-blocking/index.test.ts      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
index b336797..9e90bfe 100644
--- a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
+++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
@@ -1,5 +1,6 @@
 import { describe, it, expect } from 'vitest';
 import { tmpdir } from 'node:os';
+import { realpathSync } from 'node:fs';
 import { execRunBlockingTool } from './index.js';
 
 describe('exec.run_blocking tool', () => {
@@ -61,7 +62,7 @@ describe('exec.run_blocking tool', () => {
     });
 
     it('should accept a cwd argument', async () => {
-        const cwd = tmpdir();
+        const cwd = realpathSync(tmpdir());
         const result = JSON.parse(await execRunBlockingTool.execute({
             command: `node -e "process.stdout.write(process.cwd())"`,
             cwd,

From a1ec8ddcea6606f220c00da353e4a27db9f2f02e Mon Sep 17 00:00:00 2001
From: sajeerzeji <sajeerzeji44@gmail.com>
Date: Mon, 8 Jun 2026 00:24:55 +0530
Subject: [PATCH 5/5] Github pipeline issues fixed

---
 .../src/tools/exec-tools/tools/run-blocking/index.test.ts   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
index 9e90bfe..9e0f3a8 100644
--- a/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
+++ b/packages/toolpack-sdk/src/tools/exec-tools/tools/run-blocking/index.test.ts
@@ -62,12 +62,14 @@ describe('exec.run_blocking tool', () => {
     });
 
     it('should accept a cwd argument', async () => {
-        const cwd = realpathSync(tmpdir());
+        const cwd = tmpdir();
         const result = JSON.parse(await execRunBlockingTool.execute({
             command: `node -e "process.stdout.write(process.cwd())"`,
             cwd,
         }));
         expect(result.exitCode).toBe(0);
-        expect(result.stdout.trim()).toBe(cwd);
+        // Canonicalise both sides: macOS symlinks (/var → /private/var) and
+        // Windows 8.3 short paths (RUNNER~1 → runneradmin) differ in raw form.
+        expect(realpathSync.native(result.stdout.trim())).toBe(realpathSync.native(cwd));
     });
 });