From 10727f7d4d6ccd3fe025df2c651f6c56c861c968 Mon Sep 17 00:00:00 2001 From: JacobSampson Date: Sun, 7 Jun 2026 23:14:28 -0500 Subject: [PATCH] feat: scaffold MCP plugin interface for 3rd-party model data sources (APR-207) Add @aprovan/model-sources package that defines the plugin interface contract for external model performance data sources (Artificial Analysis, Chatbot Arena, etc.) and wires a hook point for the upcoming model selection engine (APR-205). - ModelDataPlugin interface: listModels() / getModel() / getRecommendations() - ModelSourceRegistry: manages registered plugins, aggregates results - noopSelectionHook / createRegistryHook: APR-205 integration site - ArtificialAnalysisPlugin: reference stub implementation (no live API calls) - 38 unit tests covering all public surfaces Co-Authored-By: Claude Sonnet 4.6 --- packages/model-sources/eslint.config.mjs | 3 + packages/model-sources/package.json | 45 ++++ .../src/__tests__/artificial-analysis.test.ts | 144 +++++++++++ .../src/__tests__/registry.test.ts | 232 ++++++++++++++++++ .../src/__tests__/selection-hook.test.ts | 112 +++++++++ packages/model-sources/src/index.ts | 37 +++ .../src/plugins/artificial-analysis.ts | 220 +++++++++++++++++ packages/model-sources/src/registry.ts | 156 ++++++++++++ packages/model-sources/src/selection-hook.ts | 72 ++++++ packages/model-sources/src/types.ts | 170 +++++++++++++ packages/model-sources/tsconfig.json | 9 + packages/model-sources/tsup.config.ts | 12 + packages/model-sources/vitest.config.ts | 9 + 13 files changed, 1221 insertions(+) create mode 100644 packages/model-sources/eslint.config.mjs create mode 100644 packages/model-sources/package.json create mode 100644 packages/model-sources/src/__tests__/artificial-analysis.test.ts create mode 100644 packages/model-sources/src/__tests__/registry.test.ts create mode 100644 packages/model-sources/src/__tests__/selection-hook.test.ts create mode 100644 packages/model-sources/src/index.ts create mode 100644 packages/model-sources/src/plugins/artificial-analysis.ts create mode 100644 packages/model-sources/src/registry.ts create mode 100644 packages/model-sources/src/selection-hook.ts create mode 100644 packages/model-sources/src/types.ts create mode 100644 packages/model-sources/tsconfig.json create mode 100644 packages/model-sources/tsup.config.ts create mode 100644 packages/model-sources/vitest.config.ts diff --git a/packages/model-sources/eslint.config.mjs b/packages/model-sources/eslint.config.mjs new file mode 100644 index 0000000..1109e8d --- /dev/null +++ b/packages/model-sources/eslint.config.mjs @@ -0,0 +1,3 @@ +import baseConfig from "@aprovan/eslint-config/base"; + +export default [...baseConfig]; diff --git a/packages/model-sources/package.json b/packages/model-sources/package.json new file mode 100644 index 0000000..9f33bef --- /dev/null +++ b/packages/model-sources/package.json @@ -0,0 +1,45 @@ +{ + "name": "@aprovan/model-sources", + "version": "0.1.0", + "description": "MCP plugin interface for 3rd-party model performance data sources", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "require": "./dist/index.cjs" + } + }, + "files": [ + "dist" + ], + "publishConfig": { + "access": "public" + }, + "scripts": { + "build": "tsup", + "dev": "tsup --watch", + "lint": "eslint \"src/**/*.ts\"", + "typecheck": "tsc --noEmit", + "test": "vitest run" + }, + "devDependencies": { + "@aprovan/eslint-config": "workspace:*", + "@aprovan/prettier-config": "workspace:*", + "@aprovan/tsconfig": "workspace:*", + "@types/node": "^22.10.0", + "eslint": "^9.0.0", + "tsup": "^8.3.5", + "typescript": "^5.7.3", + "vitest": "^3.0.0" + }, + "engines": { + "node": ">=20.0.0" + }, + "prettier": "@aprovan/prettier-config", + "license": "MIT", + "packageManager": "pnpm@9.15.9+sha512.68046141893c66fad01c079231128e9afb89ef87e2691d69e4d40eee228988295fd4682181bae55b58418c3a253bde65a505ec7c5f9403ece5cc3cd37dcf2531" +} diff --git a/packages/model-sources/src/__tests__/artificial-analysis.test.ts b/packages/model-sources/src/__tests__/artificial-analysis.test.ts new file mode 100644 index 0000000..8dfbda2 --- /dev/null +++ b/packages/model-sources/src/__tests__/artificial-analysis.test.ts @@ -0,0 +1,144 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { ArtificialAnalysisPlugin } from "../plugins/artificial-analysis.js"; + +describe("ArtificialAnalysisPlugin", () => { + let plugin: ArtificialAnalysisPlugin; + + beforeEach(() => { + plugin = new ArtificialAnalysisPlugin(); + }); + + describe("metadata", () => { + it("has the expected id", () => { + expect(plugin.id).toBe("artificial-analysis"); + }); + + it("has a non-empty name and description", () => { + expect(plugin.name.length).toBeGreaterThan(0); + expect(plugin.description.length).toBeGreaterThan(0); + }); + }); + + describe("before init()", () => { + it("listModels() throws if not initialised", async () => { + await expect(plugin.listModels()).rejects.toThrow(/initialised/); + }); + + it("getModel() throws if not initialised", async () => { + await expect(plugin.getModel("any")).rejects.toThrow(/initialised/); + }); + + it("getRecommendations() throws if not initialised", async () => { + await expect( + plugin.getRecommendations({ complexity: 1 }), + ).rejects.toThrow(/initialised/); + }); + }); + + describe("after init()", () => { + beforeEach(async () => { + await plugin.init({ credentials: { apiKey: "test-key" } }); + }); + + describe("listModels()", () => { + it("returns a non-empty array of models", async () => { + const models = await plugin.listModels(); + expect(models.length).toBeGreaterThan(0); + }); + + it("each model has required fields", async () => { + const models = await plugin.listModels(); + for (const model of models) { + expect(typeof model.id).toBe("string"); + expect(model.id.length).toBeGreaterThan(0); + expect(typeof model.name).toBe("string"); + expect(typeof model.provider).toBe("string"); + expect(typeof model.available).toBe("boolean"); + } + }); + }); + + describe("getModel()", () => { + it("returns the model for a known ID", async () => { + const models = await plugin.listModels(); + const first = models[0]; + if (first === undefined) throw new Error("no stub models"); + + const result = await plugin.getModel(first.id); + expect(result).not.toBeNull(); + expect(result?.id).toBe(first.id); + }); + + it("returns null for an unknown model ID", async () => { + const result = await plugin.getModel("not-a-real-model-id"); + expect(result).toBeNull(); + }); + }); + + describe("getRecommendations()", () => { + it("returns an array of recommendations", async () => { + const recs = await plugin.getRecommendations({ complexity: 3 }); + expect(Array.isArray(recs)).toBe(true); + }); + + it("each recommendation has model, score, and rationale", async () => { + const recs = await plugin.getRecommendations({ complexity: 2 }); + for (const rec of recs) { + expect(rec.model).toBeDefined(); + expect(typeof rec.score).toBe("number"); + expect(rec.score).toBeGreaterThanOrEqual(0); + expect(rec.score).toBeLessThanOrEqual(1); + expect(typeof rec.rationale).toBe("string"); + } + }); + + it("respects the limit parameter", async () => { + const recs = await plugin.getRecommendations({ + complexity: 3, + limit: 2, + }); + expect(recs.length).toBeLessThanOrEqual(2); + }); + + it("excludes models that exceed the budget cap", async () => { + // Very low budget should exclude expensive models + const recs = await plugin.getRecommendations({ + complexity: 1, + maxCostPer1kOutputTokens: 0.001, // lower than all stub models + }); + for (const rec of recs) { + const cost = rec.model.pricing?.outputPer1kTokens; + if (cost !== undefined) { + expect(cost).toBeLessThanOrEqual(0.001); + } + } + }); + + it("returns results sorted by score descending", async () => { + const recs = await plugin.getRecommendations({ complexity: 4 }); + for (let i = 1; i < recs.length; i++) { + const prev = recs[i - 1]; + const curr = recs[i]; + if (prev !== undefined && curr !== undefined) { + expect(prev.score).toBeGreaterThanOrEqual(curr.score); + } + } + }); + + it("includes complexity tier in the rationale string", async () => { + const recs = await plugin.getRecommendations({ complexity: 3 }); + for (const rec of recs) { + expect(rec.rationale).toContain("3"); + } + }); + }); + }); + + describe("init() with endpoint override", () => { + it("accepts a custom endpoint without throwing", async () => { + await expect( + plugin.init({ endpoint: "https://custom.example.com/v1" }), + ).resolves.not.toThrow(); + }); + }); +}); diff --git a/packages/model-sources/src/__tests__/registry.test.ts b/packages/model-sources/src/__tests__/registry.test.ts new file mode 100644 index 0000000..9709f46 --- /dev/null +++ b/packages/model-sources/src/__tests__/registry.test.ts @@ -0,0 +1,232 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { ModelSourceRegistry } from "../registry.js"; +import type { + ModelDataPlugin, + ModelInfo, + ModelRecommendation, + PluginConfig, + RecommendationQuery, +} from "../types.js"; + +// --------------------------------------------------------------------------- +// Fixtures +// --------------------------------------------------------------------------- + +function makeModel(id: string, provider = "Test"): ModelInfo { + return { + id, + name: id, + provider, + available: true, + pricing: { inputPer1kTokens: 0.001, outputPer1kTokens: 0.002 }, + benchmarks: { coding: 70 }, + }; +} + +class StubPlugin implements ModelDataPlugin { + readonly id: string; + readonly name: string; + readonly description = "Stub plugin for testing"; + private models: ModelInfo[]; + private shouldThrow: boolean; + initCalled = false; + + constructor( + id: string, + models: ModelInfo[] = [], + shouldThrow = false, + ) { + this.id = id; + this.name = id; + this.models = models; + this.shouldThrow = shouldThrow; + } + + async init(_config: PluginConfig): Promise { + this.initCalled = true; + } + + async listModels(): Promise { + if (this.shouldThrow) throw new Error("plugin error"); + return this.models; + } + + async getModel(id: string): Promise { + if (this.shouldThrow) throw new Error("plugin error"); + return this.models.find((m) => m.id === id) ?? null; + } + + async getRecommendations( + _query: RecommendationQuery, + ): Promise { + if (this.shouldThrow) throw new Error("plugin error"); + return this.models.map((m) => ({ model: m, score: 0.5, rationale: "stub" })); + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("ModelSourceRegistry", () => { + let reg: ModelSourceRegistry; + + beforeEach(() => { + reg = new ModelSourceRegistry(); + }); + + describe("register / getPlugin / listPluginIds", () => { + it("registers a plugin and makes it retrievable by ID", () => { + const plugin = new StubPlugin("p1"); + reg.register(plugin); + expect(reg.getPlugin("p1")).toBe(plugin); + }); + + it("replaces an existing plugin with the same ID", () => { + const first = new StubPlugin("p1"); + const second = new StubPlugin("p1"); + reg.register(first); + reg.register(second); + expect(reg.getPlugin("p1")).toBe(second); + }); + + it("returns undefined for an unknown plugin ID", () => { + expect(reg.getPlugin("unknown")).toBeUndefined(); + }); + + it("lists all registered plugin IDs", () => { + reg.register(new StubPlugin("a")); + reg.register(new StubPlugin("b")); + expect(reg.listPluginIds()).toEqual(expect.arrayContaining(["a", "b"])); + expect(reg.listPluginIds()).toHaveLength(2); + }); + }); + + describe("unregister", () => { + it("removes a registered plugin", () => { + reg.register(new StubPlugin("p1")); + reg.unregister("p1"); + expect(reg.getPlugin("p1")).toBeUndefined(); + expect(reg.listPluginIds()).toHaveLength(0); + }); + + it("is a no-op for an unknown plugin ID", () => { + expect(() => reg.unregister("missing")).not.toThrow(); + }); + }); + + describe("initPlugin", () => { + it("calls init() on the target plugin", async () => { + const plugin = new StubPlugin("p1"); + reg.register(plugin); + await reg.initPlugin("p1", {}); + expect(plugin.initCalled).toBe(true); + }); + + it("throws when the plugin ID is not registered", async () => { + await expect(reg.initPlugin("not-there", {})).rejects.toThrow( + /Plugin not registered/, + ); + }); + }); + + describe("getAllModels", () => { + it("merges models from all plugins", async () => { + reg.register(new StubPlugin("a", [makeModel("m1"), makeModel("m2")])); + reg.register(new StubPlugin("b", [makeModel("m3")])); + const models = await reg.getAllModels(); + expect(models).toHaveLength(3); + expect(models.map((m) => m.id)).toEqual( + expect.arrayContaining(["m1", "m2", "m3"]), + ); + }); + + it("skips plugins that throw and returns data from healthy ones", async () => { + reg.register(new StubPlugin("ok", [makeModel("m1")])); + reg.register(new StubPlugin("bad", [], true)); + const models = await reg.getAllModels(); + expect(models).toHaveLength(1); + expect(models[0]?.id).toBe("m1"); + }); + + it("returns an empty array when no plugins are registered", async () => { + expect(await reg.getAllModels()).toEqual([]); + }); + }); + + describe("getModel", () => { + it("returns the model when found in one of the plugins", async () => { + reg.register(new StubPlugin("a", [makeModel("target")])); + const result = await reg.getModel("target"); + expect(result).not.toBeNull(); + expect(result?.id).toBe("target"); + }); + + it("returns null when no plugin has the model", async () => { + reg.register(new StubPlugin("a", [makeModel("m1")])); + expect(await reg.getModel("unknown-model")).toBeNull(); + }); + + it("skips failing plugins and searches remaining ones", async () => { + reg.register(new StubPlugin("bad", [], true)); + reg.register(new StubPlugin("ok", [makeModel("m1")])); + const result = await reg.getModel("m1"); + expect(result?.id).toBe("m1"); + }); + }); + + describe("getRecommendations", () => { + it("merges and deduplicates recommendations by model ID", async () => { + const shared = makeModel("shared"); + reg.register(new StubPlugin("a", [shared, makeModel("a-only")])); + reg.register(new StubPlugin("b", [shared, makeModel("b-only")])); + + const recs = await reg.getRecommendations({ complexity: 3 }); + const ids = recs.map((r) => r.model.id); + + // shared should appear once; both unique models should be present + expect(ids.filter((id) => id === "shared")).toHaveLength(1); + expect(ids).toContain("a-only"); + expect(ids).toContain("b-only"); + }); + + it("returns results sorted by score descending", async () => { + // Use a custom plugin to control scores precisely + const highScorePlugin: ModelDataPlugin = { + id: "high", + name: "high", + description: "", + init: async () => undefined, + listModels: async () => [], + getModel: async () => null, + getRecommendations: async () => [ + { model: makeModel("high-model"), score: 0.9, rationale: "" }, + ], + }; + const lowScorePlugin: ModelDataPlugin = { + id: "low", + name: "low", + description: "", + init: async () => undefined, + listModels: async () => [], + getModel: async () => null, + getRecommendations: async () => [ + { model: makeModel("low-model"), score: 0.2, rationale: "" }, + ], + }; + + reg.register(lowScorePlugin); + reg.register(highScorePlugin); + + const recs = await reg.getRecommendations({ complexity: 3 }); + expect(recs[0]?.score).toBeGreaterThanOrEqual(recs[1]?.score ?? 0); + }); + + it("skips failing plugins gracefully", async () => { + reg.register(new StubPlugin("bad", [], true)); + reg.register(new StubPlugin("ok", [makeModel("m1")])); + const recs = await reg.getRecommendations({ complexity: 1 }); + expect(recs.length).toBeGreaterThan(0); + }); + }); +}); diff --git a/packages/model-sources/src/__tests__/selection-hook.test.ts b/packages/model-sources/src/__tests__/selection-hook.test.ts new file mode 100644 index 0000000..b38bed2 --- /dev/null +++ b/packages/model-sources/src/__tests__/selection-hook.test.ts @@ -0,0 +1,112 @@ +import { describe, it, expect } from "vitest"; +import { + noopSelectionHook, + createRegistryHook, +} from "../selection-hook.js"; +import { ModelSourceRegistry } from "../registry.js"; +import type { + ModelDataPlugin, + ModelInfo, + ModelRecommendation, + PluginConfig, +} from "../types.js"; + +// --------------------------------------------------------------------------- +// Fixture +// --------------------------------------------------------------------------- + +function makeModel(id: string): ModelInfo { + return { + id, + name: id, + provider: "Test", + available: true, + }; +} + +function makePlugin( + id: string, + recs: ModelRecommendation[], +): ModelDataPlugin { + return { + id, + name: id, + description: "", + init: async (_config: PluginConfig) => undefined, + listModels: async () => [], + getModel: async () => null, + getRecommendations: async () => recs, + }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("noopSelectionHook", () => { + it("returns an empty array regardless of query", async () => { + const reg = new ModelSourceRegistry(); + const result = await noopSelectionHook({ complexity: 3 }, reg); + expect(result).toEqual([]); + }); + + it("does not call any registry methods", async () => { + const reg = new ModelSourceRegistry(); + reg.register( + makePlugin("p1", [ + { model: makeModel("m1"), score: 0.9, rationale: "" }, + ]), + ); + const result = await noopSelectionHook({ complexity: 3 }, reg); + // Even with a registered plugin, noop always returns [] + expect(result).toEqual([]); + }); +}); + +describe("createRegistryHook", () => { + it("returns a hook that queries the bound registry", async () => { + const reg = new ModelSourceRegistry(); + reg.register( + makePlugin("p1", [ + { model: makeModel("m1"), score: 0.8, rationale: "great" }, + ]), + ); + + const hook = createRegistryHook(reg); + const result = await hook({ complexity: 2 }, reg); + + expect(result.length).toBeGreaterThan(0); + expect(result[0]?.model.id).toBe("m1"); + }); + + it("uses the registry captured at creation time, ignoring the passed registry arg", async () => { + const capturedReg = new ModelSourceRegistry(); + capturedReg.register( + makePlugin("from-captured", [ + { model: makeModel("captured-model"), score: 0.7, rationale: "" }, + ]), + ); + + const otherReg = new ModelSourceRegistry(); + otherReg.register( + makePlugin("from-other", [ + { model: makeModel("other-model"), score: 0.9, rationale: "" }, + ]), + ); + + const hook = createRegistryHook(capturedReg); + // Pass otherReg as the second arg — hook should use capturedReg + const result = await hook({ complexity: 3 }, otherReg); + + const ids = result.map((r) => r.model.id); + expect(ids).toContain("captured-model"); + expect(ids).not.toContain("other-model"); + }); + + it("returns an empty array when the registry has no plugins", async () => { + const reg = new ModelSourceRegistry(); + const hook = createRegistryHook(reg); + const result = await hook({ complexity: 1 }, reg); + expect(result).toEqual([]); + }); +}); diff --git a/packages/model-sources/src/index.ts b/packages/model-sources/src/index.ts new file mode 100644 index 0000000..ac7f965 --- /dev/null +++ b/packages/model-sources/src/index.ts @@ -0,0 +1,37 @@ +/** + * @aprovan/model-sources + * + * MCP plugin interface for 3rd-party model performance data sources. + * + * Provides: + * - `ModelDataPlugin` — interface every data-source plugin must implement + * - `ModelSourceRegistry` — in-memory registry for managing plugins + * - `registry` — shared singleton registry instance + * - `noopSelectionHook` / `createRegistryHook` — hook point for APR-205 + * - `ArtificialAnalysisPlugin` — reference implementation (stub data) + * + * **Scaffolding note (APR-207):** Plugin data is not yet wired into model + * selection decisions. See `selection-hook.ts` for the integration site. + */ + +// Types +export type { + ComplexityTier, + ModelPricing, + ModelBenchmarks, + ModelInfo, + RecommendationQuery, + ModelRecommendation, + PluginConfig, + ModelDataPlugin, +} from "./types.js"; + +// Registry +export { ModelSourceRegistry, registry } from "./registry.js"; + +// APR-205 hook point +export type { ModelSelectionHook } from "./selection-hook.js"; +export { noopSelectionHook, createRegistryHook } from "./selection-hook.js"; + +// Reference plugins +export { ArtificialAnalysisPlugin } from "./plugins/artificial-analysis.js"; diff --git a/packages/model-sources/src/plugins/artificial-analysis.ts b/packages/model-sources/src/plugins/artificial-analysis.ts new file mode 100644 index 0000000..f52bd7e --- /dev/null +++ b/packages/model-sources/src/plugins/artificial-analysis.ts @@ -0,0 +1,220 @@ +/** + * Reference plugin: Artificial Analysis (https://artificialanalysis.ai). + * + * Artificial Analysis publishes independent benchmark scores, pricing, and + * latency measurements for frontier models. Their free tier allows up to + * 1 000 requests/day. + * + * **This is a scaffold / reference implementation (APR-207).** + * The three query methods (`listModels`, `getModel`, `getRecommendations`) + * currently return static stub data so that consumers and tests can exercise + * the interface without a live API key. + * + * To activate the real integration once APR-205 is ready: + * 1. Replace the stub bodies with actual `fetch` calls (see the TODO + * comments inside each method). + * 2. Map the API response shape to `ModelInfo` using `mapApiModel`. + * 3. Provide a real API key via `PluginConfig.credentials.apiKey`. + * + * Real API endpoints (reference): + * GET /v1/models → listModels + * GET /v1/models/:id → getModel + */ + +import type { + ModelDataPlugin, + ModelInfo, + ModelRecommendation, + ModelPricing, + ModelBenchmarks, + PluginConfig, + RecommendationQuery, + ComplexityTier, +} from "../types.js"; + +export class ArtificialAnalysisPlugin implements ModelDataPlugin { + readonly id = "artificial-analysis"; + readonly name = "Artificial Analysis"; + readonly description = + "Independent model benchmarks, pricing, and latency data from artificialanalysis.ai"; + + private apiKey = ""; + private endpoint = "https://api.artificialanalysis.ai/v1"; + private initialized = false; + + async init(config: PluginConfig): Promise { + if (config.endpoint !== undefined) { + this.endpoint = config.endpoint; + } + this.apiKey = config.credentials?.["apiKey"] ?? ""; + this.initialized = true; + } + + async listModels(): Promise { + this.assertInitialized(); + // TODO (APR-205): Replace stub with real API call, e.g.: + // const res = await fetch(`${this.endpoint}/models`, { + // headers: { Authorization: `Bearer ${this.apiKey}` }, + // }); + // if (!res.ok) return []; + // const data: unknown = await res.json(); + // return parseApiModels(data); + void this.endpoint; // referenced by TODO above — suppress unused lint + void this.apiKey; // referenced by TODO above — suppress unused lint + return STUB_MODELS; + } + + async getModel(id: string): Promise { + this.assertInitialized(); + // TODO (APR-205): Replace stub with real API call, e.g.: + // const res = await fetch( + // `${this.endpoint}/models/${encodeURIComponent(id)}`, + // { headers: { Authorization: `Bearer ${this.apiKey}` } }, + // ); + // if (res.status === 404) return null; + // if (!res.ok) return null; + // return parseApiModel(await res.json()); + return STUB_MODELS.find((m) => m.id === id) ?? null; + } + + async getRecommendations( + query: RecommendationQuery, + ): Promise { + this.assertInitialized(); + const models = await this.listModels(); + return rankModels(models, query); + } + + private assertInitialized(): void { + if (!this.initialized) { + throw new Error( + `${this.name} plugin must be initialised before use. Call init() first.`, + ); + } + } +} + +// --------------------------------------------------------------------------- +// Internal ranking helpers +// --------------------------------------------------------------------------- + +function rankModels( + models: ModelInfo[], + query: RecommendationQuery, +): ModelRecommendation[] { + const limit = query.limit ?? 5; + + const candidates = models.filter((m) => { + if (!m.available) return false; + if ( + query.maxCostPer1kOutputTokens !== undefined && + m.pricing !== undefined && + m.pricing.outputPer1kTokens > query.maxCostPer1kOutputTokens + ) { + return false; + } + return true; + }); + + const scored: ModelRecommendation[] = candidates.map((m) => ({ + model: m, + score: computeScore(m, query.complexity), + rationale: buildRationale(m, query.complexity), + })); + + return scored.sort((a, b) => b.score - a.score).slice(0, limit); +} + +function computeScore(model: ModelInfo, complexity: ComplexityTier): number { + // Blend normalised coding benchmark with an inverse cost term. + // Higher complexity → weight performance more; lower complexity → weight cost more. + const benchScore = (model.benchmarks?.coding ?? 50) / 100; + const perfWeight = complexity / 5; + + const rawCost = model.pricing?.outputPer1kTokens; + // Normalise cost: 0 $/1K → costScore 1.0; $0.02/1K → costScore 0.0 + const costScore = + rawCost !== undefined ? Math.max(0, 1 - rawCost / 0.02) : 0.5; + + return benchScore * perfWeight + costScore * (1 - perfWeight); +} + +function buildRationale( + model: ModelInfo, + complexity: ComplexityTier, +): string { + const parts: string[] = [`complexity tier ${complexity}`]; + + if (model.benchmarks?.coding !== undefined) { + parts.push(`coding score ${model.benchmarks.coding}/100`); + } + if (model.pricing !== undefined) { + parts.push(`$${model.pricing.outputPer1kTokens}/1K output tokens`); + } + if (model.benchmarks?.tokensPerSecond !== undefined) { + parts.push(`${model.benchmarks.tokensPerSecond} tok/s`); + } + + return parts.join(" · "); +} + +// --------------------------------------------------------------------------- +// Stub data +// +// Representative values as of mid-2025 (not real-time). +// Replace with actual API responses once APR-205 integrates this plugin. +// --------------------------------------------------------------------------- + +function makeModel( + id: string, + name: string, + provider: string, + pricing: ModelPricing, + benchmarks: ModelBenchmarks, + contextWindow: number, +): ModelInfo { + return { id, name, provider, pricing, benchmarks, contextWindow, available: true }; +} + +const STUB_MODELS: ModelInfo[] = [ + makeModel( + "anthropic/claude-haiku-4-5", + "Claude Haiku 4.5", + "Anthropic", + { inputPer1kTokens: 0.0008, outputPer1kTokens: 0.004 }, + { coding: 68, reasoning: 72, tokensPerSecond: 140, ttftMs: 350 }, + 200_000, + ), + makeModel( + "anthropic/claude-sonnet-4-6", + "Claude Sonnet 4.6", + "Anthropic", + { inputPer1kTokens: 0.003, outputPer1kTokens: 0.015 }, + { coding: 84, reasoning: 87, tokensPerSecond: 95, ttftMs: 520 }, + 200_000, + ), + makeModel( + "openai/gpt-4.1-mini", + "GPT-4.1 mini", + "OpenAI", + { inputPer1kTokens: 0.0004, outputPer1kTokens: 0.0016 }, + { coding: 65, reasoning: 70, tokensPerSecond: 130, ttftMs: 300 }, + 128_000, + ), + makeModel( + "google/gemini-2.5-flash", + "Gemini 2.5 Flash", + "Google", + { inputPer1kTokens: 0.0005, outputPer1kTokens: 0.0015 }, + { coding: 75, reasoning: 80, tokensPerSecond: 120, ttftMs: 400 }, + 1_000_000, + ), + makeModel( + "openai/gpt-4.1", + "GPT-4.1", + "OpenAI", + { inputPer1kTokens: 0.002, outputPer1kTokens: 0.008 }, + { coding: 80, reasoning: 83, tokensPerSecond: 100, ttftMs: 450 }, + 128_000, + ), +]; diff --git a/packages/model-sources/src/registry.ts b/packages/model-sources/src/registry.ts new file mode 100644 index 0000000..0fbc47e --- /dev/null +++ b/packages/model-sources/src/registry.ts @@ -0,0 +1,156 @@ +/** + * Plugin registry for model data sources. + * + * Maintains a live collection of registered ModelDataPlugin instances and + * provides aggregate query methods. The registry itself does not make + * selection decisions — it is a data-layer aggregator. + * + * **Scaffolding note (APR-207):** Data returned by this registry is not yet + * consumed by the model selection engine. The integration hook lives in + * `selection-hook.ts` and will be activated by APR-205. + */ + +import type { + ModelDataPlugin, + ModelInfo, + ModelRecommendation, + PluginConfig, + RecommendationQuery, +} from "./types.js"; + +/** + * In-memory registry for {@link ModelDataPlugin} instances. + * + * @example + * ```typescript + * import { registry } from "@aprovan/model-sources"; + * import { ArtificialAnalysisPlugin } from "@aprovan/model-sources"; + * + * registry.register(new ArtificialAnalysisPlugin()); + * await registry.initPlugin("artificial-analysis", { + * credentials: { apiKey: process.env.AA_API_KEY ?? "" }, + * }); + * + * const models = await registry.getAllModels(); + * ``` + */ +export class ModelSourceRegistry { + private readonly plugins = new Map(); + + /** + * Register a plugin. If a plugin with the same `id` is already + * registered it will be replaced. + */ + register(plugin: ModelDataPlugin): void { + this.plugins.set(plugin.id, plugin); + } + + /** + * Remove a plugin from the registry by its ID. + * No-op if the plugin is not registered. + */ + unregister(id: string): void { + this.plugins.delete(id); + } + + /** + * Retrieve a registered plugin by ID. + * Returns `undefined` if no plugin with that ID is registered. + */ + getPlugin(id: string): ModelDataPlugin | undefined { + return this.plugins.get(id); + } + + /** + * Return the IDs of all currently registered plugins. + */ + listPluginIds(): string[] { + return [...this.plugins.keys()]; + } + + /** + * Initialise a specific registered plugin with the given config. + * Throws if the plugin ID is not registered. + */ + async initPlugin(id: string, config: PluginConfig): Promise { + const plugin = this.plugins.get(id); + if (plugin === undefined) { + throw new Error(`Plugin not registered: "${id}"`); + } + await plugin.init(config); + } + + /** + * Query every registered plugin for its full model list and merge results. + * + * Plugins that throw (e.g. because they have not been initialised) are + * silently skipped so that one failing source does not block the rest. + */ + async getAllModels(): Promise { + const results: ModelInfo[] = []; + for (const plugin of this.plugins.values()) { + try { + const models = await plugin.listModels(); + results.push(...models); + } catch { + // Plugin unavailable or not initialised — skip silently + } + } + return results; + } + + /** + * Search all registered plugins for a model with the given ID. + * Returns the first match found, or `null` if no plugin knows the model. + */ + async getModel(modelId: string): Promise { + for (const plugin of this.plugins.values()) { + try { + const info = await plugin.getModel(modelId); + if (info !== null) return info; + } catch { + // Plugin unavailable — continue searching others + } + } + return null; + } + + /** + * Collect recommendations from all registered plugins for the given query, + * deduplicate by model ID (keeping the highest score), and return results + * sorted by score descending. + */ + async getRecommendations( + query: RecommendationQuery, + ): Promise { + const all: ModelRecommendation[] = []; + + for (const plugin of this.plugins.values()) { + try { + const recs = await plugin.getRecommendations(query); + all.push(...recs); + } catch { + // Plugin unavailable — continue + } + } + + // Deduplicate: for each model ID keep the highest-scoring entry + const byModelId = new Map(); + for (const rec of all) { + const existing = byModelId.get(rec.model.id); + if (existing === undefined || rec.score > existing.score) { + byModelId.set(rec.model.id, rec); + } + } + + return [...byModelId.values()].sort((a, b) => b.score - a.score); + } +} + +/** + * Shared singleton registry. + * + * Suitable for most use cases. Consumers that need strict isolation + * (e.g. tests) should instantiate `new ModelSourceRegistry()` directly. + */ +export const registry = new ModelSourceRegistry(); diff --git a/packages/model-sources/src/selection-hook.ts b/packages/model-sources/src/selection-hook.ts new file mode 100644 index 0000000..a65f302 --- /dev/null +++ b/packages/model-sources/src/selection-hook.ts @@ -0,0 +1,72 @@ +/** + * Hook point for injecting plugin data into the model selection engine. + * + * APR-205 (model selection engine) will call the hook with a complexity + * query and receive ranked recommendations sourced from registered plugins. + * The engine is solely responsible for combining these recommendations with + * its own scoring logic and making the final routing decision. + * + * **Scaffolding note (APR-207):** The hook exists but is wired to a no-op + * by default. To activate, APR-205 should call `createRegistryHook(registry)` + * during engine initialisation and replace the no-op reference. + * + * @example Activating plugin data in APR-205: + * ```typescript + * import { createRegistryHook, registry } from "@aprovan/model-sources"; + * + * // Engine setup + * const pluginHook = createRegistryHook(registry); + * + * // Inside model selection logic + * const pluginRecs = await pluginHook({ complexity: task.complexity }); + * // … merge pluginRecs with internal scoring + * ``` + */ + +import type { ModelRecommendation, RecommendationQuery } from "./types.js"; +import type { ModelSourceRegistry } from "./registry.js"; + +/** + * Signature for the plugin data hook consumed by the model selection engine. + * + * @param query - Complexity tier and optional budget constraint. + * @param registry - The registry to query (passed by the engine so the hook + * can be used with any registry instance, including mocks). + */ +export type ModelSelectionHook = ( + query: RecommendationQuery, + registry: ModelSourceRegistry, +) => Promise; + +/** + * Default no-op hook. + * + * Returns an empty array — plugin data is not consulted. + * Used until APR-205 replaces this with `createRegistryHook(registry)`. + */ +export const noopSelectionHook: ModelSelectionHook = async ( + _query: RecommendationQuery, + _registry: ModelSourceRegistry, +): Promise => { + return []; +}; + +/** + * Build a live selection hook backed by the given registry. + * + * The returned function queries the registry for recommendations and returns + * them sorted by score descending. Plugins that fail are silently skipped + * (handled internally by `ModelSourceRegistry.getRecommendations`). + * + * @param registry - The plugin registry to query when the hook is invoked. + */ +export function createRegistryHook( + registry: ModelSourceRegistry, +): ModelSelectionHook { + return async ( + query: RecommendationQuery, + _registry: ModelSourceRegistry, + ): Promise => { + return registry.getRecommendations(query); + }; +} diff --git a/packages/model-sources/src/types.ts b/packages/model-sources/src/types.ts new file mode 100644 index 0000000..3d8c2ba --- /dev/null +++ b/packages/model-sources/src/types.ts @@ -0,0 +1,170 @@ +/** + * Core types for the model data source plugin interface. + * + * These types define the contract between 3rd-party data plugins and the + * model selection engine (APR-205). Plugins are read-only data providers; + * they surface benchmarks, pricing, and availability information but do + * NOT make routing or selection decisions — that logic lives in APR-205. + */ + +/** + * Task complexity tier (1 = simplest, 5 = most demanding). + * Mirrors the complexity scoring system used across Aprovan agents. + * + * Rough guidelines: + * 1 — trivial / well-defined single-step tasks + * 2 — straightforward multi-step tasks + * 3 — moderately complex tasks requiring reasoning + * 4 — hard tasks requiring frontier capabilities + * 5 — research / highest-complexity work + */ +export type ComplexityTier = 1 | 2 | 3 | 4 | 5; + +/** Pricing for a model in USD. All amounts are per 1K tokens. */ +export interface ModelPricing { + /** Cost per 1K input (prompt) tokens */ + inputPer1kTokens: number; + /** Cost per 1K output (completion) tokens */ + outputPer1kTokens: number; +} + +/** + * Benchmark scores sourced from an external data provider. + * All numeric scores are normalised to 0–100 unless otherwise noted. + */ +export interface ModelBenchmarks { + /** General coding ability (e.g. HumanEval, SWE-bench) */ + coding?: number; + /** General reasoning (e.g. MMLU, GPQA) */ + reasoning?: number; + /** Math (e.g. MATH, AIME) */ + math?: number; + /** Instruction following / chat (e.g. MT-Bench) */ + instruction?: number; + /** Median output throughput in tokens per second */ + tokensPerSecond?: number; + /** Median time-to-first-token in milliseconds */ + ttftMs?: number; +} + +/** A model entry as returned by a model data plugin */ +export interface ModelInfo { + /** + * Provider-scoped unique model identifier. + * Convention: "/" + * e.g. "anthropic/claude-sonnet-4-6" + */ + id: string; + /** Human-readable model name */ + name: string; + /** Provider name (e.g. "Anthropic", "OpenAI", "Google") */ + provider: string; + /** Pricing information (undefined if not reported by this source) */ + pricing?: ModelPricing; + /** Benchmark scores (undefined if not reported by this source) */ + benchmarks?: ModelBenchmarks; + /** Maximum context window in tokens */ + contextWindow?: number; + /** Whether the model is currently available / not deprecated */ + available: boolean; + /** Arbitrary extra data from the source (e.g. tags, release date) */ + metadata?: Record; +} + +/** Query parameters for recommendation requests */ +export interface RecommendationQuery { + /** Task complexity tier — drives the performance/cost trade-off */ + complexity: ComplexityTier; + /** + * Hard cap on cost per 1K output tokens in USD. + * Models exceeding this are excluded from results. + */ + maxCostPer1kOutputTokens?: number; + /** Maximum number of recommendations to return (default: 5) */ + limit?: number; +} + +/** A model recommendation produced by a plugin's ranking logic */ +export interface ModelRecommendation { + /** The recommended model */ + model: ModelInfo; + /** + * Fit score from 0 to 1 (higher = better match for the query). + * This is a data-layer score, not a final routing decision. + */ + score: number; + /** Human-readable rationale produced by the plugin */ + rationale: string; +} + +/** + * Configuration passed to a plugin at initialisation time. + * Plugins must not be queried before `init()` has been called. + */ +export interface PluginConfig { + /** Override the default API endpoint */ + endpoint?: string; + /** Plugin-specific credentials (API keys, tokens, etc.) */ + credentials?: Record; + /** Additional plugin-specific settings */ + options?: Record; +} + +/** + * MCP plugin interface for a 3rd-party model data source. + * + * Each concrete plugin represents one external data provider (e.g. Artificial + * Analysis, Chatbot Arena). Plugins expose three query methods that map + * directly to the draft interface from APR-207: + * + * list_models() → listModels() + * get_model(id) → getModel(id) + * get_recommendations(complexity, ...) → getRecommendations(query) + * + * **Scaffolding note (APR-207):** Plugin data is not yet wired into model + * selection decisions. The hook point in `selection-hook.ts` is the intended + * integration site for APR-205. + * + * @example + * ```typescript + * const plugin = new ArtificialAnalysisPlugin(); + * await plugin.init({ credentials: { apiKey: process.env.AA_API_KEY ?? "" } }); + * const models = await plugin.listModels(); + * const top = await plugin.getRecommendations({ complexity: 3, limit: 3 }); + * ``` + */ +export interface ModelDataPlugin { + /** Stable unique identifier for this plugin (e.g. "artificial-analysis") */ + readonly id: string; + /** Human-readable name of the data source */ + readonly name: string; + /** One-line description of what this data source provides */ + readonly description: string; + + /** + * Initialise the plugin with its configuration. + * Must be called exactly once before any query methods. + */ + init(config: PluginConfig): Promise; + + /** + * Return all models known to this source with benchmark scores and pricing. + * Returns an empty array when the source is unavailable or returns no data. + */ + listModels(): Promise; + + /** + * Return detailed info for one model by its ID. + * Returns `null` if the model is not found in this source. + */ + getModel(id: string): Promise; + + /** + * Return a ranked list of model recommendations for the given query. + * + * The plugin provides a *data-layer* ranking only. The model selection + * engine (APR-205) is solely responsible for final routing decisions and + * may discard, re-rank, or augment these results. + */ + getRecommendations(query: RecommendationQuery): Promise; +} diff --git a/packages/model-sources/tsconfig.json b/packages/model-sources/tsconfig.json new file mode 100644 index 0000000..21aa77c --- /dev/null +++ b/packages/model-sources/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "@aprovan/tsconfig/node.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} diff --git a/packages/model-sources/tsup.config.ts b/packages/model-sources/tsup.config.ts new file mode 100644 index 0000000..a383fe9 --- /dev/null +++ b/packages/model-sources/tsup.config.ts @@ -0,0 +1,12 @@ +import { defineConfig } from "tsup"; + +export default defineConfig({ + entry: { + index: "src/index.ts", + }, + format: ["esm", "cjs"], + dts: true, + clean: true, + sourcemap: true, + target: "node20", +}); diff --git a/packages/model-sources/vitest.config.ts b/packages/model-sources/vitest.config.ts new file mode 100644 index 0000000..4c5d757 --- /dev/null +++ b/packages/model-sources/vitest.config.ts @@ -0,0 +1,9 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + environment: "node", + testTimeout: 10_000, + hookTimeout: 10_000, + }, +});