diff --git a/.env.example b/.env.example index 77ca0f3a..d91b71cf 100644 --- a/.env.example +++ b/.env.example @@ -39,6 +39,8 @@ # OPENROUTER_API_KEY=sk-or-... # OPENROUTER_MODEL=anthropic/claude-sonnet-4-20250514 +# OPENROUTER_REASONING_EFFORT=high # Optional: xhigh | high | medium | low | minimal | none +# OPENROUTER_INCLUDE_REASONING=true # Optional: include reasoning tokens in supported OpenRouter responses # MINIMAX_API_KEY=... # MINIMAX_MODEL=MiniMax-M2.7 diff --git a/README.md b/README.md index d5f4c414..257dda5f 100644 --- a/README.md +++ b/README.md @@ -1204,6 +1204,8 @@ OPENAI_MODEL=qwen2.5-coder-7b-instruct # match the model name from LM St Reasoning-class models (`o1`-style with `` blocks) can return empty `content` with a `reasoning` field your local server may not surface. If extractions come back blank, switch to a non-reasoning model first. The `OPENAI_REASONING_EFFORT=none` env can also disable thinking on Ollama Cloud thinking models that mirror the OpenAI reasoning schema. +OpenRouter reasoning models can be configured directly with `OPENROUTER_REASONING_EFFORT=xhigh|high|medium|low|minimal|none`. Set `OPENROUTER_INCLUDE_REASONING=true` when you want supported models to return reasoning tokens in the response. + Local embeddings ship out of the box via `@xenova/transformers` — `EMBEDDING_PROVIDER=local` (default) gives you BGE-small entirely on-device. No extra config needed. ### Cost-aware model selection @@ -1314,6 +1316,9 @@ Create `~/.agentmemory/.env`: # ANTHROPIC_BASE_URL=... # Optional: Anthropic-compatible proxy / Azure # GEMINI_API_KEY=... # OPENROUTER_API_KEY=... +# OPENROUTER_MODEL=anthropic/claude-sonnet-4-20250514 +# OPENROUTER_REASONING_EFFORT=high # Optional: xhigh | high | medium | low | minimal | none +# OPENROUTER_INCLUDE_REASONING=true # Optional: include reasoning tokens when supported # MINIMAX_API_KEY=... # OPENAI_API_KEY=*** # NOTE: this same key auto-activates BOTH the # # OpenAI LLM provider (here) AND the OpenAI diff --git a/src/providers/openrouter.ts b/src/providers/openrouter.ts index 5c47bb0a..ecb12ad6 100644 --- a/src/providers/openrouter.ts +++ b/src/providers/openrouter.ts @@ -1,12 +1,23 @@ import type { MemoryProvider } from "../types.js"; +import { getEnvVar } from "../config.js"; import { fetchWithTimeout } from "./_fetch.js"; +type OpenRouterRequestBody = { + model: string; + max_tokens: number; + messages: Array<{ role: "system" | "user"; content: string }>; + reasoning?: { effort: string }; + include_reasoning?: boolean; +}; + export class OpenRouterProvider implements MemoryProvider { name: string; private apiKey: string; private model: string; private maxTokens: number; private baseUrl: string; + private reasoningEffort?: string; + private includeReasoning: boolean; constructor( apiKey: string, @@ -19,6 +30,17 @@ export class OpenRouterProvider implements MemoryProvider { this.maxTokens = maxTokens; this.baseUrl = baseUrl; this.name = baseUrl.includes("openrouter") ? "openrouter" : "gemini"; + const reasoningEffort = + getEnvVar("OPENROUTER_REASONING_EFFORT")?.trim().toLowerCase() || + undefined; + const includeReasoning = getEnvVar("OPENROUTER_INCLUDE_REASONING") + ?.trim() + .toLowerCase(); + this.reasoningEffort = + this.name === "openrouter" ? reasoningEffort : undefined; + this.includeReasoning = + this.name === "openrouter" && + (includeReasoning === "true" || includeReasoning === "1"); } async compress(systemPrompt: string, userPrompt: string): Promise { @@ -33,6 +55,21 @@ export class OpenRouterProvider implements MemoryProvider { systemPrompt: string, userPrompt: string, ): Promise { + const body: OpenRouterRequestBody = { + model: this.model, + max_tokens: this.maxTokens, + messages: [ + { role: "system", content: systemPrompt }, + { role: "user", content: userPrompt }, + ], + }; + if (this.reasoningEffort) { + body.reasoning = { effort: this.reasoningEffort }; + } + if (this.includeReasoning) { + body.include_reasoning = true; + } + const response = await fetchWithTimeout(this.baseUrl, { method: "POST", headers: { @@ -42,14 +79,7 @@ export class OpenRouterProvider implements MemoryProvider { ? { "HTTP-Referer": "https://github.com/rohitg00/agentmemory" } : {}), }, - body: JSON.stringify({ - model: this.model, - max_tokens: this.maxTokens, - messages: [ - { role: "system", content: systemPrompt }, - { role: "user", content: userPrompt }, - ], - }), + body: JSON.stringify(body), }); if (!response.ok) { @@ -57,16 +87,22 @@ export class OpenRouterProvider implements MemoryProvider { throw new Error(`${this.name} API error (${response.status}): ${text}`); } - const data = (await response.json()) as Record; - const choices = data.choices as - | Array<{ message: { content: string } }> - | undefined; - const content = choices?.[0]?.message?.content; - if (!content) { - throw new Error( - `${this.name} returned unexpected response: ${JSON.stringify(data).slice(0, 200)}`, - ); + const data = (await response.json()) as { + choices?: Array<{ + message?: { content?: string; reasoning?: string; reasoning_content?: string }; + }>; + }; + const message = data.choices?.[0]?.message; + const content = message?.content; + if (content) { + return content; + } + const reasoning = message?.reasoning || message?.reasoning_content; + if (reasoning) { + return reasoning; } - return content; + throw new Error( + `${this.name} returned unexpected response: ${JSON.stringify(data).slice(0, 200)}`, + ); } } diff --git a/test/fetch-timeout.test.ts b/test/fetch-timeout.test.ts index 5b2cd7c9..b51618b7 100644 --- a/test/fetch-timeout.test.ts +++ b/test/fetch-timeout.test.ts @@ -104,6 +104,8 @@ describe("Provider hang regression — OpenRouterProvider (covers Gemini LLM pat afterEach(() => { vi.restoreAllMocks(); delete process.env["AGENTMEMORY_LLM_TIMEOUT_MS"]; + delete process.env["OPENROUTER_REASONING_EFFORT"]; + delete process.env["OPENROUTER_INCLUDE_REASONING"]; }); it("compress() aborts after timeout when upstream hangs", async () => { @@ -117,6 +119,130 @@ describe("Provider hang regression — OpenRouterProvider (covers Gemini LLM pat }); }); +describe("OpenRouterProvider reasoning options", () => { + beforeEach(() => { + delete process.env["OPENROUTER_REASONING_EFFORT"]; + delete process.env["OPENROUTER_INCLUDE_REASONING"]; + delete process.env["AGENTMEMORY_LLM_TIMEOUT_MS"]; + }); + + afterEach(() => { + vi.restoreAllMocks(); + delete process.env["OPENROUTER_REASONING_EFFORT"]; + delete process.env["OPENROUTER_INCLUDE_REASONING"]; + delete process.env["AGENTMEMORY_LLM_TIMEOUT_MS"]; + }); + + function mockChatResponse(message: { + content?: string; + reasoning?: string; + reasoning_content?: string; + }): ReturnType { + return vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response(JSON.stringify({ choices: [{ message }] }), { + status: 200, + headers: { "content-type": "application/json" }, + }), + ); + } + + function requestBody(fetchSpy: ReturnType): Record { + const init = fetchSpy.mock.calls[0]?.[1] as RequestInit; + return JSON.parse(init.body as string) as Record; + } + + it("passes OpenRouter reasoning controls when explicitly enabled", async () => { + process.env["OPENROUTER_REASONING_EFFORT"] = "HIGH"; + process.env["OPENROUTER_INCLUDE_REASONING"] = "TRUE"; + const fetchSpy = mockChatResponse({ content: "compressed memory" }); + + const provider = new OpenRouterProvider( + "test-key", + "moonshotai/kimi-k2.6", + 32000, + "https://openrouter.ai/api/v1/chat/completions", + ); + + await expect(provider.compress("system", "user")).resolves.toBe( + "compressed memory", + ); + expect(requestBody(fetchSpy)).toMatchObject({ + model: "moonshotai/kimi-k2.6", + max_tokens: 32000, + reasoning: { effort: "high" }, + include_reasoning: true, + }); + }); + + it("does not send OpenRouter-only reasoning controls on the Gemini-compatible path", async () => { + process.env["OPENROUTER_REASONING_EFFORT"] = "high"; + process.env["OPENROUTER_INCLUDE_REASONING"] = "true"; + const fetchSpy = mockChatResponse({ content: "compressed memory" }); + + const provider = new OpenRouterProvider( + "test-key", + "gemini-2.5-flash", + 1024, + "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions", + ); + + await provider.compress("system", "user"); + const body = requestBody(fetchSpy); + expect(body.reasoning).toBeUndefined(); + expect(body.include_reasoning).toBeUndefined(); + }); + + it("returns reasoning-only OpenRouter responses when content is empty", async () => { + mockChatResponse({ content: "", reasoning_content: "reasoning output" }); + const provider = new OpenRouterProvider( + "test-key", + "moonshotai/kimi-k2.6", + 1024, + "https://openrouter.ai/api/v1/chat/completions", + ); + + await expect(provider.compress("system", "user")).resolves.toBe( + "reasoning output", + ); + }); + + it("falls through empty reasoning to reasoning_content", async () => { + mockChatResponse({ + content: "", + reasoning: "", + reasoning_content: "reasoning content fallback", + }); + const provider = new OpenRouterProvider( + "test-key", + "moonshotai/kimi-k2.6", + 1024, + "https://openrouter.ai/api/v1/chat/completions", + ); + + await expect(provider.compress("system", "user")).resolves.toBe( + "reasoning content fallback", + ); + }); + + it("prefers reasoning over reasoning_content when both are present", async () => { + mockChatResponse({ + content: "", + reasoning: "explicit reasoning", + reasoning_content: "fallback reasoning", + }); + const provider = new OpenRouterProvider( + "test-key", + "moonshotai/kimi-k2.6", + 1024, + "https://openrouter.ai/api/v1/chat/completions", + ); + + await expect(provider.compress("system", "user")).resolves.toBe( + "explicit reasoning", + ); + }); +}); + describe("Provider hang regression — GeminiEmbeddingProvider", () => { beforeEach(() => { vi.spyOn(globalThis, "fetch").mockImplementation(hangingFetch as typeof fetch); @@ -343,4 +469,3 @@ describe("OpenAIProvider thinking-model fallback (#627)", () => { expect(out).toBe("real content"); }); }); -