rohitg00 · lukeaus · Jun 3, 2026 · Jun 3, 2026
diff --git a/.env.example b/.env.example
@@ -39,6 +39,8 @@
 
 # OPENROUTER_API_KEY=sk-or-...
 # OPENROUTER_MODEL=anthropic/claude-sonnet-4-20250514
+# OPENROUTER_REASONING_EFFORT=high              # Optional: xhigh | high | medium | low | minimal | none
+# OPENROUTER_INCLUDE_REASONING=true             # Optional: include reasoning tokens in supported OpenRouter responses
 
 # MINIMAX_API_KEY=...
 # MINIMAX_MODEL=MiniMax-M2.7

diff --git a/README.md b/README.md
@@ -1204,6 +1204,8 @@ OPENAI_MODEL=qwen2.5-coder-7b-instruct         # match the model name from LM St
 
 Reasoning-class models (`o1`-style with `<think>` blocks) can return empty `content` with a `reasoning` field your local server may not surface. If extractions come back blank, switch to a non-reasoning model first. The `OPENAI_REASONING_EFFORT=none` env can also disable thinking on Ollama Cloud thinking models that mirror the OpenAI reasoning schema.
 
+OpenRouter reasoning models can be configured directly with `OPENROUTER_REASONING_EFFORT=xhigh|high|medium|low|minimal|none`. Set `OPENROUTER_INCLUDE_REASONING=true` when you want supported models to return reasoning tokens in the response.
+
 Local embeddings ship out of the box via `@xenova/transformers` — `EMBEDDING_PROVIDER=local` (default) gives you BGE-small entirely on-device. No extra config needed.
 
 ### Cost-aware model selection
@@ -1314,6 +1316,9 @@ Create `~/.agentmemory/.env`:
 # ANTHROPIC_BASE_URL=...              # Optional: Anthropic-compatible proxy / Azure
 # GEMINI_API_KEY=...
 # OPENROUTER_API_KEY=...
+# OPENROUTER_MODEL=anthropic/claude-sonnet-4-20250514
+# OPENROUTER_REASONING_EFFORT=high       # Optional: xhigh | high | medium | low | minimal | none
+# OPENROUTER_INCLUDE_REASONING=true      # Optional: include reasoning tokens when supported
 # MINIMAX_API_KEY=...
 # OPENAI_API_KEY=***                       # NOTE: this same key auto-activates BOTH the
 #                                          # OpenAI LLM provider (here) AND the OpenAI

diff --git a/src/providers/openrouter.ts b/src/providers/openrouter.ts
@@ -1,12 +1,23 @@
 import type { MemoryProvider } from "../types.js";
+import { getEnvVar } from "../config.js";
 import { fetchWithTimeout } from "./_fetch.js";
 
+type OpenRouterRequestBody = {
+  model: string;
+  max_tokens: number;
+  messages: Array<{ role: "system" | "user"; content: string }>;
+  reasoning?: { effort: string };
+  include_reasoning?: boolean;
+};
+
 export class OpenRouterProvider implements MemoryProvider {
   name: string;
   private apiKey: string;
   private model: string;
   private maxTokens: number;
   private baseUrl: string;
+  private reasoningEffort?: string;
+  private includeReasoning: boolean;
 
   constructor(
     apiKey: string,
@@ -19,6 +30,17 @@ export class OpenRouterProvider implements MemoryProvider {
     this.maxTokens = maxTokens;
     this.baseUrl = baseUrl;
     this.name = baseUrl.includes("openrouter") ? "openrouter" : "gemini";
+    const reasoningEffort =
+      getEnvVar("OPENROUTER_REASONING_EFFORT")?.trim().toLowerCase() ||
+      undefined;
+    const includeReasoning = getEnvVar("OPENROUTER_INCLUDE_REASONING")
+      ?.trim()
+      .toLowerCase();
+    this.reasoningEffort =
+      this.name === "openrouter" ? reasoningEffort : undefined;
+    this.includeReasoning =
+      this.name === "openrouter" &&
+      (includeReasoning === "true" || includeReasoning === "1");
   }
 
   async compress(systemPrompt: string, userPrompt: string): Promise<string> {
@@ -33,6 +55,21 @@ export class OpenRouterProvider implements MemoryProvider {
     systemPrompt: string,
     userPrompt: string,
   ): Promise<string> {
+    const body: OpenRouterRequestBody = {
+      model: this.model,
+      max_tokens: this.maxTokens,
+      messages: [
+        { role: "system", content: systemPrompt },
+        { role: "user", content: userPrompt },
+      ],
+    };
+    if (this.reasoningEffort) {
+      body.reasoning = { effort: this.reasoningEffort };
+    }
+    if (this.includeReasoning) {
+      body.include_reasoning = true;
+    }
+
     const response = await fetchWithTimeout(this.baseUrl, {
       method: "POST",
       headers: {
@@ -42,31 +79,30 @@ export class OpenRouterProvider implements MemoryProvider {
           ? { "HTTP-Referer": "https://github.com/rohitg00/agentmemory" }
           : {}),
       },
-      body: JSON.stringify({
-        model: this.model,
-        max_tokens: this.maxTokens,
-        messages: [
-          { role: "system", content: systemPrompt },
-          { role: "user", content: userPrompt },
-        ],
-      }),
+      body: JSON.stringify(body),
     });
 
     if (!response.ok) {
       const text = await response.text();
       throw new Error(`${this.name} API error (${response.status}): ${text}`);
     }
 
-    const data = (await response.json()) as Record<string, unknown>;
-    const choices = data.choices as
-      | Array<{ message: { content: string } }>
-      | undefined;
-    const content = choices?.[0]?.message?.content;
-    if (!content) {
-      throw new Error(
-        `${this.name} returned unexpected response: ${JSON.stringify(data).slice(0, 200)}`,
-      );
+    const data = (await response.json()) as {
+      choices?: Array<{
+        message?: { content?: string; reasoning?: string; reasoning_content?: string };
+      }>;
+    };
+    const message = data.choices?.[0]?.message;
+    const content = message?.content;
+    if (content) {
+      return content;
+    }
+    const reasoning = message?.reasoning || message?.reasoning_content;
+    if (reasoning) {
+      return reasoning;
     }
-    return content;
+    throw new Error(
+      `${this.name} returned unexpected response: ${JSON.stringify(data).slice(0, 200)}`,
+    );
   }
 }
diff --git a/test/fetch-timeout.test.ts b/test/fetch-timeout.test.ts
@@ -104,6 +104,8 @@ describe("Provider hang regression — OpenRouterProvider (covers Gemini LLM pat
   afterEach(() => {
     vi.restoreAllMocks();
     delete process.env["AGENTMEMORY_LLM_TIMEOUT_MS"];
+    delete process.env["OPENROUTER_REASONING_EFFORT"];
+    delete process.env["OPENROUTER_INCLUDE_REASONING"];
   });
 
   it("compress() aborts after timeout when upstream hangs", async () => {
@@ -117,6 +119,130 @@ describe("Provider hang regression — OpenRouterProvider (covers Gemini LLM pat
   });
 });
 
+describe("OpenRouterProvider reasoning options", () => {
+  beforeEach(() => {
+    delete process.env["OPENROUTER_REASONING_EFFORT"];
+    delete process.env["OPENROUTER_INCLUDE_REASONING"];
+    delete process.env["AGENTMEMORY_LLM_TIMEOUT_MS"];
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+    delete process.env["OPENROUTER_REASONING_EFFORT"];
+    delete process.env["OPENROUTER_INCLUDE_REASONING"];
+    delete process.env["AGENTMEMORY_LLM_TIMEOUT_MS"];
+  });
+
+  function mockChatResponse(message: {
+    content?: string;
+    reasoning?: string;
+    reasoning_content?: string;
+  }): ReturnType<typeof vi.spyOn> {
+    return vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({ choices: [{ message }] }), {
+        status: 200,
+        headers: { "content-type": "application/json" },
+      }),
+    );
+  }
+
+  function requestBody(fetchSpy: ReturnType<typeof vi.spyOn>): Record<string, unknown> {
+    const init = fetchSpy.mock.calls[0]?.[1] as RequestInit;
+    return JSON.parse(init.body as string) as Record<string, unknown>;
+  }
+
+  it("passes OpenRouter reasoning controls when explicitly enabled", async () => {
+    process.env["OPENROUTER_REASONING_EFFORT"] = "HIGH";
+    process.env["OPENROUTER_INCLUDE_REASONING"] = "TRUE";
+    const fetchSpy = mockChatResponse({ content: "compressed memory" });
+
+    const provider = new OpenRouterProvider(
+      "test-key",
+      "moonshotai/kimi-k2.6",
+      32000,
+      "https://openrouter.ai/api/v1/chat/completions",
+    );
+
+    await expect(provider.compress("system", "user")).resolves.toBe(
+      "compressed memory",
+    );
+    expect(requestBody(fetchSpy)).toMatchObject({
+      model: "moonshotai/kimi-k2.6",
+      max_tokens: 32000,
+      reasoning: { effort: "high" },
+      include_reasoning: true,
+    });
+  });
+
+  it("does not send OpenRouter-only reasoning controls on the Gemini-compatible path", async () => {
+    process.env["OPENROUTER_REASONING_EFFORT"] = "high";
+    process.env["OPENROUTER_INCLUDE_REASONING"] = "true";
+    const fetchSpy = mockChatResponse({ content: "compressed memory" });
+
+    const provider = new OpenRouterProvider(
+      "test-key",
+      "gemini-2.5-flash",
+      1024,
+      "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
+    );
+
+    await provider.compress("system", "user");
+    const body = requestBody(fetchSpy);
+    expect(body.reasoning).toBeUndefined();
+    expect(body.include_reasoning).toBeUndefined();
+  });
+
+  it("returns reasoning-only OpenRouter responses when content is empty", async () => {
+    mockChatResponse({ content: "", reasoning_content: "reasoning output" });
+    const provider = new OpenRouterProvider(
+      "test-key",
+      "moonshotai/kimi-k2.6",
+      1024,
+      "https://openrouter.ai/api/v1/chat/completions",
+    );
+
+    await expect(provider.compress("system", "user")).resolves.toBe(
+      "reasoning output",
+    );
+  });
+
+  it("falls through empty reasoning to reasoning_content", async () => {
+    mockChatResponse({
+      content: "",
+      reasoning: "",
+      reasoning_content: "reasoning content fallback",
+    });
+    const provider = new OpenRouterProvider(
+      "test-key",
+      "moonshotai/kimi-k2.6",
+      1024,
+      "https://openrouter.ai/api/v1/chat/completions",
+    );
+
+    await expect(provider.compress("system", "user")).resolves.toBe(
+      "reasoning content fallback",
+    );
+  });
+
+  it("prefers reasoning over reasoning_content when both are present", async () => {
+    mockChatResponse({
+      content: "",
+      reasoning: "explicit reasoning",
+      reasoning_content: "fallback reasoning",
+    });
+    const provider = new OpenRouterProvider(
+      "test-key",
+      "moonshotai/kimi-k2.6",
+      1024,
+      "https://openrouter.ai/api/v1/chat/completions",
+    );
+
+    await expect(provider.compress("system", "user")).resolves.toBe(
+      "explicit reasoning",
+    );
+  });
+});
+
 describe("Provider hang regression — GeminiEmbeddingProvider", () => {
   beforeEach(() => {
     vi.spyOn(globalThis, "fetch").mockImplementation(hangingFetch as typeof fetch);
@@ -343,4 +469,3 @@ describe("OpenAIProvider thinking-model fallback (#627)", () => {
     expect(out).toBe("real content");
   });
 });
-