Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@

# OPENROUTER_API_KEY=sk-or-...
# OPENROUTER_MODEL=anthropic/claude-sonnet-4-20250514
# OPENROUTER_REASONING_EFFORT=high # Optional: xhigh | high | medium | low | minimal | none
# OPENROUTER_INCLUDE_REASONING=true # Optional: include reasoning tokens in supported OpenRouter responses

# MINIMAX_API_KEY=...
# MINIMAX_MODEL=MiniMax-M2.7
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,8 @@ OPENAI_MODEL=qwen2.5-coder-7b-instruct # match the model name from LM St

Reasoning-class models (`o1`-style with `<think>` blocks) can return empty `content` with a `reasoning` field your local server may not surface. If extractions come back blank, switch to a non-reasoning model first. The `OPENAI_REASONING_EFFORT=none` env can also disable thinking on Ollama Cloud thinking models that mirror the OpenAI reasoning schema.

OpenRouter reasoning models can be configured directly with `OPENROUTER_REASONING_EFFORT=xhigh|high|medium|low|minimal|none`. Set `OPENROUTER_INCLUDE_REASONING=true` when you want supported models to return reasoning tokens in the response.

Local embeddings ship out of the box via `@xenova/transformers` — `EMBEDDING_PROVIDER=local` (default) gives you BGE-small entirely on-device. No extra config needed.

### Cost-aware model selection
Expand Down Expand Up @@ -1314,6 +1316,9 @@ Create `~/.agentmemory/.env`:
# ANTHROPIC_BASE_URL=... # Optional: Anthropic-compatible proxy / Azure
# GEMINI_API_KEY=...
# OPENROUTER_API_KEY=...
# OPENROUTER_MODEL=anthropic/claude-sonnet-4-20250514
# OPENROUTER_REASONING_EFFORT=high # Optional: xhigh | high | medium | low | minimal | none
# OPENROUTER_INCLUDE_REASONING=true # Optional: include reasoning tokens when supported
# MINIMAX_API_KEY=...
# OPENAI_API_KEY=*** # NOTE: this same key auto-activates BOTH the
# # OpenAI LLM provider (here) AND the OpenAI
Expand Down
72 changes: 54 additions & 18 deletions src/providers/openrouter.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
import type { MemoryProvider } from "../types.js";
import { getEnvVar } from "../config.js";
import { fetchWithTimeout } from "./_fetch.js";

type OpenRouterRequestBody = {
model: string;
max_tokens: number;
messages: Array<{ role: "system" | "user"; content: string }>;
reasoning?: { effort: string };
include_reasoning?: boolean;
};
Comment thread
lukeaus marked this conversation as resolved.

export class OpenRouterProvider implements MemoryProvider {
name: string;
private apiKey: string;
private model: string;
private maxTokens: number;
private baseUrl: string;
private reasoningEffort?: string;
private includeReasoning: boolean;

constructor(
apiKey: string,
Expand All @@ -19,6 +30,17 @@ export class OpenRouterProvider implements MemoryProvider {
this.maxTokens = maxTokens;
this.baseUrl = baseUrl;
this.name = baseUrl.includes("openrouter") ? "openrouter" : "gemini";
const reasoningEffort =
getEnvVar("OPENROUTER_REASONING_EFFORT")?.trim().toLowerCase() ||
undefined;
const includeReasoning = getEnvVar("OPENROUTER_INCLUDE_REASONING")
?.trim()
.toLowerCase();
this.reasoningEffort =
this.name === "openrouter" ? reasoningEffort : undefined;
this.includeReasoning =
this.name === "openrouter" &&
(includeReasoning === "true" || includeReasoning === "1");
}

async compress(systemPrompt: string, userPrompt: string): Promise<string> {
Expand All @@ -33,6 +55,21 @@ export class OpenRouterProvider implements MemoryProvider {
systemPrompt: string,
userPrompt: string,
): Promise<string> {
const body: OpenRouterRequestBody = {
model: this.model,
max_tokens: this.maxTokens,
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: userPrompt },
],
};
if (this.reasoningEffort) {
body.reasoning = { effort: this.reasoningEffort };
}
if (this.includeReasoning) {
body.include_reasoning = true;
}

const response = await fetchWithTimeout(this.baseUrl, {
method: "POST",
headers: {
Expand All @@ -42,31 +79,30 @@ export class OpenRouterProvider implements MemoryProvider {
? { "HTTP-Referer": "https://github.com/rohitg00/agentmemory" }
: {}),
},
body: JSON.stringify({
model: this.model,
max_tokens: this.maxTokens,
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: userPrompt },
],
}),
body: JSON.stringify(body),
});

if (!response.ok) {
const text = await response.text();
throw new Error(`${this.name} API error (${response.status}): ${text}`);
}

const data = (await response.json()) as Record<string, unknown>;
const choices = data.choices as
| Array<{ message: { content: string } }>
| undefined;
const content = choices?.[0]?.message?.content;
if (!content) {
throw new Error(
`${this.name} returned unexpected response: ${JSON.stringify(data).slice(0, 200)}`,
);
const data = (await response.json()) as {
choices?: Array<{
message?: { content?: string; reasoning?: string; reasoning_content?: string };
}>;
};
const message = data.choices?.[0]?.message;
const content = message?.content;
if (content) {
return content;
}
const reasoning = message?.reasoning || message?.reasoning_content;
if (reasoning) {
return reasoning;
}
return content;
throw new Error(
`${this.name} returned unexpected response: ${JSON.stringify(data).slice(0, 200)}`,
);
}
}
127 changes: 126 additions & 1 deletion test/fetch-timeout.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ describe("Provider hang regression — OpenRouterProvider (covers Gemini LLM pat
afterEach(() => {
vi.restoreAllMocks();
delete process.env["AGENTMEMORY_LLM_TIMEOUT_MS"];
delete process.env["OPENROUTER_REASONING_EFFORT"];
delete process.env["OPENROUTER_INCLUDE_REASONING"];
});

it("compress() aborts after timeout when upstream hangs", async () => {
Expand All @@ -117,6 +119,130 @@ describe("Provider hang regression — OpenRouterProvider (covers Gemini LLM pat
});
});

describe("OpenRouterProvider reasoning options", () => {
beforeEach(() => {
delete process.env["OPENROUTER_REASONING_EFFORT"];
delete process.env["OPENROUTER_INCLUDE_REASONING"];
delete process.env["AGENTMEMORY_LLM_TIMEOUT_MS"];
});

afterEach(() => {
vi.restoreAllMocks();
delete process.env["OPENROUTER_REASONING_EFFORT"];
delete process.env["OPENROUTER_INCLUDE_REASONING"];
delete process.env["AGENTMEMORY_LLM_TIMEOUT_MS"];
});

function mockChatResponse(message: {
content?: string;
reasoning?: string;
reasoning_content?: string;
}): ReturnType<typeof vi.spyOn> {
return vi.spyOn(globalThis, "fetch").mockResolvedValue(
new Response(JSON.stringify({ choices: [{ message }] }), {
status: 200,
headers: { "content-type": "application/json" },
}),
);
}

function requestBody(fetchSpy: ReturnType<typeof vi.spyOn>): Record<string, unknown> {
const init = fetchSpy.mock.calls[0]?.[1] as RequestInit;
return JSON.parse(init.body as string) as Record<string, unknown>;
}

it("passes OpenRouter reasoning controls when explicitly enabled", async () => {
process.env["OPENROUTER_REASONING_EFFORT"] = "HIGH";
process.env["OPENROUTER_INCLUDE_REASONING"] = "TRUE";
const fetchSpy = mockChatResponse({ content: "compressed memory" });

const provider = new OpenRouterProvider(
"test-key",
"moonshotai/kimi-k2.6",
32000,
"https://openrouter.ai/api/v1/chat/completions",
);

await expect(provider.compress("system", "user")).resolves.toBe(
"compressed memory",
);
expect(requestBody(fetchSpy)).toMatchObject({
model: "moonshotai/kimi-k2.6",
max_tokens: 32000,
reasoning: { effort: "high" },
include_reasoning: true,
});
});

it("does not send OpenRouter-only reasoning controls on the Gemini-compatible path", async () => {
process.env["OPENROUTER_REASONING_EFFORT"] = "high";
process.env["OPENROUTER_INCLUDE_REASONING"] = "true";
const fetchSpy = mockChatResponse({ content: "compressed memory" });

const provider = new OpenRouterProvider(
"test-key",
"gemini-2.5-flash",
1024,
"https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
);

await provider.compress("system", "user");
const body = requestBody(fetchSpy);
expect(body.reasoning).toBeUndefined();
expect(body.include_reasoning).toBeUndefined();
});

it("returns reasoning-only OpenRouter responses when content is empty", async () => {
mockChatResponse({ content: "", reasoning_content: "reasoning output" });
const provider = new OpenRouterProvider(
"test-key",
"moonshotai/kimi-k2.6",
1024,
"https://openrouter.ai/api/v1/chat/completions",
);

await expect(provider.compress("system", "user")).resolves.toBe(
"reasoning output",
);
});

it("falls through empty reasoning to reasoning_content", async () => {
mockChatResponse({
content: "",
reasoning: "",
reasoning_content: "reasoning content fallback",
});
const provider = new OpenRouterProvider(
"test-key",
"moonshotai/kimi-k2.6",
1024,
"https://openrouter.ai/api/v1/chat/completions",
);

await expect(provider.compress("system", "user")).resolves.toBe(
"reasoning content fallback",
);
});

it("prefers reasoning over reasoning_content when both are present", async () => {
mockChatResponse({
content: "",
reasoning: "explicit reasoning",
reasoning_content: "fallback reasoning",
});
const provider = new OpenRouterProvider(
"test-key",
"moonshotai/kimi-k2.6",
1024,
"https://openrouter.ai/api/v1/chat/completions",
);

await expect(provider.compress("system", "user")).resolves.toBe(
"explicit reasoning",
);
});
});

describe("Provider hang regression — GeminiEmbeddingProvider", () => {
beforeEach(() => {
vi.spyOn(globalThis, "fetch").mockImplementation(hangingFetch as typeof fetch);
Expand Down Expand Up @@ -343,4 +469,3 @@ describe("OpenAIProvider thinking-model fallback (#627)", () => {
expect(out).toBe("real content");
});
});