From bd537d2da012073e8f7f1c2b5afe8748f320f3df Mon Sep 17 00:00:00 2001 From: Ammar Date: Mon, 23 Feb 2026 13:12:01 -0600 Subject: [PATCH 1/5] wip: add update-models skill baseline --- .mux/skills/update-models/SKILL.md | 109 +++++++++++++++++++++++ src/common/constants/knownModels.test.ts | 49 +++++----- 2 files changed, 132 insertions(+), 26 deletions(-) create mode 100644 .mux/skills/update-models/SKILL.md diff --git a/.mux/skills/update-models/SKILL.md b/.mux/skills/update-models/SKILL.md new file mode 100644 index 0000000000..2376049a58 --- /dev/null +++ b/.mux/skills/update-models/SKILL.md @@ -0,0 +1,109 @@ +--- +name: update-models +description: Upgrade models.json from LiteLLM upstream and prune models-extra entries that are now covered. +--- + +# Update Models + +Refresh the LiteLLM pricing database (`models.json`) and remove entries from `models-extra.ts` +that upstream now covers accurately. + +## File Map + +| File | Role | +| ----------------------------------------- | ------------------------------------------------------------------------- | +| `src/common/utils/tokens/models.json` | LiteLLM upstream pricing/token-limit database (~1 MB JSON) | +| `src/common/utils/tokens/models-extra.ts` | Local overrides for models missing or wrong in upstream | +| `src/common/utils/tokens/modelStats.ts` | Runtime lookup: checks models-extra **first**, then models.json | +| `src/common/constants/knownModels.ts` | UI-facing model definitions (aliases, warm flags, tokenizer overrides) | +| `scripts/update_models.ts` | Fetches latest `model_prices_and_context_window.json` from LiteLLM GitHub | + +## Procedure + +### 1. Fetch the latest models.json + +```bash +bun scripts/update_models.ts +``` + +This overwrites `src/common/utils/tokens/models.json` with the latest LiteLLM data. + +### 2. Identify removable models-extra entries + +For **each** model key in `models-extra.ts`, check whether upstream `models.json` now contains +a matching entry. The lookup keys follow the same logic as `modelStats.ts`: + +- Bare model name (e.g., `gpt-5.2`) +- Provider-prefixed name (e.g., `openai/gpt-5.2`) + +### 3. Decide: remove, keep, or update + +For each models-extra entry found upstream, compare the **critical fields**: + +| Field | Priority | +| --------------------------------- | ------------------------------ | +| `max_input_tokens` | Must match or be acceptable | +| `max_output_tokens` | Must match or be acceptable | +| `input_cost_per_token` | Must match exactly | +| `output_cost_per_token` | Must match exactly | +| `cache_creation_input_token_cost` | Must match if present in extra | +| `cache_read_input_token_cost` | Must match if present in extra | + +**Decision matrix:** + +- **Remove** from models-extra: upstream data matches on all critical fields (or upstream is + strictly better—e.g., has cache costs that extra omitted). +- **Keep** in models-extra: upstream data is wrong (e.g., wrong context window, wrong pricing). + Update the comment explaining _why_ it's kept. +- **Update** in models-extra: the model is in upstream but upstream has a specific field wrong. + Only override the minimum necessary fields. + +> Remember: `modelStats.ts` checks models-extra **first**. An entry in models-extra always +> wins over models.json, which means stale overrides will shadow corrected upstream data. + +### 4. Remove entries from models-extra.ts + +Delete the full object entry (key + value + preceding comment block) for each model being removed. +Keep the file clean — no orphaned comments or trailing commas. + +After removal, if `models-extra.ts` is empty (all models are upstream), keep the file with just +the `ModelData` interface and an empty `modelsExtra` export: + +```typescript +export const modelsExtra: Record = {}; +``` + +### 5. Validate + +Run these checks in order — all must pass: + +```bash +# Type-check (catches import/type errors from removed entries) +make typecheck + +# Unit tests for model lookups (catches broken pricing/limits) +bun test src/common/utils/tokens/modelStats.test.ts + +# Known-models integration test — verifies every KNOWN_MODELS entry resolves +# through getModelStats() and has valid token limits and costs. +# This catches premature models-extra removals automatically. +bun test src/common/constants/knownModels.test.ts + +# Model capabilities (uses models-extra data) +bun test src/common/utils/ai/modelCapabilities.test.ts +``` + +If any test hard-codes a value from a removed models-extra entry (e.g., asserting +`max_input_tokens === 272000` for a model that now resolves from upstream with a +different value), update the test expectation to match the new upstream data. + +## Common Pitfalls + +- **LiteLLM key format varies.** Some models use bare names (`gpt-5.2`), some use + `provider/model` (`anthropic/claude-opus-4-6`). Always check both forms. +- **models-extra shadows upstream.** If you leave a stale entry in models-extra, users will + get outdated pricing even after upstream is fixed. Always prune. +- **The `mode` field matters.** Some Codex models use `"responses"` mode instead of `"chat"`. + If upstream has the wrong mode, keep the models-extra override. +- **Cache costs may be absent upstream.** If models-extra has cache pricing that upstream lacks, + keep the entry (cache cost accuracy affects user-facing cost estimates). diff --git a/src/common/constants/knownModels.test.ts b/src/common/constants/knownModels.test.ts index ad8f9882c9..3f869ab32d 100644 --- a/src/common/constants/knownModels.test.ts +++ b/src/common/constants/knownModels.test.ts @@ -1,49 +1,46 @@ /** - * Integration test for known models - verifies all models exist in models.json + * Integration test for known models — verifies every model in KNOWN_MODELS + * resolves through the real getModelStats() lookup chain (models-extra → models.json). * - * This test does NOT go through IPC - it directly uses data from models.json - * to verify that every providerModelId in KNOWN_MODELS exists. + * This catches: + * - A knownModels entry whose providerModelId doesn't exist anywhere + * - A models-extra pruning that removed an entry upstream doesn't cover yet + * - An upstream models.json update that drops a model we rely on */ import { describe, test, expect } from "@jest/globals"; import { KNOWN_MODELS } from "@/common/constants/knownModels"; -import modelsJson from "@/common/utils/tokens/models.json"; -import { modelsExtra } from "@/common/utils/tokens/models-extra"; +import { getModelStats } from "@/common/utils/tokens/modelStats"; describe("Known Models Integration", () => { - test("all known models exist in models.json", () => { - const missingModels: string[] = []; + test("all known models resolve via getModelStats()", () => { + const missing: string[] = []; for (const [key, model] of Object.entries(KNOWN_MODELS)) { - const modelId = model.providerModelId; - - // Check if model exists in models.json or models-extra - // xAI models are prefixed with "xai/" in models.json - const lookupKey = model.provider === "xai" ? `xai/${modelId}` : modelId; - if (!(lookupKey in modelsJson) && !(modelId in modelsExtra)) { - missingModels.push(`${key}: ${model.provider}:${modelId}`); + const stats = getModelStats(model.id); + if (!stats) { + missing.push(`${key}: ${model.id}`); } } - // Report all missing models at once for easier debugging - if (missingModels.length > 0) { + if (missing.length > 0) { throw new Error( - `The following known models are missing from models.json:\n${missingModels.join("\n")}\n\n` + - `Run 'bun scripts/update_models.ts' to refresh models.json from LiteLLM.` + `The following known models have no stats (not in models.json or models-extra):\n` + + `${missing.join("\n")}\n\n` + + `Either add the model to models-extra.ts or run 'bun scripts/update_models.ts' to refresh models.json.` ); } }); - test("all known models have required metadata", () => { + test("all known models have positive token limits and non-negative costs", () => { for (const [, model] of Object.entries(KNOWN_MODELS)) { - const modelId = model.providerModelId; - // xAI models are prefixed with "xai/" in models.json - const lookupKey = model.provider === "xai" ? `xai/${modelId}` : modelId; - const modelData = modelsJson[lookupKey as keyof typeof modelsJson] ?? modelsExtra[modelId]; + const stats = getModelStats(model.id); + // Existence is covered by the test above; skip if null to avoid noise. + if (!stats) continue; - expect(modelData).toBeDefined(); - // Check that basic metadata fields exist (not all models have all fields) - expect(typeof modelData.litellm_provider).toBe("string"); + expect(stats.max_input_tokens).toBeGreaterThan(0); + expect(stats.input_cost_per_token).toBeGreaterThanOrEqual(0); + expect(stats.output_cost_per_token).toBeGreaterThanOrEqual(0); } }); }); From b84b1c15a61177dc7a3b7dc7eda3e7aa0728b4e9 Mon Sep 17 00:00:00 2001 From: Ammar Date: Mon, 23 Feb 2026 13:19:55 -0600 Subject: [PATCH 2/5] feat: refresh models.json and prune models-extra overrides --- .mux/skills/update-models/SKILL.md | 92 +- src/common/utils/ai/modelCapabilities.test.ts | 10 +- src/common/utils/tokens/modelStats.test.ts | 4 +- src/common/utils/tokens/models-extra.ts | 191 +- src/common/utils/tokens/models.json | 4278 ++++++++++++++--- 5 files changed, 3673 insertions(+), 902 deletions(-) diff --git a/.mux/skills/update-models/SKILL.md b/.mux/skills/update-models/SKILL.md index 2376049a58..ec3e93eb58 100644 --- a/.mux/skills/update-models/SKILL.md +++ b/.mux/skills/update-models/SKILL.md @@ -36,18 +36,82 @@ a matching entry. The lookup keys follow the same logic as `modelStats.ts`: - Bare model name (e.g., `gpt-5.2`) - Provider-prefixed name (e.g., `openai/gpt-5.2`) +Use this script to print each `models-extra` entry, whether upstream has it, and which critical +fields differ: + +```bash +bun -e ' +import modelsJson from "./src/common/utils/tokens/models.json"; +import { modelsExtra } from "./src/common/utils/tokens/models-extra"; + +const critical = [ + "max_input_tokens", + "max_output_tokens", + "input_cost_per_token", + "output_cost_per_token", + "cache_creation_input_token_cost", + "cache_read_input_token_cost", + "mode", +] as const; + +function parseNum(v: unknown): number | null { + if (typeof v === "number" && Number.isFinite(v)) return v; + if (typeof v === "string") { + const n = Number(v.replace(/,/g, "").trim()); + return Number.isFinite(n) ? n : null; + } + return null; +} + +for (const [model, extra] of Object.entries(modelsExtra)) { + const provider = extra.litellm_provider ?? ""; + const candidates = [ + model, + provider ? `${provider}/${model}` : null, + provider ? `${provider}/${model}-cloud` : null, + ].filter(Boolean) as string[]; + + const foundKey = candidates.find((k) => (modelsJson as Record)[k]); + if (!foundKey) { + console.log(`${model} | upstream=missing | decision=keep`); + continue; + } + + const upstream = (modelsJson as Record>)[foundKey]; + const diffs = critical.filter((field) => { + const ev = extra[field]; + const uv = upstream[field]; + if (ev == null && uv == null) return false; + const en = parseNum(ev); + const un = parseNum(uv); + return en != null || un != null ? en !== un : ev !== uv; + }); + + console.log( + `${model} | upstream=${foundKey} | diffs=${diffs.join(",") || "none"} | decision=${ + diffs.length === 0 ? "remove" : "review" + }` + ); +} +' +``` + +Then manually inspect each `review` entry to decide whether upstream is now accurate enough to +remove the local override. + ### 3. Decide: remove, keep, or update For each models-extra entry found upstream, compare the **critical fields**: -| Field | Priority | -| --------------------------------- | ------------------------------ | -| `max_input_tokens` | Must match or be acceptable | -| `max_output_tokens` | Must match or be acceptable | -| `input_cost_per_token` | Must match exactly | -| `output_cost_per_token` | Must match exactly | -| `cache_creation_input_token_cost` | Must match if present in extra | -| `cache_read_input_token_cost` | Must match if present in extra | +| Field | Priority | +| --------------------------------- | ---------------------------------------------- | +| `max_input_tokens` | Must match or be acceptable | +| `max_output_tokens` | Must match or be acceptable | +| `input_cost_per_token` | Must match exactly | +| `output_cost_per_token` | Must match exactly | +| `cache_creation_input_token_cost` | Must match if present in extra | +| `cache_read_input_token_cost` | Must match if present in extra | +| `mode` | Must match when provider routing depends on it | **Decision matrix:** @@ -97,6 +161,18 @@ If any test hard-codes a value from a removed models-extra entry (e.g., assertin `max_input_tokens === 272000` for a model that now resolves from upstream with a different value), update the test expectation to match the new upstream data. +## Findings from 2026-02-23 update cycle + +- Upstream LiteLLM had caught up on most previously custom entries; only one model + (`gpt-5.3-codex`) still required a local `models-extra` entry. +- Several stale overrides were **worse** than upstream (e.g., lower max token limits or outdated + `mode: "chat"` where upstream now uses `mode: "responses"`). +- `max_output_tokens` changed for some models without cost changes, so pruning decisions should + always compare token limits in addition to pricing fields. + +**Lesson:** default to removing local overrides once upstream is present, unless there is a +clear, documented mismatch that affects runtime behavior or cost accounting. + ## Common Pitfalls - **LiteLLM key format varies.** Some models use bare names (`gpt-5.2`), some use diff --git a/src/common/utils/ai/modelCapabilities.test.ts b/src/common/utils/ai/modelCapabilities.test.ts index d0cd1649e9..9e39bcea53 100644 --- a/src/common/utils/ai/modelCapabilities.test.ts +++ b/src/common/utils/ai/modelCapabilities.test.ts @@ -9,15 +9,13 @@ describe("getModelCapabilities", () => { expect(caps?.supportsVision).toBe(true); }); - it("merges models.json + modelsExtra so overrides don't wipe capabilities", () => { - // claude-opus-4-5 exists in both sources; modelsExtra intentionally overrides - // pricing/token limits, but it should not wipe upstream capability flags. + it("returns capabilities for upstream Anthropic models", () => { const caps = getModelCapabilities("anthropic:claude-opus-4-5"); expect(caps).not.toBeNull(); expect(caps?.supportsPdfInput).toBe(true); }); - it("keeps explicit PDF support for Opus 4.6 from models-extra", () => { + it("retains PDF support for Opus 4.6", () => { const caps = getModelCapabilities("anthropic:claude-opus-4-6"); expect(caps).not.toBeNull(); expect(caps?.supportsPdfInput).toBe(true); @@ -29,9 +27,9 @@ describe("getModelCapabilities", () => { }); it("returns capabilities for models present only in models-extra", () => { - // This model is defined in models-extra.ts but not (yet) in upstream models.json. - const caps = getModelCapabilities("openrouter:z-ai/glm-4.6"); + const caps = getModelCapabilities("openai:gpt-5.3-codex"); expect(caps).not.toBeNull(); + expect(caps?.supportsVision).toBe(true); }); it("returns maxPdfSizeMb when present in model metadata", () => { diff --git a/src/common/utils/tokens/modelStats.test.ts b/src/common/utils/tokens/modelStats.test.ts index c7f6fdc68a..99ba11a28f 100644 --- a/src/common/utils/tokens/modelStats.test.ts +++ b/src/common/utils/tokens/modelStats.test.ts @@ -25,9 +25,7 @@ describe("getModelStats", () => { expect(stats?.input_cost_per_token).toBeGreaterThan(0); }); - test("models-extra.ts should override models.json", () => { - // gpt-5.2-codex exists in both files - models-extra.ts has correct 272k, models.json has incorrect 400k. - // The exact value matters here: it proves the override mechanism works. + test("should return synced upstream values for gpt-5.2-codex", () => { const stats = getModelStats("openai:gpt-5.2-codex"); expect(stats).not.toBeNull(); expect(stats?.max_input_tokens).toBe(272000); diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts index a7ed364fca..f15b474a3f 100644 --- a/src/common/utils/tokens/models-extra.ts +++ b/src/common/utils/tokens/models-extra.ts @@ -1,7 +1,8 @@ /** - * Extra models not yet in LiteLLM's official models.json - * This file is consulted as a fallback when a model is not found in the main file. - * Models should be removed from here once they appear in the upstream LiteLLM repository. + * Extra models not yet in LiteLLM's official models.json. + * + * modelStats.ts checks this file first, so stale entries here will shadow fixed upstream data. + * Keep this list as small as possible and remove entries as soon as upstream covers them. */ interface ModelData { @@ -26,121 +27,7 @@ interface ModelData { } export const modelsExtra: Record = { - // Claude Opus 4.6 - Released February 2026 - // Standard: $5/M input, $25/M output (≤200k context) - // Premium (1M context): $10/M input, $37.50/M output - // 128K max output tokens - "claude-opus-4-6": { - max_input_tokens: 200000, - max_output_tokens: 128000, - input_cost_per_token: 0.000005, // $5 per million input tokens - output_cost_per_token: 0.000025, // $25 per million output tokens - cache_creation_input_token_cost: 0.00000625, // $6.25 per million tokens - cache_read_input_token_cost: 0.0000005, // $0.50 per million tokens - litellm_provider: "anthropic", - mode: "chat", - supports_function_calling: true, - supports_vision: true, - // User-reported issue: Opus 4.6 should accept PDF attachments like other Claude 4.x models. - supports_pdf_input: true, - supports_reasoning: true, - supports_response_schema: true, - }, - - // Claude Sonnet 4.6 - Released February 2026 - // $3/M input, $15/M output (same as Sonnet 4.5) - // 64K max output tokens, supports adaptive thinking + effort parameter - "claude-sonnet-4-6": { - max_input_tokens: 200000, - max_output_tokens: 64000, - input_cost_per_token: 0.000003, // $3 per million input tokens - output_cost_per_token: 0.000015, // $15 per million output tokens - cache_creation_input_token_cost: 0.00000375, // $3.75 per million tokens - cache_read_input_token_cost: 0.0000003, // $0.30 per million tokens - litellm_provider: "anthropic", - mode: "chat", - supports_function_calling: true, - supports_vision: true, - supports_pdf_input: true, - supports_reasoning: true, - supports_response_schema: true, - }, - - // Claude Opus 4.5 - Released November 24, 2025 - // $5/M input, $25/M output (price drop from Opus 4.1's $15/$75) - // 64K max output tokens (matches Sonnet 4.5) - "claude-opus-4-5": { - max_input_tokens: 200000, - max_output_tokens: 64000, - input_cost_per_token: 0.000005, // $5 per million input tokens - output_cost_per_token: 0.000025, // $25 per million output tokens - cache_creation_input_token_cost: 0.00000625, // $6.25 per million tokens (estimated) - cache_read_input_token_cost: 0.0000005, // $0.50 per million tokens (estimated) - litellm_provider: "anthropic", - mode: "chat", - supports_function_calling: true, - supports_vision: true, - supports_reasoning: true, - supports_response_schema: true, - }, - - // GPT-5.2 / GPT-5.2 Codex - keep aligned - // LiteLLM reports 400k context for Codex, but it should match GPT-5.2 (272k) - // $1.75/M input, $14/M output - // Cached input: $0.175/M - // Supports off, low, medium, high, xhigh reasoning levels - "gpt-5.2": { - max_input_tokens: 272000, - max_output_tokens: 128000, - input_cost_per_token: 0.00000175, // $1.75 per million input tokens - output_cost_per_token: 0.000014, // $14 per million output tokens - // OpenAI model page lists "cached input" pricing, which corresponds to prompt cache reads. - cache_read_input_token_cost: 0.000000175, // $0.175 per million cached input tokens - litellm_provider: "openai", - mode: "chat", - supports_function_calling: true, - supports_vision: true, - supports_reasoning: true, - supports_response_schema: true, - knowledge_cutoff: "2025-08-31", - }, - "gpt-5.2-codex": { - max_input_tokens: 272000, - max_output_tokens: 128000, - input_cost_per_token: 0.00000175, // $1.75 per million input tokens - output_cost_per_token: 0.000014, // $14 per million output tokens - // OpenAI model page lists "cached input" pricing, which corresponds to prompt cache reads. - cache_read_input_token_cost: 0.000000175, // $0.175 per million cached input tokens - litellm_provider: "openai", - mode: "responses", - supports_function_calling: true, - supports_vision: true, - supports_reasoning: true, - supports_response_schema: true, - }, - - // Gemini 3.1 Pro Preview - Released February 19, 2026 - // Tiered pricing: ≤200K tokens $2/M input, $12/M output; >200K tokens $4/M input, $18/M output - // 1M input context, ~64K max output tokens - "gemini-3.1-pro-preview": { - max_input_tokens: 1048576, - max_output_tokens: 65535, - input_cost_per_token: 0.000002, // $2 per million input tokens (≤200K) - output_cost_per_token: 0.000012, // $12 per million output tokens (≤200K) - input_cost_per_token_above_200k_tokens: 0.000004, // $4 per million input tokens (>200K) - output_cost_per_token_above_200k_tokens: 0.000018, // $18 per million output tokens (>200K) - cache_read_input_token_cost: 2e-7, - litellm_provider: "vertex_ai-language-models", - mode: "chat", - supports_function_calling: true, - supports_vision: true, - supports_pdf_input: true, - supports_reasoning: true, - supports_response_schema: true, - knowledge_cutoff: "2025-01", - }, - - // GPT-5.3-Codex - same pricing as gpt-5.2-codex + // Not present in LiteLLM upstream models.json as of 2026-02-23. "gpt-5.3-codex": { max_input_tokens: 272000, max_output_tokens: 128000, @@ -155,7 +42,7 @@ export const modelsExtra: Record = { supports_response_schema: true, }, - // GPT-5.3 Codex Spark - research preview (text-only) and currently available as 128k-context model + // GPT-5.3 Codex Spark - research preview (text-only) and currently available as 128k-context model. // Pricing is not published separately; reuse GPT-5.3-Codex pricing until confirmed. "gpt-5.3-codex-spark": { max_input_tokens: 128000, @@ -170,70 +57,4 @@ export const modelsExtra: Record = { supports_reasoning: true, supports_response_schema: true, }, - - // GPT-5.2 Pro - Released December 11, 2025 - // $21/M input, $168/M output - // Supports medium, high, xhigh reasoning levels - "gpt-5.2-pro": { - max_input_tokens: 272000, - max_output_tokens: 128000, - input_cost_per_token: 0.000021, // $21 per million input tokens - output_cost_per_token: 0.000168, // $168 per million output tokens - knowledge_cutoff: "2025-08-31", - litellm_provider: "openai", - mode: "chat", - supports_function_calling: true, - supports_vision: true, - supports_reasoning: true, - supports_response_schema: true, - supported_endpoints: ["/v1/responses"], - }, - - // Claude Haiku 4.5 - Released October 15, 2025 - // $1/M input, $5/M output - "claude-haiku-4-5": { - max_input_tokens: 200000, - max_output_tokens: 8192, - input_cost_per_token: 0.000001, // $1 per million input tokens - output_cost_per_token: 0.000005, // $5 per million output tokens - cache_creation_input_token_cost: 0.00000125, // $1.25 per million tokens - cache_read_input_token_cost: 0.0000001, // $0.10 per million tokens - litellm_provider: "anthropic", - mode: "chat", - supports_function_calling: true, - supports_vision: true, - supports_response_schema: true, - }, - - // Z.AI GLM 4.6 via OpenRouter - // $0.40/M input, $1.75/M output (OpenRouter pricing) - // 200K context window, supports tool use and reasoning - "openrouter/z-ai/glm-4.6": { - max_input_tokens: 202752, - max_output_tokens: 202752, - input_cost_per_token: 0.0000004, // $0.40 per million input tokens - output_cost_per_token: 0.00000175, // $1.75 per million output tokens - litellm_provider: "openrouter", - mode: "chat", - supports_function_calling: true, - supports_reasoning: true, - supports_response_schema: true, - }, - - // GPT-5.1-Codex-Max - Extended reasoning model with xhigh support - // Same pricing as gpt-5.1-codex: $1.25/M input, $10/M output - // Supports 5 reasoning levels: off, low, medium, high, xhigh - "gpt-5.1-codex-max": { - max_input_tokens: 272000, // Same as gpt-5.1-codex - max_output_tokens: 128000, // Same as gpt-5.1-codex - input_cost_per_token: 0.00000125, // $1.25 per million input tokens - output_cost_per_token: 0.00001, // $10 per million output tokens - litellm_provider: "openai", - mode: "chat", - supports_function_calling: true, - supports_vision: true, - supports_reasoning: true, - supports_response_schema: true, - supported_endpoints: ["/v1/responses"], - }, }; diff --git a/src/common/utils/tokens/models.json b/src/common/utils/tokens/models.json index 7e353b714e..3fdf7225d4 100644 --- a/src/common/utils/tokens/models.json +++ b/src/common/utils/tokens/models.json @@ -324,6 +324,25 @@ "supports_video_input": true, "supports_vision": true }, + "amazon.nova-2-pro-preview-20251202-v1:0": { + "cache_read_input_token_cost": 5.46875e-7, + "input_cost_per_token": 0.0000021875, + "input_cost_per_image_token": 0.0000021875, + "input_cost_per_audio_token": 0.0000021875, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000175, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, "apac.amazon.nova-2-lite-v1:0": { "cache_read_input_token_cost": 8.25e-8, "input_cost_per_token": 3.3e-7, @@ -341,6 +360,25 @@ "supports_video_input": true, "supports_vision": true }, + "apac.amazon.nova-2-pro-preview-20251202-v1:0": { + "cache_read_input_token_cost": 5.46875e-7, + "input_cost_per_token": 0.0000021875, + "input_cost_per_image_token": 0.0000021875, + "input_cost_per_audio_token": 0.0000021875, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000175, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, "eu.amazon.nova-2-lite-v1:0": { "cache_read_input_token_cost": 8.25e-8, "input_cost_per_token": 3.3e-7, @@ -358,6 +396,25 @@ "supports_video_input": true, "supports_vision": true }, + "eu.amazon.nova-2-pro-preview-20251202-v1:0": { + "cache_read_input_token_cost": 5.46875e-7, + "input_cost_per_token": 0.0000021875, + "input_cost_per_image_token": 0.0000021875, + "input_cost_per_audio_token": 0.0000021875, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000175, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, "us.amazon.nova-2-lite-v1:0": { "cache_read_input_token_cost": 8.25e-8, "input_cost_per_token": 3.3e-7, @@ -375,6 +432,25 @@ "supports_video_input": true, "supports_vision": true }, + "us.amazon.nova-2-pro-preview-20251202-v1:0": { + "cache_read_input_token_cost": 5.46875e-7, + "input_cost_per_token": 0.0000021875, + "input_cost_per_image_token": 0.0000021875, + "input_cost_per_audio_token": 0.0000021875, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000175, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, "amazon.nova-2-multimodal-embeddings-v1:0": { "litellm_provider": "bedrock", "max_input_tokens": 8172, @@ -638,12 +714,13 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "tool_use_system_prompt_tokens": 346, + "supports_native_streaming": true }, "anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", - "max_input_tokens": 200000, + "max_input_tokens": 1000000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", @@ -652,14 +729,22 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.00003, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "cache_creation_input_token_cost_above_1hr": 0.0000075, + "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 0.000015, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7 }, "anthropic.claude-3-5-sonnet-20241022-v2:0": { "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, "litellm_provider": "bedrock", - "max_input_tokens": 200000, + "max_input_tokens": 1000000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", @@ -671,7 +756,13 @@ "supports_prompt_caching": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.00003, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "cache_creation_input_token_cost_above_1hr": 0.0000075, + "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 0.000015 }, "anthropic.claude-3-7-sonnet-20240620-v1:0": { "cache_creation_input_token_cost": 0.0000045, @@ -842,20 +933,170 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, - "anthropic.claude-sonnet-4-20250514-v1:0": { + "anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000125, + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.000001, + "input_cost_per_token": 0.000005, + "input_cost_per_token_above_200k_tokens": 0.00001, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "output_cost_per_token_above_200k_tokens": 0.0000375, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "global.anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000125, + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.000001, + "input_cost_per_token": 0.000005, + "input_cost_per_token_above_200k_tokens": 0.00001, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "output_cost_per_token_above_200k_tokens": 0.0000375, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "us.anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 0.000006875, + "cache_creation_input_token_cost_above_200k_tokens": 0.00001375, + "cache_read_input_token_cost": 5.5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.0000011, + "input_cost_per_token": 0.0000055, + "input_cost_per_token_above_200k_tokens": 0.000011, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.0000275, + "output_cost_per_token_above_200k_tokens": 0.00004125, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "eu.anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 0.000006875, + "cache_creation_input_token_cost_above_200k_tokens": 0.00001375, + "cache_read_input_token_cost": 5.5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.0000011, + "input_cost_per_token": 0.0000055, + "input_cost_per_token_above_200k_tokens": 0.000011, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.0000275, + "output_cost_per_token_above_200k_tokens": 0.00004125, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "au.anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 0.000006875, + "cache_creation_input_token_cost_above_200k_tokens": 0.00001375, + "cache_read_input_token_cost": 5.5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.0000011, + "input_cost_per_token": 0.0000055, + "input_cost_per_token_above_200k_tokens": 0.000011, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.0000275, + "output_cost_per_token_above_200k_tokens": 0.00004125, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "anthropic.claude-sonnet-4-6": { "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost": 3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "input_cost_per_token": 0.000003, "input_cost_per_token_above_200k_tokens": 0.000006, - "output_cost_per_token_above_200k_tokens": 0.0000225, - "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, - "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", - "max_input_tokens": 1000000, + "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.0000225, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -870,22 +1111,22 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 346 }, - "anthropic.claude-sonnet-4-5-20250929-v1:0": { + "global.anthropic.claude-sonnet-4-6": { "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost": 3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "input_cost_per_token": 0.000003, "input_cost_per_token_above_200k_tokens": 0.000006, - "output_cost_per_token_above_200k_tokens": 0.0000225, - "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, - "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.0000225, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -900,31 +1141,181 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 - }, - "anthropic.claude-v1": { - "input_cost_per_token": 0.000008, - "litellm_provider": "bedrock", - "max_input_tokens": 100000, - "max_output_tokens": 8191, - "max_tokens": 8191, - "mode": "chat", - "output_cost_per_token": 0.000024 + "tool_use_system_prompt_tokens": 346 }, - "anthropic.claude-v2:1": { - "input_cost_per_token": 0.000008, - "litellm_provider": "bedrock", - "max_input_tokens": 100000, - "max_output_tokens": 8191, - "max_tokens": 8191, + "us.anthropic.claude-sonnet-4-6": { + "cache_creation_input_token_cost": 0.000004125, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, + "cache_read_input_token_cost": 3.3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 0.000024, - "supports_tool_choice": true + "output_cost_per_token": 0.0000165, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 }, - "anyscale/HuggingFaceH4/zephyr-7b-beta": { - "input_cost_per_token": 1.5e-7, - "litellm_provider": "anyscale", - "max_input_tokens": 16384, + "eu.anthropic.claude-sonnet-4-6": { + "cache_creation_input_token_cost": 0.000004125, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, + "cache_read_input_token_cost": 3.3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000165, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "apac.anthropic.claude-sonnet-4-6": { + "cache_creation_input_token_cost": 0.000004125, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, + "cache_read_input_token_cost": 3.3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000165, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "anthropic.claude-v1": { + "input_cost_per_token": 0.000008, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.000024 + }, + "anthropic.claude-v2:1": { + "input_cost_per_token": 0.000008, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.000024, + "supports_tool_choice": true + }, + "anyscale/HuggingFaceH4/zephyr-7b-beta": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "anyscale", + "max_input_tokens": 16384, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", @@ -1316,6 +1707,33 @@ "supports_tool_choice": true, "supports_vision": true }, + "azure_ai/claude-opus-4-6": { + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000025, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, "azure_ai/claude-opus-4-1": { "cache_creation_input_token_cost": 0.00001875, "cache_creation_input_token_cost_above_1hr": 0.00003, @@ -1358,6 +1776,28 @@ "supports_tool_choice": true, "supports_vision": true }, + "azure_ai/claude-sonnet-4-6": { + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_1hr": 0.000006, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "azure/computer-use-preview": { "input_cost_per_token": 0.000003, "litellm_provider": "azure", @@ -1397,6 +1837,14 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "azure_ai/model_router": { + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 0, + "litellm_provider": "azure_ai", + "mode": "chat", + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-services/", + "comment": "Flat cost of $0.14 per M input tokens for Azure AI Foundry Model Router infrastructure. Use pattern: azure_ai/model_router/ where deployment-name is your Azure deployment (e.g., azure-model-router)" + }, "azure/eu/gpt-4o-2024-08-06": { "deprecation_date": "2026-02-27", "cache_read_input_token_cost": 0.000001375, @@ -2803,7 +3251,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, - "supports_tool_choice": false, + "supports_tool_choice": true, "supports_vision": true }, "azure/gpt-5-chat-latest": { @@ -2826,7 +3274,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, - "supports_tool_choice": false, + "supports_tool_choice": true, "supports_vision": true }, "azure/gpt-5-codex": { @@ -3187,12 +3635,12 @@ "cache_read_input_token_cost": 1.75e-7, "input_cost_per_token": 0.00000175, "litellm_provider": "azure", - "max_input_tokens": 128000, - "max_output_tokens": 16384, - "max_tokens": 16384, - "mode": "chat", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", "output_cost_per_token": 0.000014, - "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], "supports_function_calling": true, @@ -4862,6 +5310,20 @@ "output_cost_per_token": 7e-7, "supports_tool_choice": true }, + "azure_ai/kimi-k2.5": { + "input_cost_per_token": 6e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/kimi-k2-5-now-in-microsoft-foundry/4492321", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true + }, "azure_ai/ministral-3b": { "input_cost_per_token": 4e-8, "litellm_provider": "azure_ai", @@ -5105,42 +5567,290 @@ "output_cost_per_token": 0.000024, "supports_tool_choice": true }, - "bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 0.00000318, + "bedrock/ap-northeast-1/deepseek.v3.2": { + "input_cost_per_token": 7.4e-7, "litellm_provider": "bedrock", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, "mode": "chat", - "output_cost_per_token": 0.0000042 + "output_cost_per_token": 0.00000222, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" }, - "bedrock/ap-south-1/meta.llama3-8b-instruct-v1:0": { + "bedrock/ap-northeast-1/minimax.minimax-m2.1": { "input_cost_per_token": 3.6e-7, "litellm_provider": "bedrock", - "max_input_tokens": 8192, + "max_input_tokens": 196000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 7.2e-7 + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" }, - "bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": { - "input_cost_per_token": 0.00000305, + "bedrock/ap-northeast-1/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 7.3e-7, "litellm_provider": "bedrock", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 0.00000403 + "output_cost_per_token": 0.00000303, + "supports_function_calling": true, + "supports_reasoning": true }, - "bedrock/ca-central-1/meta.llama3-8b-instruct-v1:0": { - "input_cost_per_token": 3.5e-7, + "bedrock/ap-northeast-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-7, "litellm_provider": "bedrock", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000036, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-northeast-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 7.3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.00000303, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.00000303, + "source": "https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true + }, + "bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 0.00000318, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000042 + }, + "bedrock/ap-south-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.2e-7 + }, + "bedrock/ap-south-1/deepseek.v3.2": { + "input_cost_per_token": 7.4e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000222, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-south-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-south-1/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 7.1e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.00000294, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/ap-south-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000036, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-south-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-southeast-3/deepseek.v3.2": { + "input_cost_per_token": 7.4e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000222, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-southeast-3/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-southeast-3/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000036, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-southeast-3/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 0.00000305, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000403 + }, + "bedrock/ca-central-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3.5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, "mode": "chat", "output_cost_per_token": 6.9e-7 }, + "bedrock/eu-north-1/deepseek.v3.2": { + "input_cost_per_token": 7.4e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000222, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-north-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-north-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000036, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1": { "input_cost_per_second": 0.01635, "litellm_provider": "bedrock", @@ -5228,6 +5938,32 @@ "output_cost_per_token": 0.000024, "supports_tool_choice": true }, + "bedrock/eu-central-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-central-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0": { "input_cost_per_token": 0.00000286, "litellm_provider": "bedrock", @@ -5246,6 +5982,32 @@ "mode": "chat", "output_cost_per_token": 6.5e-7 }, + "bedrock/eu-west-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-west-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0": { "input_cost_per_token": 0.00000345, "litellm_provider": "bedrock", @@ -5264,6 +6026,32 @@ "mode": "chat", "output_cost_per_token": 7.8e-7 }, + "bedrock/eu-west-2/minimax.minimax-m2.1": { + "input_cost_per_token": 4.7e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000186, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-west-2/qwen.qwen3-coder-next": { + "input_cost_per_token": 7.8e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000186, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "bedrock/eu-west-3/mistral.mistral-7b-instruct-v0:2": { "input_cost_per_token": 2e-7, "litellm_provider": "bedrock", @@ -5294,21 +6082,47 @@ "output_cost_per_token": 9.1e-7, "supports_tool_choice": true }, - "bedrock/invoke/anthropic.claude-3-5-sonnet-20240620-v1:0": { - "input_cost_per_token": 0.000003, + "bedrock/eu-south-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, "litellm_provider": "bedrock", - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "max_tokens": 4096, - "metadata": { - "notes": "Anthropic via Invoke route does not currently support pdf input." - }, + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.000015, + "output_cost_per_token": 0.00000144, "supports_function_calling": true, - "supports_response_schema": true, + "supports_system_messages": true, "supports_tool_choice": true, - "supports_vision": true + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-south-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/invoke/anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 0.000003, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Anthropic via Invoke route does not currently support pdf input." + }, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true }, "bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": { "input_cost_per_token": 0.00000445, @@ -5328,6 +6142,70 @@ "mode": "chat", "output_cost_per_token": 0.00000101 }, + "bedrock/sa-east-1/deepseek.v3.2": { + "input_cost_per_token": 7.4e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000222, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/sa-east-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/sa-east-1/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 7.3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.00000303, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/sa-east-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000036, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/sa-east-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000144, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1": { "input_cost_per_second": 0.011, "litellm_provider": "bedrock", @@ -5464,6 +6342,134 @@ "output_cost_per_token": 7e-7, "supports_tool_choice": true }, + "bedrock/us-east-1/deepseek.v3.2": { + "input_cost_per_token": 6.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000185, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-1/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/us-east-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-2/deepseek.v3.2": { + "input_cost_per_token": 6.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000185, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-2/minimax.minimax-m2.1": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-2/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/us-east-2/moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-2/qwen.qwen3-coder-next": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "bedrock/us-gov-east-1/amazon.nova-pro-v1:0": { "input_cost_per_token": 9.6e-7, "litellm_provider": "bedrock", @@ -5870,6 +6876,70 @@ "output_cost_per_token": 7e-7, "supports_tool_choice": true }, + "bedrock/us-west-2/deepseek.v3.2": { + "input_cost_per_token": 6.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000185, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-west-2/minimax.minimax-m2.1": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-west-2/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/us-west-2/moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-west-2/qwen.qwen3-coder-next": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0": { "cache_creation_input_token_cost": 0.000001, "cache_read_input_token_cost": 8e-8, @@ -5921,13 +6991,13 @@ "supports_tool_choice": true }, "cerebras/gpt-oss-120b": { - "input_cost_per_token": 2.5e-7, + "input_cost_per_token": 3.5e-7, "litellm_provider": "cerebras", "max_input_tokens": 131072, "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 6.9e-7, + "output_cost_per_token": 7.5e-7, "source": "https://www.cerebras.ai/blog/openai-gpt-oss-120b-runs-fastest-on-cerebras", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -5945,6 +7015,7 @@ "output_cost_per_token": 8e-7, "source": "https://inference-docs.cerebras.ai/support/pricing", "supports_function_calling": true, + "supports_reasoning": true, "supports_tool_choice": true }, "cerebras/zai-glm-4.6": { @@ -6418,6 +7489,7 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, + "supports_web_search": true, "tool_use_system_prompt_tokens": 159 }, "claude-sonnet-4-5": { @@ -6481,20 +7553,25 @@ "supports_web_search": true, "tool_use_system_prompt_tokens": 346 }, - "claude-sonnet-4-5-20250929-v1:0": { + "claude-sonnet-4-6": { "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, "cache_read_input_token_cost": 3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "input_cost_per_token": 0.000003, "input_cost_per_token_above_200k_tokens": 0.000006, - "output_cost_per_token_above_200k_tokens": 0.0000225, - "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, - "cache_read_input_token_cost_above_200k_tokens": 6e-7, - "litellm_provider": "bedrock", + "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, @@ -6504,11 +7581,36 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 346 }, - "claude-opus-4-1": { - "cache_creation_input_token_cost": 0.00001875, - "cache_creation_input_token_cost_above_1hr": 0.00003, + "claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-1": { + "cache_creation_input_token_cost": 0.00001875, + "cache_creation_input_token_cost_above_1hr": 0.00003, "cache_read_input_token_cost": 0.0000015, "input_cost_per_token": 0.000015, "litellm_provider": "anthropic", @@ -6643,6 +7745,76 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, + "claude-opus-4-6": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000125, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.000001, + "input_cost_per_token": 0.000005, + "input_cost_per_token_above_200k_tokens": 0.00001, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "output_cost_per_token_above_200k_tokens": 0.0000375, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "provider_specific_entry": { + "us": 1.1, + "fast": 6 + } + }, + "claude-opus-4-6-20260205": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000125, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.000001, + "input_cost_per_token": 0.000005, + "input_cost_per_token_above_200k_tokens": 0.00001, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "output_cost_per_token_above_200k_tokens": 0.0000375, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "provider_specific_entry": { + "us": 1.1, + "fast": 6 + } + }, "claude-sonnet-4-20250514": { "deprecation_date": "2026-05-14", "cache_creation_input_token_cost": 0.00000375, @@ -7628,6 +8800,34 @@ } ] }, + "dashscope/qwen3-max": { + "litellm_provider": "dashscope", + "max_input_tokens": 258048, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 0.0000012, + "output_cost_per_token": 0.000006, + "range": [0, 32000] + }, + { + "input_cost_per_token": 0.0000024, + "output_cost_per_token": 0.000012, + "range": [32000, 128000] + }, + { + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "range": [128000, 252000] + } + ] + }, "dashscope/qwq-plus": { "input_cost_per_token": 8e-7, "litellm_provider": "dashscope", @@ -8892,6 +10092,7 @@ "supports_tool_choice": true }, "deepinfra/google/gemini-2.0-flash-001": { + "deprecation_date": "2026-03-31", "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, @@ -9228,14 +10429,20 @@ "input_cost_per_token": 2.8e-7, "input_cost_per_token_cache_hit": 2.8e-8, "litellm_provider": "deepseek", - "max_input_tokens": 128000, + "max_input_tokens": 131072, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", "output_cost_per_token": 4.2e-7, + "source": "https://api-docs.deepseek.com/quick_start/pricing", + "supported_endpoints": ["/v1/chat/completions"], "supports_assistant_prefill": true, "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, "supports_tool_choice": true }, "deepseek/deepseek-coder": { @@ -9272,16 +10479,22 @@ "input_cost_per_token": 2.8e-7, "input_cost_per_token_cache_hit": 2.8e-8, "litellm_provider": "deepseek", - "max_input_tokens": 128000, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, "mode": "chat", "output_cost_per_token": 4.2e-7, + "source": "https://api-docs.deepseek.com/quick_start/pricing", + "supported_endpoints": ["/v1/chat/completions"], "supports_assistant_prefill": true, - "supports_function_calling": true, + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, "supports_prompt_caching": true, "supports_reasoning": true, - "supports_tool_choice": true + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false }, "deepseek/deepseek-v3": { "cache_creation_input_token_cost": 0, @@ -9326,6 +10539,19 @@ "supports_reasoning": true, "supports_tool_choice": true }, + "deepseek.v3.2": { + "input_cost_per_token": 6.2e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000185, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "dolphin": { "input_cost_per_token": 5e-7, "litellm_provider": "nlp_cloud", @@ -9335,6 +10561,48 @@ "mode": "completion", "output_cost_per_token": 5e-7 }, + "deepseek-v3-2-251201": { + "input_cost_per_token": 0, + "litellm_provider": "volcengine", + "max_input_tokens": 98304, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "glm-4-7-251222": { + "input_cost_per_token": 0, + "litellm_provider": "volcengine", + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "kimi-k2-thinking-251104": { + "input_cost_per_token": 0, + "litellm_provider": "volcengine", + "max_input_tokens": 229376, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, "doubao-embedding": { "input_cost_per_token": 0, "litellm_provider": "volcengine", @@ -9497,6 +10765,28 @@ "source": "https://elevenlabs.io/pricing", "supported_endpoints": ["/v1/audio/transcriptions"] }, + "elevenlabs/eleven_v3": { + "input_cost_per_character": 0.00018, + "litellm_provider": "elevenlabs", + "metadata": { + "calculation": "$0.18/1000 characters (Scale plan pricing, 1 credit per character)", + "notes": "ElevenLabs Eleven v3 - most expressive TTS model with 70+ languages and audio tags support" + }, + "mode": "audio_speech", + "source": "https://elevenlabs.io/pricing", + "supported_endpoints": ["/v1/audio/speech"] + }, + "elevenlabs/eleven_multilingual_v2": { + "input_cost_per_character": 0.00018, + "litellm_provider": "elevenlabs", + "metadata": { + "calculation": "$0.18/1000 characters (Scale plan pricing, 1 credit per character)", + "notes": "ElevenLabs Eleven Multilingual v2 - default TTS model with 29 languages support" + }, + "mode": "audio_speech", + "source": "https://elevenlabs.io/pricing", + "supported_endpoints": ["/v1/audio/speech"] + }, "embed-english-light-v2.0": { "input_cost_per_token": 1e-7, "litellm_provider": "cohere", @@ -10178,6 +11468,21 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "fireworks_ai/accounts/fireworks/models/glm-4p7": { + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 202800, + "max_output_tokens": 202800, + "max_tokens": 202800, + "mode": "chat", + "output_cost_per_token": 0.0000022, + "source": "https://fireworks.ai/models/fireworks/glm-4p7", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "fireworks_ai/accounts/fireworks/models/gpt-oss-120b": { "input_cost_per_token": 1.5e-7, "litellm_provider": "fireworks_ai", @@ -10246,6 +11551,20 @@ "supports_tool_choice": true, "supports_web_search": true }, + "fireworks_ai/accounts/fireworks/models/kimi-k2p5": { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct": { "input_cost_per_token": 0.000003, "litellm_provider": "fireworks_ai", @@ -10349,6 +11668,20 @@ "supports_response_schema": true, "supports_tool_choice": false }, + "fireworks_ai/accounts/fireworks/models/minimax-m2p1": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_token": 3e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 204800, + "max_output_tokens": 204800, + "max_tokens": 204800, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "source": "https://fireworks.ai/models/fireworks/minimax-m2p1", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf": { "input_cost_per_token": 0.0000012, "litellm_provider": "fireworks_ai", @@ -10401,22 +11734,65 @@ "supports_response_schema": true, "supports_tool_choice": false }, - "fireworks_ai/nomic-ai/nomic-embed-text-v1": { - "input_cost_per_token": 8e-9, - "litellm_provider": "fireworks_ai-embedding-models", - "max_input_tokens": 8192, - "max_tokens": 8192, - "mode": "embedding", - "output_cost_per_token": 0, - "source": "https://fireworks.ai/pricing" + "fireworks_ai/glm-4p7": { + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 202800, + "max_output_tokens": 202800, + "max_tokens": 202800, + "mode": "chat", + "output_cost_per_token": 0.0000022, + "source": "https://fireworks.ai/models/fireworks/glm-4p7", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true }, - "fireworks_ai/nomic-ai/nomic-embed-text-v1.5": { - "input_cost_per_token": 8e-9, - "litellm_provider": "fireworks_ai-embedding-models", - "max_input_tokens": 8192, - "max_tokens": 8192, - "mode": "embedding", - "output_cost_per_token": 0, + "fireworks_ai/kimi-k2p5": { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/minimax-m2p1": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_token": 3e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 204800, + "max_output_tokens": 204800, + "max_tokens": 204800, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "source": "https://fireworks.ai/models/fireworks/minimax-m2p1", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/nomic-ai/nomic-embed-text-v1": { + "input_cost_per_token": 8e-9, + "litellm_provider": "fireworks_ai-embedding-models", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/nomic-ai/nomic-embed-text-v1.5": { + "input_cost_per_token": 8e-9, + "litellm_provider": "fireworks_ai-embedding-models", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0, "source": "https://fireworks.ai/pricing" }, "fireworks_ai/thenlper/gte-base": { @@ -11145,6 +12521,7 @@ }, "gemini-2.0-flash": { "cache_read_input_token_cost": 2.5e-8, + "deprecation_date": "2026-03-31", "input_cost_per_audio_token": 7e-7, "input_cost_per_token": 1e-7, "litellm_provider": "vertex_ai-language-models", @@ -11176,7 +12553,7 @@ }, "gemini-2.0-flash-001": { "cache_read_input_token_cost": 3.75e-8, - "deprecation_date": "2026-02-05", + "deprecation_date": "2026-03-31", "input_cost_per_audio_token": 0.000001, "input_cost_per_token": 1.5e-7, "litellm_provider": "vertex_ai-language-models", @@ -11246,6 +12623,7 @@ }, "gemini-2.0-flash-lite": { "cache_read_input_token_cost": 1.875e-8, + "deprecation_date": "2026-03-31", "input_cost_per_audio_token": 7.5e-8, "input_cost_per_token": 7.5e-8, "litellm_provider": "vertex_ai-language-models", @@ -11274,7 +12652,7 @@ }, "gemini-2.0-flash-lite-001": { "cache_read_input_token_cost": 1.875e-8, - "deprecation_date": "2026-02-25", + "deprecation_date": "2026-03-31", "input_cost_per_audio_token": 7.5e-8, "input_cost_per_token": 7.5e-8, "litellm_provider": "vertex_ai-language-models", @@ -11619,6 +12997,30 @@ "supports_vision": true, "supports_web_search": true }, + "deep-research-pro-preview-12-2025": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 0.000012, + "output_cost_per_token_batches": 0.000006, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true + }, "gemini-2.5-flash-lite": { "cache_read_input_token_cost": 1e-8, "input_cost_per_audio_token": 3e-7, @@ -11964,7 +13366,102 @@ "supports_tool_choice": true, "supports_video_input": true, "supports_vision": true, - "supports_web_search": true + "supports_web_search": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true + }, + "gemini-3.1-pro-preview": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, + "output_cost_per_image": 0.00012, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true + }, + "gemini-3.1-pro-preview-customtools": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, + "output_cost_per_image": 0.00012, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true }, "vertex_ai/gemini-3-pro-preview": { "cache_read_input_token_cost": 2e-7, @@ -12001,7 +13498,15 @@ "supports_tool_choice": true, "supports_video_input": true, "supports_vision": true, - "supports_web_search": true + "supports_web_search": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true }, "vertex_ai/gemini-3-flash-preview": { "cache_read_input_token_cost": 5e-8, @@ -12033,7 +13538,107 @@ "supports_tool_choice": true, "supports_video_input": true, "supports_vision": true, - "supports_web_search": true + "supports_web_search": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 9e-7, + "input_cost_per_audio_token_priority": 0.0000018, + "output_cost_per_token_priority": 0.0000054, + "cache_read_input_token_cost_priority": 9e-8, + "supports_service_tier": true + }, + "vertex_ai/gemini-3.1-pro-preview": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, + "litellm_provider": "vertex_ai", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, + "output_cost_per_image": 0.00012, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true + }, + "vertex_ai/gemini-3.1-pro-preview-customtools": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, + "litellm_provider": "vertex_ai", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, + "output_cost_per_image": 0.00012, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true }, "gemini-2.5-pro-exp-03-25": { "cache_read_input_token_cost": 1.25e-7, @@ -12209,38 +13814,93 @@ "supports_vision": true, "supports_web_search": true }, - "gemini-2.5-computer-use-preview-10-2025": { - "input_cost_per_token": 0.00000125, - "input_cost_per_token_above_200k_tokens": 0.0000025, + "gemini-robotics-er-1.5-preview": { + "cache_read_input_token_cost": 0, + "input_cost_per_token": 3e-7, + "input_cost_per_audio_token": 0.000001, "litellm_provider": "vertex_ai-language-models", - "max_images_per_prompt": 3000, - "max_input_tokens": 128000, - "max_output_tokens": 64000, - "max_tokens": 64000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535, "mode": "chat", - "output_cost_per_token": 0.00001, - "output_cost_per_token_above_200k_tokens": 0.000015, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/computer-use", - "supported_modalities": ["text", "image"], + "output_cost_per_token": 0.0000025, + "output_cost_per_reasoning_token": 0.0000025, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-robotics-er-1-5-preview", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "video", "audio"], "supported_output_modalities": ["text"], - "supports_computer_use": true, + "supports_audio_output": false, "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_url_context": true, "supports_vision": true }, - "gemini-embedding-001": { - "input_cost_per_token": 1.5e-7, - "litellm_provider": "vertex_ai-embedding-models", - "max_input_tokens": 2048, - "max_tokens": 2048, - "mode": "embedding", - "output_cost_per_token": 0, - "output_vector_size": 3072, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" - }, - "gemini-flash-experimental": { - "input_cost_per_character": 0, + "gemini/gemini-robotics-er-1.5-preview": { + "cache_read_input_token_cost": 0, + "input_cost_per_token": 3e-7, + "input_cost_per_audio_token": 0.000001, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "output_cost_per_reasoning_token": 0.0000025, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-robotics-er-1-5-preview", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "video", "audio"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini-2.5-computer-use-preview-10-2025": { + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "litellm_provider": "vertex_ai-language-models", + "max_images_per_prompt": 3000, + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/computer-use", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_computer_use": true, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini-embedding-001": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" + }, + "gemini-flash-experimental": { + "input_cost_per_character": 0, "input_cost_per_token": 0, "litellm_provider": "vertex_ai-language-models", "max_input_tokens": 1000000, @@ -12657,6 +14317,7 @@ }, "gemini/gemini-2.0-flash": { "cache_read_input_token_cost": 2.5e-8, + "deprecation_date": "2026-03-31", "input_cost_per_audio_token": 7e-7, "input_cost_per_token": 1e-7, "litellm_provider": "gemini", @@ -12689,6 +14350,7 @@ }, "gemini/gemini-2.0-flash-001": { "cache_read_input_token_cost": 2.5e-8, + "deprecation_date": "2026-03-31", "input_cost_per_audio_token": 7e-7, "input_cost_per_token": 1e-7, "litellm_provider": "gemini", @@ -12760,6 +14422,7 @@ }, "gemini/gemini-2.0-flash-lite": { "cache_read_input_token_cost": 1.875e-8, + "deprecation_date": "2026-03-31", "input_cost_per_audio_token": 7.5e-8, "input_cost_per_token": 7.5e-8, "litellm_provider": "gemini", @@ -13152,6 +14815,32 @@ "supports_vision": true, "supports_web_search": true }, + "gemini/deep-research-pro-preview-12-2025": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, + "litellm_provider": "gemini", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 0.000012, + "rpm": 1000, + "tpm": 4000000, + "output_cost_per_token_batches": 0.000006, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true + }, "gemini/gemini-2.5-flash-lite": { "cache_read_input_token_cost": 1e-8, "input_cost_per_audio_token": 3e-7, @@ -13440,43 +15129,22 @@ "tpm": 250000 }, "gemini/gemini-2.5-flash-preview-tts": { - "cache_read_input_token_cost": 3.75e-8, - "input_cost_per_audio_token": 0.000001, - "input_cost_per_token": 1.5e-7, + "input_cost_per_token": 3e-7, "litellm_provider": "gemini", - "max_audio_length_hours": 8.4, - "max_audio_per_prompt": 1, - "max_images_per_prompt": 3000, - "max_input_tokens": 1048576, - "max_output_tokens": 65535, - "max_pdf_size_mb": 30, - "max_tokens": 65535, - "max_video_length": 1, - "max_videos_per_prompt": 10, - "mode": "chat", - "output_cost_per_reasoning_token": 0.0000035, - "output_cost_per_token": 6e-7, - "rpm": 10, - "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text"], - "supported_output_modalities": ["audio"], - "supports_audio_output": false, - "supports_function_calling": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true, - "supports_web_search": true, - "tpm": 250000 + "mode": "audio_speech", + "output_cost_per_token": 0.0000025, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": ["/v1/audio/speech"], + "tpm": 4000000, + "rpm": 10 }, "gemini/gemini-2.5-pro": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, "input_cost_per_token": 0.00000125, "input_cost_per_token_above_200k_tokens": 0.0000025, + "input_cost_per_token_priority": 0.00000125, + "input_cost_per_token_above_200k_tokens_priority": 0.0000025, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -13490,8 +15158,11 @@ "mode": "chat", "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, + "output_cost_per_token_priority": 0.00001, + "output_cost_per_token_above_200k_tokens_priority": 0.000015, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supports_service_tier": true, "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], "supported_modalities": ["text", "image", "audio", "video"], "supported_output_modalities": ["text"], @@ -13567,7 +15238,14 @@ "supports_video_input": true, "supports_vision": true, "supports_web_search": true, - "tpm": 800000 + "tpm": 800000, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true }, "gemini/gemini-3-flash-preview": { "cache_read_input_token_cost": 5e-8, @@ -13603,7 +15281,107 @@ "supports_url_context": true, "supports_vision": true, "supports_web_search": true, - "tpm": 800000 + "supports_native_streaming": true, + "tpm": 800000, + "input_cost_per_token_priority": 9e-7, + "input_cost_per_audio_token_priority": 0.0000018, + "output_cost_per_token_priority": 0.0000054, + "cache_read_input_token_cost_priority": 9e-8, + "supports_service_tier": true + }, + "gemini/gemini-3.1-pro-preview": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, + "rpm": 2000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-3.1-pro-preview", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "tpm": 800000, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true + }, + "gemini/gemini-3.1-pro-preview-customtools": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "input_cost_per_token": 0.000002, + "input_cost_per_token_above_200k_tokens": 0.000004, + "input_cost_per_token_batches": 0.000001, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0.000012, + "output_cost_per_token_above_200k_tokens": 0.000018, + "output_cost_per_token_batches": 0.000006, + "rpm": 2000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-3.1-pro-preview", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "tpm": 800000, + "input_cost_per_token_priority": 0.0000036, + "input_cost_per_token_above_200k_tokens_priority": 0.0000072, + "output_cost_per_token_priority": 0.0000216, + "output_cost_per_token_above_200k_tokens_priority": 0.0000324, + "cache_read_input_token_cost_priority": 3.6e-7, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-7, + "supports_service_tier": true }, "gemini-3-flash-preview": { "cache_read_input_token_cost": 5e-8, @@ -13637,7 +15415,13 @@ "supports_tool_choice": true, "supports_url_context": true, "supports_vision": true, - "supports_web_search": true + "supports_web_search": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 9e-7, + "input_cost_per_audio_token_priority": 0.0000018, + "output_cost_per_token_priority": 0.0000054, + "cache_read_input_token_cost_priority": 9e-8, + "supports_service_tier": true }, "gemini/gemini-2.5-pro-exp-03-25": { "cache_read_input_token_cost": 0, @@ -13880,7 +15664,9 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "tpm": 250000, + "rpm": 10 }, "gemini/gemini-gemma-2-9b-it": { "input_cost_per_token": 3.5e-7, @@ -13892,7 +15678,9 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "supports_function_calling": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "tpm": 250000, + "rpm": 10 }, "gemini/gemini-pro": { "input_cost_per_token": 3.5e-7, @@ -14116,6 +15904,17 @@ "supports_parallel_function_calling": true, "supports_vision": true }, + "github_copilot/claude-opus-4.6-fast": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "supported_endpoints": ["/v1/chat/completions"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, "github_copilot/claude-opus-41": { "litellm_provider": "github_copilot", "max_input_tokens": 80000, @@ -14350,6 +16149,18 @@ "supports_response_schema": true, "supports_vision": true }, + "github_copilot/gpt-5.3-codex": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "supported_endpoints": ["/v1/responses"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, "github_copilot/text-embedding-3-small": { "litellm_provider": "github_copilot", "max_input_tokens": 8191, @@ -16610,7 +18421,7 @@ "input_cost_per_token": 0.00000175, "input_cost_per_token_priority": 0.0000035, "litellm_provider": "openai", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", @@ -18320,6 +20131,19 @@ "output_cost_per_token": 0.0000012, "supports_system_messages": true }, + "minimax.minimax-m2.1": { + "input_cost_per_token": 3e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "minimax/speech-02-hd": { "input_cost_per_character": 0.0001, "litellm_provider": "minimax", @@ -18354,6 +20178,7 @@ "supports_function_calling": true, "supports_tool_choice": true, "supports_prompt_caching": true, + "supports_reasoning": true, "supports_system_messages": true, "max_input_tokens": 1000000, "max_output_tokens": 8192 @@ -18368,11 +20193,12 @@ "supports_function_calling": true, "supports_tool_choice": true, "supports_prompt_caching": true, + "supports_reasoning": true, "supports_system_messages": true, "max_input_tokens": 1000000, "max_output_tokens": 8192 }, - "minimax/MiniMax-M2": { + "minimax/MiniMax-M2.5": { "input_cost_per_token": 3e-7, "output_cost_per_token": 0.0000012, "cache_read_input_token_cost": 3e-8, @@ -18382,21 +20208,52 @@ "supports_function_calling": true, "supports_tool_choice": true, "supports_prompt_caching": true, + "supports_reasoning": true, "supports_system_messages": true, - "max_input_tokens": 200000, + "max_input_tokens": 1000000, "max_output_tokens": 8192 }, - "mistral.magistral-small-2509": { - "input_cost_per_token": 5e-7, - "litellm_provider": "bedrock_converse", - "max_input_tokens": 128000, - "max_output_tokens": 8192, - "max_tokens": 8192, + "minimax/MiniMax-M2.5-lightning": { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000024, + "cache_read_input_token_cost": 3e-8, + "cache_creation_input_token_cost": 3.75e-7, + "litellm_provider": "minimax", "mode": "chat", - "output_cost_per_token": 0.0000015, "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, "supports_reasoning": true, - "supports_system_messages": true + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, + "minimax/MiniMax-M2": { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000012, + "cache_read_input_token_cost": 3e-8, + "cache_creation_input_token_cost": 3.75e-7, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 200000, + "max_output_tokens": 8192 + }, + "mistral.magistral-small-2509": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000015, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true }, "mistral.ministral-3-14b-instruct": { "input_cost_per_token": 2e-7, @@ -18607,6 +20464,20 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "mistral/devstral-small-latest": { + "input_cost_per_token": 1e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 3e-7, + "source": "https://docs.mistral.ai/models/devstral-small-2-25-12", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "mistral/labs-devstral-small-2512": { "input_cost_per_token": 1e-7, "litellm_provider": "mistral", @@ -18621,6 +20492,34 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "mistral/devstral-latest": { + "input_cost_per_token": 4e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 0.000002, + "source": "https://mistral.ai/news/devstral-2-vibe-cli", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/devstral-medium-latest": { + "input_cost_per_token": 4e-7, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 0.000002, + "source": "https://mistral.ai/news/devstral-2-vibe-cli", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "mistral/devstral-2512": { "input_cost_per_token": 4e-7, "litellm_provider": "mistral", @@ -19031,6 +20930,20 @@ "supports_reasoning": true, "supports_system_messages": true }, + "moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "moonshot/kimi-k2-0711-preview": { "cache_read_input_token_cost": 1.5e-7, "input_cost_per_token": 6e-7, @@ -19073,6 +20986,21 @@ "supports_tool_choice": true, "supports_web_search": true }, + "moonshot/kimi-k2.5": { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "source": "https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true + }, "moonshot/kimi-latest": { "cache_read_input_token_cost": 1.5e-7, "input_cost_per_token": 0.000002, @@ -19516,6 +21444,19 @@ "output_cost_per_token": 2.3e-7, "supports_system_messages": true }, + "nvidia.nemotron-nano-3-30b": { + "input_cost_per_token": 6e-8, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.4e-7, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "o1": { "cache_read_input_token_cost": 0.0000075, "input_cost_per_token": 0.000015, @@ -19526,7 +21467,7 @@ "mode": "chat", "output_cost_per_token": 0.00006, "supports_function_calling": true, - "supports_parallel_function_calling": true, + "supports_parallel_function_calling": false, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, @@ -20003,7 +21944,7 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.5e-7, + "output_cost_per_token": 0.000015, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, "supports_response_schema": false @@ -20051,7 +21992,7 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 1.5e-7, + "output_cost_per_token": 0.000015, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, "supports_response_schema": false @@ -20443,36 +22384,6 @@ "output_cost_per_token": 2e-7, "supports_system_messages": true }, - "openrouter/anthropic/claude-2": { - "input_cost_per_token": 0.00001102, - "litellm_provider": "openrouter", - "max_output_tokens": 8191, - "max_tokens": 8191, - "mode": "chat", - "output_cost_per_token": 0.00003268, - "supports_tool_choice": true - }, - "openrouter/anthropic/claude-3-5-haiku": { - "input_cost_per_token": 0.000001, - "litellm_provider": "openrouter", - "max_tokens": 200000, - "mode": "chat", - "output_cost_per_token": 0.000005, - "supports_function_calling": true, - "supports_tool_choice": true - }, - "openrouter/anthropic/claude-3-5-haiku-20241022": { - "input_cost_per_token": 0.000001, - "litellm_provider": "openrouter", - "max_input_tokens": 200000, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 0.000005, - "supports_function_calling": true, - "supports_tool_choice": true, - "tool_use_system_prompt_tokens": 264 - }, "openrouter/anthropic/claude-3-haiku": { "input_cost_per_image": 0.0004, "input_cost_per_token": 2.5e-7, @@ -20484,43 +22395,6 @@ "supports_tool_choice": true, "supports_vision": true }, - "openrouter/anthropic/claude-3-haiku-20240307": { - "input_cost_per_token": 2.5e-7, - "litellm_provider": "openrouter", - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 0.00000125, - "supports_function_calling": true, - "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 264 - }, - "openrouter/anthropic/claude-3-opus": { - "input_cost_per_token": 0.000015, - "litellm_provider": "openrouter", - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 0.000075, - "supports_function_calling": true, - "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 395 - }, - "openrouter/anthropic/claude-3-sonnet": { - "input_cost_per_image": 0.0048, - "input_cost_per_token": 0.000003, - "litellm_provider": "openrouter", - "max_tokens": 200000, - "mode": "chat", - "output_cost_per_token": 0.000015, - "supports_function_calling": true, - "supports_tool_choice": true, - "supports_vision": true - }, "openrouter/anthropic/claude-3.5-sonnet": { "input_cost_per_token": 0.000003, "litellm_provider": "openrouter", @@ -20536,20 +22410,6 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, - "openrouter/anthropic/claude-3.5-sonnet:beta": { - "input_cost_per_token": 0.000003, - "litellm_provider": "openrouter", - "max_input_tokens": 200000, - "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 0.000015, - "supports_computer_use": true, - "supports_function_calling": true, - "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 - }, "openrouter/anthropic/claude-3.7-sonnet": { "input_cost_per_image": 0.0048, "input_cost_per_token": 0.000003, @@ -20567,31 +22427,6 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, - "openrouter/anthropic/claude-3.7-sonnet:beta": { - "input_cost_per_image": 0.0048, - "input_cost_per_token": 0.000003, - "litellm_provider": "openrouter", - "max_input_tokens": 200000, - "max_output_tokens": 128000, - "max_tokens": 128000, - "mode": "chat", - "output_cost_per_token": 0.000015, - "supports_computer_use": true, - "supports_function_calling": true, - "supports_reasoning": true, - "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 - }, - "openrouter/anthropic/claude-instant-v1": { - "input_cost_per_token": 0.00000163, - "litellm_provider": "openrouter", - "max_output_tokens": 8191, - "max_tokens": 8191, - "mode": "chat", - "output_cost_per_token": 0.00000551, - "supports_tool_choice": true - }, "openrouter/anthropic/claude-opus-4": { "input_cost_per_image": 0.0048, "cache_creation_input_token_cost": 0.00001875, @@ -20730,30 +22565,6 @@ "source": "https://openrouter.ai/api/v1/models/bytedance/ui-tars-1.5-7b", "supports_tool_choice": true }, - "openrouter/cognitivecomputations/dolphin-mixtral-8x7b": { - "input_cost_per_token": 5e-7, - "litellm_provider": "openrouter", - "max_tokens": 32769, - "mode": "chat", - "output_cost_per_token": 5e-7, - "supports_tool_choice": true - }, - "openrouter/cohere/command-r-plus": { - "input_cost_per_token": 0.000003, - "litellm_provider": "openrouter", - "max_tokens": 128000, - "mode": "chat", - "output_cost_per_token": 0.000015, - "supports_tool_choice": true - }, - "openrouter/databricks/dbrx-instruct": { - "input_cost_per_token": 6e-7, - "litellm_provider": "openrouter", - "max_tokens": 32768, - "mode": "chat", - "output_cost_per_token": 6e-7, - "supports_tool_choice": true - }, "openrouter/deepseek/deepseek-chat": { "input_cost_per_token": 1.4e-7, "litellm_provider": "openrouter", @@ -20821,17 +22632,6 @@ "supports_reasoning": false, "supports_tool_choice": true }, - "openrouter/deepseek/deepseek-coder": { - "input_cost_per_token": 1.4e-7, - "litellm_provider": "openrouter", - "max_input_tokens": 66000, - "max_output_tokens": 4096, - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 2.8e-7, - "supports_prompt_caching": true, - "supports_tool_choice": true - }, "openrouter/deepseek/deepseek-r1": { "input_cost_per_token": 5.5e-7, "input_cost_per_token_cache_hit": 1.4e-7, @@ -20862,15 +22662,8 @@ "supports_reasoning": true, "supports_tool_choice": true }, - "openrouter/fireworks/firellava-13b": { - "input_cost_per_token": 2e-7, - "litellm_provider": "openrouter", - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 2e-7, - "supports_tool_choice": true - }, "openrouter/google/gemini-2.0-flash-001": { + "deprecation_date": "2026-03-31", "input_cost_per_audio_token": 7e-7, "input_cost_per_token": 1e-7, "litellm_provider": "openrouter", @@ -21008,94 +22801,22 @@ "supports_web_search": true, "tpm": 800000 }, - "openrouter/google/gemini-pro-1.5": { - "input_cost_per_image": 0.00265, - "input_cost_per_token": 0.0000025, + "openrouter/gryphe/mythomax-l2-13b": { + "input_cost_per_token": 0.000001875, "litellm_provider": "openrouter", - "max_input_tokens": 1000000, - "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0000075, - "supports_function_calling": true, - "supports_tool_choice": true, - "supports_vision": true + "output_cost_per_token": 0.000001875, + "supports_tool_choice": true }, - "openrouter/google/gemini-pro-vision": { - "input_cost_per_image": 0.0025, - "input_cost_per_token": 1.25e-7, + "openrouter/mancer/weaver": { + "input_cost_per_token": 0.000005625, "litellm_provider": "openrouter", - "max_tokens": 45875, - "mode": "chat", - "output_cost_per_token": 3.75e-7, - "supports_function_calling": true, - "supports_tool_choice": true, - "supports_vision": true - }, - "openrouter/google/palm-2-chat-bison": { - "input_cost_per_token": 5e-7, - "litellm_provider": "openrouter", - "max_tokens": 25804, - "mode": "chat", - "output_cost_per_token": 5e-7, - "supports_tool_choice": true - }, - "openrouter/google/palm-2-codechat-bison": { - "input_cost_per_token": 5e-7, - "litellm_provider": "openrouter", - "max_tokens": 20070, - "mode": "chat", - "output_cost_per_token": 5e-7, - "supports_tool_choice": true - }, - "openrouter/gryphe/mythomax-l2-13b": { - "input_cost_per_token": 0.000001875, - "litellm_provider": "openrouter", - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 0.000001875, - "supports_tool_choice": true - }, - "openrouter/jondurbin/airoboros-l2-70b-2.1": { - "input_cost_per_token": 0.000013875, - "litellm_provider": "openrouter", - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 0.000013875, - "supports_tool_choice": true - }, - "openrouter/mancer/weaver": { - "input_cost_per_token": 0.000005625, - "litellm_provider": "openrouter", - "max_tokens": 8000, + "max_tokens": 8000, "mode": "chat", "output_cost_per_token": 0.000005625, "supports_tool_choice": true }, - "openrouter/meta-llama/codellama-34b-instruct": { - "input_cost_per_token": 5e-7, - "litellm_provider": "openrouter", - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 5e-7, - "supports_tool_choice": true - }, - "openrouter/meta-llama/llama-2-13b-chat": { - "input_cost_per_token": 2e-7, - "litellm_provider": "openrouter", - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 2e-7, - "supports_tool_choice": true - }, - "openrouter/meta-llama/llama-2-70b-chat": { - "input_cost_per_token": 0.0000015, - "litellm_provider": "openrouter", - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 0.0000015, - "supports_tool_choice": true - }, "openrouter/meta-llama/llama-3-70b-instruct": { "input_cost_per_token": 5.9e-7, "litellm_provider": "openrouter", @@ -21104,38 +22825,6 @@ "output_cost_per_token": 7.9e-7, "supports_tool_choice": true }, - "openrouter/meta-llama/llama-3-70b-instruct:nitro": { - "input_cost_per_token": 9e-7, - "litellm_provider": "openrouter", - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 9e-7, - "supports_tool_choice": true - }, - "openrouter/meta-llama/llama-3-8b-instruct:extended": { - "input_cost_per_token": 2.25e-7, - "litellm_provider": "openrouter", - "max_tokens": 16384, - "mode": "chat", - "output_cost_per_token": 0.00000225, - "supports_tool_choice": true - }, - "openrouter/meta-llama/llama-3-8b-instruct:free": { - "input_cost_per_token": 0, - "litellm_provider": "openrouter", - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 0, - "supports_tool_choice": true - }, - "openrouter/microsoft/wizardlm-2-8x22b:nitro": { - "input_cost_per_token": 0.000001, - "litellm_provider": "openrouter", - "max_tokens": 65536, - "mode": "chat", - "output_cost_per_token": 0.000001, - "supports_tool_choice": true - }, "openrouter/minimax/minimax-m2": { "input_cost_per_token": 2.55e-7, "litellm_provider": "openrouter", @@ -21145,24 +22834,10 @@ "mode": "chat", "output_cost_per_token": 0.00000102, "supports_function_calling": true, - "supports_prompt_caching": false, + "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true }, - "openrouter/mistralai/devstral-2512:free": { - "input_cost_per_image": 0, - "input_cost_per_token": 0, - "litellm_provider": "openrouter", - "max_input_tokens": 262144, - "max_output_tokens": 262144, - "max_tokens": 262144, - "mode": "chat", - "output_cost_per_token": 0, - "supports_function_calling": true, - "supports_prompt_caching": false, - "supports_tool_choice": true, - "supports_vision": false - }, "openrouter/mistralai/devstral-2512": { "input_cost_per_image": 0, "input_cost_per_token": 1.5e-7, @@ -21241,14 +22916,6 @@ "output_cost_per_token": 1.3e-7, "supports_tool_choice": true }, - "openrouter/mistralai/mistral-7b-instruct:free": { - "input_cost_per_token": 0, - "litellm_provider": "openrouter", - "max_tokens": 8192, - "mode": "chat", - "output_cost_per_token": 0, - "supports_tool_choice": true - }, "openrouter/mistralai/mistral-large": { "input_cost_per_token": 0.000008, "litellm_provider": "openrouter", @@ -21281,13 +22948,20 @@ "output_cost_per_token": 6.5e-7, "supports_tool_choice": true }, - "openrouter/nousresearch/nous-hermes-llama2-13b": { - "input_cost_per_token": 2e-7, + "openrouter/moonshotai/kimi-k2.5": { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 6e-7, "litellm_provider": "openrouter", - "max_tokens": 4096, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 2e-7, - "supports_tool_choice": true + "output_cost_per_token": 0.000003, + "source": "https://openrouter.ai/moonshotai/kimi-k2.5", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true }, "openrouter/openai/gpt-3.5-turbo": { "input_cost_per_token": 0.0000015, @@ -21313,17 +22987,6 @@ "output_cost_per_token": 0.00006, "supports_tool_choice": true }, - "openrouter/openai/gpt-4-vision-preview": { - "input_cost_per_image": 0.01445, - "input_cost_per_token": 0.00001, - "litellm_provider": "openrouter", - "max_tokens": 130000, - "mode": "chat", - "output_cost_per_token": 0.00003, - "supports_function_calling": true, - "supports_tool_choice": true, - "supports_vision": true - }, "openrouter/openai/gpt-4.1": { "cache_read_input_token_cost": 5e-7, "input_cost_per_token": 0.000002, @@ -21341,23 +23004,6 @@ "supports_tool_choice": true, "supports_vision": true }, - "openrouter/openai/gpt-4.1-2025-04-14": { - "cache_read_input_token_cost": 5e-7, - "input_cost_per_token": 0.000002, - "litellm_provider": "openrouter", - "max_input_tokens": 1047576, - "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", - "output_cost_per_token": 0.000008, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_prompt_caching": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true - }, "openrouter/openai/gpt-4.1-mini": { "cache_read_input_token_cost": 1e-7, "input_cost_per_token": 4e-7, @@ -21375,23 +23021,6 @@ "supports_tool_choice": true, "supports_vision": true }, - "openrouter/openai/gpt-4.1-mini-2025-04-14": { - "cache_read_input_token_cost": 1e-7, - "input_cost_per_token": 4e-7, - "litellm_provider": "openrouter", - "max_input_tokens": 1047576, - "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", - "output_cost_per_token": 0.0000016, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_prompt_caching": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true - }, "openrouter/openai/gpt-4.1-nano": { "cache_read_input_token_cost": 2.5e-8, "input_cost_per_token": 1e-7, @@ -21409,23 +23038,6 @@ "supports_tool_choice": true, "supports_vision": true }, - "openrouter/openai/gpt-4.1-nano-2025-04-14": { - "cache_read_input_token_cost": 2.5e-8, - "input_cost_per_token": 1e-7, - "litellm_provider": "openrouter", - "max_input_tokens": 1047576, - "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", - "output_cost_per_token": 4e-7, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_prompt_caching": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_vision": true - }, "openrouter/openai/gpt-4o": { "input_cost_per_token": 0.0000025, "litellm_provider": "openrouter", @@ -21484,7 +23096,7 @@ "cache_read_input_token_cost": 1.75e-7, "input_cost_per_token": 0.00000175, "litellm_provider": "openrouter", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -21629,58 +23241,6 @@ "supports_tool_choice": true, "supports_vision": true }, - "openrouter/openai/o1-mini": { - "input_cost_per_token": 0.000003, - "litellm_provider": "openrouter", - "max_input_tokens": 128000, - "max_output_tokens": 65536, - "max_tokens": 65536, - "mode": "chat", - "output_cost_per_token": 0.000012, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_tool_choice": true, - "supports_vision": false - }, - "openrouter/openai/o1-mini-2024-09-12": { - "input_cost_per_token": 0.000003, - "litellm_provider": "openrouter", - "max_input_tokens": 128000, - "max_output_tokens": 65536, - "max_tokens": 65536, - "mode": "chat", - "output_cost_per_token": 0.000012, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_tool_choice": true, - "supports_vision": false - }, - "openrouter/openai/o1-preview": { - "input_cost_per_token": 0.000015, - "litellm_provider": "openrouter", - "max_input_tokens": 128000, - "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", - "output_cost_per_token": 0.00006, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_tool_choice": true, - "supports_vision": false - }, - "openrouter/openai/o1-preview-2024-09-12": { - "input_cost_per_token": 0.000015, - "litellm_provider": "openrouter", - "max_input_tokens": 128000, - "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", - "output_cost_per_token": 0.00006, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_tool_choice": true, - "supports_vision": false - }, "openrouter/openai/o3-mini": { "input_cost_per_token": 0.0000011, "litellm_provider": "openrouter", @@ -21709,14 +23269,6 @@ "supports_tool_choice": true, "supports_vision": false }, - "openrouter/pygmalionai/mythalion-13b": { - "input_cost_per_token": 0.000001875, - "litellm_provider": "openrouter", - "max_tokens": 4096, - "mode": "chat", - "output_cost_per_token": 0.000001875, - "supports_tool_choice": true - }, "openrouter/qwen/qwen-2.5-coder-32b-instruct": { "input_cost_per_token": 1.8e-7, "litellm_provider": "openrouter", @@ -21750,6 +23302,31 @@ "supports_tool_choice": true, "supports_function_calling": true }, + "openrouter/qwen/qwen3-235b-a22b-2507": { + "input_cost_per_token": 7.1e-8, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1e-7, + "source": "https://openrouter.ai/qwen/qwen3-235b-a22b-2507", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "openrouter/qwen/qwen3-235b-a22b-thinking-2507": { + "input_cost_per_token": 1.1e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 6e-7, + "source": "https://openrouter.ai/qwen/qwen3-235b-a22b-thinking-2507", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, "openrouter/switchpoint/router": { "input_cost_per_token": 8.5e-7, "litellm_provider": "openrouter", @@ -21783,30 +23360,17 @@ "supports_tool_choice": true, "supports_web_search": true }, - "openrouter/x-ai/grok-4-fast:free": { - "input_cost_per_token": 0, + "openrouter/z-ai/glm-4.6": { + "input_cost_per_token": 4e-7, "litellm_provider": "openrouter", - "max_input_tokens": 2000000, - "max_output_tokens": 30000, - "max_tokens": 30000, - "mode": "chat", - "output_cost_per_token": 0, - "source": "https://openrouter.ai/x-ai/grok-4-fast:free", - "supports_function_calling": true, - "supports_reasoning": true, - "supports_tool_choice": true, - "supports_web_search": false - }, - "openrouter/z-ai/glm-4.6": { - "input_cost_per_token": 4e-7, - "litellm_provider": "openrouter", - "max_input_tokens": 202800, - "max_output_tokens": 131000, - "max_tokens": 131000, + "max_input_tokens": 202800, + "max_output_tokens": 131000, + "max_tokens": 131000, "mode": "chat", "output_cost_per_token": 0.00000175, "source": "https://openrouter.ai/z-ai/glm-4.6", "supports_function_calling": true, + "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true }, @@ -21820,9 +23384,93 @@ "output_cost_per_token": 0.0000019, "source": "https://openrouter.ai/z-ai/glm-4.6:exacto", "supports_function_calling": true, + "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true }, + "openrouter/xiaomi/mimo-v2-flash": { + "input_cost_per_token": 9e-8, + "output_cost_per_token": 2.9e-7, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": false, + "supports_prompt_caching": false + }, + "openrouter/z-ai/glm-4.7": { + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000015, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 202752, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": true, + "supports_prompt_caching": false, + "supports_assistant_prefill": true + }, + "openrouter/z-ai/glm-4.7-flash": { + "input_cost_per_token": 7e-8, + "output_cost_per_token": 4e-7, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": true, + "supports_prompt_caching": false + }, + "openrouter/minimax/minimax-m2.1": { + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 0.0000012, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 204000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": true, + "supports_prompt_caching": false, + "supports_computer_use": false + }, + "openrouter/minimax/minimax-m2.5": { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000011, + "cache_read_input_token_cost": 1.5e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 196608, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://openrouter.ai/minimax/minimax-m2.5", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": false, + "supports_prompt_caching": true, + "supports_computer_use": false + }, "ovhcloud/DeepSeek-R1-Distill-Llama-70B": { "input_cost_per_token": 6.7e-7, "litellm_provider": "ovhcloud", @@ -22432,6 +24080,66 @@ "supports_function_calling": true, "supports_tool_choice": true }, + "perplexity/preset/pro-search": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_preset": true + }, + "perplexity/openai/gpt-4o": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false + }, + "perplexity/openai/gpt-4o-mini": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false + }, + "perplexity/openai/gpt-5.2": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": true + }, + "perplexity/anthropic/claude-3-5-sonnet-20241022": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false + }, + "perplexity/anthropic/claude-3-5-haiku-20241022": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false + }, + "perplexity/google/gemini-2.0-flash-exp": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false + }, + "perplexity/google/gemini-2.0-flash-thinking-exp": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": true + }, + "perplexity/xai/grok-2-1212": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false + }, + "perplexity/xai/grok-2-vision-1212": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false + }, "publicai/aisingapore/Qwen-SEA-LION-v4-32B-IT": { "input_cost_per_token": 0, "litellm_provider": "publicai", @@ -22541,6 +24249,19 @@ "supports_system_messages": true, "supports_vision": true }, + "qwen.qwen3-coder-next": { + "input_cost_per_token": 5e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "recraft/recraftv2": { "litellm_provider": "recraft", "mode": "image_generation", @@ -24402,6 +26123,34 @@ "supports_reasoning": true, "supports_tool_choice": true }, + "together_ai/zai-org/GLM-4.7": { + "input_cost_per_token": 4.5e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 0.000002, + "source": "https://www.together.ai/models/glm-4-7", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "together_ai/moonshotai/Kimi-K2.5": { + "input_cost_per_token": 5e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 0.0000028, + "source": "https://www.together.ai/models/kimi-k2-5", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_reasoning": true + }, "together_ai/moonshotai/Kimi-K2-Instruct-0905": { "input_cost_per_token": 0.000001, "litellm_provider": "together_ai", @@ -24887,6 +26636,30 @@ "supports_reasoning": true, "supports_tool_choice": false }, + "us.deepseek.v3.2": { + "input_cost_per_token": 6.2e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000185, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "eu.deepseek.v3.2": { + "input_cost_per_token": 7.4e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.00000222, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, "us.meta.llama3-1-405b-instruct-v1:0": { "input_cost_per_token": 0.00000532, "litellm_provider": "bedrock", @@ -25094,7 +26867,9 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 3e-7 + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/alibaba/qwen3-coder": { "input_cost_per_token": 4e-7, @@ -25103,7 +26878,9 @@ "max_output_tokens": 66536, "max_tokens": 66536, "mode": "chat", - "output_cost_per_token": 0.0000016 + "output_cost_per_token": 0.0000016, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/amazon/nova-lite": { "input_cost_per_token": 6e-8, @@ -25112,7 +26889,10 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.4e-7 + "output_cost_per_token": 2.4e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_response_schema": true }, "vercel_ai_gateway/amazon/nova-micro": { "input_cost_per_token": 3.5e-8, @@ -25121,7 +26901,9 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.4e-7 + "output_cost_per_token": 1.4e-7, + "supports_function_calling": true, + "supports_response_schema": true }, "vercel_ai_gateway/amazon/nova-pro": { "input_cost_per_token": 8e-7, @@ -25130,7 +26912,10 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.0000032 + "output_cost_per_token": 0.0000032, + "supports_vision": true, + "supports_function_calling": true, + "supports_response_schema": true }, "vercel_ai_gateway/amazon/titan-embed-text-v2": { "input_cost_per_token": 2e-8, @@ -25150,7 +26935,11 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.00000125 + "output_cost_per_token": 0.00000125, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/anthropic/claude-3-opus": { "cache_creation_input_token_cost": 0.00001875, @@ -25161,7 +26950,11 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.000075 + "output_cost_per_token": 0.000075, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/anthropic/claude-3.5-haiku": { "cache_creation_input_token_cost": 0.000001, @@ -25172,7 +26965,11 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.000004 + "output_cost_per_token": 0.000004, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/anthropic/claude-3.5-sonnet": { "cache_creation_input_token_cost": 0.00000375, @@ -25183,7 +26980,11 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/anthropic/claude-3.7-sonnet": { "cache_creation_input_token_cost": 0.00000375, @@ -25194,7 +26995,11 @@ "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/anthropic/claude-4-opus": { "cache_creation_input_token_cost": 0.00001875, @@ -25205,7 +27010,11 @@ "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 0.000075 + "output_cost_per_token": 0.000075, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/anthropic/claude-4-sonnet": { "cache_creation_input_token_cost": 0.00000375, @@ -25216,25 +27025,219 @@ "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "supports_function_calling": true, + "supports_tool_choice": true }, - "vercel_ai_gateway/cohere/command-a": { - "input_cost_per_token": 0.0000025, + "vercel_ai_gateway/anthropic/claude-3-5-sonnet": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, "litellm_provider": "vercel_ai_gateway", - "max_input_tokens": 256000, - "max_output_tokens": 8000, - "max_tokens": 8000, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.00001 + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true }, - "vercel_ai_gateway/cohere/command-r": { - "input_cost_per_token": 1.5e-7, - "litellm_provider": "vercel_ai_gateway", - "max_input_tokens": 128000, + "vercel_ai_gateway/anthropic/claude-3-5-sonnet-20241022": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-3-7-sonnet": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-haiku-4.5": { + "cache_creation_input_token_cost": 0.00000125, + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 0.000001, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000005, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4": { + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.000075, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.1": { + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.000015, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.000075, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.5": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.6": { + "cache_creation_input_token_cost": 0.00000625, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-sonnet-4": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-sonnet-4.5": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/cohere/command-a": { + "input_cost_per_token": 0.0000025, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 256000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 0.00001, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/cohere/command-r": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 6e-7 + "output_cost_per_token": 6e-7, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/cohere/command-r-plus": { "input_cost_per_token": 0.0000025, @@ -25243,7 +27246,9 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.00001 + "output_cost_per_token": 0.00001, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/cohere/embed-v4.0": { "input_cost_per_token": 1.2e-7, @@ -25261,7 +27266,8 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 0.00000219 + "output_cost_per_token": 0.00000219, + "supports_tool_choice": true }, "vercel_ai_gateway/deepseek/deepseek-r1-distill-llama-70b": { "input_cost_per_token": 7.5e-7, @@ -25270,7 +27276,10 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 9.9e-7 + "output_cost_per_token": 9.9e-7, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/deepseek/deepseek-v3": { "input_cost_per_token": 9e-7, @@ -25279,25 +27288,36 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 9e-7 + "output_cost_per_token": 9e-7, + "supports_tool_choice": true }, "vercel_ai_gateway/google/gemini-2.0-flash": { + "deprecation_date": "2026-03-31", "input_cost_per_token": 1.5e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1048576, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 6e-7 + "output_cost_per_token": 6e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/google/gemini-2.0-flash-lite": { + "deprecation_date": "2026-03-31", "input_cost_per_token": 7.5e-8, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1048576, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 3e-7 + "output_cost_per_token": 3e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/google/gemini-2.5-flash": { "input_cost_per_token": 3e-7, @@ -25306,7 +27326,11 @@ "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.0000025 + "output_cost_per_token": 0.0000025, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/google/gemini-2.5-pro": { "input_cost_per_token": 0.0000025, @@ -25315,7 +27339,11 @@ "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 0.00001 + "output_cost_per_token": 0.00001, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/google/gemini-embedding-001": { "input_cost_per_token": 1.5e-7, @@ -25333,7 +27361,10 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2e-7 + "output_cost_per_token": 2e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/google/text-embedding-005": { "input_cost_per_token": 2.5e-8, @@ -25369,7 +27400,8 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 7.9e-7 + "output_cost_per_token": 7.9e-7, + "supports_tool_choice": true }, "vercel_ai_gateway/meta/llama-3-8b": { "input_cost_per_token": 5e-8, @@ -25378,7 +27410,8 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 8e-8 + "output_cost_per_token": 8e-8, + "supports_tool_choice": true }, "vercel_ai_gateway/meta/llama-3.1-70b": { "input_cost_per_token": 7.2e-7, @@ -25387,7 +27420,8 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 7.2e-7 + "output_cost_per_token": 7.2e-7, + "supports_tool_choice": true }, "vercel_ai_gateway/meta/llama-3.1-8b": { "input_cost_per_token": 5e-8, @@ -25396,7 +27430,9 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 8e-8 + "output_cost_per_token": 8e-8, + "supports_function_calling": true, + "supports_response_schema": true }, "vercel_ai_gateway/meta/llama-3.2-11b": { "input_cost_per_token": 1.6e-7, @@ -25405,7 +27441,10 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.6e-7 + "output_cost_per_token": 1.6e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/meta/llama-3.2-1b": { "input_cost_per_token": 1e-7, @@ -25423,7 +27462,9 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.5e-7 + "output_cost_per_token": 1.5e-7, + "supports_function_calling": true, + "supports_response_schema": true }, "vercel_ai_gateway/meta/llama-3.2-90b": { "input_cost_per_token": 7.2e-7, @@ -25432,7 +27473,10 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 7.2e-7 + "output_cost_per_token": 7.2e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/meta/llama-3.3-70b": { "input_cost_per_token": 7.2e-7, @@ -25441,7 +27485,9 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 7.2e-7 + "output_cost_per_token": 7.2e-7, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/meta/llama-4-maverick": { "input_cost_per_token": 2e-7, @@ -25450,7 +27496,8 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 6e-7 + "output_cost_per_token": 6e-7, + "supports_tool_choice": true }, "vercel_ai_gateway/meta/llama-4-scout": { "input_cost_per_token": 1e-7, @@ -25459,16 +27506,21 @@ "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 3e-7 - }, - "vercel_ai_gateway/mistral/codestral": { - "input_cost_per_token": 3e-7, + "output_cost_per_token": 3e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/mistral/codestral": { + "input_cost_per_token": 3e-7, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 256000, "max_output_tokens": 4000, "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 9e-7 + "output_cost_per_token": 9e-7, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/mistral/codestral-embed": { "input_cost_per_token": 1.5e-7, @@ -25486,7 +27538,10 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 2.8e-7 + "output_cost_per_token": 2.8e-7, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/mistral/magistral-medium": { "input_cost_per_token": 0.000002, @@ -25495,7 +27550,10 @@ "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 0.000005 + "output_cost_per_token": 0.000005, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/mistral/magistral-small": { "input_cost_per_token": 5e-7, @@ -25504,7 +27562,8 @@ "max_output_tokens": 64000, "max_tokens": 64000, "mode": "chat", - "output_cost_per_token": 0.0000015 + "output_cost_per_token": 0.0000015, + "supports_function_calling": true }, "vercel_ai_gateway/mistral/ministral-3b": { "input_cost_per_token": 4e-8, @@ -25513,7 +27572,9 @@ "max_output_tokens": 4000, "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 4e-8 + "output_cost_per_token": 4e-8, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/mistral/ministral-8b": { "input_cost_per_token": 1e-7, @@ -25522,7 +27583,10 @@ "max_output_tokens": 4000, "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 1e-7 + "output_cost_per_token": 1e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/mistral/mistral-embed": { "input_cost_per_token": 1e-7, @@ -25540,7 +27604,9 @@ "max_output_tokens": 4000, "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 0.000006 + "output_cost_per_token": 0.000006, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/mistral/mistral-saba-24b": { "input_cost_per_token": 7.9e-7, @@ -25558,7 +27624,10 @@ "max_output_tokens": 4000, "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 3e-7 + "output_cost_per_token": 3e-7, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/mistral/mixtral-8x22b-instruct": { "input_cost_per_token": 0.0000012, @@ -25567,7 +27636,8 @@ "max_output_tokens": 2048, "max_tokens": 2048, "mode": "chat", - "output_cost_per_token": 0.0000012 + "output_cost_per_token": 0.0000012, + "supports_function_calling": true }, "vercel_ai_gateway/mistral/pixtral-12b": { "input_cost_per_token": 1.5e-7, @@ -25576,7 +27646,11 @@ "max_output_tokens": 4000, "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 1.5e-7 + "output_cost_per_token": 1.5e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/mistral/pixtral-large": { "input_cost_per_token": 0.000002, @@ -25585,7 +27659,11 @@ "max_output_tokens": 4000, "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 0.000006 + "output_cost_per_token": 0.000006, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/moonshotai/kimi-k2": { "input_cost_per_token": 5.5e-7, @@ -25594,7 +27672,9 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 0.0000022 + "output_cost_per_token": 0.0000022, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/morph/morph-v3-fast": { "input_cost_per_token": 8e-7, @@ -25621,7 +27701,9 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.0000015 + "output_cost_per_token": 0.0000015, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/openai/gpt-3.5-turbo-instruct": { "input_cost_per_token": 0.0000015, @@ -25639,7 +27721,10 @@ "max_output_tokens": 4096, "max_tokens": 4096, "mode": "chat", - "output_cost_per_token": 0.00003 + "output_cost_per_token": 0.00003, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/openai/gpt-4.1": { "cache_creation_input_token_cost": 0, @@ -25650,7 +27735,11 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.000008 + "output_cost_per_token": 0.000008, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/gpt-4.1-mini": { "cache_creation_input_token_cost": 0, @@ -25661,7 +27750,11 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.0000016 + "output_cost_per_token": 0.0000016, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/gpt-4.1-nano": { "cache_creation_input_token_cost": 0, @@ -25672,7 +27765,11 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 4e-7 + "output_cost_per_token": 4e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/gpt-4o": { "cache_creation_input_token_cost": 0, @@ -25683,7 +27780,11 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 0.00001 + "output_cost_per_token": 0.00001, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/gpt-4o-mini": { "cache_creation_input_token_cost": 0, @@ -25694,7 +27795,11 @@ "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 6e-7 + "output_cost_per_token": 6e-7, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/o1": { "cache_creation_input_token_cost": 0, @@ -25705,7 +27810,11 @@ "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0.00006 + "output_cost_per_token": 0.00006, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/o3": { "cache_creation_input_token_cost": 0, @@ -25716,7 +27825,11 @@ "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0.000008 + "output_cost_per_token": 0.000008, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/o3-mini": { "cache_creation_input_token_cost": 0, @@ -25727,7 +27840,10 @@ "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0.0000044 + "output_cost_per_token": 0.0000044, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/o4-mini": { "cache_creation_input_token_cost": 0, @@ -25738,7 +27854,11 @@ "max_output_tokens": 100000, "max_tokens": 100000, "mode": "chat", - "output_cost_per_token": 0.0000044 + "output_cost_per_token": 0.0000044, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true }, "vercel_ai_gateway/openai/text-embedding-3-large": { "input_cost_per_token": 1.3e-7, @@ -25810,7 +27930,10 @@ "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/vercel/v0-1.5-md": { "input_cost_per_token": 0.000003, @@ -25819,7 +27942,10 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/xai/grok-2": { "input_cost_per_token": 0.000002, @@ -25828,7 +27954,9 @@ "max_output_tokens": 4000, "max_tokens": 4000, "mode": "chat", - "output_cost_per_token": 0.00001 + "output_cost_per_token": 0.00001, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/xai/grok-2-vision": { "input_cost_per_token": 0.000002, @@ -25837,7 +27965,10 @@ "max_output_tokens": 32768, "max_tokens": 32768, "mode": "chat", - "output_cost_per_token": 0.00001 + "output_cost_per_token": 0.00001, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/xai/grok-3": { "input_cost_per_token": 0.000003, @@ -25846,7 +27977,9 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/xai/grok-3-fast": { "input_cost_per_token": 0.000005, @@ -25855,7 +27988,8 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.000025 + "output_cost_per_token": 0.000025, + "supports_function_calling": true }, "vercel_ai_gateway/xai/grok-3-mini": { "input_cost_per_token": 3e-7, @@ -25864,7 +27998,9 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 5e-7 + "output_cost_per_token": 5e-7, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/xai/grok-3-mini-fast": { "input_cost_per_token": 6e-7, @@ -25873,7 +28009,9 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.000004 + "output_cost_per_token": 0.000004, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/xai/grok-4": { "input_cost_per_token": 0.000003, @@ -25882,7 +28020,9 @@ "max_output_tokens": 256000, "max_tokens": 256000, "mode": "chat", - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/zai/glm-4.5": { "input_cost_per_token": 6e-7, @@ -25891,7 +28031,9 @@ "max_output_tokens": 131072, "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.0000022 + "output_cost_per_token": 0.0000022, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/zai/glm-4.5-air": { "input_cost_per_token": 2e-7, @@ -25900,7 +28042,9 @@ "max_output_tokens": 96000, "max_tokens": 96000, "mode": "chat", - "output_cost_per_token": 0.0000011 + "output_cost_per_token": 0.0000011, + "supports_function_calling": true, + "supports_tool_choice": true }, "vercel_ai_gateway/zai/glm-4.6": { "litellm_provider": "vercel_ai_gateway", @@ -25966,7 +28110,9 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_vision": true }, "vertex_ai/claude-3-5-sonnet": { "input_cost_per_token": 0.000003, @@ -26237,7 +28383,68 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "tool_use_system_prompt_tokens": 159, + "supports_native_streaming": true + }, + "vertex_ai/claude-opus-4-6": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000125, + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.000001, + "input_cost_per_token": 0.000005, + "input_cost_per_token_above_200k_tokens": 0.00001, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "output_cost_per_token_above_200k_tokens": 0.0000375, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "vertex_ai/claude-opus-4-6@default": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000125, + "cache_read_input_token_cost": 5e-7, + "cache_read_input_token_cost_above_200k_tokens": 0.000001, + "input_cost_per_token": 0.000005, + "input_cost_per_token_above_200k_tokens": 0.00001, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "output_cost_per_token_above_200k_tokens": 0.0000375, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 }, "vertex_ai/claude-sonnet-4-5": { "cache_creation_input_token_cost": 0.00000375, @@ -26265,6 +28472,36 @@ "supports_tool_choice": true, "supports_vision": true }, + "vertex_ai/claude-sonnet-4-6": { + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost": 3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + } + }, "vertex_ai/claude-sonnet-4-5@20250929": { "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, @@ -26289,7 +28526,8 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "supports_native_streaming": true }, "vertex_ai/claude-opus-4@20250514": { "cache_creation_input_token_cost": 0.00001875, @@ -26555,6 +28793,21 @@ "output_cost_per_token_batches": 0.000006, "source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image" }, + "vertex_ai/deep-research-pro-preview-12-2025": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 0.000012, + "output_cost_per_token_batches": 0.000006, + "source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image" + }, "vertex_ai/imagegeneration@006": { "litellm_provider": "vertex_ai-image-models", "mode": "image_generation", @@ -26839,9 +29092,24 @@ "supports_reasoning": true, "supports_tool_choice": true }, - "vertex_ai/mistral-medium-3": { - "input_cost_per_token": 4e-7, - "litellm_provider": "vertex_ai-mistral_models", + "vertex_ai/zai-org/glm-5-maas": { + "cache_read_input_token_cost": 1e-7, + "input_cost_per_token": 0.000001, + "litellm_provider": "vertex_ai-zai_models", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.0000032, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#glm-models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-medium-3": { + "input_cost_per_token": 4e-7, + "litellm_provider": "vertex_ai-mistral_models", "max_input_tokens": 128000, "max_output_tokens": 8191, "max_tokens": 8191, @@ -27018,6 +29286,7 @@ "mode": "chat", "output_cost_per_token": 0.000001, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_regions": ["global"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -27030,6 +29299,7 @@ "mode": "chat", "output_cost_per_token": 0.000004, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_regions": ["global"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -27042,6 +29312,7 @@ "mode": "chat", "output_cost_per_token": 0.0000012, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_regions": ["global"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -27054,6 +29325,7 @@ "mode": "chat", "output_cost_per_token": 0.0000012, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_regions": ["global"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -27876,6 +30148,7 @@ "supports_web_search": true }, "xai/grok-3": { + "cache_read_input_token_cost": 7.5e-7, "input_cost_per_token": 0.000003, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -27890,6 +30163,7 @@ "supports_web_search": true }, "xai/grok-3-beta": { + "cache_read_input_token_cost": 7.5e-7, "input_cost_per_token": 0.000003, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -27904,6 +30178,7 @@ "supports_web_search": true }, "xai/grok-3-fast-beta": { + "cache_read_input_token_cost": 0.00000125, "input_cost_per_token": 0.000005, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -27918,6 +30193,7 @@ "supports_web_search": true }, "xai/grok-3-fast-latest": { + "cache_read_input_token_cost": 0.00000125, "input_cost_per_token": 0.000005, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -27932,6 +30208,7 @@ "supports_web_search": true }, "xai/grok-3-latest": { + "cache_read_input_token_cost": 7.5e-7, "input_cost_per_token": 0.000003, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -27946,6 +30223,7 @@ "supports_web_search": true }, "xai/grok-3-mini": { + "cache_read_input_token_cost": 7.5e-8, "input_cost_per_token": 3e-7, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -27961,6 +30239,7 @@ "supports_web_search": true }, "xai/grok-3-mini-beta": { + "cache_read_input_token_cost": 7.5e-8, "input_cost_per_token": 3e-7, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -27976,6 +30255,7 @@ "supports_web_search": true }, "xai/grok-3-mini-fast": { + "cache_read_input_token_cost": 1.5e-7, "input_cost_per_token": 6e-7, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -27991,6 +30271,7 @@ "supports_web_search": true }, "xai/grok-3-mini-fast-beta": { + "cache_read_input_token_cost": 1.5e-7, "input_cost_per_token": 6e-7, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -28006,6 +30287,7 @@ "supports_web_search": true }, "xai/grok-3-mini-fast-latest": { + "cache_read_input_token_cost": 1.5e-7, "input_cost_per_token": 6e-7, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -28021,6 +30303,7 @@ "supports_web_search": true }, "xai/grok-3-mini-latest": { + "cache_read_input_token_cost": 7.5e-8, "input_cost_per_token": 3e-7, "litellm_provider": "xai", "max_input_tokens": 131072, @@ -28277,6 +30560,20 @@ "supports_vision": true, "supports_web_search": true }, + "zai.glm-4.7": { + "input_cost_per_token": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.0000022, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "zai/glm-4.7": { "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 1.1e-7, @@ -28287,11 +30584,14 @@ "max_output_tokens": 128000, "mode": "chat", "supports_function_calling": true, + "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, "source": "https://docs.z.ai/guides/overview/pricing" }, "zai/glm-4.6": { + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 1.1e-7, "input_cost_per_token": 6e-7, "output_cost_per_token": 0.0000022, "litellm_provider": "zai", @@ -28299,6 +30599,8 @@ "max_output_tokens": 128000, "mode": "chat", "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, "supports_tool_choice": true, "source": "https://docs.z.ai/guides/overview/pricing" }, @@ -28408,6 +30710,15 @@ "supported_output_modalities": ["video"], "supported_resolutions": ["720x1280", "1280x720"] }, + "openai/sora-2-pro-high-res": { + "litellm_provider": "openai", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.5, + "source": "https://platform.openai.com/docs/api-reference/videos", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1024x1792", "1792x1024"] + }, "azure/sora-2": { "litellm_provider": "azure", "mode": "video_generation", @@ -31868,5 +34179,572 @@ "mode": "chat", "output_cost_per_token": 0, "supports_reasoning": true + }, + "tts-1-1106": { + "input_cost_per_character": 0.000015, + "litellm_provider": "openai", + "mode": "audio_speech", + "supported_endpoints": ["/v1/audio/speech"] + }, + "tts-1-hd-1106": { + "input_cost_per_character": 0.00003, + "litellm_provider": "openai", + "mode": "audio_speech", + "supported_endpoints": ["/v1/audio/speech"] + }, + "gpt-4o-mini-tts-2025-03-20": { + "input_cost_per_token": 0.0000025, + "litellm_provider": "openai", + "mode": "audio_speech", + "output_cost_per_audio_token": 0.000012, + "output_cost_per_second": 0.00025, + "output_cost_per_token": 0.00001, + "supported_endpoints": ["/v1/audio/speech"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["audio"] + }, + "gpt-4o-mini-tts-2025-12-15": { + "input_cost_per_token": 0.0000025, + "litellm_provider": "openai", + "mode": "audio_speech", + "output_cost_per_audio_token": 0.000012, + "output_cost_per_second": 0.00025, + "output_cost_per_token": 0.00001, + "supported_endpoints": ["/v1/audio/speech"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["audio"] + }, + "gpt-4o-mini-transcribe-2025-03-20": { + "input_cost_per_audio_token": 0.000003, + "input_cost_per_token": 0.00000125, + "litellm_provider": "openai", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 0.000005, + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "gpt-4o-mini-transcribe-2025-12-15": { + "input_cost_per_audio_token": 0.000003, + "input_cost_per_token": 0.00000125, + "litellm_provider": "openai", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 0.000005, + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "gpt-5-search-api": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 0.00000125, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00001, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-5-search-api-2025-10-14": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 0.00000125, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00001, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-realtime-mini-2025-10-06": { + "cache_creation_input_audio_token_cost": 3e-7, + "cache_read_input_audio_token_cost": 3e-7, + "cache_read_input_token_cost": 6e-8, + "input_cost_per_audio_token": 0.00001, + "input_cost_per_image": 8e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-realtime-mini-2025-12-15": { + "cache_creation_input_audio_token_cost": 3e-7, + "cache_read_input_audio_token_cost": 3e-7, + "cache_read_input_token_cost": 6e-8, + "input_cost_per_audio_token": 0.00001, + "input_cost_per_image": 8e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "sora-2": { + "litellm_provider": "openai", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.1, + "source": "https://platform.openai.com/docs/api-reference/videos", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] + }, + "sora-2-pro": { + "litellm_provider": "openai", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.3, + "source": "https://platform.openai.com/docs/api-reference/videos", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] + }, + "sora-2-pro-high-res": { + "litellm_provider": "openai", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.5, + "source": "https://platform.openai.com/docs/api-reference/videos", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1024x1792", "1792x1024"] + }, + "chatgpt-image-latest": { + "cache_read_input_image_token_cost": 0.0000025, + "cache_read_input_token_cost": 0.00000125, + "input_cost_per_image_token": 0.00001, + "input_cost_per_token": 0.000005, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_image_token": 0.00004, + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"] + }, + "gemini-2.0-flash-exp-image-generation": { + "input_cost_per_token": 0, + "litellm_provider": "gemini", + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_token": 0, + "source": "https://ai.google.dev/pricing", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_vision": true + }, + "gemini/gemini-2.0-flash-exp-image-generation": { + "input_cost_per_token": 0, + "litellm_provider": "gemini", + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_token": 0, + "source": "https://ai.google.dev/pricing", + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_vision": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini/gemini-2.0-flash-lite-001": { + "cache_read_input_token_cost": 1.875e-8, + "deprecation_date": "2026-03-31", + "input_cost_per_audio_token": 7.5e-8, + "input_cost_per_token": 7.5e-8, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 50, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-7, + "rpm": 4000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite", + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 4000000 + }, + "gemini-2.5-flash-native-audio-latest": { + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true + }, + "gemini-2.5-flash-native-audio-preview-09-2025": { + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true + }, + "gemini-2.5-flash-native-audio-preview-12-2025": { + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true + }, + "gemini/gemini-2.5-flash-native-audio-latest": { + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini/gemini-2.5-flash-native-audio-preview-09-2025": { + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini/gemini-2.5-flash-native-audio-preview-12-2025": { + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini-2.5-flash-preview-tts": { + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "mode": "audio_speech", + "output_cost_per_token": 0.0000025, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": ["/v1/audio/speech"] + }, + "gemini-flash-latest": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000 + }, + "gemini-flash-lite-latest": { + "cache_read_input_token_cost": 1e-8, + "input_cost_per_audio_token": 3e-7, + "input_cost_per_token": 1e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-7, + "output_cost_per_token": 4e-7, + "rpm": 15, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000 + }, + "gemini-pro-latest": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, + "rpm": 2000, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000 + }, + "gemini/gemini-pro-latest": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, + "rpm": 2000, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000 + }, + "gemini-exp-1206": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 0.0000025, + "output_cost_per_token": 0.0000025, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000 + }, + "vertex_ai/claude-sonnet-4-6@default": { + "cache_creation_input_token_cost": 0.00000375, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost": 3e-7, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + } + }, + "duckduckgo/search": { + "litellm_provider": "duckduckgo", + "mode": "search", + "input_cost_per_query": 0, + "metadata": { + "notes": "DuckDuckGo Instant Answer API is free and does not require an API key." + } } } From fab0f4d20f54fff783de8242042b440d860d991a Mon Sep 17 00:00:00 2001 From: Ammar Date: Mon, 23 Feb 2026 13:47:26 -0600 Subject: [PATCH 3/5] tests: pin compaction UI flows to Sonnet model --- tests/ui/compaction/compaction.test.ts | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/ui/compaction/compaction.test.ts b/tests/ui/compaction/compaction.test.ts index 557286e3a1..e2640b6b8d 100644 --- a/tests/ui/compaction/compaction.test.ts +++ b/tests/ui/compaction/compaction.test.ts @@ -16,6 +16,7 @@ import { BackgroundProcessManager } from "@/node/services/backgroundProcessManag import { fireEvent } from "@testing-library/react"; import { createAppHarness } from "../harness"; import { WORKSPACE_DEFAULTS } from "@/constants/workspaceDefaults"; +import { KNOWN_MODELS } from "@/common/constants/knownModels"; interface ServiceContainerPrivates { backgroundProcessManager: BackgroundProcessManager; @@ -25,6 +26,10 @@ function getBackgroundProcessManager(env: TestEnvironment): BackgroundProcessMan return (env.services as unknown as ServiceContainerPrivates).backgroundProcessManager; } +// Keep compaction UI tests deterministic even when the default workspace model changes. +// Sonnet has a smaller context window than Opus, so auto-compaction still has a higher-context fallback. +const COMPACTION_TEST_MODEL = KNOWN_MODELS.SONNET.id; + async function waitForForegroundToolCallId( env: TestEnvironment, workspaceId: string, @@ -104,7 +109,12 @@ describe("Compaction UI (mock AI router)", () => { const triggerMessage = "Trigger context error"; const userDraft = "My draft message that should be preserved"; - await app.chat.send(triggerMessage); + const triggerResult = await app.env.orpc.workspace.sendMessage({ + workspaceId: app.workspaceId, + message: triggerMessage, + options: { model: COMPACTION_TEST_MODEL, agentId: WORKSPACE_DEFAULTS.agentId }, + }); + expect(triggerResult.success).toBe(true); // User starts typing while auto-compaction is in progress await app.chat.typeWithoutSending(userDraft); @@ -132,7 +142,7 @@ describe("Compaction UI (mock AI router)", () => { const seedResult = await app.env.orpc.workspace.sendMessage({ workspaceId: app.workspaceId, message: seedMessage, - options: { model: WORKSPACE_DEFAULTS.model, agentId: WORKSPACE_DEFAULTS.agentId }, + options: { model: COMPACTION_TEST_MODEL, agentId: WORKSPACE_DEFAULTS.agentId }, }); expect(seedResult.success).toBe(true); await app.chat.expectTranscriptContains(`Mock response: ${seedMessage}`); @@ -140,7 +150,7 @@ describe("Compaction UI (mock AI router)", () => { const triggerResult = await app.env.orpc.workspace.sendMessage({ workspaceId: app.workspaceId, message: triggerMessage, - options: { model: WORKSPACE_DEFAULTS.model, agentId: WORKSPACE_DEFAULTS.agentId }, + options: { model: COMPACTION_TEST_MODEL, agentId: WORKSPACE_DEFAULTS.agentId }, }); expect(triggerResult.success).toBe(true); @@ -196,7 +206,7 @@ describe("Compaction UI (mock AI router)", () => { const seedResult = await app.env.orpc.workspace.sendMessage({ workspaceId: app.workspaceId, message: seedMessage, - options: { model: WORKSPACE_DEFAULTS.model, agentId: WORKSPACE_DEFAULTS.agentId }, + options: { model: COMPACTION_TEST_MODEL, agentId: WORKSPACE_DEFAULTS.agentId }, }); expect(seedResult.success).toBe(true); await app.chat.expectTranscriptContains(`Mock response: ${seedMessage}`); @@ -253,7 +263,7 @@ describe("Compaction UI (mock AI router)", () => { const seedResult = await app.env.orpc.workspace.sendMessage({ workspaceId: app.workspaceId, message: seedMessage, - options: { model: WORKSPACE_DEFAULTS.model, agentId: WORKSPACE_DEFAULTS.agentId }, + options: { model: COMPACTION_TEST_MODEL, agentId: WORKSPACE_DEFAULTS.agentId }, }); expect(seedResult.success).toBe(true); await app.chat.expectTranscriptContains(`Mock response: ${seedMessage}`); From 8b5999184e2bc890d0311c875cd7e5814df16775 Mon Sep 17 00:00:00 2001 From: Ammar Date: Mon, 23 Feb 2026 13:55:52 -0600 Subject: [PATCH 4/5] tests: preserve compaction draft race coverage --- tests/ui/compaction/compaction.test.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/ui/compaction/compaction.test.ts b/tests/ui/compaction/compaction.test.ts index e2640b6b8d..50b0e97def 100644 --- a/tests/ui/compaction/compaction.test.ts +++ b/tests/ui/compaction/compaction.test.ts @@ -109,16 +109,18 @@ describe("Compaction UI (mock AI router)", () => { const triggerMessage = "Trigger context error"; const userDraft = "My draft message that should be preserved"; - const triggerResult = await app.env.orpc.workspace.sendMessage({ + const triggerPromise = app.env.orpc.workspace.sendMessage({ workspaceId: app.workspaceId, message: triggerMessage, options: { model: COMPACTION_TEST_MODEL, agentId: WORKSPACE_DEFAULTS.agentId }, }); - expect(triggerResult.success).toBe(true); - // User starts typing while auto-compaction is in progress + // User starts typing while auto-compaction is in progress. await app.chat.typeWithoutSending(userDraft); + const triggerResult = await triggerPromise; + expect(triggerResult.success).toBe(true); + await app.chat.expectTranscriptContains("Mock compaction summary:", 60_000); await app.chat.expectTranscriptContains(`Continue with: ${triggerMessage}`, 60_000); await app.chat.expectTranscriptContains(`Mock response: ${triggerMessage}`, 60_000); From d4a05062c6f5eeb0faf084791ded332cf8064057 Mon Sep 17 00:00:00 2001 From: Ammar Date: Mon, 23 Feb 2026 14:12:11 -0600 Subject: [PATCH 5/5] tests: stabilize compaction UI auto-retry scenario --- tests/ui/compaction/compaction.test.ts | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/ui/compaction/compaction.test.ts b/tests/ui/compaction/compaction.test.ts index 50b0e97def..b949d24b6b 100644 --- a/tests/ui/compaction/compaction.test.ts +++ b/tests/ui/compaction/compaction.test.ts @@ -17,6 +17,8 @@ import { fireEvent } from "@testing-library/react"; import { createAppHarness } from "../harness"; import { WORKSPACE_DEFAULTS } from "@/constants/workspaceDefaults"; import { KNOWN_MODELS } from "@/common/constants/knownModels"; +import { updatePersistedState } from "@/browser/hooks/usePersistedState"; +import { PREFERRED_COMPACTION_MODEL_KEY } from "@/common/constants/storage"; interface ServiceContainerPrivates { backgroundProcessManager: BackgroundProcessManager; @@ -103,24 +105,23 @@ describe("Compaction UI (mock AI router)", () => { }, 60_000); test("auto-compacts after context_exceeded and resumes", async () => { - const app = await createAppHarness({ branchPrefix: "compaction-ui" }); + const app = await createAppHarness({ + branchPrefix: "compaction-ui", + // Keep auto-compaction deterministic even when default model context windows change. + beforeRender: () => { + updatePersistedState(PREFERRED_COMPACTION_MODEL_KEY, COMPACTION_TEST_MODEL); + }, + }); try { const triggerMessage = "Trigger context error"; const userDraft = "My draft message that should be preserved"; - const triggerPromise = app.env.orpc.workspace.sendMessage({ - workspaceId: app.workspaceId, - message: triggerMessage, - options: { model: COMPACTION_TEST_MODEL, agentId: WORKSPACE_DEFAULTS.agentId }, - }); + await app.chat.send(triggerMessage); // User starts typing while auto-compaction is in progress. await app.chat.typeWithoutSending(userDraft); - const triggerResult = await triggerPromise; - expect(triggerResult.success).toBe(true); - await app.chat.expectTranscriptContains("Mock compaction summary:", 60_000); await app.chat.expectTranscriptContains(`Continue with: ${triggerMessage}`, 60_000); await app.chat.expectTranscriptContains(`Mock response: ${triggerMessage}`, 60_000);