diff --git a/CLAUDE.md b/CLAUDE.md index 15e4b1c..e6d0622 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -117,16 +117,17 @@ Model name prefixes determine routing: - **Anthropic**: claude-sonnet-4-0/4-5/4-6, claude-haiku-4-5, claude-opus-4-5/4-6/4-7/4-8, claude-fable-5, claude-3-7-sonnet, claude-3-5-haiku - **Google**: gemini-2.5-flash, gemini-2.5-flash-lite, gemini-2.5-pro, gemini-3-pro-preview, gemini-3-flash-preview, gemini-3.1-pro-preview, gemini-3.1-flash-lite-preview, gemini-3.5-flash; image generation: gemini-2.5-flash-image, gemini-3.1-flash-image - **xAI**: grok-2, grok-3, grok-3-mini, grok-4, grok-4-fast, grok-4-1-fast; image generation: grok-2-image -- **ByteDance** (BytePlus ModelArk, OpenAI-compatible, ap-southeast): seed-1.6, seed-1.8, seed-2.0-lite, deepseek-v4-flash, deepseek-v4-pro; image generation: seedream-4.0, seedance-4.5 +- **ByteDance** (BytePlus ModelArk, OpenAI-compatible, ap-southeast): seed-1.6, seed-1.8, seed-2.0-lite, deepseek-v4-flash, deepseek-v4-pro; image generation: seedream-4.0, seedream-5.0-lite, seedance-4.5 - **Nous Research** (Nous Portal, OpenAI-compatible): hermes-4-405b, hermes-4-70b - **Z.ai** (Model API, OpenAI-compatible): glm-5.2; image generation: glm-image -Image generation via xAI (grok-2-image), ByteDance (seedream-4.0, seedance-4.5), and Z.ai -(glm-image) is served through a provider `/images/generations` endpoint rather -than the chat path, but is surfaced on `/v1/chat/completions` exactly like -Gemini's inline-image models (images returned out-of-band under the message -`images` key). These models are billed a flat per-image price (see -`per_image_price_usd` in `model_registry.py`), not per token. +Image generation via xAI (grok-2-image), ByteDance (seedream-4.0, +seedream-5.0-lite, seedance-4.5), and Z.ai (glm-image) is served through a +provider `/images/generations` endpoint rather than the chat path, but is +surfaced on `/v1/chat/completions` exactly like Gemini's inline-image models +(images returned out-of-band under the message `images` key). These models are +billed a flat per-image price (see `per_image_price_usd` in +`model_registry.py`), not per token. ## Verification Examples diff --git a/tee_gateway/model_registry.py b/tee_gateway/model_registry.py index e2098d7..4f18b2e 100644 --- a/tee_gateway/model_registry.py +++ b/tee_gateway/model_registry.py @@ -353,6 +353,14 @@ class SupportedModel(Enum): input_price_usd=Decimal("0.0000004"), output_price_usd=Decimal("0.0000016"), ) + # Dola Seed 2.0 Mini uncensored deployment. The 128K context endpoint uses + # the lower billing tier: $0.0001/K input and $0.0004/K output. + DOLA_SEED_2_0_MINI = ModelConfig( + provider="bytedance", + api_name="ep-20260624214211-j4vhk", + input_price_usd=Decimal("0.0000001"), + output_price_usd=Decimal("0.0000004"), + ) DEEPSEEK_V4_FLASH = ModelConfig( provider="bytedance", api_name="deepseek-v4-flash-260425", @@ -376,6 +384,15 @@ class SupportedModel(Enum): image_generation=True, per_image_price_usd=Decimal("0.03"), ) + # Seedream 5.0 Lite image generation via a ModelArk deployment endpoint. + SEEDREAM_5_0_LITE = ModelConfig( + provider="bytedance", + api_name="ep-20260624213657-7zc5n", + input_price_usd=Decimal("0"), + output_price_usd=Decimal("0"), + image_generation=True, + per_image_price_usd=Decimal("0.035"), + ) # Seedance 4.5 image generation via a ModelArk deployment endpoint. # Uses URL response format and seedance-specific request params # (sequential_image_generation, watermark, size). Billed per image. @@ -501,6 +518,8 @@ class SupportedModel(Enum): "seed-1.8": SupportedModel.SEED_1_8, "seed-2-0-lite-260228": SupportedModel.SEED_2_0_LITE, "seed-2.0-lite": SupportedModel.SEED_2_0_LITE, + "dola-seed-2.0-mini": SupportedModel.DOLA_SEED_2_0_MINI, + "dola-seed-2-0-mini": SupportedModel.DOLA_SEED_2_0_MINI, "deepseek-v4-flash-260425": SupportedModel.DEEPSEEK_V4_FLASH, "deepseek-v4-flash": SupportedModel.DEEPSEEK_V4_FLASH, "deepseek-v4-pro-260425": SupportedModel.DEEPSEEK_V4_PRO, @@ -508,6 +527,8 @@ class SupportedModel(Enum): "seedream-4-0-250828": SupportedModel.SEEDREAM_4_0, "seedream-4.0": SupportedModel.SEEDREAM_4_0, "seedream-4-0": SupportedModel.SEEDREAM_4_0, + "seedream-5.0-lite": SupportedModel.SEEDREAM_5_0_LITE, + "seedream-5-0-lite": SupportedModel.SEEDREAM_5_0_LITE, "ep-20260624042612-7dxcv": SupportedModel.SEEDANCE_4_5, "seedance-4.5": SupportedModel.SEEDANCE_4_5, "seedance-4-5": SupportedModel.SEEDANCE_4_5, diff --git a/tests/test_pricing.py b/tests/test_pricing.py index 36f0cb0..aaf02b6 100644 --- a/tests/test_pricing.py +++ b/tests/test_pricing.py @@ -338,6 +338,19 @@ def test_seed_2_0_lite_dated_alias_resolves(self): cfg = get_model_config("seed-2-0-lite-260228") self.assertEqual(cfg, get_model_config("seed-2.0-lite")) + def test_dola_seed_2_0_mini_resolves(self): + cfg = get_model_config("dola-seed-2.0-mini") + self.assertEqual(cfg.provider, "bytedance") + self.assertEqual(cfg.api_name, "ep-20260624214211-j4vhk") + self.assertEqual(cfg.input_price_usd, Decimal("0.0000001")) + self.assertEqual(cfg.output_price_usd, Decimal("0.0000004")) + + def test_dola_seed_2_0_mini_aliases_resolve(self): + self.assertEqual( + get_model_config("dola-seed-2-0-mini"), + get_model_config("dola-seed-2.0-mini"), + ) + def test_deepseek_v4_flash_resolves(self): cfg = get_model_config("deepseek-v4-flash") self.assertEqual(cfg.provider, "bytedance") @@ -362,6 +375,19 @@ def test_deepseek_v4_pro_aliases_resolve(self): get_model_config("deepseek-v4-pro"), ) + def test_seedream_5_0_lite_resolves(self): + cfg = get_model_config("seedream-5.0-lite") + self.assertEqual(cfg.provider, "bytedance") + self.assertEqual(cfg.api_name, "ep-20260624213657-7zc5n") + self.assertTrue(cfg.image_generation) + self.assertEqual(cfg.per_image_price_usd, Decimal("0.035")) + + def test_seedream_5_0_lite_aliases_resolve(self): + self.assertEqual( + get_model_config("seedream-5-0-lite"), + get_model_config("seedream-5.0-lite"), + ) + # ── Nous Research (Nous Portal) ───────────────────────────────────────── def test_hermes_4_405b_resolves(self):