Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,16 +117,17 @@ Model name prefixes determine routing:
- **Anthropic**: claude-sonnet-4-0/4-5/4-6, claude-haiku-4-5, claude-opus-4-5/4-6/4-7/4-8, claude-fable-5, claude-3-7-sonnet, claude-3-5-haiku
- **Google**: gemini-2.5-flash, gemini-2.5-flash-lite, gemini-2.5-pro, gemini-3-pro-preview, gemini-3-flash-preview, gemini-3.1-pro-preview, gemini-3.1-flash-lite-preview, gemini-3.5-flash; image generation: gemini-2.5-flash-image, gemini-3.1-flash-image
- **xAI**: grok-2, grok-3, grok-3-mini, grok-4, grok-4-fast, grok-4-1-fast; image generation: grok-2-image
- **ByteDance** (BytePlus ModelArk, OpenAI-compatible, ap-southeast): seed-1.6, seed-1.8, seed-2.0-lite, deepseek-v4-flash, deepseek-v4-pro; image generation: seedream-4.0, seedance-4.5
- **ByteDance** (BytePlus ModelArk, OpenAI-compatible, ap-southeast): seed-1.6, seed-1.8, seed-2.0-lite, deepseek-v4-flash, deepseek-v4-pro; image generation: seedream-4.0, seedream-5.0-lite, seedance-4.5
- **Nous Research** (Nous Portal, OpenAI-compatible): hermes-4-405b, hermes-4-70b
- **Z.ai** (Model API, OpenAI-compatible): glm-5.2; image generation: glm-image

Image generation via xAI (grok-2-image), ByteDance (seedream-4.0, seedance-4.5), and Z.ai
(glm-image) is served through a provider `/images/generations` endpoint rather
than the chat path, but is surfaced on `/v1/chat/completions` exactly like
Gemini's inline-image models (images returned out-of-band under the message
`images` key). These models are billed a flat per-image price (see
`per_image_price_usd` in `model_registry.py`), not per token.
Image generation via xAI (grok-2-image), ByteDance (seedream-4.0,
seedream-5.0-lite, seedance-4.5), and Z.ai (glm-image) is served through a
provider `/images/generations` endpoint rather than the chat path, but is
surfaced on `/v1/chat/completions` exactly like Gemini's inline-image models
(images returned out-of-band under the message `images` key). These models are
billed a flat per-image price (see `per_image_price_usd` in
`model_registry.py`), not per token.

## Verification Examples

Expand Down
21 changes: 21 additions & 0 deletions tee_gateway/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,14 @@ class SupportedModel(Enum):
input_price_usd=Decimal("0.0000004"),
output_price_usd=Decimal("0.0000016"),
)
# Dola Seed 2.0 Mini uncensored deployment. The 128K context endpoint uses
# the lower billing tier: $0.0001/K input and $0.0004/K output.
DOLA_SEED_2_0_MINI = ModelConfig(
provider="bytedance",
api_name="ep-20260624214211-j4vhk",
input_price_usd=Decimal("0.0000001"),
output_price_usd=Decimal("0.0000004"),
)
DEEPSEEK_V4_FLASH = ModelConfig(
provider="bytedance",
api_name="deepseek-v4-flash-260425",
Expand All @@ -376,6 +384,15 @@ class SupportedModel(Enum):
image_generation=True,
per_image_price_usd=Decimal("0.03"),
)
# Seedream 5.0 Lite image generation via a ModelArk deployment endpoint.
SEEDREAM_5_0_LITE = ModelConfig(
provider="bytedance",
api_name="ep-20260624213657-7zc5n",
input_price_usd=Decimal("0"),
output_price_usd=Decimal("0"),
image_generation=True,
per_image_price_usd=Decimal("0.035"),
)
# Seedance 4.5 image generation via a ModelArk deployment endpoint.
# Uses URL response format and seedance-specific request params
# (sequential_image_generation, watermark, size). Billed per image.
Expand Down Expand Up @@ -501,13 +518,17 @@ class SupportedModel(Enum):
"seed-1.8": SupportedModel.SEED_1_8,
"seed-2-0-lite-260228": SupportedModel.SEED_2_0_LITE,
"seed-2.0-lite": SupportedModel.SEED_2_0_LITE,
"dola-seed-2.0-mini": SupportedModel.DOLA_SEED_2_0_MINI,
"dola-seed-2-0-mini": SupportedModel.DOLA_SEED_2_0_MINI,
"deepseek-v4-flash-260425": SupportedModel.DEEPSEEK_V4_FLASH,
"deepseek-v4-flash": SupportedModel.DEEPSEEK_V4_FLASH,
"deepseek-v4-pro-260425": SupportedModel.DEEPSEEK_V4_PRO,
"deepseek-v4-pro": SupportedModel.DEEPSEEK_V4_PRO,
"seedream-4-0-250828": SupportedModel.SEEDREAM_4_0,
"seedream-4.0": SupportedModel.SEEDREAM_4_0,
"seedream-4-0": SupportedModel.SEEDREAM_4_0,
"seedream-5.0-lite": SupportedModel.SEEDREAM_5_0_LITE,
"seedream-5-0-lite": SupportedModel.SEEDREAM_5_0_LITE,
"ep-20260624042612-7dxcv": SupportedModel.SEEDANCE_4_5,
"seedance-4.5": SupportedModel.SEEDANCE_4_5,
"seedance-4-5": SupportedModel.SEEDANCE_4_5,
Expand Down
26 changes: 26 additions & 0 deletions tests/test_pricing.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,19 @@ def test_seed_2_0_lite_dated_alias_resolves(self):
cfg = get_model_config("seed-2-0-lite-260228")
self.assertEqual(cfg, get_model_config("seed-2.0-lite"))

def test_dola_seed_2_0_mini_resolves(self):
cfg = get_model_config("dola-seed-2.0-mini")
self.assertEqual(cfg.provider, "bytedance")
self.assertEqual(cfg.api_name, "ep-20260624214211-j4vhk")
self.assertEqual(cfg.input_price_usd, Decimal("0.0000001"))
self.assertEqual(cfg.output_price_usd, Decimal("0.0000004"))

def test_dola_seed_2_0_mini_aliases_resolve(self):
self.assertEqual(
get_model_config("dola-seed-2-0-mini"),
get_model_config("dola-seed-2.0-mini"),
)

def test_deepseek_v4_flash_resolves(self):
cfg = get_model_config("deepseek-v4-flash")
self.assertEqual(cfg.provider, "bytedance")
Expand All @@ -362,6 +375,19 @@ def test_deepseek_v4_pro_aliases_resolve(self):
get_model_config("deepseek-v4-pro"),
)

def test_seedream_5_0_lite_resolves(self):
cfg = get_model_config("seedream-5.0-lite")
self.assertEqual(cfg.provider, "bytedance")
self.assertEqual(cfg.api_name, "ep-20260624213657-7zc5n")
self.assertTrue(cfg.image_generation)
self.assertEqual(cfg.per_image_price_usd, Decimal("0.035"))

def test_seedream_5_0_lite_aliases_resolve(self):
self.assertEqual(
get_model_config("seedream-5-0-lite"),
get_model_config("seedream-5.0-lite"),
)

# ── Nous Research (Nous Portal) ─────────────────────────────────────────

def test_hermes_4_405b_resolves(self):
Expand Down
Loading