Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,11 @@ Model name prefixes determine routing:
- **Anthropic**: claude-sonnet-4-0/4-5/4-6, claude-haiku-4-5, claude-opus-4-5/4-6/4-7/4-8, claude-fable-5, claude-3-7-sonnet, claude-3-5-haiku
- **Google**: gemini-2.5-flash, gemini-2.5-flash-lite, gemini-2.5-pro, gemini-3-pro-preview, gemini-3-flash-preview, gemini-3.1-pro-preview, gemini-3.1-flash-lite-preview, gemini-3.5-flash; image generation: gemini-2.5-flash-image, gemini-3.1-flash-image
- **xAI**: grok-2, grok-3, grok-3-mini, grok-4, grok-4-fast, grok-4-1-fast; image generation: grok-2-image
- **ByteDance** (BytePlus ModelArk, OpenAI-compatible, ap-southeast): seed-1.6, seed-1.8, seed-2.0-lite, deepseek-v4-flash, deepseek-v4-pro; image generation: seedream-4.0
- **ByteDance** (BytePlus ModelArk, OpenAI-compatible, ap-southeast): seed-1.6, seed-1.8, seed-2.0-lite, deepseek-v4-flash, deepseek-v4-pro; image generation: seedream-4.0, seedance-4.5
- **Nous Research** (Nous Portal, OpenAI-compatible): hermes-4-405b, hermes-4-70b
- **Z.ai** (Model API, OpenAI-compatible): glm-5.2; image generation: glm-image

Image generation via xAI (grok-2-image), ByteDance (seedream-4.0), and Z.ai
Image generation via xAI (grok-2-image), ByteDance (seedream-4.0, seedance-4.5), and Z.ai
(glm-image) is served through a provider `/images/generations` endpoint rather
than the chat path, but is surfaced on `/v1/chat/completions` exactly like
Gemini's inline-image models (images returned out-of-band under the message
Expand Down
12 changes: 9 additions & 3 deletions tee_gateway/llm_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,16 +364,22 @@ def generate_images(model: str, prompt: str, n: int = 1) -> tuple[list[str], int
raise RuntimeError(f"{provider} HTTP client has not been initialized")

# n is clamped to the OpenAI-compatible providers' documented 1..10 range.
# Z.ai's GLM-Image endpoint currently returns exactly one image and does not
# document n/response_format support, so keep its payload to the documented
# fields.
# Z.ai's GLM-Image and ByteDance Seedance endpoints don't document n/
# response_format support, so keep their payloads to documented fields.
count = max(1, min(int(n), 10))
payload: dict[str, Any] = {
"model": cfg.api_name,
"prompt": prompt,
}
if provider == "zai":
payload["size"] = "1280x1280"
elif provider == "bytedance" and cfg.api_name.startswith("ep-"):
# Seedance deployment endpoints use URL format and require extra params.
payload["response_format"] = "url"
payload["sequential_image_generation"] = "disabled"
payload["watermark"] = False
payload["size"] = "2K"
payload["stream"] = False
else:
payload["n"] = count
payload["response_format"] = "b64_json"
Expand Down
14 changes: 14 additions & 0 deletions tee_gateway/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,17 @@ class SupportedModel(Enum):
image_generation=True,
per_image_price_usd=Decimal("0.03"),
)
# Seedance 4.5 image generation via a ModelArk deployment endpoint.
# Uses URL response format and seedance-specific request params
# (sequential_image_generation, watermark, size). Billed per image.
SEEDANCE_4_5 = ModelConfig(
provider="bytedance",
api_name="ep-20260624042612-7dxcv",
input_price_usd=Decimal("0"),
output_price_usd=Decimal("0"),
image_generation=True,
per_image_price_usd=Decimal("0.05"),
)

# ── Nous Research (Nous Portal, OpenAI-compatible) ──────────────────
# Hermes 4 family, served via Nous's OpenAI-compatible inference API.
Expand Down Expand Up @@ -497,6 +508,9 @@ class SupportedModel(Enum):
"seedream-4-0-250828": SupportedModel.SEEDREAM_4_0,
"seedream-4.0": SupportedModel.SEEDREAM_4_0,
"seedream-4-0": SupportedModel.SEEDREAM_4_0,
"ep-20260624042612-7dxcv": SupportedModel.SEEDANCE_4_5,
"seedance-4.5": SupportedModel.SEEDANCE_4_5,
"seedance-4-5": SupportedModel.SEEDANCE_4_5,
# Nous Research
"hermes-4-405b": SupportedModel.HERMES_4_405B,
"hermes-4-70b": SupportedModel.HERMES_4_70B,
Expand Down
25 changes: 23 additions & 2 deletions tee_gateway/test/test_image_generation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Tests for endpoint-based image generation (xAI Grok, ByteDance Seedream,
Z.ai GLM-Image).
ByteDance Seedance, Z.ai GLM-Image).

Unlike Gemini's inline-image chat models (see test_image_billing.py), these
models are served via a dedicated OpenAI-compatible ``/images/generations``
Expand All @@ -25,6 +25,7 @@

GROK_IMAGE = "grok-2-image"
SEEDREAM = "seedream-4.0"
SEEDANCE = "seedance-4.5"
GLM_IMAGE = "glm-image"


Expand Down Expand Up @@ -89,6 +90,26 @@ def test_zai_glm_image_uses_documented_payload_and_url_response(self):
self.assertNotIn("n", payload)
self.assertNotIn("response_format", payload)

def test_seedance_uses_url_format_and_extra_params(self):
client = MagicMock()
client.post.return_value = _mock_response([{"url": "https://cdn/img.jpg"}])
with patch.object(llm_backend, "bytedance_http_client", client):
images, count = generate_images(SEEDANCE, "a black hole", n=1)

self.assertEqual(count, 1)
self.assertEqual(images, ["https://cdn/img.jpg"])

_, kwargs = client.post.call_args
payload = kwargs["json"]
self.assertEqual(payload["model"], get_model_config(SEEDANCE).api_name)
self.assertEqual(payload["prompt"], "a black hole")
self.assertEqual(payload["response_format"], "url")
self.assertEqual(payload["sequential_image_generation"], "disabled")
self.assertFalse(payload["watermark"])
self.assertEqual(payload["size"], "2K")
self.assertFalse(payload["stream"])
self.assertNotIn("n", payload)

def test_n_is_clamped_to_provider_range(self):
client = MagicMock()
client.post.return_value = _mock_response([{"b64_json": "x"}])
Expand Down Expand Up @@ -128,7 +149,7 @@ def _zero_usage() -> dict:
return {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}

def test_single_image_charged_flat_price(self):
for model in (GROK_IMAGE, SEEDREAM, GLM_IMAGE):
for model in (GROK_IMAGE, SEEDREAM, SEEDANCE, GLM_IMAGE):
with self.subTest(model=model):
cfg = get_model_config(model)
cost = compute_session_cost(model, self._zero_usage(), image_count=1)
Expand Down
Loading