diff --git a/CLAUDE.md b/CLAUDE.md index a14a1f3..15e4b1c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -117,11 +117,11 @@ Model name prefixes determine routing: - **Anthropic**: claude-sonnet-4-0/4-5/4-6, claude-haiku-4-5, claude-opus-4-5/4-6/4-7/4-8, claude-fable-5, claude-3-7-sonnet, claude-3-5-haiku - **Google**: gemini-2.5-flash, gemini-2.5-flash-lite, gemini-2.5-pro, gemini-3-pro-preview, gemini-3-flash-preview, gemini-3.1-pro-preview, gemini-3.1-flash-lite-preview, gemini-3.5-flash; image generation: gemini-2.5-flash-image, gemini-3.1-flash-image - **xAI**: grok-2, grok-3, grok-3-mini, grok-4, grok-4-fast, grok-4-1-fast; image generation: grok-2-image -- **ByteDance** (BytePlus ModelArk, OpenAI-compatible, ap-southeast): seed-1.6, seed-1.8, seed-2.0-lite, deepseek-v4-flash, deepseek-v4-pro; image generation: seedream-4.0 +- **ByteDance** (BytePlus ModelArk, OpenAI-compatible, ap-southeast): seed-1.6, seed-1.8, seed-2.0-lite, deepseek-v4-flash, deepseek-v4-pro; image generation: seedream-4.0, seedance-4.5 - **Nous Research** (Nous Portal, OpenAI-compatible): hermes-4-405b, hermes-4-70b - **Z.ai** (Model API, OpenAI-compatible): glm-5.2; image generation: glm-image -Image generation via xAI (grok-2-image), ByteDance (seedream-4.0), and Z.ai +Image generation via xAI (grok-2-image), ByteDance (seedream-4.0, seedance-4.5), and Z.ai (glm-image) is served through a provider `/images/generations` endpoint rather than the chat path, but is surfaced on `/v1/chat/completions` exactly like Gemini's inline-image models (images returned out-of-band under the message diff --git a/tee_gateway/llm_backend.py b/tee_gateway/llm_backend.py index c790a0d..3a34e84 100644 --- a/tee_gateway/llm_backend.py +++ b/tee_gateway/llm_backend.py @@ -364,9 +364,8 @@ def generate_images(model: str, prompt: str, n: int = 1) -> tuple[list[str], int raise RuntimeError(f"{provider} HTTP client has not been initialized") # n is clamped to the OpenAI-compatible providers' documented 1..10 range. - # Z.ai's GLM-Image endpoint currently returns exactly one image and does not - # document n/response_format support, so keep its payload to the documented - # fields. + # Z.ai's GLM-Image and ByteDance Seedance endpoints don't document n/ + # response_format support, so keep their payloads to documented fields. count = max(1, min(int(n), 10)) payload: dict[str, Any] = { "model": cfg.api_name, @@ -374,6 +373,13 @@ def generate_images(model: str, prompt: str, n: int = 1) -> tuple[list[str], int } if provider == "zai": payload["size"] = "1280x1280" + elif provider == "bytedance" and cfg.api_name.startswith("ep-"): + # Seedance deployment endpoints use URL format and require extra params. + payload["response_format"] = "url" + payload["sequential_image_generation"] = "disabled" + payload["watermark"] = False + payload["size"] = "2K" + payload["stream"] = False else: payload["n"] = count payload["response_format"] = "b64_json" diff --git a/tee_gateway/model_registry.py b/tee_gateway/model_registry.py index e8eb608..e2098d7 100644 --- a/tee_gateway/model_registry.py +++ b/tee_gateway/model_registry.py @@ -376,6 +376,17 @@ class SupportedModel(Enum): image_generation=True, per_image_price_usd=Decimal("0.03"), ) + # Seedance 4.5 image generation via a ModelArk deployment endpoint. + # Uses URL response format and seedance-specific request params + # (sequential_image_generation, watermark, size). Billed per image. + SEEDANCE_4_5 = ModelConfig( + provider="bytedance", + api_name="ep-20260624042612-7dxcv", + input_price_usd=Decimal("0"), + output_price_usd=Decimal("0"), + image_generation=True, + per_image_price_usd=Decimal("0.05"), + ) # ── Nous Research (Nous Portal, OpenAI-compatible) ────────────────── # Hermes 4 family, served via Nous's OpenAI-compatible inference API. @@ -497,6 +508,9 @@ class SupportedModel(Enum): "seedream-4-0-250828": SupportedModel.SEEDREAM_4_0, "seedream-4.0": SupportedModel.SEEDREAM_4_0, "seedream-4-0": SupportedModel.SEEDREAM_4_0, + "ep-20260624042612-7dxcv": SupportedModel.SEEDANCE_4_5, + "seedance-4.5": SupportedModel.SEEDANCE_4_5, + "seedance-4-5": SupportedModel.SEEDANCE_4_5, # Nous Research "hermes-4-405b": SupportedModel.HERMES_4_405B, "hermes-4-70b": SupportedModel.HERMES_4_70B, diff --git a/tee_gateway/test/test_image_generation.py b/tee_gateway/test/test_image_generation.py index b5e47db..2dc5353 100644 --- a/tee_gateway/test/test_image_generation.py +++ b/tee_gateway/test/test_image_generation.py @@ -1,5 +1,5 @@ """Tests for endpoint-based image generation (xAI Grok, ByteDance Seedream, -Z.ai GLM-Image). +ByteDance Seedance, Z.ai GLM-Image). Unlike Gemini's inline-image chat models (see test_image_billing.py), these models are served via a dedicated OpenAI-compatible ``/images/generations`` @@ -25,6 +25,7 @@ GROK_IMAGE = "grok-2-image" SEEDREAM = "seedream-4.0" +SEEDANCE = "seedance-4.5" GLM_IMAGE = "glm-image" @@ -89,6 +90,26 @@ def test_zai_glm_image_uses_documented_payload_and_url_response(self): self.assertNotIn("n", payload) self.assertNotIn("response_format", payload) + def test_seedance_uses_url_format_and_extra_params(self): + client = MagicMock() + client.post.return_value = _mock_response([{"url": "https://cdn/img.jpg"}]) + with patch.object(llm_backend, "bytedance_http_client", client): + images, count = generate_images(SEEDANCE, "a black hole", n=1) + + self.assertEqual(count, 1) + self.assertEqual(images, ["https://cdn/img.jpg"]) + + _, kwargs = client.post.call_args + payload = kwargs["json"] + self.assertEqual(payload["model"], get_model_config(SEEDANCE).api_name) + self.assertEqual(payload["prompt"], "a black hole") + self.assertEqual(payload["response_format"], "url") + self.assertEqual(payload["sequential_image_generation"], "disabled") + self.assertFalse(payload["watermark"]) + self.assertEqual(payload["size"], "2K") + self.assertFalse(payload["stream"]) + self.assertNotIn("n", payload) + def test_n_is_clamped_to_provider_range(self): client = MagicMock() client.post.return_value = _mock_response([{"b64_json": "x"}]) @@ -128,7 +149,7 @@ def _zero_usage() -> dict: return {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} def test_single_image_charged_flat_price(self): - for model in (GROK_IMAGE, SEEDREAM, GLM_IMAGE): + for model in (GROK_IMAGE, SEEDREAM, SEEDANCE, GLM_IMAGE): with self.subTest(model=model): cfg = get_model_config(model) cost = compute_session_cost(model, self._zero_usage(), image_count=1)