From d59ac2b66d9138ef7cb2a2e158ec1e9dd9b17460 Mon Sep 17 00:00:00 2001 From: Aniket Dixit Date: Mon, 22 Jun 2026 21:36:41 +0530 Subject: [PATCH 1/2] glm model support --- CLAUDE.md | 16 +++--- scripts/run-enclave.sh | 6 ++- tee_gateway/__main__.py | 5 ++ tee_gateway/config.py | 3 ++ tee_gateway/llm_backend.py | 60 ++++++++++++++++++++--- tee_gateway/model_registry.py | 23 ++++++++- tee_gateway/test/test_image_generation.py | 25 ++++++++-- tests/test_pricing.py | 16 ++++++ 8 files changed, 134 insertions(+), 20 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index fe4d319..7bf7c5e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -67,6 +67,7 @@ API keys (injected at runtime via POST /v1/keys — do NOT bake into the image): - `XAI_API_KEY` - `ARK_API_KEY` (BytePlus / ByteDance ModelArk; injected as `bytedance_api_key`) - `NOUS_API_KEY` (Nous Research / Nous Portal; injected as `nous_api_key`) +- `ZAI_API_KEY` (Z.ai Model API; injected as `zai_api_key`) Server configuration: - `API_SERVER_PORT` (default: 8000) @@ -118,13 +119,14 @@ Model name prefixes determine routing: - **xAI**: grok-2, grok-3, grok-3-mini, grok-4, grok-4-fast, grok-4-1-fast; image generation: grok-2-image - **ByteDance** (BytePlus ModelArk, OpenAI-compatible, ap-southeast): seed-1.6, seed-1.8, seed-2.0-lite; image generation: seedream-4.0 - **Nous Research** (Nous Portal, OpenAI-compatible): hermes-4-405b, hermes-4-70b - -Image generation via xAI (grok-2-image) and ByteDance (seedream-4.0) is served -through a provider `/images/generations` endpoint rather than the chat path, but -is surfaced on `/v1/chat/completions` exactly like Gemini's inline-image models -(images returned out-of-band under the message `images` key). These models are -billed a flat per-image price (see `per_image_price_usd` in `model_registry.py`), -not per token. +- **Z.ai** (Model API, OpenAI-compatible): glm-5.2; image generation: glm-image + +Image generation via xAI (grok-2-image), ByteDance (seedream-4.0), and Z.ai +(glm-image) is served through a provider `/images/generations` endpoint rather +than the chat path, but is surfaced on `/v1/chat/completions` exactly like +Gemini's inline-image models (images returned out-of-band under the message +`images` key). These models are billed a flat per-image price (see +`per_image_price_usd` in `model_registry.py`), not per token. ## Verification Examples diff --git a/scripts/run-enclave.sh b/scripts/run-enclave.sh index 04b1ff8..3928511 100755 --- a/scripts/run-enclave.sh +++ b/scripts/run-enclave.sh @@ -91,6 +91,7 @@ if [ -f "$ENV_FILE" ]; then XAI_API_KEY="$(grep -E '^XAI_API_KEY=' "$ENV_FILE" | cut -d'=' -f2-)" ARK_API_KEY="$(grep -E '^ARK_API_KEY=' "$ENV_FILE" | cut -d'=' -f2-)" NOUS_API_KEY="$(grep -E '^NOUS_API_KEY=' "$ENV_FILE" | cut -d'=' -f2-)" + ZAI_API_KEY="$(grep -E '^ZAI_API_KEY=' "$ENV_FILE" | cut -d'=' -f2-)" # FACILITATOR_URL is used for both x402 payment verification and the heartbeat relay. # HEARTBEAT_CONTRACT_ADDRESS and TEE_HEARTBEAT_INTERVAL are optional heartbeat parameters. @@ -108,6 +109,7 @@ if [ -f "$ENV_FILE" ]; then --arg xai "$XAI_API_KEY" \ --arg bytedance "$ARK_API_KEY" \ --arg nous "$NOUS_API_KEY" \ + --arg zai "$ZAI_API_KEY" \ --arg hb_contract "$HEARTBEAT_CONTRACT_ADDRESS" \ --arg facilitator "$FACILITATOR_URL" \ --arg hb_interval "$TEE_HEARTBEAT_INTERVAL" \ @@ -117,7 +119,8 @@ if [ -f "$ENV_FILE" ]; then anthropic_api_key: $anthropic, xai_api_key: $xai, bytedance_api_key: $bytedance, - nous_api_key: $nous + nous_api_key: $nous, + zai_api_key: $zai } + if $hb_contract != "" then {heartbeat_contract_address: $hb_contract} else {} end + if $facilitator != "" then {facilitator_url: $facilitator} else {} end @@ -148,6 +151,7 @@ if [ -f "$ENV_FILE" ]; then # Clear key variables from this shell immediately after use unset OPENAI_API_KEY GOOGLE_API_KEY ANTHROPIC_API_KEY XAI_API_KEY ARK_API_KEY + unset NOUS_API_KEY ZAI_API_KEY unset HEARTBEAT_CONTRACT_ADDRESS FACILITATOR_URL TEE_HEARTBEAT_INTERVAL fi else diff --git a/tee_gateway/__main__.py b/tee_gateway/__main__.py index 70086f9..3b317db 100644 --- a/tee_gateway/__main__.py +++ b/tee_gateway/__main__.py @@ -394,6 +394,7 @@ def set_provider_keys(): xai_api_key=body.get("xai_api_key") or None, bytedance_api_key=body.get("bytedance_api_key") or None, nous_api_key=body.get("nous_api_key") or None, + zai_api_key=body.get("zai_api_key") or None, ) set_provider_config(provider_config) @@ -456,6 +457,9 @@ def _set(val: str | None) -> str: logger.info( " nous_api_key : %s", _set(provider_config.nous_api_key) ) + logger.info( + " zai_api_key : %s", _set(provider_config.zai_api_key) + ) logger.info(" facilitator_url : %s", facilitator_url) logger.info( " heartbeat_contract_address : %s", @@ -489,6 +493,7 @@ def _set(val: str | None) -> str: "xai": provider_config.xai_api_key, "bytedance": provider_config.bytedance_api_key, "nous": provider_config.nous_api_key, + "zai": provider_config.zai_api_key, }.items() if k ] diff --git a/tee_gateway/config.py b/tee_gateway/config.py index 61d313e..18f977e 100644 --- a/tee_gateway/config.py +++ b/tee_gateway/config.py @@ -29,6 +29,7 @@ class ProviderConfig: xai_api_key: Optional[str] = None bytedance_api_key: Optional[str] = None nous_api_key: Optional[str] = None + zai_api_key: Optional[str] = None def initialized_providers(self) -> list[str]: """Return provider names whose API key is set (non-empty).""" @@ -45,6 +46,8 @@ def initialized_providers(self) -> list[str]: providers.append("bytedance") if self.nous_api_key: providers.append("nous") + if self.zai_api_key: + providers.append("zai") return providers diff --git a/tee_gateway/llm_backend.py b/tee_gateway/llm_backend.py index 3d2435a..c790a0d 100644 --- a/tee_gateway/llm_backend.py +++ b/tee_gateway/llm_backend.py @@ -53,12 +53,19 @@ # Nous Research OpenAI-compatible inference endpoint (Nous Portal). NOUS_BASE_URL = "https://inference-api.nousresearch.com/v1" +# Z.ai Model API OpenAI-compatible endpoint. The full chat URL is +# https://api.z.ai/api/paas/v4/chat/completions; ChatOpenAI appends +# /chat/completions to this base URL. Do not confuse this paid Model API with +# the subscription Coding Plan endpoint at /api/coding/paas/v4. +ZAI_BASE_URL = "https://api.z.ai/api/paas/v4" + # Shared synchronous HTTP clients for each provider. # Initialized to None; built by set_provider_config() after key injection. openai_http_client: Optional[httpx.Client] = None xai_http_client: Optional[httpx.Client] = None bytedance_http_client: Optional[httpx.Client] = None nous_http_client: Optional[httpx.Client] = None +zai_http_client: Optional[httpx.Client] = None _provider_config: Optional[ProviderConfig] = None @@ -67,12 +74,13 @@ def set_provider_config(config: ProviderConfig) -> None: """Store the provider config and rebuild HTTP clients. Called once after key injection.""" global _provider_config, openai_http_client, xai_http_client, bytedance_http_client - global nous_http_client + global nous_http_client, zai_http_client old_openai = openai_http_client old_xai = xai_http_client old_bytedance = bytedance_http_client old_nous = nous_http_client + old_zai = zai_http_client openai_http_client = httpx.Client( base_url="https://api.openai.com/v1", @@ -106,6 +114,14 @@ def set_provider_config(config: ProviderConfig) -> None: http2=True, follow_redirects=False, ) + zai_http_client = httpx.Client( + base_url=ZAI_BASE_URL, + headers={"Authorization": f"Bearer {config.zai_api_key or ''}"}, + timeout=_TIMEOUT, + limits=_LIMITS, + http2=True, + follow_redirects=False, + ) get_chat_model_cached.cache_clear() _provider_config = config @@ -118,6 +134,8 @@ def set_provider_config(config: ProviderConfig) -> None: old_bytedance.close() if old_nous is not None: old_nous.close() + if old_zai is not None: + old_zai.close() def get_provider_config() -> Optional[ProviderConfig]: @@ -289,6 +307,24 @@ def get_chat_model_cached( stream_usage=True, ) # type: ignore [call-arg] + elif provider == "zai": + if not config.zai_api_key: + raise ValueError("zai_api_key not set in ProviderConfig") + + if zai_http_client is None: + raise RuntimeError("Z.ai HTTP client has not been initialized") + + return ChatOpenAI( + model=api_name, + temperature=effective_temp, + max_tokens=max_tokens, + http_client=zai_http_client, + api_key=SecretStr(config.zai_api_key), + base_url=ZAI_BASE_URL, + streaming=True, + stream_usage=True, + ) # type: ignore [call-arg] + else: raise ValueError(f"Unsupported provider: {provider}") @@ -300,11 +336,11 @@ def get_chat_model_cached( def generate_images(model: str, prompt: str, n: int = 1) -> tuple[list[str], int]: """Generate images via a provider's OpenAI-compatible images endpoint. - Unlike Gemini's inline-image chat models, xAI (Aurora) and ByteDance - (Seedream) expose image generation through a dedicated + Unlike Gemini's inline-image chat models, xAI (Aurora), ByteDance + (Seedream), and Z.ai (GLM-Image) expose image generation through a dedicated ``POST /images/generations`` endpoint. We request ``b64_json`` so the image - bytes ride inline inside the OHTTP/TEE envelope rather than as external URLs - that leak the content and expire. + bytes ride inline inside the OHTTP/TEE envelope for providers that support + it. Z.ai returns temporary image URLs only. Returns ``(data_uris, image_count)`` where each entry is a ``data:`` URI. The count is used for per-image billing. Falls back to provider-returned URLs if @@ -317,6 +353,8 @@ def generate_images(model: str, prompt: str, n: int = 1) -> tuple[list[str], int client = xai_http_client elif provider == "bytedance": client = bytedance_http_client + elif provider == "zai": + client = zai_http_client else: raise ValueError( f"Provider {provider!r} does not support the image-generation endpoint" @@ -325,14 +363,20 @@ def generate_images(model: str, prompt: str, n: int = 1) -> tuple[list[str], int if client is None: raise RuntimeError(f"{provider} HTTP client has not been initialized") - # n is clamped to the providers' documented 1..10 range. + # n is clamped to the OpenAI-compatible providers' documented 1..10 range. + # Z.ai's GLM-Image endpoint currently returns exactly one image and does not + # document n/response_format support, so keep its payload to the documented + # fields. count = max(1, min(int(n), 10)) payload: dict[str, Any] = { "model": cfg.api_name, "prompt": prompt, - "n": count, - "response_format": "b64_json", } + if provider == "zai": + payload["size"] = "1280x1280" + else: + payload["n"] = count + payload["response_format"] = "b64_json" logger.info( "Generating %d image(s) - Provider: %s, Model: %s", diff --git a/tee_gateway/model_registry.py b/tee_gateway/model_registry.py index 36612f0..b30ac10 100644 --- a/tee_gateway/model_registry.py +++ b/tee_gateway/model_registry.py @@ -13,7 +13,7 @@ @dataclass(frozen=True) class ModelConfig: - provider: str # "openai" | "anthropic" | "google" | "x-ai" | "bytedance" | "nous" + provider: str # "openai" | "anthropic" | "google" | "x-ai" | "bytedance" | "nous" | "zai" api_name: str # model name sent to provider API input_price_usd: Decimal # USD per token output_price_usd: Decimal # USD per token @@ -385,6 +385,24 @@ class SupportedModel(Enum): output_price_usd=Decimal("0.0000004"), ) + # ── Z.ai (Model API, OpenAI-compatible) ───────────────────────────── + # Z.ai publishes GLM-5.2 prices per 1M tokens: $1.40 input, $4.40 output. + GLM_5_2 = ModelConfig( + provider="zai", + api_name="glm-5.2", + input_price_usd=Decimal("0.0000014"), + output_price_usd=Decimal("0.0000044"), + ) + # GLM-Image uses Z.ai's image endpoint and is billed per generated image. + GLM_IMAGE = ModelConfig( + provider="zai", + api_name="glm-image", + input_price_usd=Decimal("0"), + output_price_usd=Decimal("0"), + image_generation=True, + per_image_price_usd=Decimal("0.015"), + ) + # ── Legacy models (not in current SDK — retained for older SDK versions) ── GROK_3_MINI = ModelConfig( provider="x-ai", @@ -465,6 +483,9 @@ class SupportedModel(Enum): # Nous Research "hermes-4-405b": SupportedModel.HERMES_4_405B, "hermes-4-70b": SupportedModel.HERMES_4_70B, + # Z.ai + "glm-5.2": SupportedModel.GLM_5_2, + "glm-image": SupportedModel.GLM_IMAGE, # Legacy — not in current SDK, retained for older SDK versions "grok-3-mini-beta": SupportedModel.GROK_3_MINI, # old beta alias "grok-3-mini": SupportedModel.GROK_3_MINI, diff --git a/tee_gateway/test/test_image_generation.py b/tee_gateway/test/test_image_generation.py index efd3621..b5e47db 100644 --- a/tee_gateway/test/test_image_generation.py +++ b/tee_gateway/test/test_image_generation.py @@ -1,11 +1,12 @@ -"""Tests for endpoint-based image generation (xAI Grok, ByteDance Seedream). +"""Tests for endpoint-based image generation (xAI Grok, ByteDance Seedream, +Z.ai GLM-Image). Unlike Gemini's inline-image chat models (see test_image_billing.py), these models are served via a dedicated OpenAI-compatible ``/images/generations`` endpoint and billed a flat price per generated image. These tests pin: 1. The request/response handling in ``generate_images`` (b64_json -> data URI, - n clamping, url fallback). + n clamping, url fallback, provider-specific payloads). 2. The flat per-image billing in ``compute_session_cost``. No network or API key required — the provider HTTP client is mocked and a stub @@ -24,6 +25,7 @@ GROK_IMAGE = "grok-2-image" SEEDREAM = "seedream-4.0" +GLM_IMAGE = "glm-image" def _mock_response(data: list[dict]) -> MagicMock: @@ -70,6 +72,23 @@ def test_url_fallback_when_no_b64(self): self.assertEqual(count, 1) self.assertEqual(images, ["https://img/1.jpg"]) + def test_zai_glm_image_uses_documented_payload_and_url_response(self): + client = MagicMock() + client.post.return_value = _mock_response([{"url": "https://z.ai/img.png"}]) + with patch.object(llm_backend, "zai_http_client", client): + images, count = generate_images(GLM_IMAGE, "a poster", n=3) + + self.assertEqual(count, 1) + self.assertEqual(images, ["https://z.ai/img.png"]) + + _, kwargs = client.post.call_args + payload = kwargs["json"] + self.assertEqual(payload["model"], "glm-image") + self.assertEqual(payload["prompt"], "a poster") + self.assertEqual(payload["size"], "1280x1280") + self.assertNotIn("n", payload) + self.assertNotIn("response_format", payload) + def test_n_is_clamped_to_provider_range(self): client = MagicMock() client.post.return_value = _mock_response([{"b64_json": "x"}]) @@ -109,7 +128,7 @@ def _zero_usage() -> dict: return {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} def test_single_image_charged_flat_price(self): - for model in (GROK_IMAGE, SEEDREAM): + for model in (GROK_IMAGE, SEEDREAM, GLM_IMAGE): with self.subTest(model=model): cfg = get_model_config(model) cost = compute_session_cost(model, self._zero_usage(), image_count=1) diff --git a/tests/test_pricing.py b/tests/test_pricing.py index 123ab92..31e0729 100644 --- a/tests/test_pricing.py +++ b/tests/test_pricing.py @@ -354,6 +354,22 @@ def test_hermes_4_70b_resolves(self): self.assertEqual(cfg.input_price_usd, Decimal("0.00000013")) self.assertEqual(cfg.output_price_usd, Decimal("0.0000004")) + # ── Z.ai (Model API) ─────────────────────────────────────────────────── + + def test_glm_5_2_resolves(self): + cfg = get_model_config("glm-5.2") + self.assertEqual(cfg.provider, "zai") + self.assertEqual(cfg.api_name, "glm-5.2") + self.assertEqual(cfg.input_price_usd, Decimal("0.0000014")) + self.assertEqual(cfg.output_price_usd, Decimal("0.0000044")) + + def test_glm_image_resolves(self): + cfg = get_model_config("glm-image") + self.assertEqual(cfg.provider, "zai") + self.assertEqual(cfg.api_name, "glm-image") + self.assertTrue(cfg.image_generation) + self.assertEqual(cfg.per_image_price_usd, Decimal("0.015")) + # ── Errors ─────────────────────────────────────────────────────────────── def test_unknown_model_raises(self): From e8b8ca7206f04e07bfb4359186b66fe7f773a21e Mon Sep 17 00:00:00 2001 From: Aniket Dixit Date: Mon, 22 Jun 2026 21:57:20 +0530 Subject: [PATCH 2/2] lint fix --- tee_gateway/model_registry.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tee_gateway/model_registry.py b/tee_gateway/model_registry.py index b30ac10..ff5d85a 100644 --- a/tee_gateway/model_registry.py +++ b/tee_gateway/model_registry.py @@ -13,7 +13,8 @@ @dataclass(frozen=True) class ModelConfig: - provider: str # "openai" | "anthropic" | "google" | "x-ai" | "bytedance" | "nous" | "zai" + # "openai" | "anthropic" | "google" | "x-ai" | "bytedance" | "nous" | "zai" + provider: str api_name: str # model name sent to provider API input_price_usd: Decimal # USD per token output_price_usd: Decimal # USD per token