Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ Model name prefixes determine routing:
- **Anthropic**: claude-sonnet-4-0/4-5/4-6, claude-haiku-4-5, claude-opus-4-5/4-6/4-7/4-8, claude-fable-5, claude-3-7-sonnet, claude-3-5-haiku
- **Google**: gemini-2.5-flash, gemini-2.5-flash-lite, gemini-2.5-pro, gemini-3-pro-preview, gemini-3-flash-preview, gemini-3.1-pro-preview, gemini-3.1-flash-lite-preview, gemini-3.5-flash; image generation: gemini-2.5-flash-image, gemini-3.1-flash-image
- **xAI**: grok-2, grok-3, grok-3-mini, grok-4, grok-4-fast, grok-4-1-fast; image generation: grok-2-image
- **ByteDance** (BytePlus ModelArk, OpenAI-compatible, ap-southeast): seed-1.6, seed-1.8, seed-2.0-lite; image generation: seedream-4.0
- **ByteDance** (BytePlus ModelArk, OpenAI-compatible, ap-southeast): seed-1.6, seed-1.8, seed-2.0-lite, deepseek-v4-flash, deepseek-v4-pro; image generation: seedream-4.0
- **Nous Research** (Nous Portal, OpenAI-compatible): hermes-4-405b, hermes-4-70b
- **Z.ai** (Model API, OpenAI-compatible): glm-5.2; image generation: glm-image

Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ The gateway solves this by running inside a hardware-isolated Nitro Enclave wher
| Anthropic | claude-sonnet-4-5, claude-sonnet-4-6, claude-haiku-4-5, claude-opus-4-5, claude-opus-4-6 |
| Google | gemini-2.5-flash, gemini-2.5-flash-lite, gemini-2.5-pro, gemini-3-pro-preview, gemini-3-flash-preview |
| xAI | grok-4, grok-4-fast, grok-4-1-fast, grok-4-1-fast-non-reasoning |
| ByteDance | seed-1.6, seed-1.8, seed-2.0-lite |
| ByteDance | seed-1.6, seed-1.8, seed-2.0-lite, deepseek-v4-flash, deepseek-v4-pro |

## Quick Start

Expand All @@ -52,6 +52,8 @@ export ANTHROPIC_API_KEY=sk-ant-...
export GOOGLE_API_KEY=...
export XAI_API_KEY=...
export ARK_API_KEY=... # BytePlus / ByteDance ModelArk
export NOUS_API_KEY=... # Nous Research / Nous Portal
export ZAI_API_KEY=... # Z.ai Model API

# Run server (starts the Flask/connexion app on port 8000)
make test-local
Expand Down Expand Up @@ -394,6 +396,8 @@ Clients use an x402-compatible client (e.g., the [x402 SDK](https://github.com/o
| `GOOGLE_API_KEY` | - | Google AI API key |
| `XAI_API_KEY` | - | xAI API key |
| `ARK_API_KEY` | - | BytePlus / ByteDance ModelArk API key (injected as `bytedance_api_key`) |
| `NOUS_API_KEY` | - | Nous Research / Nous Portal API key (injected as `nous_api_key`) |
| `ZAI_API_KEY` | - | Z.ai Model API key (injected as `zai_api_key`) |
| `EVM_PAYMENT_ADDRESS` | - | Wallet address to receive x402 payments |
| `FACILITATOR_URL` | see `tee_gateway/__main__.py` | x402 payment facilitator endpoint |

Expand Down
16 changes: 16 additions & 0 deletions tee_gateway/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,18 @@ class SupportedModel(Enum):
input_price_usd=Decimal("0.0000004"),
output_price_usd=Decimal("0.0000016"),
)
DEEPSEEK_V4_FLASH = ModelConfig(
provider="bytedance",
api_name="deepseek-v4-flash-260425",
input_price_usd=Decimal("0.00000014"),
output_price_usd=Decimal("0.00000028"),
)
DEEPSEEK_V4_PRO = ModelConfig(
provider="bytedance",
api_name="deepseek-v4-pro-260425",
input_price_usd=Decimal("0.00000174"),
output_price_usd=Decimal("0.00000348"),
)
# Seedream 4.0 image generation via ModelArk's OpenAI-compatible
# /images/generations endpoint. Billed at a flat $0.03 per generated image;
# token prices unused.
Expand Down Expand Up @@ -478,6 +490,10 @@ class SupportedModel(Enum):
"seed-1.8": SupportedModel.SEED_1_8,
"seed-2-0-lite-260228": SupportedModel.SEED_2_0_LITE,
"seed-2.0-lite": SupportedModel.SEED_2_0_LITE,
"deepseek-v4-flash-260425": SupportedModel.DEEPSEEK_V4_FLASH,
"deepseek-v4-flash": SupportedModel.DEEPSEEK_V4_FLASH,
"deepseek-v4-pro-260425": SupportedModel.DEEPSEEK_V4_PRO,
"deepseek-v4-pro": SupportedModel.DEEPSEEK_V4_PRO,
"seedream-4-0-250828": SupportedModel.SEEDREAM_4_0,
"seedream-4.0": SupportedModel.SEEDREAM_4_0,
"seedream-4-0": SupportedModel.SEEDREAM_4_0,
Expand Down
38 changes: 38 additions & 0 deletions tests/test_pricing.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,30 @@ def test_seed_2_0_lite_dated_alias_resolves(self):
cfg = get_model_config("seed-2-0-lite-260228")
self.assertEqual(cfg, get_model_config("seed-2.0-lite"))

def test_deepseek_v4_flash_resolves(self):
cfg = get_model_config("deepseek-v4-flash")
self.assertEqual(cfg.provider, "bytedance")
self.assertEqual(cfg.api_name, "deepseek-v4-flash-260425")
self.assertEqual(cfg.input_price_usd, Decimal("0.00000014"))
self.assertEqual(cfg.output_price_usd, Decimal("0.00000028"))

def test_deepseek_v4_flash_dated_alias_resolves(self):
cfg = get_model_config("deepseek-v4-flash-260425")
self.assertEqual(cfg, get_model_config("deepseek-v4-flash"))

def test_deepseek_v4_pro_resolves(self):
cfg = get_model_config("deepseek-v4-pro")
self.assertEqual(cfg.provider, "bytedance")
self.assertEqual(cfg.api_name, "deepseek-v4-pro-260425")
self.assertEqual(cfg.input_price_usd, Decimal("0.00000174"))
self.assertEqual(cfg.output_price_usd, Decimal("0.00000348"))

def test_deepseek_v4_pro_aliases_resolve(self):
self.assertEqual(
get_model_config("deepseek-v4-pro-260425"),
get_model_config("deepseek-v4-pro"),
)

# ── Nous Research (Nous Portal) ─────────────────────────────────────────

def test_hermes_4_405b_resolves(self):
Expand Down Expand Up @@ -618,6 +642,20 @@ def test_seed_2_0_lite_cheaper_than_seed_1_6(self):
full = self._calc("seed-1.6", 1000, 1000)
self.assertLess(lite, full)

def test_deepseek_v4_flash_cost(self):
cost = self._calc("deepseek-v4-flash", 1000, 500)
expected = _expected_cost_opg("deepseek-v4-flash", 1000, 500)
self.assertEqual(cost, expected)
# 1000*0.00000014 + 500*0.00000028 = 0.00014 + 0.00014 = 0.00028 USD
self.assertEqual(cost, 280_000_000_000_000)

def test_deepseek_v4_pro_cost(self):
cost = self._calc("deepseek-v4-pro", 1000, 500)
expected = _expected_cost_opg("deepseek-v4-pro", 1000, 500)
self.assertEqual(cost, expected)
# 1000*0.00000174 + 500*0.00000348 = 0.00174 + 0.00174 = 0.00348 USD
self.assertEqual(cost, 3_480_000_000_000_000)

# ── Haiku is cheaper than Sonnet ────────────────────────────────────────

def test_haiku_cheaper_than_sonnet(self):
Expand Down
Loading