Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ MIDDLE_MODEL="gpt-4o"
# Used for Claude sonnet requests
SMALL_MODEL="gpt-4o-mini"
# Used for Claude haiku requests
MODEL_ALIASES_FILE=./aliases.json
# More mapping

# Optional: Server settings
HOST="0.0.0.0"
Expand Down
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ The application automatically loads environment variables from a `.env` file in
- `BIG_MODEL` - Model for Claude opus requests (default: `gpt-4o`)
- `MIDDLE_MODEL` - Model for Claude opus requests (default: `gpt-4o`)
- `SMALL_MODEL` - Model for Claude haiku requests (default: `gpt-4o-mini`)
- `MODEL_ALIASES_FILE` - Optional path to a JSON file of exact-match aliases
(e.g. `{"kimi-k2.6_v1": "kimi-k2.6"}`). See [Model Aliases](#model-aliases-exact-match-overrides).

**API Configuration:**

Expand Down Expand Up @@ -164,6 +166,36 @@ The proxy maps Claude model requests to your configured models:
| Models with "sonnet" | `MIDDLE_MODEL`| Default: `BIG_MODEL` |
| Models with "opus" | `BIG_MODEL` | Default: `gpt-4o` |

### Model Aliases (exact-match overrides)

For any case the built-in BIG/MIDDLE/SMALL pattern cannot express (e.g. passing
`kimi-k2.6_v1` and wanting it routed to `kimi-k2.6`), define an aliases file and
point `MODEL_ALIASES_FILE` at it:

```bash
MODEL_ALIASES_FILE=./aliases.json
```

`aliases.json`:

```json
{
"kimi-k2.6_v1": "kimi-k2.6",
"my-fast": "gpt-4o-mini"
}
```

Rules:

- Exact, case-sensitive match on the incoming model name.
- Aliases run **before** all built-in rules — an alias can override prefix
passthrough and the haiku/sonnet/opus keyword mapping.
- No chaining: the target is used verbatim; it is not looked up again.
- A missing or malformed file is reported to stderr and treated as "no aliases";
the proxy still starts.

See `aliases.example.json` for a ready-to-copy sample.

### Provider Examples

#### OpenAI
Expand Down
5 changes: 5 additions & 0 deletions aliases.example.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"claude-opus-4-6-v1": "claude-opus-4-6-v1",
"my-fast": "gpt-4o-mini",
"claude-3-5-sonnet-latest": "gpt-4o"
}
5 changes: 5 additions & 0 deletions aliases.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"claude-opus-4-7": "claude-opus-4-7",
"claude-opus-4-6-v1": "claude-opus-4-6-v1",
"kimi-k2.6": "kimi-k2.6"
}
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ dependencies = [
"pydantic>=2.0.0",
"python-dotenv>=1.0.0",
"openai>=1.54.0",
"httpx[http2]>=0.25.0",
"orjson>=3.9.0",
]

[project.optional-dependencies]
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ uvicorn>=0.34.0
pydantic>=2.0.0
python-dotenv>=1.0.0
openai>=1.54.0
httpx[http2]>=0.25.0
orjson>=3.9.0
# Dev dependencies
pytest>=7.0.0
pytest-asyncio>=0.21.0
httpx>=0.25.0
black>=23.0.0
isort>=5.12.0
mypy>=1.0.0
8 changes: 7 additions & 1 deletion src/api/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
config.request_timeout,
api_version=config.azure_api_version,
custom_headers=custom_headers,
max_connections=config.max_connections,
max_keepalive_connections=config.max_keepalive_connections,
keepalive_expiry=config.keepalive_expiry,
)

async def validate_api_key(x_api_key: Optional[str] = Header(None), authorization: Optional[str] = Header(None)):
Expand Down Expand Up @@ -63,6 +66,10 @@ async def create_message(request: ClaudeMessagesRequest, http_request: Request,
# Convert Claude request to OpenAI format
openai_request = convert_claude_to_openai(request, model_manager)

logger.info(
f"[{request_id[:8]}] {request.model} -> {openai_request['model']} (stream={request.stream})"
)

# Check if client disconnected before processing
if await http_request.is_disconnected():
raise HTTPException(status_code=499, detail="Client disconnected")
Expand Down Expand Up @@ -219,7 +226,6 @@ async def root():
"status": "running",
"config": {
"openai_base_url": config.openai_base_url,
"max_tokens_limit": config.max_tokens_limit,
"api_key_configured": bool(config.openai_api_key),
"client_api_key_validation": bool(config.anthropic_api_key),
"big_model": config.big_model,
Expand Down
61 changes: 49 additions & 12 deletions src/conversion/request_converter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
import orjson
from typing import Dict, Any, List
from venv import logger
from src.core.constants import Constants
from src.models.claude import ClaudeMessagesRequest, ClaudeMessage
from src.core.config import config
Expand Down Expand Up @@ -77,15 +76,12 @@ def convert_claude_to_openai(
openai_request = {
"model": openai_model,
"messages": openai_messages,
"max_tokens": min(
max(claude_request.max_tokens, config.min_tokens_limit),
config.max_tokens_limit,
),
"max_tokens": claude_request.max_tokens,
"temperature": claude_request.temperature,
"stream": claude_request.stream,
}
logger.debug(
f"Converted Claude request to OpenAI format: {json.dumps(openai_request, indent=2, ensure_ascii=False)}"
f"Converted Claude request to OpenAI format: {orjson.dumps(openai_request, option=orjson.OPT_INDENT_2).decode('utf-8')}"
)
# Add optional parameters
if claude_request.stop_sequences:
Expand Down Expand Up @@ -117,7 +113,9 @@ def convert_claude_to_openai(
if choice_type == "auto":
openai_request["tool_choice"] = "auto"
elif choice_type == "any":
openai_request["tool_choice"] = "auto"
openai_request["tool_choice"] = "required"
elif choice_type == "none":
openai_request["tool_choice"] = "none"
elif choice_type == "tool" and "name" in claude_request.tool_choice:
openai_request["tool_choice"] = {
"type": Constants.TOOL_FUNCTION,
Expand All @@ -126,6 +124,35 @@ def convert_claude_to_openai(
else:
openai_request["tool_choice"] = "auto"

# Convert thinking config to reasoning_effort
thinking_enabled = False
if claude_request.thinking:
thinking_type = getattr(claude_request.thinking, "type", None)
if thinking_type == "enabled" or claude_request.thinking.enabled:
thinking_enabled = True
openai_request["reasoning_effort"] = "high"
# Upstream requires max_tokens > thinking.budget_tokens.
# When thinking is enabled, enforce a minimum of 16000 to avoid 400 errors.
_THINKING_MIN_TOKENS = 16000
if openai_request["max_tokens"] < _THINKING_MIN_TOKENS:
logger.info(
f"Raising max_tokens from {openai_request['max_tokens']} to {_THINKING_MIN_TOKENS} for thinking mode"
)
openai_request["max_tokens"] = _THINKING_MIN_TOKENS

# When thinking is enabled, some models (e.g. kimi) require reasoning_content
# on all assistant messages with tool_calls. Claude may omit thinking blocks
# in history (redacted, compacted, or simply not generated for simple tool calls).
# Fill empty reasoning_content as placeholder for those messages.
if thinking_enabled:
for msg in openai_messages:
if (
msg.get("role") == Constants.ROLE_ASSISTANT
and msg.get("tool_calls")
and "reasoning_content" not in msg
):
msg["reasoning_content"] = ""

return openai_request


Expand Down Expand Up @@ -169,10 +196,11 @@ def convert_claude_assistant_message(msg: ClaudeMessage) -> Dict[str, Any]:
"""Convert Claude assistant message to OpenAI format."""
text_parts = []
tool_calls = []
reasoning_parts = []

if msg.content is None:
return {"role": Constants.ROLE_ASSISTANT, "content": None}

if isinstance(msg.content, str):
return {"role": Constants.ROLE_ASSISTANT, "content": msg.content}

Expand All @@ -186,10 +214,15 @@ def convert_claude_assistant_message(msg: ClaudeMessage) -> Dict[str, Any]:
"type": Constants.TOOL_FUNCTION,
Constants.TOOL_FUNCTION: {
"name": block.name,
"arguments": json.dumps(block.input, ensure_ascii=False),
"arguments": orjson.dumps(block.input).decode("utf-8"),
},
}
)
elif block.type == Constants.CONTENT_THINKING:
reasoning_parts.append(block.thinking)
elif block.type == Constants.CONTENT_REDACTED_THINKING:
# Redacted thinking cannot be forwarded, skip
pass

openai_message = {"role": Constants.ROLE_ASSISTANT}

Expand All @@ -199,6 +232,10 @@ def convert_claude_assistant_message(msg: ClaudeMessage) -> Dict[str, Any]:
else:
openai_message["content"] = None

# Set reasoning_content from thinking blocks
if reasoning_parts:
openai_message["reasoning_content"] = "\n".join(reasoning_parts)

# Set tool calls
if tool_calls:
openai_message["tool_calls"] = tool_calls
Expand Down Expand Up @@ -245,7 +282,7 @@ def parse_tool_result_content(content):
result_parts.append(item.get("text", ""))
else:
try:
result_parts.append(json.dumps(item, ensure_ascii=False))
result_parts.append(orjson.dumps(item).decode("utf-8"))
except:
result_parts.append(str(item))
return "\n".join(result_parts).strip()
Expand All @@ -254,7 +291,7 @@ def parse_tool_result_content(content):
if content.get("type") == Constants.CONTENT_TEXT:
return content.get("text", "")
try:
return json.dumps(content, ensure_ascii=False)
return orjson.dumps(content).decode("utf-8")
except:
return str(content)

Expand Down
Loading