diff --git a/README.md b/README.md index 7c834c4..2f7e43d 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ This project comes packed with features designed for a robust and intelligent ag - [Contributing](#contributing) - [License](#license) - [References](#references) + - [LM Studio (Local) Usage](#lm-studio-local-usage) ## Quick Start @@ -247,3 +248,33 @@ This project is licensed under the [MIT License](LICENSE). --- **⭐ If this project helps you, please give it a Star!** +## LM Studio (Local) Usage + +You can run Mini Agent against an OpenAI-compatible local server like LM Studio. + +Steps: +- In LM Studio, start the local server (default base URL: `http://localhost:1234/v1`) and load your model. +- Update your config to use the OpenAI-compatible provider. + +Example config (same file as above): + +```yaml +provider: "openai-compatible" +api_key: "lm-studio" # LM Studio accepts any non-empty key +api_base: "http://localhost:1234/v1" # LM Studio local server default +model: "YOUR_LOADED_MODEL_NAME" # Must match the loaded model in LM Studio +``` + +Notes: +- Tool/function calling is supported via the OpenAI tools schema. +- Reasoning/Thinking with LM Studio: + - If you enable LM Studio → Settings → Developer → "When applicable, separate reasoning_content and content in API responses", + Mini Agent will display the model’s reasoning in the "🧠 Thinking" panel. + - If the model emits thoughts as `...` inside normal content, Mini Agent will extract and move those + to the "🧠 Thinking" panel automatically and show only the visible answer under "🤖 Assistant". + +Interleaved thinking (MiniMax vs LM Studio) +- MiniMax M2 (Anthropic-compatible): returns structured content blocks, including `{"type":"thinking","thinking":"..."}` and + tool use blocks. Mini Agent preserves and resubmits these blocks to support true interleaved thinking across steps. +- LM Studio (OpenAI-compatible): returns standard Chat Completions. Reasoning may be provided via `reasoning_content` (when the + setting is enabled) or inline `...` tags. Mini Agent supports both and renders them in the "🧠 Thinking" panel. diff --git a/mini_agent/agent.py b/mini_agent/agent.py index 6d4a946..c7d4828 100644 --- a/mini_agent/agent.py +++ b/mini_agent/agent.py @@ -1,6 +1,8 @@ """Core Agent implementation.""" import json +import re +import unicodedata from pathlib import Path import tiktoken @@ -267,12 +269,38 @@ async def run(self) -> str: # Check and summarize message history to prevent context overflow await self._summarize_messages() - # Step header - print(f"\n{Colors.DIM}╭{'─' * 58}╮{Colors.RESET}") - print( - f"{Colors.DIM}│{Colors.RESET} {Colors.BOLD}{Colors.BRIGHT_CYAN}💭 Step {step + 1}/{self.max_steps}{Colors.RESET}{' ' * (49 - len(f'Step {step + 1}/{self.max_steps}'))}{Colors.DIM}│{Colors.RESET}" - ) - print(f"{Colors.DIM}╰{'─' * 58}╯{Colors.RESET}") + # Step header (ANSI/emoji aware padding) + INNER_WIDTH = 58 + + ansi_re = re.compile(r"\x1b\[[0-9;]*m") + + def col_width(s: str) -> int: + s = ansi_re.sub("", s) + total = 0 + for ch in s: + if unicodedata.combining(ch): + continue + code = ord(ch) + if 0x1F300 <= code <= 0x1FAFF: + total += 2 + continue + eaw = unicodedata.east_asian_width(ch) + total += 2 if eaw in ("W", "F") else 1 + return total + + def box_line(text: str): + max_content = INNER_WIDTH - 1 + # Ellipsize overly long + if col_width(text) > max_content: + plain = ansi_re.sub("", text) + text = plain[: max_content - 1] + "…" if max_content > 1 else plain[:max_content] + pad = max(0, INNER_WIDTH - 1 - col_width(text)) + print(f"{Colors.DIM}│{Colors.RESET} {text}{' ' * pad}{Colors.DIM}│{Colors.RESET}") + + print(f"\n{Colors.DIM}╭{'─' * INNER_WIDTH}╮{Colors.RESET}") + header = f"{Colors.BOLD}{Colors.BRIGHT_CYAN}💭 Step {step + 1}/{self.max_steps}{Colors.RESET}" + box_line(header) + print(f"{Colors.DIM}╰{'─' * INNER_WIDTH}╯{Colors.RESET}") # Get tool schemas tool_schemas = [tool.to_schema() for tool in self.tools.values()] diff --git a/mini_agent/cli.py b/mini_agent/cli.py index 53322eb..2021771 100644 --- a/mini_agent/cli.py +++ b/mini_agent/cli.py @@ -11,6 +11,8 @@ import argparse import asyncio +import re +import unicodedata from datetime import datetime from pathlib import Path from typing import List @@ -68,14 +70,38 @@ class Colors: BG_BLUE = "\033[44m" +ansi_re = re.compile(r"\x1b\[[0-9;]*m") + + +def _col_width(s: str) -> int: + """Approximate terminal column width (ANSI-stripped, emoji/EAW-aware).""" + s = ansi_re.sub("", s) + total = 0 + for ch in s: + if unicodedata.combining(ch): + continue + code = ord(ch) + # Treat most emoji as width 2 + if 0x1F300 <= code <= 0x1FAFF: + total += 2 + continue + eaw = unicodedata.east_asian_width(ch) + total += 2 if eaw in ("W", "F") else 1 + return total + + def print_banner(): - """Print welcome banner""" + """Print welcome banner with robust centering.""" + INNER_WIDTH = 58 print() - print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}╔{'═' * 58}╗{Colors.RESET}") - print( - f"{Colors.BOLD}{Colors.BRIGHT_CYAN}║{Colors.RESET} {Colors.BOLD}🤖 Mini Agent - Multi-turn Interactive Session{Colors.RESET} {Colors.BOLD}{Colors.BRIGHT_CYAN}║{Colors.RESET}" - ) - print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}╚{'═' * 58}╝{Colors.RESET}") + print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}╔{'═' * INNER_WIDTH}╗{Colors.RESET}") + text = f"{Colors.BOLD}🤖 Mini Agent - Multi-turn Interactive Session{Colors.RESET}" + w = _col_width(text) + # Two spaces of left margin inside the box looks nice + left = max(0, (INNER_WIDTH - w) // 2) + right = max(0, INNER_WIDTH - w - left) + print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}║{Colors.RESET}{' ' * left}{text}{' ' * right}{Colors.BOLD}{Colors.BRIGHT_CYAN}║{Colors.RESET}") + print(f"{Colors.BOLD}{Colors.BRIGHT_CYAN}╚{'═' * INNER_WIDTH}╝{Colors.RESET}") print() @@ -108,29 +134,48 @@ def print_help(): def print_session_info(agent: Agent, workspace_dir: Path, model: str): - """Print session information""" - print(f"{Colors.DIM}┌{'─' * 58}┐{Colors.RESET}") - print( - f"{Colors.DIM}│{Colors.RESET} {Colors.BRIGHT_CYAN}Session Info{Colors.RESET} {Colors.DIM}│{Colors.RESET}" - ) - print(f"{Colors.DIM}├{'─' * 58}┤{Colors.RESET}") - print(f"{Colors.DIM}│{Colors.RESET} Model: {model}{' ' * max(0, 49 - len(str(model)))} {Colors.DIM}│{Colors.RESET}") - print( - f"{Colors.DIM}│{Colors.RESET} Workspace: {workspace_dir}{' ' * max(0, 45 - len(str(workspace_dir)))} {Colors.DIM}│{Colors.RESET}" - ) - msg_text = f"{len(agent.messages)} messages" - print( - f"{Colors.DIM}│{Colors.RESET} Message History: {msg_text}{' ' * max(0, 38 - len(msg_text))} {Colors.DIM}│{Colors.RESET}" - ) - tools_text = f"{len(agent.tools)} tools" - print( - f"{Colors.DIM}│{Colors.RESET} Available Tools: {tools_text}{' ' * max(0, 41 - len(tools_text))} {Colors.DIM}│{Colors.RESET}" - ) - print(f"{Colors.DIM}└{'─' * 58}┘{Colors.RESET}") + """Print session information with robust padding. + + Uses a fixed inner width and computes padding based on visible length + (ANSI color codes are ignored for width calculation). + """ + + INNER_WIDTH = 58 + + def box_line(text: str): + # Fit text into INNER_WIDTH with a leading single space and trailing padding + max_content = INNER_WIDTH - 1 # account for the single leading space + # Ellipsize if too long (no color inside these labels by default) + if _col_width(text) > max_content: + plain = ansi_re.sub("", text) + # Leave 1 char for ellipsis + if max_content > 1: + text = plain[: max_content - 1] + "…" + else: + text = plain[: max_content] + pad = max(0, INNER_WIDTH - 1 - _col_width(text)) + print(f"{Colors.DIM}│{Colors.RESET} {text}{' ' * pad}{Colors.DIM}│{Colors.RESET}") + + # Top border + print(f"{Colors.DIM}┌{'─' * INNER_WIDTH}┐{Colors.RESET}") + # Header centered + header = f"{Colors.BRIGHT_CYAN}Session Info{Colors.RESET}" + # Center by adding left padding inside text + free_space = INNER_WIDTH - _col_width(header) + left_pad = max(0, (free_space - 1) // 2) # subtract 1 for the fixed leading space + box_line(" " * left_pad + header) + # Divider + print(f"{Colors.DIM}├{'─' * INNER_WIDTH}┤{Colors.RESET}") + + box_line(f"Model: {model}") + box_line(f"Workspace: {workspace_dir}") + box_line(f"Message History: {len(agent.messages)} messages") + box_line(f"Available Tools: {len(agent.tools)} tools") + + # Bottom border and helper hint + print(f"{Colors.DIM}└{'─' * INNER_WIDTH}┘{Colors.RESET}") print() - print( - f"{Colors.DIM}Type {Colors.BRIGHT_GREEN}/help{Colors.DIM} for help, {Colors.BRIGHT_GREEN}/exit{Colors.DIM} to quit{Colors.RESET}" - ) + print(f"{Colors.DIM}Type {Colors.BRIGHT_GREEN}/help{Colors.DIM} for help, {Colors.BRIGHT_GREEN}/exit{Colors.DIM} to quit{Colors.RESET}") print() @@ -376,6 +421,7 @@ def on_retry(exception: Exception, attempt: int): api_key=config.llm.api_key, api_base=config.llm.api_base, model=config.llm.model, + provider=getattr(config.llm, "provider", "anthropic"), retry_config=retry_config if config.llm.retry.enabled else None, ) diff --git a/mini_agent/config.py b/mini_agent/config.py index b7e36f2..cdc9fcd 100644 --- a/mini_agent/config.py +++ b/mini_agent/config.py @@ -25,6 +25,10 @@ class LLMConfig(BaseModel): api_key: str api_base: str = "https://api.minimax.io/anthropic" model: str = "MiniMax-M2" + # Provider for the API. Supported values: + # - "anthropic" (default, MiniMax Anthropic-compatible endpoint) + # - "openai-compatible" (e.g., LM Studio local server or any OpenAI-compatible API) + provider: str = "anthropic" retry: RetryConfig = Field(default_factory=RetryConfig) @@ -106,6 +110,7 @@ def from_yaml(cls, config_path: str | Path) -> "Config": api_key=data["api_key"], api_base=data.get("api_base", "https://api.minimax.io/anthropic"), model=data.get("model", "MiniMax-M2"), + provider=data.get("provider", "anthropic"), retry=retry_config, ) diff --git a/mini_agent/config/config-example.yaml b/mini_agent/config/config-example.yaml index 074ac7e..7427296 100644 --- a/mini_agent/config/config-example.yaml +++ b/mini_agent/config/config-example.yaml @@ -11,7 +11,12 @@ # - All config files (config.yaml, mcp.json, system_prompt.md) are in the same directory # ===== LLM Configuration ===== -# MiniMax API Configuration +# Provider selection: +# - anthropic (default): MiniMax Anthropic-compatible API +# - openai-compatible: OpenAI-compatible servers (e.g., LM Studio local API) +provider: "anthropic" + +# MiniMax API Configuration (anthropic) # MiniMax provides both global and China platforms: # - Global: https://platform.minimax.io -> api_base: https://api.minimax.io/anthropic # - China: https://platform.minimaxi.com -> api_base: https://api.minimaxi.com/anthropic @@ -21,6 +26,13 @@ api_base: "https://api.minimax.io/anthropic" # Global users # api_base: "https://api.minimaxi.com/anthropic" # China users model: "MiniMax-M2" +# --- LM Studio (OpenAI-compatible) Example --- +# To use LM Studio, change these fields (uncomment and adjust): +# provider: "openai-compatible" +# api_key: "lm-studio" # LM Studio accepts any non-empty key +# api_base: "http://localhost:1234/v1" # LM Studio local server default +# model: "YOUR_LOADED_MODEL_NAME" # Must match the model loaded in LM Studio + # ===== Retry Configuration ===== retry: enabled: true # Enable retry mechanism diff --git a/mini_agent/llm.py b/mini_agent/llm.py index 80af28e..3211a28 100644 --- a/mini_agent/llm.py +++ b/mini_agent/llm.py @@ -1,9 +1,19 @@ -"""LLM client for MiniMax M2 via Anthropic-compatible API.""" +"""Unified LLM client supporting Anthropic-compatible (MiniMax) and +OpenAI-compatible (e.g., LM Studio) APIs. + +This client keeps the Mini-Agent internal schema stable while enabling +different backends via a simple `provider` switch. + +Providers: +- "anthropic" (default): MiniMax M2 via Anthropic-compatible endpoint +- "openai-compatible": e.g., LM Studio local server (http://localhost:1234/v1) +""" import logging from typing import Any import httpx +import json from .retry import RetryConfig as RetryConfigBase from .retry import async_retry @@ -13,29 +23,27 @@ class LLMClient: - """MiniMax M2 LLM Client via Anthropic-compatible endpoint. - - Supported models: - - MiniMax-M2 - """ + """LLM Client with pluggable providers (Anthropic or OpenAI-compatible).""" def __init__( self, api_key: str, api_base: str = "https://api.minimax.io/anthropic", model: str = "MiniMax-M2", + provider: str = "anthropic", retry_config: RetryConfigBase | None = None, ): self.api_key = api_key self.api_base = api_base self.model = model + self.provider = provider or "anthropic" self.retry_config = retry_config or RetryConfigBase() # Callback for tracking retry count self.retry_callback = None - async def _make_api_request(self, payload: dict[str, Any]) -> dict[str, Any]: - """Execute API request (core method that can be retried) + async def _make_api_request_anthropic(self, payload: dict[str, Any]) -> dict[str, Any]: + """Execute Anthropic-compatible API request (core method that can be retried) Args: payload: Request payload @@ -81,101 +89,128 @@ async def _make_api_request(self, payload: dict[str, Any]) -> dict[str, Any]: return result - async def generate( - self, - messages: list[Message], - tools: list[dict[str, Any]] | None = None, - ) -> LLMResponse: - """Generate response from LLM.""" - # Extract system message (Anthropic requires it separately) - system_message = None - api_messages = [] + async def _make_api_request_openai(self, payload: dict[str, Any]) -> dict[str, Any]: + """Execute OpenAI-compatible API request (e.g., LM Studio).""" + async with httpx.AsyncClient(timeout=120.0) as client: + response = await client.post( + f"{self.api_base}/chat/completions", + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + }, + json=payload, + ) + + # Try to provide rich error information on non-2xx + status = response.status_code + try: + result = response.json() + except Exception: + result = {"raw": response.text} + + if status >= 400: + # Extract error message from common OpenAI-compatible formats + err = None + if isinstance(result, dict): + if "error" in result: + err = result.get("error") + if isinstance(err, dict): + err = err.get("message") or err.get("type") + if not err and "message" in result: + err = result.get("message") + if not err: + err = response.text + raise Exception(f"OpenAI-compatible API Error {status}: {err}") + + return result + + def _convert_messages_to_openai(self, messages: list[Message]) -> list[dict[str, Any]]: + """Convert internal messages to OpenAI Chat Completions format.""" + oa_messages: list[dict[str, Any]] = [] for msg in messages: if msg.role == "system": - system_message = msg.content - continue - - # For user and assistant messages - if msg.role in ["user", "assistant"]: - # Handle assistant messages with thinking or tool calls - if msg.role == "assistant" and (msg.thinking or msg.tool_calls): - # Build content blocks for assistant with thinking and/or tool calls - content_blocks = [] - - # Add thinking block if present - if msg.thinking: - content_blocks.append({"type": "thinking", "thinking": msg.thinking}) - - # Add text content if present - if msg.content: - content_blocks.append({"type": "text", "text": msg.content}) - - # Add tool use blocks - if msg.tool_calls: - for tool_call in msg.tool_calls: - content_blocks.append( - { - "type": "tool_use", - "id": tool_call.id, - "name": tool_call.function.name, - "input": tool_call.function.arguments, - } - ) - - api_messages.append({"role": "assistant", "content": content_blocks}) - else: - api_messages.append({"role": msg.role, "content": msg.content}) - - # For tool result messages - elif msg.role == "tool": - # Anthropic uses user role with tool_result content blocks - api_messages.append( - { - "role": "user", - "content": [ + oa_messages.append({"role": "system", "content": msg.content}) + elif msg.role == "user": + oa_messages.append({"role": "user", "content": msg.content}) + elif msg.role == "assistant": + entry: dict[str, Any] = {"role": "assistant"} + # OpenAI format doesn't support separate "thinking" blocks; omit. + entry["content"] = msg.content or "" + + # Translate tool_calls if present + if msg.tool_calls: + tool_calls = [] + for tc in msg.tool_calls: + func_args = tc.function.arguments + # OpenAI expects function.arguments as a JSON string + if isinstance(func_args, str): + args_str = func_args + else: + args_str = json.dumps(func_args, ensure_ascii=False) + + tool_calls.append( { - "type": "tool_result", - "tool_use_id": msg.tool_call_id, - "content": msg.content, + "id": tc.id, + "type": tc.type, + "function": { + "name": tc.function.name, + "arguments": args_str, + }, } - ], - } - ) + ) + entry["tool_calls"] = tool_calls - # Build request payload - payload = { - "model": self.model, - "messages": api_messages, - "max_tokens": 16384, # Increased to handle longer outputs - } - - # Add system message if present - if system_message: - payload["system"] = system_message - - # Add tools if provided - if tools: - payload["tools"] = tools - - # Make API request with retry logic - if self.retry_config.enabled: - # Apply retry logic - retry_decorator = async_retry(config=self.retry_config, on_retry=self.retry_callback) - api_call = retry_decorator(self._make_api_request) - result = await api_call(payload) - else: - # Don't use retry - result = await self._make_api_request(payload) + oa_messages.append(entry) + elif msg.role == "tool": + # OpenAI uses role "tool" with tool_call_id and content + entry = { + "role": "tool", + "tool_call_id": msg.tool_call_id, + "content": msg.content, + } + # Name is optional in OpenAI schema for tool role + if msg.name: + entry["name"] = msg.name + oa_messages.append(entry) + + return oa_messages + + def _convert_tools_to_openai(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Convert Anthropic-style tool schemas to OpenAI tools schema. + + Input (Anthropic-like): + {"name": str, "description": str, "input_schema": {...}} + + Output (OpenAI): + {"type": "function", "function": {"name": str, "description": str, "parameters": {...}}} + """ + converted: list[dict[str, Any]] = [] + for t in tools: + name = t.get("name") + desc = t.get("description") + params = t.get("input_schema") or {} + converted.append( + { + "type": "function", + "function": { + "name": name, + "description": desc, + "parameters": params, + }, + } + ) + return converted - # Parse Anthropic response format + def _parse_anthropic_response(self, result: dict[str, Any]) -> LLMResponse: + """Parse Anthropic-compatible response into internal schema.""" content_blocks = result.get("content", []) stop_reason = result.get("stop_reason", "stop") # Extract text content, thinking, and tool calls text_content = "" thinking_content = "" - tool_calls = [] + tool_calls: list[ToolCall] = [] for block in content_blocks: if block.get("type") == "text": @@ -183,7 +218,6 @@ async def generate( elif block.get("type") == "thinking": thinking_content += block.get("thinking", "") elif block.get("type") == "tool_use": - # Parse Anthropic tool_use block tool_calls.append( ToolCall( id=block.get("id"), @@ -201,3 +235,138 @@ async def generate( tool_calls=tool_calls if tool_calls else None, finish_reason=stop_reason, ) + + def _parse_openai_response(self, result: dict[str, Any]) -> LLMResponse: + """Parse OpenAI-compatible Chat Completions response.""" + choices = result.get("choices", []) + if not choices: + raise Exception("OpenAI-compatible response missing 'choices'") + + choice = choices[0] + msg = choice.get("message", {}) + finish_reason = choice.get("finish_reason", "stop") + + text_content = msg.get("content") or "" + tool_calls_raw = msg.get("tool_calls") or [] + tool_calls: list[ToolCall] = [] + for tc in tool_calls_raw: + func = tc.get("function", {}) + args_raw = func.get("arguments") + # Convert JSON string to dict if needed + try: + args_parsed = json.loads(args_raw) if isinstance(args_raw, str) else (args_raw or {}) + except Exception: + # If not valid JSON, pass-through as string under a reserved key + args_parsed = {"_raw": args_raw} + + tool_calls.append( + ToolCall( + id=tc.get("id"), + type=tc.get("type", "function"), + function=FunctionCall( + name=func.get("name"), + arguments=args_parsed, + ), + ) + ) + + return LLMResponse( + content=text_content, + thinking=None, # OpenAI-compatible APIs don't return structured "thinking" + tool_calls=tool_calls if tool_calls else None, + finish_reason=finish_reason, + ) + + async def generate( + self, + messages: list[Message], + tools: list[dict[str, Any]] | None = None, + ) -> LLMResponse: + """Generate response from LLM.""" + provider = (self.provider or "anthropic").lower() + + if provider == "anthropic": + # Extract system message (Anthropic requires it separately) + system_message = None + api_messages: list[dict[str, Any]] = [] + + for msg in messages: + if msg.role == "system": + system_message = msg.content + continue + + if msg.role in ["user", "assistant"]: + if msg.role == "assistant" and (msg.thinking or msg.tool_calls): + content_blocks = [] + if msg.thinking: + content_blocks.append({"type": "thinking", "thinking": msg.thinking}) + if msg.content: + content_blocks.append({"type": "text", "text": msg.content}) + if msg.tool_calls: + for tool_call in msg.tool_calls: + content_blocks.append( + { + "type": "tool_use", + "id": tool_call.id, + "name": tool_call.function.name, + "input": tool_call.function.arguments, + } + ) + api_messages.append({"role": "assistant", "content": content_blocks}) + else: + api_messages.append({"role": msg.role, "content": msg.content}) + elif msg.role == "tool": + api_messages.append( + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": msg.tool_call_id, + "content": msg.content, + } + ], + } + ) + + payload = { + "model": self.model, + "messages": api_messages, + "max_tokens": 16384, + } + + if system_message: + payload["system"] = system_message + if tools: + payload["tools"] = tools + + # Make API request with retry logic + if self.retry_config.enabled: + retry_decorator = async_retry(config=self.retry_config, on_retry=self.retry_callback) + api_call = retry_decorator(self._make_api_request_anthropic) + result = await api_call(payload) + else: + result = await self._make_api_request_anthropic(payload) + + return self._parse_anthropic_response(result) + + # OpenAI-compatible (e.g., LM Studio) + oa_messages = self._convert_messages_to_openai(messages) + payload = { + "model": self.model, + "messages": oa_messages, + # Keep a reasonable default for local models + "max_tokens": 2048, + } + if tools: + payload["tools"] = self._convert_tools_to_openai(tools) + payload["tool_choice"] = "auto" + + if self.retry_config.enabled: + retry_decorator = async_retry(config=self.retry_config, on_retry=self.retry_callback) + api_call = retry_decorator(self._make_api_request_openai) + result = await api_call(payload) + else: + result = await self._make_api_request_openai(payload) + + return self._parse_openai_response(result) diff --git a/mini_agent/llm/__init__.py b/mini_agent/llm/__init__.py new file mode 100644 index 0000000..ca7bb65 --- /dev/null +++ b/mini_agent/llm/__init__.py @@ -0,0 +1,45 @@ +"""Provider factory for LLMs and backwards-compatible client wrapper. + +To address maintainability, providers are implemented separately under +mini_agent.llm.providers.* and this module exposes a thin LLMClient that +constructs the right provider based on configuration. +""" + +from typing import Any + +from ..retry import RetryConfig as RetryConfigBase +from ..schema import LLMResponse, Message + + +class LLMClient: + """Backwards-compatible facade that dispatches to a provider instance.""" + + def __init__( + self, + api_key: str, + api_base: str = "https://api.minimax.io/anthropic", + model: str = "MiniMax-M2", + provider: str = "anthropic", + retry_config: RetryConfigBase | None = None, + ): + self.provider_name = (provider or "anthropic").lower() + self.retry_config = retry_config + + # Lazy import to avoid circulars + if self.provider_name == "anthropic": + from .providers.anthropic import AnthropicLLM + + self._impl = AnthropicLLM(api_key, api_base, model, retry_config) + else: + from .providers.openai_compat import OpenAILLM + + self._impl = OpenAILLM(api_key, api_base, model, retry_config) + + # Expose a retry callback passthrough for CLI printing + self.retry_callback = None + if hasattr(self._impl, "set_retry_callback"): + self._impl.set_retry_callback(lambda exc, attempt: self.retry_callback and self.retry_callback(exc, attempt)) + + async def generate(self, messages: list[Message], tools: list[dict[str, Any]] | None = None) -> LLMResponse: + return await self._impl.generate(messages, tools) + diff --git a/mini_agent/llm/providers/__init__.py b/mini_agent/llm/providers/__init__.py new file mode 100644 index 0000000..395ee42 --- /dev/null +++ b/mini_agent/llm/providers/__init__.py @@ -0,0 +1,7 @@ +"""Provider implementations for LLM backends.""" + +__all__ = [ + "AnthropicLLM", + "OpenAILLM", +] + diff --git a/mini_agent/llm/providers/anthropic.py b/mini_agent/llm/providers/anthropic.py new file mode 100644 index 0000000..f5ba7ef --- /dev/null +++ b/mini_agent/llm/providers/anthropic.py @@ -0,0 +1,139 @@ +"""Anthropic-compatible (MiniMax M2) provider implementation.""" + +from __future__ import annotations + +from typing import Any + +import httpx + +from ...retry import async_retry, RetryConfig as RetryConfigBase +from ...schema import FunctionCall, LLMResponse, Message, ToolCall +from .base import BaseLLM + + +class AnthropicLLM(BaseLLM): + async def _post(self, payload: dict[str, Any]) -> dict[str, Any]: + async with httpx.AsyncClient(timeout=120.0) as client: + response = await client.post( + f"{self.api_base}/v1/messages", + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + "anthropic-version": "2023-06-01", + }, + json=payload, + ) + return response.json() + + def _parse_response(self, result: dict[str, Any]) -> LLMResponse: + # Check for errors (Anthropic format) + if result.get("type") == "error": + error_info = result.get("error", {}) + error_msg = f"API Error ({error_info.get('type')}): {error_info.get('message')}" + raise Exception(error_msg) + + # Check for MiniMax base_resp errors + if "base_resp" in result: + base_resp = result["base_resp"] + status_code = base_resp.get("status_code") + status_msg = base_resp.get("status_msg") + if status_code not in [0, 1000, None]: + error_msg = f"MiniMax API Error (code {status_code}): {status_msg}" + if status_code == 1008: + error_msg += "\n\n⚠️ Insufficient account balance, please recharge on MiniMax platform" + elif status_code == 2013: + error_msg += f"\n\n⚠️ Model '{self.model}' is not supported" + raise Exception(error_msg) + + content_blocks = result.get("content", []) + stop_reason = result.get("stop_reason", "stop") + + text_content = "" + thinking_content = "" + tool_calls: list[ToolCall] = [] + for block in content_blocks: + if block.get("type") == "text": + text_content += block.get("text", "") + elif block.get("type") == "thinking": + thinking_content += block.get("thinking", "") + elif block.get("type") == "tool_use": + tool_calls.append( + ToolCall( + id=block.get("id"), + type="function", + function=FunctionCall( + name=block.get("name"), + arguments=block.get("input", {}), + ), + ) + ) + + return LLMResponse( + content=text_content, + thinking=thinking_content if thinking_content else None, + tool_calls=tool_calls if tool_calls else None, + finish_reason=stop_reason, + ) + + async def generate(self, messages: list[Message], tools: list[dict[str, Any]] | None = None) -> LLMResponse: + # Extract system message separately + system_message = None + api_messages: list[dict[str, Any]] = [] + for msg in messages: + if msg.role == "system": + system_message = msg.content + continue + if msg.role in ["user", "assistant"]: + if msg.role == "assistant" and (msg.thinking or msg.tool_calls): + content_blocks = [] + if msg.thinking: + content_blocks.append({"type": "thinking", "thinking": msg.thinking}) + if msg.content: + content_blocks.append({"type": "text", "text": msg.content}) + if msg.tool_calls: + for tool_call in msg.tool_calls: + content_blocks.append( + { + "type": "tool_use", + "id": tool_call.id, + "name": tool_call.function.name, + "input": tool_call.function.arguments, + } + ) + api_messages.append({"role": "assistant", "content": content_blocks}) + else: + api_messages.append({"role": msg.role, "content": msg.content}) + elif msg.role == "tool": + api_messages.append( + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": msg.tool_call_id, + "content": msg.content, + } + ], + } + ) + + payload = { + "model": self.model, + "messages": api_messages, + "max_tokens": 16384, + } + if system_message: + payload["system"] = system_message + if tools: + payload["tools"] = tools + + # Retry wrapper + if self.retry_config and self.retry_config.enabled: + retry_decorator = async_retry(config=self.retry_config, on_retry=self._retry_callback) + api_call = retry_decorator(self._post) + result = await api_call(payload) + else: + result = await self._post(payload) + + return self._parse_response(result) + diff --git a/mini_agent/llm/providers/base.py b/mini_agent/llm/providers/base.py new file mode 100644 index 0000000..9241cdb --- /dev/null +++ b/mini_agent/llm/providers/base.py @@ -0,0 +1,32 @@ +"""Abstract base class for LLM providers (Strategy pattern).""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any + +from ...retry import RetryConfig as RetryConfigBase +from ...schema import LLMResponse, Message + + +class BaseLLM(ABC): + def __init__( + self, + api_key: str, + api_base: str, + model: str, + retry_config: RetryConfigBase | None = None, + ) -> None: + self.api_key = api_key + self.api_base = api_base + self.model = model + self.retry_config = retry_config + self._retry_callback = None + + def set_retry_callback(self, cb): + self._retry_callback = cb + + @abstractmethod + async def generate(self, messages: list[Message], tools: list[dict[str, Any]] | None = None) -> LLMResponse: + raise NotImplementedError + diff --git a/mini_agent/llm/providers/openai_compat.py b/mini_agent/llm/providers/openai_compat.py new file mode 100644 index 0000000..d11f467 --- /dev/null +++ b/mini_agent/llm/providers/openai_compat.py @@ -0,0 +1,175 @@ +"""OpenAI-compatible provider (LM Studio, etc.).""" + +from __future__ import annotations + +import json +import re +from typing import Any + +import httpx + +from ...retry import async_retry, RetryConfig as RetryConfigBase +from ...schema import FunctionCall, LLMResponse, Message, ToolCall +from .base import BaseLLM + + +class OpenAILLM(BaseLLM): + async def _post(self, payload: dict[str, Any]) -> dict[str, Any]: + async with httpx.AsyncClient(timeout=120.0) as client: + response = await client.post( + f"{self.api_base}/chat/completions", + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + }, + json=payload, + ) + + status = response.status_code + try: + result = response.json() + except Exception: + result = {"raw": response.text} + + if status >= 400: + err = None + if isinstance(result, dict): + if "error" in result: + err = result.get("error") + if isinstance(err, dict): + err = err.get("message") or err.get("type") + if not err and "message" in result: + err = result.get("message") + if not err: + err = response.text + raise Exception(f"OpenAI-compatible API Error {status}: {err}") + + return result + + def _convert_messages(self, messages: list[Message]) -> list[dict[str, Any]]: + oa_messages: list[dict[str, Any]] = [] + for msg in messages: + if msg.role == "system": + oa_messages.append({"role": "system", "content": msg.content}) + elif msg.role == "user": + oa_messages.append({"role": "user", "content": msg.content}) + elif msg.role == "assistant": + entry: dict[str, Any] = {"role": "assistant"} + entry["content"] = msg.content or "" + if msg.tool_calls: + tool_calls = [] + for tc in msg.tool_calls: + func_args = tc.function.arguments + if isinstance(func_args, str): + args_str = func_args + else: + args_str = json.dumps(func_args, ensure_ascii=False) + tool_calls.append( + { + "id": tc.id, + "type": tc.type, + "function": { + "name": tc.function.name, + "arguments": args_str, + }, + } + ) + entry["tool_calls"] = tool_calls + oa_messages.append(entry) + elif msg.role == "tool": + entry = { + "role": "tool", + "tool_call_id": msg.tool_call_id, + "content": msg.content, + } + if msg.name: + entry["name"] = msg.name + oa_messages.append(entry) + return oa_messages + + def _convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]: + converted: list[dict[str, Any]] = [] + for t in tools: + name = t.get("name") + desc = t.get("description") + params = t.get("input_schema") or {} + converted.append( + { + "type": "function", + "function": { + "name": name, + "description": desc, + "parameters": params, + }, + } + ) + return converted + + def _parse_response(self, result: dict[str, Any]) -> LLMResponse: + choices = result.get("choices", []) + if not choices: + raise Exception("OpenAI-compatible response missing 'choices'") + + choice = choices[0] + msg = choice.get("message", {}) + finish_reason = choice.get("finish_reason", "stop") + + text_content = msg.get("content") or "" + reasoning_content = msg.get("reasoning_content") + + tool_calls_raw = msg.get("tool_calls") or [] + tool_calls: list[ToolCall] = [] + for tc in tool_calls_raw: + func = tc.get("function", {}) + args_raw = func.get("arguments") + try: + args_parsed = json.loads(args_raw) if isinstance(args_raw, str) else (args_raw or {}) + except Exception: + args_parsed = {"_raw": args_raw} + + tool_calls.append( + ToolCall( + id=tc.get("id"), + type=tc.get("type", "function"), + function=FunctionCall( + name=func.get("name"), + arguments=args_parsed, + ), + ) + ) + + extracted_thinking = None + if not reasoning_content and text_content: + pattern = r"([\s\S]*?)" + match = re.search(pattern, text_content, re.IGNORECASE) + if match: + extracted_thinking = match.group(1).strip() + text_content = re.sub(pattern, "", text_content, flags=re.IGNORECASE).strip() + + return LLMResponse( + content=text_content, + thinking=(reasoning_content or extracted_thinking), + tool_calls=tool_calls if tool_calls else None, + finish_reason=finish_reason, + ) + + async def generate(self, messages: list[Message], tools: list[dict[str, Any]] | None = None) -> LLMResponse: + oa_messages = self._convert_messages(messages) + payload: dict[str, Any] = { + "model": self.model, + "messages": oa_messages, + "max_tokens": 2048, + } + if tools: + payload["tools"] = self._convert_tools(tools) + payload["tool_choice"] = "auto" + + if self.retry_config and self.retry_config.enabled: + retry_decorator = async_retry(config=self.retry_config, on_retry=self._retry_callback) + api_call = retry_decorator(self._post) + result = await api_call(payload) + else: + result = await self._post(payload) + + return self._parse_response(result) + diff --git a/tests/test_openai_provider.py b/tests/test_openai_provider.py new file mode 100644 index 0000000..b687f88 --- /dev/null +++ b/tests/test_openai_provider.py @@ -0,0 +1,96 @@ +import pytest + +from mini_agent.llm.providers.openai_compat import OpenAILLM +from mini_agent.schema import Message, ToolCall, FunctionCall + + +def make_openai_llm(): + # Dummy credentials/base; will not be used for network in these tests + return OpenAILLM(api_key="test", api_base="http://localhost:1234/v1", model="test-model", retry_config=None) + + +def test_tool_schema_conversion(): + llm = make_openai_llm() + tools = [ + { + "name": "calculator", + "description": "calc", + "input_schema": { + "type": "object", + "properties": {"a": {"type": "number"}}, + }, + } + ] + converted = llm._convert_tools(tools) + assert converted and converted[0]["type"] == "function" + assert converted[0]["function"]["name"] == "calculator" + + +def test_reasoning_from_reasoning_content(): + llm = make_openai_llm() + result = { + "choices": [ + { + "finish_reason": "stop", + "message": { + "content": "final answer", + "reasoning_content": "hidden thoughts", + "tool_calls": [], + }, + } + ] + } + resp = llm._parse_response(result) + assert resp.content == "final answer" + assert resp.thinking == "hidden thoughts" + + +def test_reasoning_from_think_tags(): + llm = make_openai_llm() + result = { + "choices": [ + { + "finish_reason": "stop", + "message": {"content": "t1 visible", "tool_calls": []}, + } + ] + } + resp = llm._parse_response(result) + assert resp.content == "visible" + assert resp.thinking == "t1" + + +def test_tool_calls_parse(): + llm = make_openai_llm() + result = { + "choices": [ + { + "finish_reason": "tool_calls", + "message": { + "content": None, + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": {"name": "calc", "arguments": "{\"a\":1}"}, + } + ], + }, + } + ] + } + resp = llm._parse_response(result) + assert resp.tool_calls and resp.tool_calls[0].function.name == "calc" + assert resp.tool_calls[0].function.arguments == {"a": 1} + + +def test_message_conversion_basic(): + llm = make_openai_llm() + msgs = [ + Message(role="system", content="sys"), + Message(role="user", content="u1"), + Message(role="assistant", content="a1"), + ] + converted = llm._convert_messages(msgs) + assert [m["role"] for m in converted] == ["system", "user", "assistant"] +