From 51f8f662b93a527f7cd1c7e3827eae92cd90ae88 Mon Sep 17 00:00:00 2001 From: Albertchamberlain <47343901+Albertchamberlain@users.noreply.github.com> Date: Thu, 18 Jun 2026 14:37:40 +0800 Subject: [PATCH] Add configurable LLM provider layer Introduce a provider adapter so FsExplorer can use Google Gemini or OpenAI-compatible APIs (SiliconFlow, OpenAI) via FS_EXPLORER_LLM_PROVIDER and related env vars. Google remains the default with native JSON schema output. OpenAI-compatible providers use response_format=json_object plus flexible action parsing for imperfect model output. Includes PROVIDERS.md, updated tests, and optional live integration test gated on SILICONFLOW_API_KEY. --- .env.example | 21 ++- PROVIDERS.md | 64 +++++++++ README.md | 17 ++- pyproject.toml | 1 + src/fs_explorer/agent.py | 202 ++++++++++++++++----------- src/fs_explorer/llm/__init__.py | 14 ++ src/fs_explorer/llm/action_parser.py | 109 +++++++++++++++ src/fs_explorer/llm/base.py | 42 ++++++ src/fs_explorer/llm/config.py | 87 ++++++++++++ src/fs_explorer/llm/factory.py | 41 ++++++ src/fs_explorer/llm/google_client.py | 63 +++++++++ src/fs_explorer/llm/openai_client.py | 75 ++++++++++ src/fs_explorer/llm/prompts.py | 28 ++++ src/fs_explorer/ui.html | 2 +- tests/conftest.py | 83 ++++------- tests/test_agent.py | 113 ++++++++------- tests/test_llm_config.py | 51 +++++++ tests/test_llm_integration.py | 29 ++++ 18 files changed, 842 insertions(+), 200 deletions(-) create mode 100644 PROVIDERS.md create mode 100644 src/fs_explorer/llm/__init__.py create mode 100644 src/fs_explorer/llm/action_parser.py create mode 100644 src/fs_explorer/llm/base.py create mode 100644 src/fs_explorer/llm/config.py create mode 100644 src/fs_explorer/llm/factory.py create mode 100644 src/fs_explorer/llm/google_client.py create mode 100644 src/fs_explorer/llm/openai_client.py create mode 100644 src/fs_explorer/llm/prompts.py create mode 100644 tests/test_llm_config.py create mode 100644 tests/test_llm_integration.py diff --git a/.env.example b/.env.example index a68315a..9740d4c 100644 --- a/.env.example +++ b/.env.example @@ -1,6 +1,23 @@ -# Google Gemini API Key +# LLM provider selection: google | siliconflow | openai +FS_EXPLORER_LLM_PROVIDER=google + +# Optional model override (provider-specific default if unset) +# FS_EXPLORER_LLM_MODEL=gemini-3-flash-preview +# FS_EXPLORER_LLM_MODEL=Qwen/Qwen2.5-72B-Instruct + +# Optional base URL override for OpenAI-compatible providers +# FS_EXPLORER_LLM_BASE_URL=https://api.siliconflow.cn/v1 + +# --- Google Gemini --- # Get yours at: https://aistudio.google.com/apikey -GOOGLE_API_KEY=your_api_key_here +GOOGLE_API_KEY=your_google_api_key_here + +# --- SiliconFlow (OpenAI-compatible) --- +# Get yours at: https://cloud.siliconflow.cn/account/ak +SILICONFLOW_API_KEY=your_siliconflow_api_key_here + +# --- OpenAI (optional) --- +# OPENAI_API_KEY=your_openai_api_key_here # Optional: dedicated key for langextract metadata mode. # If unset, indexing will fall back to GOOGLE_API_KEY. diff --git a/PROVIDERS.md b/PROVIDERS.md new file mode 100644 index 0000000..b8448b3 --- /dev/null +++ b/PROVIDERS.md @@ -0,0 +1,64 @@ +# LLM Provider Configuration + +FsExplorer supports multiple LLM backends through a small provider adapter layer. + +## Quick Start + +### Google Gemini (default) + +```bash +export FS_EXPLORER_LLM_PROVIDER=google +export GOOGLE_API_KEY=your_google_api_key +``` + +### SiliconFlow (OpenAI-compatible) + +```bash +export FS_EXPLORER_LLM_PROVIDER=siliconflow +export SILICONFLOW_API_KEY=your_siliconflow_api_key +# Optional overrides +export FS_EXPLORER_LLM_MODEL=Qwen/Qwen2.5-72B-Instruct +export FS_EXPLORER_LLM_BASE_URL=https://api.siliconflow.cn/v1 +# International endpoint: https://api.siliconflow.com/v1 +``` + +Get a SiliconFlow API key at https://cloud.siliconflow.cn/account/ak + +### OpenAI + +```bash +export FS_EXPLORER_LLM_PROVIDER=openai +export OPENAI_API_KEY=your_openai_api_key +export FS_EXPLORER_LLM_MODEL=gpt-4o-mini +export FS_EXPLORER_LLM_BASE_URL=https://api.openai.com/v1 +``` + +## Environment Variables + +| Variable | Description | +|----------|-------------| +| `FS_EXPLORER_LLM_PROVIDER` | `google`, `siliconflow`, or `openai` (default: `google`) | +| `FS_EXPLORER_LLM_MODEL` | Model id override | +| `FS_EXPLORER_LLM_BASE_URL` | Base URL for OpenAI-compatible providers | +| `GOOGLE_API_KEY` | Google Gemini API key | +| `SILICONFLOW_API_KEY` | SiliconFlow API key | +| `OPENAI_API_KEY` | OpenAI API key | + +## Architecture + +``` +FsExplorerAgent + -> llm.create_llm_client() + -> GoogleGeminiClient (native JSON schema) + -> OpenAICompatibleClient (SiliconFlow, OpenAI, ...) +``` + +Google Gemini uses native structured JSON output. OpenAI-compatible providers use `response_format=json_object` plus the Action JSON schema embedded in the system prompt, with flexible parsing for imperfect model output. + +## Embeddings / Indexing + +Vector indexing (`explore index --with-embeddings`) still uses Google Gemini embeddings by default via `GOOGLE_API_KEY`. Chat provider selection is independent of embedding configuration. + +## Security + +Never commit `.env` or real API keys. Use `.env.example` as a template only. diff --git a/README.md b/README.md index a04f857..af0aa3f 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ This video explains the architecture of the project and how to run it. - ๐Ÿ” **6 Tools**: `scan_folder`, `preview_file`, `parse_file`, `read`, `grep`, `glob` - ๐Ÿ“„ **Document Support**: PDF, DOCX, PPTX, XLSX, HTML, Markdown (via Docling) -- ๐Ÿค– **Powered by**: Google Gemini 3 Flash with structured JSON output +- ๐Ÿค– **Multi-LLM**: Google Gemini, SiliconFlow, OpenAI-compatible APIs - ๐Ÿ’ฐ **Cost Efficient**: ~$0.001 per query with token tracking - ๐ŸŒ **Web UI**: Real-time WebSocket streaming interface - ๐Ÿ“Š **Citations**: Answers include source references @@ -45,13 +45,19 @@ pip install . ## Configuration -Create a `.env` file in the project root: +Create a `.env` file in the project root (see `.env.example`): ```bash +# Google Gemini (default) +FS_EXPLORER_LLM_PROVIDER=google GOOGLE_API_KEY=your_api_key_here + +# SiliconFlow (OpenAI-compatible) +# FS_EXPLORER_LLM_PROVIDER=siliconflow +# SILICONFLOW_API_KEY=your_api_key_here ``` -Get your API key from [Google AI Studio](https://aistudio.google.com/apikey). +See [PROVIDERS.md](PROVIDERS.md) for all supported backends and model overrides. ## Usage @@ -125,7 +131,7 @@ uv run explore --task "Look in data/large_acquisition/. What happens to employee | Component | Technology | |-----------|------------| -| LLM | Google Gemini 3 Flash | +| LLM | Google Gemini / SiliconFlow / OpenAI-compatible | | Document Parsing | Docling (local, open-source) | | Orchestration | LlamaIndex Workflows | | CLI | Typer + Rich | @@ -136,7 +142,8 @@ uv run explore --task "Look in data/large_acquisition/. What happens to employee ``` src/fs_explorer/ -โ”œโ”€โ”€ agent.py # Gemini client, token tracking +โ”œโ”€โ”€ agent.py # Agent + tool registry +โ”œโ”€โ”€ llm/ # Multi-provider LLM adapters โ”œโ”€โ”€ workflow.py # LlamaIndex workflow engine โ”œโ”€โ”€ fs.py # File tools: scan, parse, grep โ”œโ”€โ”€ models.py # Pydantic models for actions diff --git a/pyproject.toml b/pyproject.toml index 9dea965..8488dd4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ "duckdb>=1.0.0", "fastapi>=0.115.0", "google-genai>=1.55.0", + "openai>=1.60.0", "langextract>=1.0.0", "llama-index-workflows>=2.11.5", "python-dotenv>=1.0.0", diff --git a/src/fs_explorer/agent.py b/src/fs_explorer/agent.py index ce0e25a..850d68a 100644 --- a/src/fs_explorer/agent.py +++ b/src/fs_explorer/agent.py @@ -1,8 +1,7 @@ """ -FsExplorer Agent for filesystem exploration using Google Gemini. +FsExplorer Agent for filesystem exploration using configurable LLM providers. -This module contains the agent that interacts with the Gemini AI model -to make decisions about filesystem exploration actions. +Supports Google Gemini, SiliconFlow, and other OpenAI-compatible APIs. """ import os @@ -12,9 +11,9 @@ from dataclasses import dataclass from dotenv import load_dotenv -from google.genai.types import Content, HttpOptions, Part -from google.genai import Client as GenAIClient +from .llm import ChatMessage, LLMClient, create_llm_client, load_llm_config +from .llm.action_parser import parse_action_json from .models import Action, ActionType, ToolCallAction, Tools from .fs import ( read_file, @@ -43,9 +42,12 @@ # Token Usage Tracking # ============================================================================= -# Gemini Flash pricing (per million tokens) -GEMINI_FLASH_INPUT_COST_PER_MILLION = 0.075 -GEMINI_FLASH_OUTPUT_COST_PER_MILLION = 0.30 +# Reference pricing (USD per million tokens) for optional cost estimates. +_PROVIDER_PRICING: dict[str, tuple[float, float]] = { + "google": (0.075, 0.30), + "siliconflow": (0.59, 0.59), + "openai": (0.15, 0.60), +} @dataclass @@ -53,26 +55,39 @@ class TokenUsage: """ Track token usage and costs across the session. - Maintains running totals of API calls, token counts, and provides - cost estimates based on Gemini Flash pricing. + Maintains running totals of API calls and token counts. Cost estimates + are best-effort and depend on the active provider. """ prompt_tokens: int = 0 completion_tokens: int = 0 total_tokens: int = 0 api_calls: int = 0 + provider_name: str = "google" + model_name: str = "" # Track content sizes tool_result_chars: int = 0 documents_parsed: int = 0 documents_scanned: int = 0 - def add_api_call(self, prompt_tokens: int, completion_tokens: int) -> None: + def add_api_call( + self, + prompt_tokens: int, + completion_tokens: int, + *, + provider_name: str | None = None, + model_name: str | None = None, + ) -> None: """Record token usage from an API call.""" self.prompt_tokens += prompt_tokens self.completion_tokens += completion_tokens self.total_tokens += prompt_tokens + completion_tokens self.api_calls += 1 + if provider_name: + self.provider_name = provider_name + if model_name: + self.model_name = model_name def add_tool_result(self, result: str, tool_name: str) -> None: """Record metrics from a tool execution.""" @@ -85,24 +100,36 @@ def add_tool_result(self, result: str, tool_name: str) -> None: elif tool_name == "preview_file": self.documents_parsed += 1 - def _calculate_cost(self) -> tuple[float, float, float]: - """Calculate estimated costs based on Gemini Flash pricing.""" - input_cost = ( - self.prompt_tokens / 1_000_000 - ) * GEMINI_FLASH_INPUT_COST_PER_MILLION - output_cost = ( - self.completion_tokens / 1_000_000 - ) * GEMINI_FLASH_OUTPUT_COST_PER_MILLION + def _calculate_cost(self) -> tuple[float, float, float] | None: + """Calculate estimated costs when provider pricing is known.""" + pricing = _PROVIDER_PRICING.get(self.provider_name) + if pricing is None: + return None + input_rate, output_rate = pricing + input_cost = (self.prompt_tokens / 1_000_000) * input_rate + output_cost = (self.completion_tokens / 1_000_000) * output_rate return input_cost, output_cost, input_cost + output_cost def summary(self) -> str: """Generate a formatted summary of token usage and costs.""" - input_cost, output_cost, total_cost = self._calculate_cost() + costs = self._calculate_cost() + cost_block = "" + if costs is not None: + input_cost, output_cost, total_cost = costs + cost_block = ( + f"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n" + f" Est. Cost ({self.provider_name} / {self.model_name or 'default'}):\n" + f" Input: ${input_cost:.4f}\n" + f" Output: ${output_cost:.4f}\n" + f" Total: ${total_cost:.4f}\n" + ) return f""" โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• TOKEN USAGE SUMMARY โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + Provider: {self.provider_name} + Model: {self.model_name or "n/a"} API Calls: {self.api_calls} Prompt Tokens: {self.prompt_tokens:,} Completion Tokens: {self.completion_tokens:,} @@ -111,12 +138,7 @@ def summary(self) -> str: Documents Scanned: {self.documents_scanned} Documents Parsed: {self.documents_parsed} Tool Result Chars: {self.tool_result_chars:,} -โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ - Est. Cost (Gemini Flash): - Input: ${input_cost:.4f} - Output: ${output_cost:.4f} - Total: ${total_cost:.4f} -โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +{cost_block}โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• """ @@ -540,7 +562,7 @@ def _build_system_prompt(enable_semantic: bool, enable_metadata: bool) -> str: class FsExplorerAgent: """ - AI agent for exploring filesystems using Google Gemini. + AI agent for exploring filesystems using a configurable LLM provider. The agent maintains a conversation history with the LLM and uses structured JSON output to make decisions about which actions to take. @@ -549,31 +571,42 @@ class FsExplorerAgent: token_usage: Tracks API call statistics and costs. """ - def __init__(self, api_key: str | None = None) -> None: + def __init__( + self, + api_key: str | None = None, + *, + llm_client: LLMClient | None = None, + ) -> None: """ - Initialize the agent with Google API credentials. + Initialize the agent with provider credentials from env or args. Args: - api_key: Google API key. If not provided, reads from - GOOGLE_API_KEY environment variable. + api_key: Optional API key override for the active provider. + llm_client: Optional pre-built LLM client (used in tests). Raises: - ValueError: If no API key is available. + ValueError: If provider configuration or API key is missing. """ - if api_key is None: - api_key = os.getenv("GOOGLE_API_KEY") - if api_key is None: - raise ValueError( - "GOOGLE_API_KEY not found within the current environment: " - "please export it or provide it to the class constructor." - ) - - self._client = GenAIClient( - api_key=api_key, - http_options=HttpOptions(api_version="v1beta"), + if llm_client is not None: + self._llm = llm_client + else: + self._llm = create_llm_client(api_key=api_key) + + config = ( + load_llm_config(api_key=api_key) + if llm_client is None + else None + ) + self._chat_history: list[ChatMessage] = [] + self.token_usage = TokenUsage( + provider_name=self._llm.provider_name, + model_name=config.model if config else self._llm.model_name, ) - self._chat_history: list[Content] = [] - self.token_usage = TokenUsage() + + @property + def llm_client(self) -> LLMClient: + """Return the active LLM backend.""" + return self._llm def configure_task(self, task: str) -> None: """ @@ -582,51 +615,53 @@ def configure_task(self, task: str) -> None: Args: task: The task or context to add to the conversation. """ - self._chat_history.append( - Content(role="user", parts=[Part.from_text(text=task)]) - ) + self._chat_history.append(ChatMessage(role="user", content=task)) async def take_action(self) -> tuple[Action, ActionType] | None: """ Request the next action from the AI model. - Sends the current conversation history to Gemini and receives - a structured JSON response indicating the next action to take. + Sends the current conversation history to the configured provider and + receives a structured JSON response indicating the next action. Returns: A tuple of (Action, ActionType) if successful, None otherwise. """ - response = await self._client.aio.models.generate_content( - model="gemini-3-flash-preview", - contents=self._chat_history, # type: ignore - config={ - "system_instruction": _build_system_prompt(_ENABLE_SEMANTIC, _ENABLE_METADATA), - "response_mime_type": "application/json", - "response_schema": Action, - }, - ) - - # Track token usage from response metadata - if response.usage_metadata: - self.token_usage.add_api_call( - prompt_tokens=response.usage_metadata.prompt_token_count or 0, - completion_tokens=response.usage_metadata.candidates_token_count or 0, + try: + response_text, usage = await self._llm.generate_action_json( + messages=self._chat_history, + system_instruction=_build_system_prompt( + _ENABLE_SEMANTIC, _ENABLE_METADATA + ), ) + except Exception as exc: + print(f"LLM request failed ({self._llm.provider_name}): {exc}") + return None + + self.token_usage.add_api_call( + prompt_tokens=usage.prompt_tokens, + completion_tokens=usage.completion_tokens, + provider_name=self._llm.provider_name, + model_name=self._llm.model_name, + ) - if response.candidates is not None: - if response.candidates[0].content is not None: - self._chat_history.append(response.candidates[0].content) - if response.text is not None: - action = Action.model_validate_json(response.text) - if action.to_action_type() == "toolcall": - toolcall = cast(ToolCallAction, action.action) - self.call_tool( - tool_name=toolcall.tool_name, - tool_input=toolcall.to_fn_args(), - ) - return action, action.to_action_type() + self._chat_history.append( + ChatMessage(role="assistant", content=response_text) + ) - return None + try: + action, action_type = parse_action_json(response_text) + except ValueError as exc: + print(f"Failed to parse LLM response: {exc}") + return None + + if action_type == "toolcall": + toolcall = cast(ToolCallAction, action.action) + self.call_tool( + tool_name=toolcall.tool_name, + tool_input=toolcall.to_fn_args(), + ) + return action, action_type def call_tool(self, tool_name: Tools, tool_input: dict[str, Any]) -> None: """ @@ -648,15 +683,16 @@ def call_tool(self, tool_name: Tools, tool_input: dict[str, Any]) -> None: self.token_usage.add_tool_result(result, tool_name) self._chat_history.append( - Content( + ChatMessage( role="user", - parts=[ - Part.from_text(text=f"Tool result for {tool_name}:\n\n{result}") - ], + content=f"Tool result for {tool_name}:\n\n{result}", ) ) def reset(self) -> None: """Reset the agent's conversation history and token tracking.""" self._chat_history.clear() - self.token_usage = TokenUsage() + self.token_usage = TokenUsage( + provider_name=self._llm.provider_name, + model_name=self._llm.model_name, + ) diff --git a/src/fs_explorer/llm/__init__.py b/src/fs_explorer/llm/__init__.py new file mode 100644 index 0000000..47fda1d --- /dev/null +++ b/src/fs_explorer/llm/__init__.py @@ -0,0 +1,14 @@ +"""LLM provider adapters for FsExplorer.""" + +from .config import LLMConfig, load_llm_config +from .factory import create_llm_client +from .base import ChatMessage, LLMClient, LLMUsage + +__all__ = [ + "ChatMessage", + "LLMClient", + "LLMUsage", + "LLMConfig", + "load_llm_config", + "create_llm_client", +] diff --git a/src/fs_explorer/llm/action_parser.py b/src/fs_explorer/llm/action_parser.py new file mode 100644 index 0000000..6624ac1 --- /dev/null +++ b/src/fs_explorer/llm/action_parser.py @@ -0,0 +1,109 @@ +"""Parse agent actions from LLM JSON with flexible recovery.""" + +from __future__ import annotations + +import json +from typing import cast + +from ..models import ( + Action, + ActionType, + AskHumanAction, + GoDeeperAction, + StopAction, + ToolCallAction, + ToolCallArg, + Tools, +) + + +def parse_action_json(json_str: str) -> tuple[Action, ActionType]: + """Parse an action from JSON, with flexible recovery for weaker models.""" + try: + action = Action.model_validate_json(json_str) + return action, action.to_action_type() + except Exception: + pass + + try: + raw = json.loads(json_str) + except json.JSONDecodeError as exc: + raise ValueError(f"Invalid JSON: {json_str[:200]}") from exc + + action_data = raw.get("action", {}) + reason = raw.get("reason", "") + + if isinstance(action_data, dict) and "final_result" in action_data: + return Action( + action=StopAction(final_result=str(action_data["final_result"])), + reason=str(reason), + ), "stop" + + if isinstance(action_data, dict) and action_data.get("tool_name") == "final_result": + answer = reason or str(action_data.get("tool_input", "")) + return Action( + action=StopAction(final_result=answer), + reason="Recovered: tool_name was final_result", + ), "stop" + + if isinstance(action_data, dict) and "directory" in action_data and "tool_name" not in action_data: + return Action( + action=GoDeeperAction(directory=str(action_data["directory"])), + reason=str(reason), + ), "godeeper" + + if isinstance(action_data, dict) and "question" in action_data: + return Action( + action=AskHumanAction(question=str(action_data["question"])), + reason=str(reason), + ), "askhuman" + + if not isinstance(action_data, dict): + raise ValueError(f"Unsupported action payload: {json_str[:200]}") + + tool_name = action_data.get("tool_name") + tool_input_raw = action_data.get("tool_input", []) + if not tool_name: + raise ValueError(f"Missing tool_name in action: {json_str[:200]}") + + args = _normalize_tool_args(tool_input_raw) + tool = cast(Tools, tool_name) + return Action( + action=ToolCallAction(tool_name=tool, tool_input=args), + reason=str(reason), + ), "toolcall" + + +def _normalize_tool_args(tool_input_raw: object) -> list[ToolCallArg]: + if isinstance(tool_input_raw, dict): + return [ + ToolCallArg(parameter_name=str(key), parameter_value=value) + for key, value in tool_input_raw.items() + ] + + if not isinstance(tool_input_raw, list): + return [] + + args: list[ToolCallArg] = [] + for item in tool_input_raw: + if isinstance(item, ToolCallArg): + args.append(item) + elif isinstance(item, dict): + if "parameter_name" in item and "parameter_value" in item: + args.append( + ToolCallArg( + parameter_name=str(item["parameter_name"]), + parameter_value=item["parameter_value"], + ) + ) + elif len(item) == 1: + key, value = next(iter(item.items())) + args.append( + ToolCallArg(parameter_name=str(key), parameter_value=value) + ) + else: + for key, value in item.items(): + args.append( + ToolCallArg(parameter_name=str(key), parameter_value=value) + ) + return args diff --git a/src/fs_explorer/llm/base.py b/src/fs_explorer/llm/base.py new file mode 100644 index 0000000..51d7ef9 --- /dev/null +++ b/src/fs_explorer/llm/base.py @@ -0,0 +1,42 @@ +"""Shared types for LLM provider adapters.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Protocol + + +@dataclass(frozen=True) +class ChatMessage: + """Provider-neutral chat message.""" + + role: str + content: str + + +@dataclass(frozen=True) +class LLMUsage: + """Token usage from a single LLM call.""" + + prompt_tokens: int = 0 + completion_tokens: int = 0 + + +class LLMClient(Protocol): + """Interface implemented by all LLM backends.""" + + @property + def provider_name(self) -> str: + """Human-readable provider id, e.g. google or siliconflow.""" + + @property + def model_name(self) -> str: + """Model id sent to the provider API.""" + + async def generate_action_json( + self, + *, + messages: list[ChatMessage], + system_instruction: str, + ) -> tuple[str, LLMUsage]: + """Return structured action JSON and token usage.""" diff --git a/src/fs_explorer/llm/config.py b/src/fs_explorer/llm/config.py new file mode 100644 index 0000000..10fd1da --- /dev/null +++ b/src/fs_explorer/llm/config.py @@ -0,0 +1,87 @@ +"""Environment-based LLM provider configuration.""" + +from __future__ import annotations + +import os +from dataclasses import dataclass +from typing import Literal + +LLMProviderName = Literal["google", "siliconflow", "openai"] + +_DEFAULT_MODELS: dict[LLMProviderName, str] = { + "google": "gemini-3-flash-preview", + "siliconflow": "Qwen/Qwen2.5-72B-Instruct", + "openai": "gpt-4o-mini", +} + +_DEFAULT_BASE_URLS: dict[LLMProviderName, str] = { + "siliconflow": "https://api.siliconflow.cn/v1", + "openai": "https://api.openai.com/v1", +} + + +@dataclass(frozen=True) +class LLMConfig: + """Resolved LLM settings for the active provider.""" + + provider: LLMProviderName + api_key: str + model: str + base_url: str | None = None + + +def _resolve_provider() -> LLMProviderName: + raw = os.getenv("FS_EXPLORER_LLM_PROVIDER", "google").strip().lower() + aliases = { + "gemini": "google", + "google": "google", + "siliconflow": "siliconflow", + "silicon-flow": "siliconflow", + "sf": "siliconflow", + "openai": "openai", + } + provider = aliases.get(raw) + if provider is None: + supported = ", ".join(sorted(set(aliases.values()))) + raise ValueError( + f"Unsupported FS_EXPLORER_LLM_PROVIDER={raw!r}. " + f"Supported values: {supported}" + ) + return provider # type: ignore[return-value] + + +def _resolve_api_key(provider: LLMProviderName, explicit_key: str | None) -> str: + if explicit_key: + return explicit_key + + env_keys: dict[LLMProviderName, tuple[str, ...]] = { + "google": ("GOOGLE_API_KEY",), + "siliconflow": ("SILICONFLOW_API_KEY", "OPENAI_API_KEY"), + "openai": ("OPENAI_API_KEY",), + } + for env_name in env_keys[provider]: + value = os.getenv(env_name) + if value: + return value + + expected = " or ".join(env_keys[provider]) + raise ValueError( + f"No API key found for provider {provider!r}. " + f"Set {expected}, or pass api_key to FsExplorerAgent." + ) + + +def load_llm_config(*, api_key: str | None = None) -> LLMConfig: + """Load provider settings from environment variables.""" + provider = _resolve_provider() + resolved_key = _resolve_api_key(provider, api_key) + model = os.getenv("FS_EXPLORER_LLM_MODEL", _DEFAULT_MODELS[provider]) + base_url = os.getenv("FS_EXPLORER_LLM_BASE_URL") + if base_url is None and provider in _DEFAULT_BASE_URLS: + base_url = _DEFAULT_BASE_URLS[provider] + return LLMConfig( + provider=provider, + api_key=resolved_key, + model=model, + base_url=base_url, + ) diff --git a/src/fs_explorer/llm/factory.py b/src/fs_explorer/llm/factory.py new file mode 100644 index 0000000..4712d32 --- /dev/null +++ b/src/fs_explorer/llm/factory.py @@ -0,0 +1,41 @@ +"""Factory for LLM provider clients.""" + +from __future__ import annotations + +from .base import LLMClient +from .config import LLMConfig, load_llm_config +from .google_client import GoogleGeminiClient +from .openai_client import OpenAICompatibleClient + + +from .prompts import action_schema_instructions + + +def create_llm_client(*, api_key: str | None = None, config: LLMConfig | None = None) -> LLMClient: + """Instantiate the configured LLM backend.""" + resolved = config or load_llm_config(api_key=api_key) + + if resolved.provider == "google": + return GoogleGeminiClient(api_key=resolved.api_key, model=resolved.model) + + if resolved.provider == "siliconflow": + if not resolved.base_url: + raise ValueError("FS_EXPLORER_LLM_BASE_URL is required for siliconflow") + return OpenAICompatibleClient( + api_key=resolved.api_key, + model=resolved.model, + base_url=resolved.base_url, + provider_name="siliconflow", + ) + + if resolved.provider == "openai": + if not resolved.base_url: + raise ValueError("FS_EXPLORER_LLM_BASE_URL is required for openai") + return OpenAICompatibleClient( + api_key=resolved.api_key, + model=resolved.model, + base_url=resolved.base_url, + provider_name="openai", + ) + + raise ValueError(f"Unsupported provider: {resolved.provider}") diff --git a/src/fs_explorer/llm/google_client.py b/src/fs_explorer/llm/google_client.py new file mode 100644 index 0000000..47d1d3d --- /dev/null +++ b/src/fs_explorer/llm/google_client.py @@ -0,0 +1,63 @@ +"""Google Gemini LLM client.""" + +from __future__ import annotations + +from google.genai import Client as GenAIClient +from google.genai.types import Content, HttpOptions, Part + +from ..models import Action +from .base import ChatMessage, LLMUsage + + +class GoogleGeminiClient: + """Gemini backend with native JSON schema support.""" + + def __init__(self, *, api_key: str, model: str) -> None: + self._client = GenAIClient( + api_key=api_key, + http_options=HttpOptions(api_version="v1beta"), + ) + self._model = model + + @property + def provider_name(self) -> str: + return "google" + + @property + def model_name(self) -> str: + return self._model + + async def generate_action_json( + self, + *, + messages: list[ChatMessage], + system_instruction: str, + ) -> tuple[str, LLMUsage]: + contents = [ + Content( + role="user" if message.role == "user" else "model", + parts=[Part.from_text(text=message.content)], + ) + for message in messages + ] + response = await self._client.aio.models.generate_content( + model=self._model, + contents=contents, # type: ignore[arg-type] + config={ + "system_instruction": system_instruction, + "response_mime_type": "application/json", + "response_schema": Action, + }, + ) + + usage = LLMUsage() + if response.usage_metadata: + usage = LLMUsage( + prompt_tokens=response.usage_metadata.prompt_token_count or 0, + completion_tokens=response.usage_metadata.candidates_token_count or 0, + ) + + if response.text is None: + raise RuntimeError("Gemini returned an empty response") + + return response.text, usage diff --git a/src/fs_explorer/llm/openai_client.py b/src/fs_explorer/llm/openai_client.py new file mode 100644 index 0000000..d4a02cc --- /dev/null +++ b/src/fs_explorer/llm/openai_client.py @@ -0,0 +1,75 @@ +"""OpenAI-compatible LLM client (SiliconFlow, OpenAI, etc.).""" + +from __future__ import annotations + +from openai import AsyncOpenAI + +from .base import ChatMessage, LLMUsage +from .prompts import action_schema_instructions + + +class OpenAICompatibleClient: + """Chat-completions backend for OpenAI-compatible APIs.""" + + def __init__( + self, + *, + api_key: str, + model: str, + base_url: str, + provider_name: str, + ) -> None: + self._client = AsyncOpenAI(api_key=api_key, base_url=base_url) + self._model = model + self._provider_name = provider_name + + @property + def provider_name(self) -> str: + return self._provider_name + + @property + def model_name(self) -> str: + return self._model + + async def generate_action_json( + self, + *, + messages: list[ChatMessage], + system_instruction: str, + ) -> tuple[str, LLMUsage]: + schema_hint = ( + f"{system_instruction}\n\n{action_schema_instructions()}" + ) + payload = [ + {"role": "system", "content": schema_hint}, + *[ + {"role": message.role, "content": message.content} + for message in messages + ], + ] + response = await self._client.chat.completions.create( + model=self._model, + messages=payload, # type: ignore[arg-type] + response_format={"type": "json_object"}, + temperature=0.2, + ) + + content = response.choices[0].message.content or "" + usage = LLMUsage() + if response.usage is not None: + usage = LLMUsage( + prompt_tokens=response.usage.prompt_tokens or 0, + completion_tokens=response.usage.completion_tokens or 0, + ) + + json_str = content.strip() + if not json_str.startswith("{"): + start = json_str.find("{") + end = json_str.rfind("}") + 1 + if start >= 0 and end > start: + json_str = json_str[start:end] + + if not json_str: + raise RuntimeError(f"{self._provider_name} returned an empty response") + + return json_str, usage diff --git a/src/fs_explorer/llm/prompts.py b/src/fs_explorer/llm/prompts.py new file mode 100644 index 0000000..1b07261 --- /dev/null +++ b/src/fs_explorer/llm/prompts.py @@ -0,0 +1,28 @@ +"""Compact action-format instructions for OpenAI-compatible models.""" + + +def action_schema_instructions() -> str: + """Return JSON action format guidance for chat-completions backends.""" + return """ +You MUST respond with exactly one JSON object containing `action` and `reason`. + +Stop when you have the final answer: +{"action": {"final_result": "Your answer with citations"}, "reason": "Why you are done"} + +Call a tool: +{"action": {"tool_name": "scan_folder", "tool_input": [{"parameter_name": "directory", "parameter_value": "/path"}]}, "reason": "Why this tool"} + +Navigate into a subdirectory: +{"action": {"directory": "/path/to/subdir"}, "reason": "Why go deeper"} + +Ask the user a clarifying question: +{"action": {"question": "Your question"}, "reason": "Why you need input"} + +Allowed tool_name values: +read, grep, glob, scan_folder, preview_file, parse_file, semantic_search, get_document, list_indexed_documents + +Rules: +- Output JSON only. No markdown fences. +- tool_input must be a list of {"parameter_name": ..., "parameter_value": ...} objects. +- Use stop only when you can answer the user's task. +""" diff --git a/src/fs_explorer/ui.html b/src/fs_explorer/ui.html index 3143c23..ed28d34 100644 --- a/src/fs_explorer/ui.html +++ b/src/fs_explorer/ui.html @@ -1050,7 +1050,7 @@

fs-explorer

diff --git a/tests/conftest.py b/tests/conftest.py index fd5b048..5c12b2d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,73 +1,38 @@ """ Pytest fixtures and mocks for FsExplorer tests. -Provides mock implementations of the Google GenAI client for unit testing +Provides mock implementations of the LLM client for unit testing without making actual API calls. """ -from google.genai.types import ( - HttpOptions, - Content, - GenerateContentResponse, - Candidate, - Part, - GenerateContentResponseUsageMetadata, -) +from fs_explorer.llm.base import ChatMessage, LLMUsage from fs_explorer.models import StopAction, Action -class MockModels: - """Mock implementation of the GenAI models interface.""" - - async def generate_content(self, *args, **kwargs) -> GenerateContentResponse: - """Return a mock response with a stop action.""" - return GenerateContentResponse( - candidates=[ - Candidate( - content=Content( - role="model", - parts=[ - Part.from_text( - text=Action( - action=StopAction( - final_result="this is a final result" - ), - reason="I am done", - ).model_dump_json() - ) - ], - ) - ) - ], - usage_metadata=GenerateContentResponseUsageMetadata( - prompt_token_count=100, - candidates_token_count=50, - total_token_count=150, - ), - ) +class MockLLMClient: + """Mock LLM backend that always returns a stop action.""" + def __init__(self, provider_name: str = "google", model_name: str = "mock-model") -> None: + self._provider_name = provider_name + self._model_name = model_name -class MockAio: - """Mock implementation of the async GenAI interface.""" - @property - def models(self) -> MockModels: - """Return mock models interface.""" - return MockModels() - - -class MockGenAIClient: - """ - Mock implementation of the Google GenAI client. - - Provides predictable responses for testing without API calls. - """ - - def __init__(self, api_key: str, http_options: HttpOptions) -> None: - """Initialize mock client (ignores parameters).""" - pass + def provider_name(self) -> str: + return self._provider_name @property - def aio(self) -> MockAio: - """Return mock async interface.""" - return MockAio() + def model_name(self) -> str: + return self._model_name + + async def generate_action_json( + self, + *, + messages: list[ChatMessage], + system_instruction: str, + ) -> tuple[str, LLMUsage]: + del messages, system_instruction + payload = Action( + action=StopAction(final_result="this is a final result"), + reason="I am done", + ).model_dump_json() + return payload, LLMUsage(prompt_tokens=100, completion_tokens=50) diff --git a/tests/test_agent.py b/tests/test_agent.py index cc9e9fa..be183bc 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -4,8 +4,6 @@ import os from unittest.mock import patch -from google.genai import Client as GenAIClient -from google.genai.types import HttpOptions from fs_explorer.agent import ( FsExplorerAgent, @@ -17,78 +15,95 @@ clear_index_context, ) from fs_explorer.models import Action, StopAction -from .conftest import MockGenAIClient +from .conftest import MockLLMClient class TestAgentInitialization: """Tests for agent initialization.""" - - @patch.dict(os.environ, {"GOOGLE_API_KEY": "test-api-key"}) + + @patch.dict(os.environ, {"GOOGLE_API_KEY": "test-api-key", "FS_EXPLORER_LLM_PROVIDER": "google"}) def test_agent_init_with_env_key(self) -> None: """Test agent initialization with API key from environment.""" agent = FsExplorerAgent() - assert isinstance(agent._client, GenAIClient) - assert len(agent._chat_history) == 0 # No system prompt in history + assert agent.llm_client.provider_name == "google" + assert len(agent._chat_history) == 0 assert isinstance(agent.token_usage, TokenUsage) + @patch.dict(os.environ, {"FS_EXPLORER_LLM_PROVIDER": "google"}) def test_agent_init_with_explicit_key(self) -> None: """Test agent initialization with explicit API key.""" agent = FsExplorerAgent(api_key="explicit-test-key") - assert isinstance(agent._client, GenAIClient) + assert agent.llm_client.provider_name == "google" + + @patch.dict( + os.environ, + { + "FS_EXPLORER_LLM_PROVIDER": "siliconflow", + "SILICONFLOW_API_KEY": "sf-test-key", + }, + clear=False, + ) + def test_agent_init_with_siliconflow(self) -> None: + """Test siliconflow provider selection from environment.""" + agent = FsExplorerAgent() + assert agent.llm_client.provider_name == "siliconflow" def test_agent_init_without_key_raises(self) -> None: """Test that initialization without API key raises ValueError.""" - # Ensure no key in environment env = os.environ.copy() - if "GOOGLE_API_KEY" in env: - del env["GOOGLE_API_KEY"] - + for key in ( + "GOOGLE_API_KEY", + "SILICONFLOW_API_KEY", + "OPENAI_API_KEY", + "FS_EXPLORER_LLM_PROVIDER", + ): + env.pop(key, None) + with patch.dict(os.environ, env, clear=True): - with pytest.raises(ValueError, match="GOOGLE_API_KEY not found"): + with pytest.raises(ValueError, match="No API key found"): FsExplorerAgent() + def test_agent_init_with_injected_client(self) -> None: + """Test initialization with an injected mock client.""" + client = MockLLMClient(provider_name="mock") + agent = FsExplorerAgent(llm_client=client) + assert agent.llm_client is client + class TestAgentConfiguration: """Tests for agent task configuration.""" - - @patch.dict(os.environ, {"GOOGLE_API_KEY": "test-api-key"}) + def test_configure_task_adds_to_history(self) -> None: """Test that configure_task adds message to chat history.""" - agent = FsExplorerAgent() + agent = FsExplorerAgent(llm_client=MockLLMClient()) agent.configure_task("this is a task") - + assert len(agent._chat_history) == 1 assert agent._chat_history[0].role == "user" - assert agent._chat_history[0].parts[0].text == "this is a task" + assert agent._chat_history[0].content == "this is a task" - @patch.dict(os.environ, {"GOOGLE_API_KEY": "test-api-key"}) def test_multiple_configure_task_calls(self) -> None: """Test that multiple configure_task calls accumulate.""" - agent = FsExplorerAgent() + agent = FsExplorerAgent(llm_client=MockLLMClient()) agent.configure_task("task 1") agent.configure_task("task 2") - + assert len(agent._chat_history) == 2 - assert agent._chat_history[0].parts[0].text == "task 1" - assert agent._chat_history[1].parts[0].text == "task 2" + assert agent._chat_history[0].content == "task 1" + assert agent._chat_history[1].content == "task 2" class TestAgentActions: """Tests for agent action handling.""" - + @pytest.mark.asyncio - @patch.dict(os.environ, {"GOOGLE_API_KEY": "test-api-key"}) async def test_take_action_returns_action(self) -> None: """Test that take_action returns an action from the model.""" - agent = FsExplorerAgent() + agent = FsExplorerAgent(llm_client=MockLLMClient()) agent.configure_task("this is a task") - agent._client = MockGenAIClient( - api_key="test", - http_options=HttpOptions(api_version="v1beta") - ) - + result = await agent.take_action() - + assert result is not None action, action_type = result assert isinstance(action, Action) @@ -97,65 +112,64 @@ async def test_take_action_returns_action(self) -> None: assert action.reason == "I am done" assert action_type == "stop" - @patch.dict(os.environ, {"GOOGLE_API_KEY": "test-api-key"}) def test_reset_clears_history(self) -> None: """Test that reset clears chat history and token usage.""" - agent = FsExplorerAgent() + agent = FsExplorerAgent(llm_client=MockLLMClient()) agent.configure_task("task 1") agent.token_usage.api_calls = 5 - + agent.reset() - + assert len(agent._chat_history) == 0 assert agent.token_usage.api_calls == 0 class TestTokenUsage: """Tests for TokenUsage tracking.""" - + def test_add_api_call(self) -> None: """Test adding API call metrics.""" usage = TokenUsage() - usage.add_api_call(100, 50) - + usage.add_api_call(100, 50, provider_name="google", model_name="gemini") + assert usage.prompt_tokens == 100 assert usage.completion_tokens == 50 assert usage.total_tokens == 150 assert usage.api_calls == 1 + assert usage.provider_name == "google" def test_add_tool_result_parse_file(self) -> None: """Test tracking parse_file tool usage.""" usage = TokenUsage() usage.add_tool_result("document content here", "parse_file") - + assert usage.documents_parsed == 1 assert usage.tool_result_chars == len("document content here") def test_add_tool_result_scan_folder(self) -> None: """Test tracking scan_folder tool usage.""" usage = TokenUsage() - # Simulating scan output with document markers result = "โ”‚ [1/3] doc1.pdf\nโ”‚ [2/3] doc2.pdf\nโ”‚ [3/3] doc3.pdf" usage.add_tool_result(result, "scan_folder") - + assert usage.documents_scanned == 3 def test_summary_format(self) -> None: """Test that summary produces formatted output.""" - usage = TokenUsage() + usage = TokenUsage(provider_name="google", model_name="gemini") usage.add_api_call(1000, 500) - + summary = usage.summary() - + assert "TOKEN USAGE SUMMARY" in summary - assert "1,000" in summary # Formatted prompt tokens + assert "1,000" in summary assert "API Calls:" in summary assert "Est. Cost" in summary class TestSystemPrompt: """Tests for system prompt configuration.""" - + def test_system_prompt_contains_tools(self) -> None: """Test that system prompt documents all tools.""" assert "scan_folder" in SYSTEM_PROMPT @@ -217,13 +231,12 @@ def test_build_system_prompt_both(self) -> None: prompt = _build_system_prompt(True, True) assert "Semantic + Metadata" in prompt - @patch.dict(os.environ, {"GOOGLE_API_KEY": "test-api-key"}) def test_all_tools_always_available(self) -> None: """Filesystem and indexed tools are never blocked.""" set_search_flags(enable_semantic=False, enable_metadata=False) - agent = FsExplorerAgent() + agent = FsExplorerAgent(llm_client=MockLLMClient()) agent.configure_task("test") agent.call_tool("glob", {"directory": "/tmp", "pattern": "*.md"}) last = agent._chat_history[-1] - assert "not available" not in last.parts[0].text + assert "not available" not in last.content diff --git a/tests/test_llm_config.py b/tests/test_llm_config.py new file mode 100644 index 0000000..bb564e8 --- /dev/null +++ b/tests/test_llm_config.py @@ -0,0 +1,51 @@ +"""Tests for LLM provider configuration.""" + +import os + +import pytest + +from fs_explorer.llm.config import load_llm_config + + +def test_load_google_config(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("FS_EXPLORER_LLM_PROVIDER", "google") + monkeypatch.setenv("GOOGLE_API_KEY", "g-key") + monkeypatch.delenv("FS_EXPLORER_LLM_MODEL", raising=False) + config = load_llm_config() + assert config.provider == "google" + assert config.api_key == "g-key" + assert config.model == "gemini-3-flash-preview" + + +def test_load_siliconflow_config(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("FS_EXPLORER_LLM_PROVIDER", "siliconflow") + monkeypatch.setenv("SILICONFLOW_API_KEY", "sf-key") + config = load_llm_config() + assert config.provider == "siliconflow" + assert config.api_key == "sf-key" + assert config.base_url == "https://api.siliconflow.cn/v1" + + +def test_load_openai_config(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("FS_EXPLORER_LLM_PROVIDER", "openai") + monkeypatch.setenv("OPENAI_API_KEY", "o-key") + monkeypatch.delenv("FS_EXPLORER_LLM_BASE_URL", raising=False) + config = load_llm_config() + assert config.provider == "openai" + assert config.api_key == "o-key" + assert config.base_url == "https://api.openai.com/v1" + + +def test_explicit_api_key_override(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("FS_EXPLORER_LLM_PROVIDER", "google") + monkeypatch.delenv("GOOGLE_API_KEY", raising=False) + config = load_llm_config(api_key="override") + assert config.api_key == "override" + + +def test_missing_api_key_raises(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("FS_EXPLORER_LLM_PROVIDER", "siliconflow") + monkeypatch.delenv("SILICONFLOW_API_KEY", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + with pytest.raises(ValueError, match="No API key found"): + load_llm_config() diff --git a/tests/test_llm_integration.py b/tests/test_llm_integration.py new file mode 100644 index 0000000..6a56660 --- /dev/null +++ b/tests/test_llm_integration.py @@ -0,0 +1,29 @@ +"""Optional live integration tests for configured LLM providers.""" + +from __future__ import annotations + +import os + +import pytest + +from fs_explorer.agent import FsExplorerAgent +from fs_explorer.models import StopAction + + +@pytest.mark.asyncio +@pytest.mark.skipif( + not os.getenv("SILICONFLOW_API_KEY"), + reason="SILICONFLOW_API_KEY not set", +) +async def test_siliconflow_take_action_stop() -> None: + os.environ["FS_EXPLORER_LLM_PROVIDER"] = "siliconflow" + agent = FsExplorerAgent() + agent.configure_task( + "Respond with a stop action. final_result should be exactly: siliconflow-ok" + ) + result = await agent.take_action() + assert result is not None + action, action_type = result + assert action_type == "stop" + assert isinstance(action.action, StopAction) + assert "siliconflow-ok" in action.action.final_result.lower()