diff --git a/agentflow/.env.template b/agentflow/.env.template index 01bf02c..37b9f55 100644 --- a/agentflow/.env.template +++ b/agentflow/.env.template @@ -23,6 +23,8 @@ GOOGLE_API_KEY= ANTHROPIC_API_KEY= # Anthropic LLM DEEPSEEK_API_KEY= # DeepSeek LLM XAI_API_KEY= # Grok (xAI) LLM +FORGE_API_KEY= # Forge LLM router +FORGE_API_BASE=https://api.forge.tensorblock.co/v1 # Optional Forge base URL override # === Azure OpenAI Configuration === AZURE_OPENAI_API_KEY= # Azure OpenAI API Key diff --git a/agentflow/agentflow/engine/factory.py b/agentflow/agentflow/engine/factory.py index ae0a504..62c6111 100644 --- a/agentflow/agentflow/engine/factory.py +++ b/agentflow/agentflow/engine/factory.py @@ -19,6 +19,29 @@ def create_llm_engine(model_string: str, use_cache: bool = False, is_multimodal: print(f"creating llm engine {model_string} with: is_multimodal: {is_multimodal}, kwargs: {kwargs}") + model_string_lower = model_string.lower() + + # === Forge (OpenAI-compatible router) === + if model_string_lower.startswith("forge-"): + from .openai import ChatOpenAI + + if "FORGE_API_KEY" not in os.environ: + raise ValueError("Please set the FORGE_API_KEY environment variable.") + + model_string = model_string[len("forge-"):] + config = { + "model_string": model_string, + "use_cache": use_cache, + "is_multimodal": is_multimodal, + "temperature": kwargs.get("temperature", 0.7), + "top_p": kwargs.get("top_p", 0.9), + "frequency_penalty": kwargs.get("frequency_penalty", 0.5), + "presence_penalty": kwargs.get("presence_penalty", 0.5), + "api_key": os.getenv("FORGE_API_KEY"), + "base_url": os.getenv("FORGE_API_BASE") or "https://api.forge.tensorblock.co/v1", + } + return ChatOpenAI(**config) + # === Azure OpenAI === if "azure" in model_string: from .azure import ChatAzureOpenAI @@ -196,4 +219,4 @@ def create_llm_engine(model_string: str, use_cache: bool = False, is_multimodal: "For Ollama models, use 'ollama-'. " "For other custom engines, you can edit the factory.py file and add its interface file. " "Your pull request will be warmly welcomed!" - ) \ No newline at end of file + ) diff --git a/agentflow/agentflow/engine/openai.py b/agentflow/agentflow/engine/openai.py index 7a73726..75bba43 100644 --- a/agentflow/agentflow/engine/openai.py +++ b/agentflow/agentflow/engine/openai.py @@ -63,6 +63,8 @@ def __init__( system_prompt=DEFAULT_SYSTEM_PROMPT, is_multimodal: bool=False, use_cache: bool=True, # disable cache for now + api_key: str | None = None, + base_url: str | None = None, **kwargs): """ :param model_string: @@ -87,12 +89,15 @@ def __init__( os.makedirs(self.image_cache_dir, exist_ok=True) super().__init__(cache_path=cache_path) - if os.getenv("OPENAI_API_KEY") is None: + resolved_api_key = api_key or os.getenv("OPENAI_API_KEY") + if not resolved_api_key: raise ValueError("Please set the OPENAI_API_KEY environment variable if you'd like to use OpenAI models.") - - self.client = OpenAI( - api_key=os.getenv("OPENAI_API_KEY"), - ) + + client_kwargs = {"api_key": resolved_api_key} + if base_url: + client_kwargs["base_url"] = base_url + + self.client = OpenAI(**client_kwargs) @retry(wait=wait_random_exponential(min=1, max=5), stop=stop_after_attempt(5)) diff --git a/assets/doc/api_key.md b/assets/doc/api_key.md index 4e5568e..a561dfb 100644 --- a/assets/doc/api_key.md +++ b/assets/doc/api_key.md @@ -73,6 +73,16 @@ This guide provides detailed instructions on how to obtain API keys for all LLM > **Important Note for Qwen Models**: Together AI offers both Turbo (quantized) and standard (non-quantized) versions of Qwen models. For best performance and accuracy, we recommend using the **non-quantized versions** (e.g., `Qwen/Qwen2.5-7B-Instruct` instead of `Qwen/Qwen2.5-7B-Instruct-Turbo`). The Turbo versions are faster but may have reduced quality due to quantization. +--- + +## 5. Forge API Key + +**Purpose**: Access Forge's OpenAI-compatible router with model strings like `OpenAI/gpt-4o-mini`. + +**Environment variables**: +- `FORGE_API_KEY` +- `FORGE_API_BASE` (optional, defaults to `https://api.forge.tensorblock.co/v1`) + ## Important Notes @@ -94,3 +104,4 @@ This guide provides detailed instructions on how to obtain API keys for all LLM | GOOGLE_API_KEY | Gemini models | [aistudio.google.com](https://aistudio.google.com/) | [Docs](https://ai.google.dev/gemini-api/docs/models/gemini) | | DASHSCOPE_API_KEY | Qwen models | [dashscope.console.aliyun.com](https://dashscope.console.aliyun.com/) | [Docs](https://help.aliyun.com/zh/dashscope/developer-reference/model-square) | | TOGETHER_API_KEY | Qwen & open-source models | [together.ai](https://www.together.ai/) | [Docs](https://docs.together.ai/docs/inference-models) | +| FORGE_API_KEY | Forge router | [forge.tensorblock.co](https://forge.tensorblock.co) | [Docs](https://forge.tensorblock.co) | diff --git a/assets/doc/llm_engine.md b/assets/doc/llm_engine.md index 1a185a2..2f76e31 100644 --- a/assets/doc/llm_engine.md +++ b/assets/doc/llm_engine.md @@ -7,6 +7,7 @@ We support a broad range of LLM engines for agents and tools in [`factory.py`](. | vLLM | `vllm-Qwen/Qwen2.5-7B-Instruct` | Various vLLM-supported models (e.g., `Qwen2.5-7B-Instruct`, `Qwen2.5-VL-3B-Instruct`). Supports local checkpoint models for customization and local inference. | [vLLM Models](https://docs.vllm.ai/en/latest/models/supported_models.html) | | DashScope (Qwen) | `dashscope-qwen2.5-7b-instruct` | Qwen models via Alibaba Cloud DashScope API | [DashScope Models](https://help.aliyun.com/zh/model-studio/getting-started/models) | | OpenAI | `gpt-4o`, `o1-mini` | `gpt-4-turbo`, `gpt-4o`, `gpt-4o-mini`, `gpt-4.1`, `gpt-4.1-mini`, `gpt-4.1-nano`, `gpt-3.5-turbo`, `gpt-4`, `o1`, `o1-mini`, `o3`, `o3-mini`, `o1-pro`, `o4-mini` | [OpenAI Models](https://platform.openai.com/docs/models) | +| Forge | `forge-OpenAI/gpt-4o-mini` | Any OpenAI-compatible model via Forge router (`Provider/model-name`) | [Forge](https://forge.tensorblock.co) | | Azure OpenAI | `azure-gpt-4o` | `gpt-4o`, `gpt-4o-mini`, `gpt-4.1`, `gpt-4.1-mini`, `gpt-4.1-nano`, `gpt-3.5-turbo`, `gpt-4`, `o1`, `o1-mini`, `o3`, `o3-mini`, `o1-pro`, `o4-mini` | [Azure OpenAI Models](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#models) | | Anthropic | `claude-3-5-sonnet-20241022` | `claude-3-haiku-20240307`, `claude-3-sonnet-20240229`, `claude-3-opus-20240229`, `claude-3-5-sonnet-20240620`, `claude-3-5-sonnet-20241022`, `claude-3-5-haiku-20241022`, `claude-3-7-sonnet-20250219` | [Anthropic Models](https://docs.anthropic.com/en/docs/about-claude/models/all-models) | | TogetherAI | `together-meta-llama/Llama-3-70b-chat-hf` | Most models including `meta-llama/Llama-4-Scout-17B-16E-Instruct`, `Qwen/QwQ-32B`, `Qwen/Qwen2-VL-72B-Instruct`, `meta-llama/Llama-3-70b-chat-hf`, `Qwen/Qwen2-72B-Instruct` | [TogetherAI Models](https://api.together.ai/models) |