From 8316ecb056728cf0c2b1eb6f45096bd09a7d71df Mon Sep 17 00:00:00 2001 From: Nick Lotz Date: Thu, 11 Jun 2026 11:51:55 -0500 Subject: [PATCH 1/2] Migrate Agentspan docs and examples --- .../agentspan/concepts/agents.mdx | 360 ++++++++++++ .../agentspan/concepts/guardrails.mdx | 209 +++++++ .../agentspan/concepts/memory.mdx | 193 +++++++ .../agentspan/concepts/multi-agent.mdx | 546 ++++++++++++++++++ .../agentspan/concepts/plan-execute.mdx | 482 ++++++++++++++++ .../agentspan/concepts/scheduling.mdx | 328 +++++++++++ .../agentspan/concepts/skills.mdx | 244 ++++++++ .../agentspan/concepts/streaming.mdx | 170 ++++++ .../agentspan/concepts/testing.mdx | 349 +++++++++++ .../agentspan/concepts/tools.mdx | 302 ++++++++++ .../agentspan/examples/crash-resume.mdx | 265 +++++++++ .../agentspan/examples/document-processor.mdx | 278 +++++++++ .../agentspan/examples/google-adk.mdx | 351 +++++++++++ .../agentspan/examples/human-in-the-loop.mdx | 219 +++++++ .../agentspan/examples/langgraph.mdx | 287 +++++++++ .../agentspan/examples/openai-agents-sdk.mdx | 281 +++++++++ .../agentspan/examples/research-pipeline.mdx | 137 +++++ .../agentspan/examples/support-triage.mdx | 222 +++++++ docs/developer-guides/agentspan/overview.mdx | 50 ++ .../developer-guides/agentspan/quickstart.mdx | 124 ++++ .../agentspan/reference/ai-models.mdx | 222 +++++++ .../agentspan/reference/cli.mdx | 168 ++++++ .../agentspan/reference/deployment.mdx | 184 ++++++ .../agentspan/reference/integrations.mdx | 144 +++++ .../agentspan/reference/providers.mdx | 207 +++++++ .../agentspan/reference/self-hosting.mdx | 168 ++++++ .../agentspan/reference/worker-types.mdx | 89 +++ .../agentspan/why-agentspan.mdx | 89 +++ sidebars.js | 76 ++- 29 files changed, 6743 insertions(+), 1 deletion(-) create mode 100644 docs/developer-guides/agentspan/concepts/agents.mdx create mode 100644 docs/developer-guides/agentspan/concepts/guardrails.mdx create mode 100644 docs/developer-guides/agentspan/concepts/memory.mdx create mode 100644 docs/developer-guides/agentspan/concepts/multi-agent.mdx create mode 100644 docs/developer-guides/agentspan/concepts/plan-execute.mdx create mode 100644 docs/developer-guides/agentspan/concepts/scheduling.mdx create mode 100644 docs/developer-guides/agentspan/concepts/skills.mdx create mode 100644 docs/developer-guides/agentspan/concepts/streaming.mdx create mode 100644 docs/developer-guides/agentspan/concepts/testing.mdx create mode 100644 docs/developer-guides/agentspan/concepts/tools.mdx create mode 100644 docs/developer-guides/agentspan/examples/crash-resume.mdx create mode 100644 docs/developer-guides/agentspan/examples/document-processor.mdx create mode 100644 docs/developer-guides/agentspan/examples/google-adk.mdx create mode 100644 docs/developer-guides/agentspan/examples/human-in-the-loop.mdx create mode 100644 docs/developer-guides/agentspan/examples/langgraph.mdx create mode 100644 docs/developer-guides/agentspan/examples/openai-agents-sdk.mdx create mode 100644 docs/developer-guides/agentspan/examples/research-pipeline.mdx create mode 100644 docs/developer-guides/agentspan/examples/support-triage.mdx create mode 100644 docs/developer-guides/agentspan/overview.mdx create mode 100644 docs/developer-guides/agentspan/quickstart.mdx create mode 100644 docs/developer-guides/agentspan/reference/ai-models.mdx create mode 100644 docs/developer-guides/agentspan/reference/cli.mdx create mode 100644 docs/developer-guides/agentspan/reference/deployment.mdx create mode 100644 docs/developer-guides/agentspan/reference/integrations.mdx create mode 100644 docs/developer-guides/agentspan/reference/providers.mdx create mode 100644 docs/developer-guides/agentspan/reference/self-hosting.mdx create mode 100644 docs/developer-guides/agentspan/reference/worker-types.mdx create mode 100644 docs/developer-guides/agentspan/why-agentspan.mdx diff --git a/docs/developer-guides/agentspan/concepts/agents.mdx b/docs/developer-guides/agentspan/concepts/agents.mdx new file mode 100644 index 00000000..6b83e0a6 --- /dev/null +++ b/docs/developer-guides/agentspan/concepts/agents.mdx @@ -0,0 +1,360 @@ +--- +slug: "/developer-guides/agentspan/concepts/agents" +title: "Agents" +description: "The Agent class — constructor, parameters, results, handles, and the compilation model" +--- + +# Agents + +`Agent` is the single orchestration primitive in Agentspan. A single agent wraps an LLM with tools. An agent with sub-agents IS a multi-agent system. There are no separate Team, Network, or Crew classes. + +## Agent execution at runtime + +The following diagrams show how the Agentspan server orchestrates different runtime behaviors — guardrail validation with retry and escalation, and human-in-the-loop approval. + +**1. Retry** — the guardrail fails and the server re-invokes the same tool automatically. +```mermaid +sequenceDiagram + participant Code as Your Code + participant Server as Agentspan Server + participant LLM + participant Tool as Order Search Tool + participant Guard as Guardrail + Code->>Server: run(agent, prompt) + activate Server + Server->>LLM: llm_call + activate LLM + LLM-->>Server: tool_calls: [order_search] + deactivate LLM + Server->>Tool: order_search + activate Tool + Tool-->>Server: result + deactivate Tool + Server->>Guard: validate result + activate Guard + Guard-->>Server: pass / fail + deactivate Guard + opt guardrail failed + Server->>Tool: order_search (retry) + activate Tool + Tool-->>Server: result + deactivate Tool + end + opt guardrail passed + Server->>LLM: llm_call + activate LLM + LLM-->>Server: finish: STOP + deactivate LLM + Server-->>Code: AgentResult + end + deactivate Server +``` +**2. Escalation** — the guardrail fails and the server escalates by invoking a notification tool. +```mermaid +sequenceDiagram + participant Code as Your Code + participant Server as Agentspan Server + participant LLM + participant Tool as Order Search Tool + participant Guard as Guardrail + participant Notify as Send Slack Message + Code->>Server: run(agent, prompt) + activate Server + Server->>LLM: llm_call + activate LLM + LLM-->>Server: tool_calls: [order_search] + deactivate LLM + Server->>Tool: order_search + activate Tool + Tool-->>Server: result + deactivate Tool + Server->>Guard: validate result + activate Guard + Guard-->>Server: pass / fail + deactivate Guard + opt guardrail failed + Server->>Notify: send_slack_message + activate Notify + Notify-->>Server: notification sent + deactivate Notify + Server-->>Code: AgentResult (status: FAILED) + end + opt guardrail passed + Server->>LLM: llm_call + activate LLM + LLM-->>Server: finish: STOP + deactivate LLM + Server-->>Code: AgentResult + end + deactivate Server +``` +**3. Human-in-the-loop** — a tool marked `approval_required=True` pauses execution until a human approves or rejects. +```mermaid +sequenceDiagram + participant Code as Your Code + participant Server as Agentspan Server + participant LLM + participant Tool as Process Refund + participant Human as HITL (Human-in-the-Loop) + Code->>Server: run(agent, prompt) + activate Server + Server->>LLM: llm_call + activate LLM + LLM-->>Server: tool_calls: [process_refund] + deactivate LLM + Note over Server,Human: approval_required — paused, state held on server + Server->>Human: approval_required + activate Human + Human-->>Server: approved + deactivate Human + Server->>Tool: process_refund + activate Tool + Tool-->>Server: result + deactivate Tool + Server->>LLM: llm_call + activate LLM + LLM-->>Server: finish: STOP + deactivate LLM + Server-->>Code: AgentResult + deactivate Server +``` + +## Import + +```python +from agentspan.agents import Agent, AgentRuntime, run, start, stream +``` + +## Constructor + +```python +Agent( + name: str, # Unique name (becomes workflow name) + model: str, # "provider/model" format + instructions: Union[str, Callable] = "", # System prompt + tools: Optional[List] = None, # @tool functions or ToolDef + agents: Optional[List[Agent]] = None, # Sub-agents + strategy: str = "handoff", # Multi-agent strategy + router: Optional[Union[Agent, Callable]] = None, # For "router" strategy + output_type: Optional[type] = None, # Pydantic model for structured output + guardrails: Optional[List[Guardrail]] = None, # Input/output validation + memory: Optional[ConversationMemory] = None, # Session management + dependencies: Optional[Dict[str, Any]] = None, # Injected into ToolContext + max_turns: int = 25, # Maximum agent loop iterations + max_tokens: Optional[int] = None, # LLM max tokens + temperature: Optional[float] = None, # LLM temperature + stop_when: Optional[Callable] = None, # Early termination condition + metadata: Optional[Dict[str, Any]] = None, # Arbitrary metadata +) +``` + +## Parameters + +**`name`** — Unique identifier for this agent. Used in the execution UI, history queries, and `runtime.run("agent_name", prompt)` invocations. Required. + +**`model`** — LLM in `"provider/model"` format. See [Providers](/developer-guides/agentspan/reference/providers) for all options. + +```python +agent = Agent(name="bot", model="openai/gpt-4o") +agent = Agent(name="bot", model="anthropic/claude-sonnet-4-6") +agent = Agent(name="bot", model="google_gemini/gemini-2.0-flash") +``` + +**`instructions`** — System prompt. Can be a string or a callable that returns a string: + +```python +# Static +Agent(name="bot", model="openai/gpt-4o", instructions="You are a helpful assistant.") + +# Dynamic — evaluated at run time +from datetime import date +Agent(name="bot", model="openai/gpt-4o", + instructions=lambda: f"Today is {date.today()}. You are a helpful assistant.") +``` + +**`tools`** — List of `@tool`-decorated functions, `http_tool()`, `mcp_tool()`, or `api_tool()` results. See [Tools](/developer-guides/agentspan/concepts/tools). + +**`agents`** — Sub-agents for multi-agent orchestration. See [Multi-Agent](/developer-guides/agentspan/concepts/multi-agent). + +**`strategy`** — How sub-agents are coordinated. Default: `"handoff"`. See [Multi-Agent](/developer-guides/agentspan/concepts/multi-agent). + +**`output_type`** — A Pydantic `BaseModel` subclass for structured output: + +```python +from pydantic import BaseModel +from agentspan.agents import Agent, AgentRuntime + +class Report(BaseModel): + title: str + summary: str + confidence: float + +agent = Agent(name="analyst", model="openai/gpt-4o", output_type=Report) + +with AgentRuntime() as runtime: + result = runtime.run(agent, "Summarize the Q4 results") + report: Report = result.output # Fully typed +``` + +**`max_turns`** — Maximum iterations of the think-act-observe loop. Prevents runaway agents. Default: 25. + +**`stop_when`** — Optional callable `(context: dict) -> bool`. Evaluated after each tool call. If it returns `True`, the agent stops early. + +**`dependencies`** — Dict injected into tools via `ToolContext`: + +```python +agent = Agent( + name="bot", model="openai/gpt-4o", + tools=[query_db], + dependencies={"db": my_database, "user_id": "u-123"}, +) +``` + +## Running Agents + +### `AgentRuntime` context manager (recommended) + +```python +from agentspan.agents import Agent, AgentRuntime + +agent = Agent(name="assistant", model="openai/gpt-4o") + +with AgentRuntime() as runtime: + # Blocking — waits for result + result = runtime.run(agent, "What is quantum computing?") + result.print_result() + + # Fire-and-forget — returns immediately + handle = runtime.start(agent, "Analyze this large dataset") + + # Streaming — yields events as they happen + for event in runtime.stream(agent, "Write a poem"): + print(event) +``` + +### Module-level functions + +```python +from agentspan.agents import run, start, stream + +result = run(agent, "Hello") # Uses a shared singleton runtime +handle = start(agent, "Hello") +for event in stream(agent, "Hi"): ... +``` + +### Async variants + +```python +from agentspan.agents import run_async, start_async, stream_async + +result = await run_async(agent, "Hello") +handle = await start_async(agent, "Hello") +async for event in stream_async(agent, "Hi"): ... +``` + +## AgentResult + +Returned by `run()`: + +| Field | Type | Description | +|---|---|---| +| `output` | `Any` | Final answer (or Pydantic model if `output_type` set) | +| `workflow_id` | `str` | Execution ID — use to track in UI or reconnect | +| `status` | `str` | `"COMPLETED"`, `"FAILED"`, `"TERMINATED"`, `"TIMED_OUT"` | +| `messages` | `List[Dict]` | Full conversation history | +| `tool_calls` | `List[Dict]` | All tool invocations with inputs/outputs | +| `token_usage` | `Optional[TokenUsage]` | Aggregated token usage (populated via `AgentRuntime`) | +| `is_success` | `bool` | `True` if status is COMPLETED | +| `is_failed` | `bool` | `True` if status is FAILED | + +```python +with AgentRuntime() as runtime: + result = runtime.run(agent, "Summarize this") + +print(result.output) # The answer +print(result.workflow_id) # Track in the Agentspan UI at http://localhost:6767 +print(result.status) # "COMPLETED" +print(result.token_usage) # TokenUsage(prompt_tokens=..., completion_tokens=..., total_tokens=...) +``` + +> **Note:** `result.output` is the direct output value (string or Pydantic model). When using module-level `run()` without an `AgentRuntime`, `token_usage` is `None`. + +## AgentHandle + +Returned by `start()`. A handle to a running (or paused) execution: + +| Method | Description | +|---|---| +| `get_status()` | Fetch current status → `AgentStatus` | +| `stream().get_result()` | Wait for the result | +| `approve()` | Approve a paused human-in-the-loop task | +| `reject(reason)` | Reject a HITL task with a reason | +| `send(message)` | Send a message to the agent (multi-turn) | +| `pause()` | Pause the execution | +| `resume()` | Resume a paused execution | +| `cancel(reason)` | Cancel the execution | +| `workflow_id` | The execution ID (attribute) | + +```python +with AgentRuntime() as runtime: + handle = runtime.start(agent, "Analyze Q4 reports") + +print(handle.workflow_id) # Store this to reconnect later + +# Poll status +status = handle.get_status() +if status.is_waiting: + handle.approve() +elif status.is_complete: + print(status.output) +``` + +### Reconnect to an existing execution + +```python +from agentspan.agents import AgentHandle, AgentRuntime + +runtime = AgentRuntime() +runtime.serve(agent, blocking=False) # Start workers for @tool functions + +handle = AgentHandle(workflow_id="exec-abc123", runtime=runtime) +status = handle.get_status() +``` + +> **Critical:** When reconnecting to a run that uses `@tool` functions, call `runtime.serve(agent, blocking=False)` **before** creating the `AgentHandle`. Otherwise tool tasks will hang. + +## Pipeline Composition + +The `>>` operator creates sequential pipelines: + +```python +researcher = Agent(name="researcher", model="openai/gpt-4o", + instructions="Research the topic.") +writer = Agent(name="writer", model="openai/gpt-4o", + instructions="Write an article from the research.") +editor = Agent(name="editor", model="openai/gpt-4o", + instructions="Polish the article for publication.") + +pipeline = researcher >> writer >> editor + +with AgentRuntime() as runtime: + result = runtime.run(pipeline, "AI agents in 2025") + result.print_result() +``` + +## Dry-run / Plan + +Compile the agent without executing it: + +```python +from agentspan.agents import plan + +workflow = plan(agent) +print(workflow) # Compiled workflow definition (server-side execution graph) +``` + +--- + +## Execution engine + +Agentspan compiles agent definitions into [Conductor](https://conductor-oss.org/) workflows — an open-source orchestration engine that has run billions of executions in production at Netflix, LinkedIn, and Tesla. Durable state, per-step retries, replay, and full execution history are Conductor primitives. `AgentRuntime`, `Agent`, and `@tool` are the Agentspan API on top of that foundation. diff --git a/docs/developer-guides/agentspan/concepts/guardrails.mdx b/docs/developer-guides/agentspan/concepts/guardrails.mdx new file mode 100644 index 00000000..35d1618f --- /dev/null +++ b/docs/developer-guides/agentspan/concepts/guardrails.mdx @@ -0,0 +1,209 @@ +--- +slug: "/developer-guides/agentspan/concepts/guardrails" +title: "Guardrails" +description: "Input and output validation — custom functions, regex, LLM judges, four on_fail modes" +--- + +# Guardrails + +Guardrails validate agent input or output. On failure, you choose how to respond: retry with feedback, raise an error, auto-fix, or escalate to a human. + +## Import + +```python +from agentspan.agents import ( + Agent, AgentRuntime, Guardrail, GuardrailResult, guardrail, + OnFail, Position, RegexGuardrail, LLMGuardrail, +) +``` + +## Basic Usage + +```python +from agentspan.agents import Agent, AgentRuntime, Guardrail, GuardrailResult, guardrail + +@guardrail +def word_limit(content: str) -> GuardrailResult: + """Keep responses concise.""" + if len(content.split()) > 500: + return GuardrailResult(passed=False, message="Too long. Be more concise.") + return GuardrailResult(passed=True) + +agent = Agent( + name="concise_bot", + model="openai/gpt-4o", + guardrails=[Guardrail(word_limit, on_fail=OnFail.RETRY)], +) + +with AgentRuntime() as runtime: + result = runtime.run(agent, "Explain quantum computing.") + result.print_result() +``` + +## GuardrailResult + +```python +GuardrailResult( + passed: bool, # True if content passes + message: str = "", # Feedback for the LLM on retry + fixed_output: Optional[str] = None, # Corrected output for on_fail="fix" +) +``` + +## OnFail Modes + +| Mode | Behavior | +|---|---| +| `OnFail.RETRY` | Append feedback message and re-run the LLM (up to `max_retries` times) | +| `OnFail.RAISE` | Fail the execution immediately | +| `OnFail.FIX` | Replace output with `GuardrailResult.fixed_output` | +| `OnFail.HUMAN` | Pause for human review (creates a WaitTask) | + +String values (`"retry"`, `"raise"`, `"fix"`, `"human"`) also work. + +## Guardrail Constructor + +```python +Guardrail( + func: Optional[Callable[[str], GuardrailResult]] = None, + position: Union[str, Position] = Position.OUTPUT, # "input" or "output" + on_fail: Union[str, OnFail] = OnFail.RETRY, + name: Optional[str] = None, + max_retries: int = 3, +) +``` + +## Position + +```python +class Position(str, Enum): + INPUT = "input" # Run before the LLM call (validate the user's prompt) + OUTPUT = "output" # Run after the LLM call (validate the response) +``` + +## Custom Guardrail + +```python +@guardrail +def no_pii(content: str) -> GuardrailResult: + """Reject responses containing email addresses.""" + import re + if re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", content): + return GuardrailResult(passed=False, message="Response contains PII (email). Remove it.") + return GuardrailResult(passed=True) + +agent = Agent( + name="safe_bot", + model="openai/gpt-4o", + guardrails=[Guardrail(no_pii, on_fail=OnFail.RETRY, max_retries=3)], +) +``` + +## RegexGuardrail + +Block or allow responses based on regex patterns: + +```python +from agentspan.agents import RegexGuardrail + +# Block responses containing profanity +agent = Agent( + name="safe_bot", + model="openai/gpt-4o", + guardrails=[ + RegexGuardrail(patterns=r"\b(badword1|badword2)\b", mode="block", on_fail=OnFail.RETRY), + ], +) + +# Only allow responses matching a pattern +agent = Agent( + name="structured_bot", + model="openai/gpt-4o", + guardrails=[ + RegexGuardrail(patterns=r"^\d{4}-\d{2}-\d{2}$", mode="allow", on_fail=OnFail.RETRY), + ], +) +``` + +## LLMGuardrail + +Use a second LLM as a judge: + +```python +from agentspan.agents import LLMGuardrail + +factual_check = LLMGuardrail( + model="openai/gpt-4o-mini", + policy="Is this response factually accurate and helpful? Reply YES or NO with a brief explanation.", + on_fail=OnFail.RETRY, + max_retries=2, +) + +agent = Agent( + name="research_bot", + model="openai/gpt-4o", + guardrails=[factual_check], +) +``` + +## Input Guardrails + +Validate the user's prompt before it reaches the LLM: + +```python +@guardrail +def no_jailbreak(content: str) -> GuardrailResult: + """Block jailbreak attempts.""" + red_flags = ["ignore previous instructions", "act as", "jailbreak"] + if any(flag in content.lower() for flag in red_flags): + return GuardrailResult(passed=False, message="Request blocked.") + return GuardrailResult(passed=True) + +agent = Agent( + name="safe_bot", + model="openai/gpt-4o", + guardrails=[Guardrail(no_jailbreak, position=Position.INPUT, on_fail=OnFail.RAISE)], +) +``` + +## Auto-Fix + +Use `OnFail.FIX` to replace the output automatically: + +```python +@guardrail +def ensure_json(content: str) -> GuardrailResult: + """Ensure the output is valid JSON.""" + import json + try: + json.loads(content) + return GuardrailResult(passed=True) + except json.JSONDecodeError: + return GuardrailResult( + passed=False, + message="Output must be valid JSON.", + fixed_output='{"error": "Could not generate valid JSON"}', + ) + +agent = Agent( + name="json_bot", + model="openai/gpt-4o", + guardrails=[Guardrail(ensure_json, on_fail=OnFail.FIX)], +) +``` + +## Multiple Guardrails + +Chain multiple guardrails — they run in order: + +```python +agent = Agent( + name="safe_bot", + model="openai/gpt-4o", + guardrails=[ + Guardrail(no_jailbreak, position=Position.INPUT, on_fail=OnFail.RAISE), + Guardrail(no_pii, on_fail=OnFail.RETRY, max_retries=3), + Guardrail(word_limit, on_fail=OnFail.RETRY), + ], +) +``` diff --git a/docs/developer-guides/agentspan/concepts/memory.mdx b/docs/developer-guides/agentspan/concepts/memory.mdx new file mode 100644 index 00000000..adb4c6a5 --- /dev/null +++ b/docs/developer-guides/agentspan/concepts/memory.mdx @@ -0,0 +1,193 @@ +--- +slug: "/developer-guides/agentspan/concepts/memory" +title: "Memory" +description: "ConversationMemory for chat history, SemanticMemory for long-term knowledge retrieval" +--- + +# Memory + +Agentspan provides two memory systems: **ConversationMemory** for managing chat history and **SemanticMemory** for long-term knowledge retrieval. They serve different purposes and can be used together. + +## Import + +```python +from agentspan.agents import Agent, ConversationMemory +from agentspan.agents.semantic_memory import SemanticMemory +``` + +## ConversationMemory + +Manages chat history as a list of messages. Messages are prepended to the LLM's message list at compile time, giving the LLM context from previous interactions. + +```python +from agentspan.agents import Agent, AgentRuntime, ConversationMemory + +memory = ConversationMemory(max_messages=100) + +agent = Agent( + name="assistant", + model="openai/gpt-4o", + instructions="You are a helpful assistant.", + memory=memory, +) + +with AgentRuntime() as runtime: + result = runtime.run(agent, "My name is Alice.") + memory.add_user_message("My name is Alice.") + memory.add_assistant_message(result.output['result']) + + result2 = runtime.run(agent, "What's my name?") + result2.print_result() # "Your name is Alice." +``` + +### Parameters + +| Field | Type | Default | Description | +|---|---|---|---| +| `messages` | `list[dict]` | `[]` | Accumulated conversation messages | +| `max_messages` | `int` | `None` | Maximum messages to retain. `None` = unlimited | + +### Methods + +| Method | Description | +|---|---| +| `add_user_message(content)` | Append a user message | +| `add_assistant_message(content)` | Append an assistant message | +| `add_system_message(content)` | Append a system message | +| `add_tool_call(tool_name, args, task_reference_name)` | Record a tool invocation | +| `add_tool_result(tool_name, result, task_reference_name)` | Record a tool result | +| `clear()` | Clear all history | + +### Trimming Behavior + +When `max_messages` is set and the message count exceeds it: +1. System messages are preserved in their original positions +2. Oldest non-system messages are removed first +3. Budget: `max_messages - system_count` non-system messages kept (newest) + +## SemanticMemory + +Long-term memory with similarity-based retrieval. Stores facts, preferences, and knowledge recalled by relevance to the current query. + +```python +from agentspan.agents.semantic_memory import SemanticMemory + +memory = SemanticMemory(max_results=3) + +memory.add("Customer prefers email communication.") +memory.add("Account is on the Enterprise plan since March 2021.") +memory.add("Last issue: billing discrepancy on invoice #1042.") + +context = memory.get_context("What plan am I on?") +# "Relevant context from memory:\n 1. Account is on the Enterprise plan..." +``` + +### Parameters + +| Parameter | Type | Default | Description | +|---|---|---|---| +| `store` | `MemoryStore` | `InMemoryStore()` | Storage backend | +| `max_results` | `int` | `5` | Maximum memories to retrieve per query | +| `session_id` | `str` | `None` | Optional session scope | + +### Methods + +| Method | Returns | Description | +|---|---|---| +| `add(content, metadata)` | `str` (entry ID) | Store a memory | +| `search(query, top_k)` | `list[str]` | Return relevant memory content strings | +| `search_entries(query, top_k)` | `list[MemoryEntry]` | Return full `MemoryEntry` objects | +| `get_context(query)` | `str` | Get memories formatted for prompt injection | +| `delete(memory_id)` | `bool` | Delete a memory by ID | +| `clear()` | — | Delete all memories | +| `list_all()` | `list[MemoryEntry]` | Return all stored memories | + +### Usage: Expose as a Tool (recommended) + +The agent decides when to search and what to query: + +```python +from agentspan.agents import Agent, AgentRuntime, tool +from agentspan.agents.semantic_memory import SemanticMemory + +memory = SemanticMemory(max_results=3) +memory.add("User prefers Python over JavaScript") +memory.add("User is a senior engineer with 10 years experience") + +@tool +def get_context(query: str) -> str: + """Retrieve relevant context from memory.""" + return memory.get_context(query) + +agent = Agent( + name="assistant", + model="openai/gpt-4o", + tools=[get_context], +) + +with AgentRuntime() as runtime: + result = runtime.run(agent, "What language should I use?") + result.print_result() +``` + +### Usage: Inject into System Prompt + +```python +def build_instructions() -> str: + context = memory.get_context("relevant query") + return f"You are a support agent.\n\n{context}" + +agent = Agent( + name="support", + model="openai/gpt-4o", + instructions=build_instructions, # callable +) +``` + +## Storage Backends + +The default `InMemoryStore` uses Jaccard similarity. Non-persistent — suitable for development. + +For production, implement `MemoryStore`: + +```python +from agentspan.agents.semantic_memory import MemoryStore, MemoryEntry, SemanticMemory + +class PineconeStore(MemoryStore): + def __init__(self, index_name: str, api_key: str): + self.index = pinecone.Index(index_name, api_key=api_key) + + def add(self, entry: MemoryEntry) -> str: + embedding = get_embedding(entry.content) + self.index.upsert([(entry.id, embedding, {"content": entry.content})]) + return entry.id + + def search(self, query: str, top_k: int = 5) -> list[MemoryEntry]: + embedding = get_embedding(query) + results = self.index.query(embedding, top_k=top_k) + return [MemoryEntry(id=r.id, content=r.metadata["content"]) for r in results.matches] + + def delete(self, memory_id: str) -> bool: + self.index.delete(ids=[memory_id]) + return True + + def clear(self) -> None: + self.index.delete(delete_all=True) + + def list_all(self) -> list[MemoryEntry]: + ... + +memory = SemanticMemory(store=PineconeStore("my-index", api_key="...")) +``` + +Compatible backends: Pinecone, Weaviate, ChromaDB, Qdrant, Mem0, or any vector search service. + +## Comparison + +| | ConversationMemory | SemanticMemory | +|---|---|---| +| **Purpose** | Chat history (messages) | Long-term knowledge (facts) | +| **Retrieval** | All messages, FIFO trimmed | Similarity search | +| **Injection** | Prepended as LLM messages | Formatted text via tool or instructions | +| **Persistence** | In-process (lost on restart) | Pluggable backend (can persist) | +| **Best for** | Multi-turn conversations in a session | Cross-session preferences, user facts | diff --git a/docs/developer-guides/agentspan/concepts/multi-agent.mdx b/docs/developer-guides/agentspan/concepts/multi-agent.mdx new file mode 100644 index 00000000..9314f744 --- /dev/null +++ b/docs/developer-guides/agentspan/concepts/multi-agent.mdx @@ -0,0 +1,546 @@ +--- +slug: "/developer-guides/agentspan/concepts/multi-agent" +title: "Multi-Agent Strategies" +description: "All 8 coordination strategies — sequential, parallel, handoff, router, swarm, round_robin, random, manual" +--- + +# Multi-Agent Strategies + +Every multi-agent system in Agentspan is built from one primitive: `Agent`. Set `agents=[...]` and choose a `strategy` to coordinate them. + +## Overview + +| Strategy | Description | +|---|---| +| `handoff` (default) | LLM chooses which sub-agent handles the request | +| `sequential` | Sub-agents run in order, output feeds forward | +| `parallel` | All sub-agents run concurrently, results aggregated | +| `router` | A router agent or function selects which sub-agent runs | +| `swarm` | Condition-based handoffs between agents | +| `round_robin` | Agents take turns in a fixed rotation | +| `random` | Random sub-agent selection each turn | +| `manual` | Human selects which agent speaks each turn | + +## Sequential — `a >> b >> c` + +Sub-agents run in order. Each agent's output becomes the next agent's input. + +```mermaid +sequenceDiagram + participant Code as Your Code + participant Server as Agentspan Server + participant R as Researcher + participant W as Writer + participant E as Editor + Code->>Server: run(researcher >> writer >> editor, prompt) + activate Server + Server->>R: run + activate R + R-->>Server: result + deactivate R + Server->>W: run (receives researcher output) + activate W + W-->>Server: result + deactivate W + Server->>E: run (receives writer output) + activate E + E-->>Server: result + deactivate E + Server-->>Code: AgentResult + deactivate Server +``` + +```python +from agentspan.agents import Agent, AgentRuntime + +researcher = Agent(name="researcher", model="openai/gpt-4o", + instructions="Research the topic and provide key facts.") +writer = Agent(name="writer", model="openai/gpt-4o", + instructions="Write an engaging article from the research.") +editor = Agent(name="editor", model="openai/gpt-4o", + instructions="Polish the article for publication.") + +# Operator syntax +pipeline = researcher >> writer >> editor + +# Equivalent constructor syntax +pipeline = Agent( + name="pipeline", + model="openai/gpt-4o", + agents=[researcher, writer, editor], + strategy="sequential", +) + +with AgentRuntime() as runtime: + result = runtime.run(pipeline, "AI agents in software development") + result.print_result() +``` + +## Parallel + +All sub-agents run concurrently. Results are aggregated into `result.sub_results`. + +```mermaid +sequenceDiagram + participant Code as Your Code + participant Server as Agentspan Server + participant M as Market Analyst + participant R as Risk Analyst + participant F as Financial Analyst + Code->>Server: run(due_diligence, prompt) + activate Server + par run concurrently + Server->>M: run + activate M + and + Server->>R: run + activate R + and + Server->>F: run + activate F + end + M-->>Server: result + deactivate M + R-->>Server: result + deactivate R + F-->>Server: result + deactivate F + Note over Server: synthesize all results + Server-->>Code: AgentResult + deactivate Server +``` + +```python +from agentspan.agents import Agent, AgentRuntime + +market = Agent(name="market", model="openai/gpt-4o", + instructions="Analyze market size, growth, and key players.") +risk = Agent(name="risk", model="openai/gpt-4o", + instructions="Analyze regulatory, technical, and competitive risks.") +financial = Agent(name="financial", model="openai/gpt-4o", + instructions="Analyze financial projections and metrics.") + +analysis = Agent( + name="analysis", + model="openai/gpt-4o", + agents=[market, risk, financial], + strategy="parallel", +) + +with AgentRuntime() as runtime: + result = runtime.run(analysis, "Launching an AI healthcare tool in the US") + print(result.sub_results["market"]) + print(result.sub_results["risk"]) + print(result.sub_results["financial"]) +``` + +## Handoff (default) + +The orchestrator LLM decides which sub-agent handles the request. Sub-agents can also hand off to each other. + +```mermaid +sequenceDiagram + participant Code as Your Code + participant Server as Agentspan Server + participant LLM + participant B as Billing + participant T as Technical + participant G as General + Code->>Server: run(support, prompt) + activate Server + Server->>LLM: route decision + activate LLM + LLM-->>Server: "Billing" + deactivate LLM + Server->>B: run + activate B + B-->>Server: result + deactivate B + Server-->>Code: AgentResult + deactivate Server +``` + +```python +from agentspan.agents import Agent, AgentRuntime, tool + +@tool +def check_balance(account_id: str) -> dict: + """Check account balance.""" + return {"account_id": account_id, "balance": 5432.10} + +billing = Agent(name="billing", model="openai/gpt-4o", + instructions="Handle billing inquiries.", tools=[check_balance]) +technical = Agent(name="technical", model="openai/gpt-4o", + instructions="Handle technical issues.") + +support = Agent( + name="support", + model="openai/gpt-4o", + instructions="Route customer requests to the right team.", + agents=[billing, technical], + strategy="handoff", # This is the default +) + +with AgentRuntime() as runtime: + result = runtime.run(support, "What's the balance on account ACC-123?") + result.print_result() +``` + +## Router + +A dedicated router agent or function selects which sub-agent runs: + +```mermaid +sequenceDiagram + participant Code as Your Code + participant Server as Agentspan Server + participant Cls as Classifier + participant B as Billing + participant T as Technical + participant G as General + Code->>Server: run(support, prompt) + activate Server + Server->>Cls: classify intent + activate Cls + Cls-->>Server: "Billing" + deactivate Cls + Server->>B: run + activate B + B-->>Server: result + deactivate B + Server-->>Code: AgentResult + deactivate Server +``` + +```python +from agentspan.agents import Agent, AgentRuntime + +classifier = Agent( + name="classifier", + model="openai/gpt-4o-mini", + instructions="Classify the request as 'billing', 'technical', or 'general'. Reply with just the category.", +) + +billing = Agent(name="billing", model="openai/gpt-4o", + instructions="Handle billing inquiries.") +technical = Agent(name="technical", model="openai/gpt-4o", + instructions="Handle technical issues.") +general = Agent(name="general", model="openai/gpt-4o", + instructions="Handle general questions.") + +support = Agent( + name="support", + model="openai/gpt-4o", + agents=[billing, technical, general], + strategy="router", + router=classifier, +) + +with AgentRuntime() as runtime: + result = runtime.run(support, "My invoice has a wrong charge") + result.print_result() +``` + +You can also use a Python function as the router: + +```python +def route(prompt: str) -> str: + """Return the name of the agent to route to.""" + if "bill" in prompt.lower() or "invoice" in prompt.lower(): + return "billing" + elif "error" in prompt.lower() or "bug" in prompt.lower(): + return "technical" + return "general" + +support = Agent( + name="support", + model="openai/gpt-4o", + agents=[billing, technical, general], + strategy="router", + router=route, +) +``` + +## Swarm + +Condition-based handoffs between agents. Each agent can trigger a handoff based on text patterns or other conditions: + +```mermaid +sequenceDiagram + participant Code as Your Code + participant Server as Agentspan Server + participant T as Triage + participant Ref as Refund Specialist + participant Esc as Escalation + Code->>Server: run(support, prompt) + activate Server + Server->>T: run + activate T + T-->>Server: "customer wants a refund" + deactivate T + Note over Server: OnTextMention("refund") triggers + Server->>Ref: run + activate Ref + Ref-->>Server: result + deactivate Ref + Server-->>Code: AgentResult + deactivate Server +``` + +```python +from agentspan.agents import Agent, AgentRuntime, Strategy +from agentspan.agents import TextMentionTermination + +triage = Agent(name="triage", model="openai/gpt-4o", + instructions="Triage support requests. Say 'BILLING' for billing, 'TECH' for technical.") +billing = Agent(name="billing", model="openai/gpt-4o", + instructions="Handle billing inquiries.") +technical = Agent(name="technical", model="openai/gpt-4o", + instructions="Handle technical issues.") + +team = Agent( + name="support_team", + model="openai/gpt-4o", + agents=[triage, billing, technical], + strategy=Strategy.SWARM, + handoffs=[ + TextMentionTermination("BILLING", target="billing"), + TextMentionTermination("TECH", target="technical"), + ], +) +``` + +## Round Robin + +Agents take turns in a fixed rotation: + +```mermaid +sequenceDiagram + participant Code as Your Code + participant Server as Agentspan Server + participant O as Optimist + participant P as Pessimist + participant R as Realist + Code->>Server: run(debate, prompt) + activate Server + loop 3 rounds + Server->>O: run + activate O + O-->>Server: result + deactivate O + Server->>P: run + activate P + P-->>Server: result + deactivate P + Server->>R: run + activate R + R-->>Server: result + deactivate R + end + Server-->>Code: AgentResult + deactivate Server +``` + +```python +agent1 = Agent(name="agent1", model="openai/gpt-4o", + instructions="You are the first debater. Argue for AI regulation.") +agent2 = Agent(name="agent2", model="openai/gpt-4o", + instructions="You are the second debater. Argue against AI regulation.") + +debate = Agent( + name="debate", + model="openai/gpt-4o", + agents=[agent1, agent2], + strategy="round_robin", + max_turns=6, # 3 rounds each +) + +with AgentRuntime() as runtime: + result = runtime.run(debate, "Begin the debate.") + result.print_result() +``` + +## Random + +A random sub-agent is selected each turn. Useful for load balancing across models or creating diverse output ensembles. + +```mermaid +sequenceDiagram + participant Code as Your Code + participant Server as Agentspan Server + participant G as GPT-4o + participant C as Claude + participant Ge as Gemini + Code->>Server: run(ensemble, prompt) + activate Server + Note over Server: random selection + Server->>C: run (randomly chosen) + activate C + C-->>Server: result + deactivate C + Server-->>Code: AgentResult + deactivate Server +``` + +```python +ensemble = Agent( + name="diverse_writers", + agents=[ + Agent(name="gpt4", model="openai/gpt-4o", instructions="Write concisely."), + Agent(name="claude", model="anthropic/claude-sonnet-4-6", instructions="Write creatively."), + Agent(name="gemini", model="google_gemini/gemini-2.0-flash", instructions="Write with examples."), + ], + strategy="random", +) + +with AgentRuntime() as runtime: + result = runtime.run(ensemble, "Explain why consistency matters in distributed systems") + result.print_result() +``` + +## Manual + +Execution pauses between turns waiting for explicit human selection of the next agent. Useful for human-directed workflows. + +```mermaid +sequenceDiagram + participant Code as Your Code + participant Server as Agentspan Server + participant A as Agent A + participant B as Agent B + participant C as Agent C + Code->>Server: start(workflow, prompt) + activate Server + Note over Server,C: paused — waiting for human to pick next agent + Code->>Server: handle.send("agent_a") + Server->>A: run + activate A + A-->>Server: result + deactivate A + Note over Server,C: paused — waiting again + Code->>Server: handle.send("agent_b") + Server->>B: run + activate B + B-->>Server: result + deactivate B + Server-->>Code: AgentResult + deactivate Server +``` + +```python +from agentspan.agents import Agent, AgentRuntime, start + +workflow = Agent( + name="manual_workflow", + agents=[agent_a, agent_b, agent_c], + strategy="manual", +) + +with AgentRuntime() as runtime: + handle = runtime.start(workflow, "initial prompt") + # Manual strategy pauses at each turn waiting for input. + # Use handle.send(agent_name) to select which agent runs next. + status = handle.get_status() + if status.is_waiting: + handle.send("agent_a") # send the name of the agent to invoke +``` + +## Termination Conditions + +Control when multi-agent loops stop: + +```python +from agentspan.agents import ( + MaxMessageTermination, + TextMentionTermination, + StopMessageTermination, + TokenUsageTermination, +) + +# Stop after 20 messages +MaxMessageTermination(max_messages=20) + +# Stop when an agent says "DONE" +TextMentionTermination("DONE") + +# Stop on StopMessage events +StopMessageTermination() + +# Stop when token budget is exceeded +TokenUsageTermination(max_total_tokens=10000) +``` + +Combine multiple conditions: + +```python +from agentspan.agents import Agent + +agent = Agent( + name="team", + model="openai/gpt-4o", + agents=[agent1, agent2], + strategy="round_robin", + stop_when=MaxMessageTermination(20) | TextMentionTermination("DONE"), +) +``` + +## Nested Strategies + +Strategies compose freely — a parallel agent can contain sequential pipelines: + +```mermaid +sequenceDiagram + participant Code as Your Code + participant Server as Agentspan Server + participant M as Market Agent + participant T as Tech Agent + participant Reg as Regulatory Agent + participant W as Writer + participant E as Editor + participant F as Formatter + Code->>Server: run(report_pipeline, prompt) + activate Server + Note over Server: Stage 1 — Parallel Research + par run concurrently + Server->>M: run + activate M + and + Server->>T: run + activate T + and + Server->>Reg: run + activate Reg + end + M-->>Server: result + deactivate M + T-->>Server: result + deactivate T + Reg-->>Server: result + deactivate Reg + Note over Server: Stage 2 — Sequential Publish + Server->>W: run + activate W + W-->>Server: result + deactivate W + Server->>E: run (receives Writer output) + activate E + E-->>Server: result + deactivate E + Server->>F: run (receives Editor output) + activate F + F-->>Server: result + deactivate F + Server-->>Code: AgentResult + deactivate Server +``` + +```python +research_pipeline = researcher >> writer + +analysis = Agent( + name="analysis", + model="openai/gpt-4o", + agents=[research_pipeline, financial_agent, risk_agent], + strategy="parallel", +) +``` diff --git a/docs/developer-guides/agentspan/concepts/plan-execute.mdx b/docs/developer-guides/agentspan/concepts/plan-execute.mdx new file mode 100644 index 00000000..37ddd98d --- /dev/null +++ b/docs/developer-guides/agentspan/concepts/plan-execute.mdx @@ -0,0 +1,482 @@ +--- +slug: "/developer-guides/agentspan/concepts/plan-execute" +title: "Plan-Execute Strategy" +description: "PLAN_EXECUTE compiles LLM-generated (or static) plans into deterministic Conductor sub-workflows — the planner reasons, the executor runs." +--- + +# Plan-Execute Strategy + +`Strategy.PLAN_EXECUTE` (also called PAE; the server-side compiler is PAC, "PLAN_AND_COMPILE") splits a task into two phases: + +1. **Plan** — a planner agent emits a JSON DAG of operations. +2. **Execute** — the server compiles that JSON into a Conductor sub-workflow and runs it deterministically. + +The LLM is only invoked where it adds value (planning, per-op content generation). Orchestration, retries, parallelism, and validation are pure Conductor primitives — no token cost, no nondeterminism. + +## The deterministic boundary + +The whole point of PAC/PAE is to draw a hard line between the **non-deterministic part** (the planner LLM reasoning about *what to do*) and the **deterministic part** (Conductor running the compiled DAG). Once the plan is compiled, the executor is replay-safe, branch-stable, and free of LLM randomness. + +```mermaid +flowchart TB + subgraph ND["LLM (non-deterministic)"] + direction LR + Planner["planner agent
emits JSON plan"] + end + + subgraph PAC["PAC compile step (server, pure function)"] + direction LR + ExtractJSON["extract_json
(static_plan → markdown_plan → planSource)"] + Compile["compile to
WorkflowDef"] + ExtractJSON --> Compile + end + + subgraph DET["Conductor sub-workflow (deterministic)"] + direction LR + Setup["SET_VARIABLE
_ctx_init"] + Fork["FORK_JOIN
(parallel steps)"] + Join["JOIN
(aggregate)"] + Validate["validation +
SWITCH gate"] + Setup --> Fork --> Join --> Validate + end + + Prompt[["user prompt"]] --> Planner + Planner -- "JSON plan in ```json fence```" --> ExtractJSON + Compile -- "workflowDef (Conductor JSON)" --> Setup + + StaticPlan[["static_plan=
(skip planner)"]] -.->|"Case 0:
overrides LLM"| ExtractJSON + Validate -- pass --> Done(["COMPLETED"]) + Validate -- fail --> Fallback{{"fallback agent?"}} + Fallback -- yes --> FallbackRun["LLM-loop recovery"] + Fallback -- no --> Failed(["FAILED"]) + + classDef llm fill:#fff3e0,stroke:#e65100,stroke-width:2px; + classDef pure fill:#e8f5e9,stroke:#1b5e20,stroke-width:2px; + classDef det fill:#e3f2fd,stroke:#0d47a1,stroke-width:2px; + class Planner,FallbackRun llm; + class ExtractJSON,Compile pure; + class Setup,Fork,Join,Validate det; +``` + +**Why this shape gives you determinism:** + +- **One planner call, then we're done with the LLM.** The plan is a value; everything downstream is a function of that value. Two identical plans produce two identical workflow defs and two identical executions (modulo tool side effects). +- **`Ref("step_id")` is resolved at compile time**, not at run time — there is no runtime "interpret-the-plan" loop that could diverge. The wire form (`{"$ref": "fetch"}`) becomes a Conductor template (`${fetch.output.result}`) once, in PAC. +- **Branching is a SWITCH, not a re-prompt.** `success_condition` is a JS expression evaluated by Conductor's JavaScript engine — same input, same branch, every time. +- **Parallelism is FORK_JOIN, not "ask the LLM to fan out".** A 5-section parallel report has exactly 5 branches, deterministically. +- **`plan=` (static plan) bypasses the LLM entirely.** Workflow shape and execution are now fully determined by your code. Use this for tests, replays, or any pipeline where planning lives outside the agent. + +## When to use it + +PLAN_EXECUTE wins when the work has **fixed structure but variable content**: + +- Generate a research report (3 sections, parallel writes, then assemble + validate) +- Process a batch of records with conditional branches +- Multi-stage refactor where each stage is the same shape but the inputs differ +- Anywhere you'd otherwise hand-write 20 turns of LLM tool-calling and hope it doesn't loop + +If you need fully agentic exploration with no fixed shape, use `Strategy.HANDOFF` instead. If you have a fully fixed pipeline, use `Strategy.SEQUENTIAL`. PLAN_EXECUTE is the middle ground. + +## The shape + +```python +from agentspan.agents import Strategy, Agent, plan_execute + +# One-call construction (recommended): +harness = plan_execute( + name="report_generator", + tools=[create_directory, write_file, assemble_files, check_word_count], + planner_instructions="Plan a research report on the user's topic. Use 3 sections, then assemble.", + fallback_instructions="The deterministic plan failed — recover agentically.", +) + +# Or assemble manually if you need every knob: +planner = Agent(name="planner", instructions=PLANNER_INSTRUCTIONS, model=...) +fallback = Agent(name="fb", instructions=FALLBACK_INSTRUCTIONS, tools=[...], model=...) +harness = Agent( + name="report_generator", + strategy=Strategy.PLAN_EXECUTE, + planner=planner, + fallback=fallback, + tools=[...], # canonical plan-executable set; PAC validates against this + fallback_max_turns=5, +) +``` + +The **planner**, **fallback**, and **tools** slots are the three first-class fields. `agents=[...]` is **not** valid for PLAN_EXECUTE — set the named slots. + +## Plan schema + +The server auto-appends a `## Plan schema` block to the planner's user prompt (along with `## Available tools` derived from `harness.tools`). Your `planner_instructions` only needs to cover **domain-level guidance** — what to plan, not how to format JSON. + +The schema PAC consumes: + +```json +{ + "steps": [ + { + "id": "", + "depends_on": [""], + "parallel": false, + "operations": [ + {"tool": "", "args": {}}, + {"tool": "", "generate": { + "instructions": "", + "output_schema": "", + "max_tokens": 4096 + }} + ] + } + ], + "validation": [ + {"tool": "", "args": {...}, + "success_condition": "$.passed === true"} + ], + "on_success": [{"tool": "", "args": {...}}], + "on_failure": [{"tool": "", "args": {...}}] +} +``` + +**Key concepts:** + +- **`args` vs `generate`** — `args` runs the tool with literal values you decide at plan time. `generate` defers arg construction to a per-op LLM call at run time. +- **`depends_on`** — cross-step concurrency. A step starts when *all* listed deps complete. Defaults to the previous step. +- **`parallel`** — when true, the step's own `operations` run concurrently (FORK_JOIN). Without it, operations run in order within the step. +- **`success_condition`** — JS expression evaluated against the validator's output (`$` = parsed output map). Returns truthy on pass. +- **`on_success` / `on_failure`** — tools to run after validation. Optional. + +## Typed plans (no JSON soup) + +For static plans (or plans you build programmatically), import the typed builders: + +```python +from agentspan.agents import Plan, Step, Op, Generate, Validation, Action + +plan = Plan( + steps=[ + Step("setup", operations=[Op("create_directory", args={"path": "out"})]), + Step( + "write", + depends_on=["setup"], + parallel=True, + operations=[ + Op("write_file", generate=Generate( + instructions="Write the introduction.", + output_schema='{"path": "out/intro.md", "content": "..."}', + )), + ], + ), + ], + validation=[ + Validation("check_word_count", args={"path": "out/intro.md", "min_words": 200}), + ], +) +``` + +IDE autocomplete, Pylance type-checks, no escaping nightmares. + +## Output → input across steps with `Ref` + +Wire the **whole output** of one step into the args of a later step with `Ref("step_id")`. No JSON path, no field selection, no Conductor task-ref naming to memorise. + +```python +from agentspan.agents import Op, Plan, Ref, Step + +plan = Plan(steps=[ + Step("fetch", operations=[Op("fetch_data", args={"url": URL})]), + Step( + "summarize", + depends_on=["fetch"], + operations=[ + # The whole dict returned by `fetch_data` becomes the value of + # the `document` arg passed to `summarize`. No `.result` suffix, + # no JSONPath — the SDK serialises Ref(...) to {"$ref": "fetch"} + # and the server rewrites it to the right Conductor template + # against an INLINE wrapper that normalises dict vs. wrapped + # worker returns. + Op("summarize", args={"document": Ref("fetch")}), + ], + ), +]) +``` + +Rules: + +- The referenced step must be declared in this step's `depends_on` — explicit beats implicit. The server's PAC compile step rejects plans that Ref a step they don't depend on (the typed-Plan builders ship the Ref to the wire as-is; the failure surfaces at workflow start, not in your IDE). +- The referenced step must exist in the plan. +- Self-Refs (`Ref(stepId)` from inside `stepId`) are a compile error. +- A step can Ref multiple upstream steps independently — `Op("report", args={"src": Ref("fetch"), "summary": Ref("summarize")})` works. +- For a `parallel=True` step, `Ref("step_id")` resolves to the **array of branch results** (the FORK_JOIN aggregator's payload). +- Refs work inside nested args (lists, nested dicts) — the serialiser walks the whole arg tree. + +See `examples/108_plan_execute_refs.py` for a three-step pipeline that pipes one step's record dict through two downstream steps without ever spelling out a JSONPath. + +## Static plans — skip the planner LLM + +Pass a `Plan` (or a raw dict in the same shape) to `runtime.run` and PAC uses it directly: + +```python +result = runtime.run(harness, "anything", plan=plan, cwd=work_dir) +``` + +The planner LLM still runs (the workflow shape is fixed at compile time) but its output is discarded — PAC's `extract_json` reads `workflow.input.static_plan` as Case 0, which wins over planner output. Use this for: + +- Tests (deterministic plan, no LLM nondeterminism) +- Replays of a previously-emitted plan +- Pipelines where planning lives outside the agent (a separate service or a code path that builds the `Plan` object) + +## Tool guardrails propagate + +`@tool(guardrails=[...])` works inside PLAN_EXECUTE the same way it works in the LLM-loop: + +```python +no_pii = RegexGuardrail(patterns=[r"\b\d{16}\b"], on_fail=OnFail.RAISE, ...) + +@tool(guardrails=[no_pii]) +def send_email(to: str, body: str) -> str: ... +``` + +PAC wraps every emitted SIMPLE for `send_email` in a guardrail SWITCH gate. The bare SIMPLE only runs from the gate's `pass` branch. If the guardrail trips: + +- `on_fail=raise` — TERMINATE the dynamic plan; harness's `fallback` agent recovers +- `on_fail=retry` / `fix` / `human` — collapse to TERMINATE in plan mode; same fallback path. (See `OnFail` docstring for full semantics — there's no LLM loop in plan mode to feed retry feedback into; the fallback IS the retry loop.) + +The compiler emits **only the SWITCH cases that are reachable** for the configured `on_fail`. An `on_fail=raise` guardrail produces one `raise` case, not four dead branches. + +## Fallback — the recovery agent + +Configure `fallback=` on the harness for adaptive recovery when: + +- The planner emits a malformed plan (PAC validation fails) +- A guardrail trips on a deterministic step +- A plan step itself fails at run time + +The fallback runs as a normal LLM-loop agent with the harness's `tools`. It receives the original prompt + the failure context (planner output, error message). `fallback_max_turns` caps its turn count during recovery. + +Without a fallback, any failure terminates the workflow. Acceptable for fail-loud pipelines; surprising otherwise — PAC **refuses to compile** when guardrails with `on_fail=retry|fix|human` are configured but no fallback exists, forcing you to either configure a fallback or explicitly set `on_fail=raise` to acknowledge fail-closed semantics. + +## What PAC actually emits + +For a plan with N parallel steps + 1 validator, the compiled WorkflowDef looks roughly like: + +``` +SET_VARIABLE _ctx_init +FORK_JOIN (per-step branches) + LLM_CHAT_COMPLETE (per generate op) + INLINE (parse LLM JSON output) + SWITCH (parse-error gate) + SIMPLE (the tool call) +JOIN +INLINE (aggregate parallel branch results — only if downstream reads it) +SIMPLE (validator) +INLINE (val_eval — emits "passed"/"failed") +SWITCH vsw ("passed" → on_success, default → TERMINATE/on_failure) +``` + +Visually, for a 3-section parallel-write plan with one validator: + +```mermaid +flowchart TB + Start([start]) --> Init["SET_VARIABLE
_ctx_init"] + Init --> Fork{{"FORK_JOIN"}} + + Fork --> S1L["LLM_CHAT_COMPLETE
section_1 generate"] + S1L --> S1P["INLINE
parse JSON"] + S1P --> S1S{"SWITCH
parse ok?"} + S1S -- ok --> S1T["SIMPLE
write_file"] + S1S -- fail --> S1F["TERMINATE"] + + Fork --> S2L["LLM_CHAT_COMPLETE
section_2 generate"] + S2L --> S2P["INLINE
parse JSON"] + S2P --> S2S{"SWITCH
parse ok?"} + S2S -- ok --> S2T["SIMPLE
write_file"] + S2S -- fail --> S2F["TERMINATE"] + + Fork --> S3L["LLM_CHAT_COMPLETE
section_3 generate"] + S3L --> S3P["INLINE
parse JSON"] + S3P --> S3S{"SWITCH
parse ok?"} + S3S -- ok --> S3T["SIMPLE
write_file"] + S3S -- fail --> S3F["TERMINATE"] + + S1T --> Join((JOIN)) + S2T --> Join + S3T --> Join + + Join --> Agg["INLINE
step_output_write_all
(Ref normaliser)"] + Agg --> Val["SIMPLE
check_word_count"] + Val --> VEval["INLINE
val_eval"] + VEval --> VSW{"SWITCH
passed?"} + VSW -- passed --> OK([COMPLETED]) + VSW -- failed --> Bad([TERMINATE / on_failure]) + + classDef llm fill:#fff3e0,stroke:#e65100; + classDef pure fill:#e8f5e9,stroke:#1b5e20; + classDef tool fill:#e3f2fd,stroke:#0d47a1; + classDef gate fill:#fce4ec,stroke:#880e4f; + class S1L,S2L,S3L llm; + class S1P,S2P,S3P,Agg,VEval,Init pure; + class S1T,S2T,S3T,Val tool; + class S1S,S2S,S3S,VSW,Fork,Join gate; +``` + +Only the orange `LLM_CHAT_COMPLETE` nodes are non-deterministic. Everything else — parse, gate, tool call, aggregate, validate, branch — is pure Conductor and replay-safe. With a **static plan** (`plan=` argument), the planner LLM call up-front is elided too, leaving a fully deterministic pipeline. + +The `## Available tools` block in the planner prompt and PAC's validator share the same source: `harness.tools`. A planner can't emit a tool name that PAC will reject (and PAC will reject anything not in the harness's set — closes the hallucinated-tool-name bug). + +## Common patterns + +### Research report (LLM-driven planning) + +```python +harness = plan_execute( + name="report", + tools=[create_directory, write_file, assemble_files, check_word_count], + planner_instructions="Plan a research report on the user's topic. Use 3 sections.", + fallback_instructions="Fix what the deterministic plan couldn't.", +) +result = runtime.run(harness, "AI agents in 2025") +``` + +### Static pipeline (no planner reasoning needed) + +```python +harness = plan_execute(name="ingest", tools=[fetch, transform, store]) +plan = Plan(steps=[ + Step("fetch", operations=[Op("fetch", args={"url": url})]), + Step("transform", depends_on=["fetch"], operations=[Op("transform", args={"path": "raw.json"})]), + Step("store", depends_on=["transform"], operations=[Op("store", args={"key": "result"})]), +]) +result = runtime.run(harness, "ingest job", plan=plan) +``` + +### Parallel work + validation + +```python +plan = Plan( + steps=[ + Step("setup", operations=[Op("create_directory", args={"path": "out"})]), + Step("write_all", depends_on=["setup"], parallel=True, operations=[ + Op("write_file", generate=Generate( + instructions=f"Write section {i}.", + output_schema=f'{{"path": "out/{i}.md", "content": "..."}}', + )) + for i in range(5) + ]), + Step("assemble", depends_on=["write_all"], operations=[ + Op("assemble_files", args={"output_path": "report.md", "input_paths": "..."}) + ]), + ], + validation=[Validation("check_word_count", args={"path": "report.md", "min_words": 1000})], +) +``` + +## Planner context — ground the planner in your domain rules + +The planner's `instructions` are fine for "how to emit a plan." They're a poor fit for the *domain-specific rules* a real plan depends on: KYC tier thresholds, onboarding phase ordering, compliance escalation paths, region-specific exceptions. Those live in docs that change weekly — not in code that ships quarterly. + +`planner_context` injects those rules into the planner's user prompt at runtime, as a `## Reference Context` block. Two entry shapes: + +```python +from agentspan.agents import Agent, Context, Strategy + +harness = Agent( + name="onboarding_harness", + strategy=Strategy.PLAN_EXECUTE, + tools=[validate_kyc, create_account, send_welcome_email], + planner=planner, + fallback=fallback, + planner_context=[ + # 1) Inline text — short, stable, hand-edited in code. + "Onboarding has 3 mandatory phases in order: validate_kyc → create_account → send_welcome_email.", + "Tier 'enterprise' customers ADDITIONALLY require schedule_kickoff_call.", + + # 2) Live doc — fetched per planner invocation, no compile-time fetch, no cache. + # Authorization placeholders use the same `${CRED}` shape as ToolConfig.headers. + Context( + url="https://confluence.example.com/onboarding-rules", + headers={"Authorization": "Bearer ${CONFLUENCE_TOKEN}"}, + required=True, # fetch failure → workflow fails (default) + max_bytes=8192, # truncate at 8KB + add a [doc truncated] marker + ), + ], +) +``` + +**How it compiles.** Each URL entry emits a `PLANNER_CONTEXT_FETCH` system task inside the planner-route's *live* branch (the static-plan path skips it for free). With ≥2 URLs the fetches are wrapped in a `FORK_JOIN` so they run in parallel. A small in-process TTL cache (default 60 s) plus `If-None-Match`/ETag means repeat fetches for the same doc within the TTL return the cached body without touching the wire — and 304 responses refresh the TTL without re-downloading. + +**Cache scope.** Cache key is `(url, sorted-headers)`, so different `Authorization` headers (different principals) never share a cache entry. Bounded LRU at ~1024 entries. + +**Credential placeholders.** `${CRED_NAME}` in headers gets escaped server-side to `#{CRED_NAME}` so Conductor's templater leaves it alone; the runtime credential resolver fills the value at request time — same pipeline as HTTP tool headers. Headers containing `CR`/`LF` are rejected at compile time to close the HTTP-response-splitting injection vector. + +**Failure handling.** `required=True` (default) hard-fails the workflow on fetch error. `required=False` substitutes a `[doc unavailable]` marker in the planner prompt so the planner runs on partial context — for "nice-to-have" docs (glossaries, FAQs). + +End-to-end demo: `examples/115_plan_execute_planner_context.py` (Python; mirrored to TS / Java / C#). + +## Inspecting compiled plans + +`POST /api/agent/inspect-plan` compiles a plan against a PLAN_EXECUTE harness config and returns the resulting Conductor `WorkflowDef` + error string + warnings + stats — **without dispatching the SUB_WORKFLOW**. Useful for: + +* IDE tooling validating that a plan compiles cleanly against a fixed agent config before deploy +* Plan-debug REPLs visualizing the compiled DAG +* CI checks that verify a static plan still compiles after agent-config or tool-schema changes + +Request shape: + +```json +{ + "agentConfig": { /* same shape as POST /agent/start */ }, + "plan": { "steps": [ { "id": "...", "operations": [ ... ] } ] } +} +``` + +Response includes the same fields PAC sets on `output` at execution time (`workflowDef`, `error`, `warnings`, `stats`) — uses the production compile path, so the inspected output is byte-equal to what a real run would produce for that plan. + +## Knobs reference + +| Field | Purpose | +|---|---| +| `planner=` | Required. The agent that emits the JSON plan. | +| `fallback=` | Optional. Agentic recovery when a plan can't compile/exec. | +| `tools=` | Required. Plan-executable tool set. PAC validates `op.tool` names against this list and propagates each tool's guardrails. | +| `planner_context=` | Optional. List of `Context(text=…)` / `Context(url=…)` entries appended to the planner's user prompt as `## Reference Context`. URLs fetched per-planner-invocation with TTL cache + ETag revalidation. See "Planner context" above. | +| `fallback_max_turns=` | Caps the fallback agent's turn count during recovery. | +| `plan_source=` | Optional. Reads a fixed plan from a deterministic tool call after the planner sub-workflow runs. When the planner's text output fails extraction, this source is tried as a fallback. The newer run-time `plan=` argument (see "Static plans" below) is the simpler path for most cases. | + +| Run-time kwarg | Purpose | +|---|---| +| `plan=` | Skip the planner LLM's output; use this `Plan`/dict directly. | +| `cwd=` | Working directory for filesystem-bound tools. | + +## Examples + +- `examples/85_plan_execute_harness.py` — research report with LLM planner + fallback recovery +- `examples/103_plan_and_compile.py` — minimal PAC demo with `args` + `generate` ops + validation +- `examples/104_plan_execute_guardrails.py` — guardrail propagation in plan mode +- `examples/100_issue_fixer_agent.py` — production-shape pipeline with PLAN_EXECUTE coder + agentic fallback +- `examples/108_plan_execute_refs.py` — cross-step output piping via `Ref("step_id")` +- `examples/109_plan_execute_replan.py` — outer-loop replan pattern: run plan, inspect result, build the next plan with feedback baked into the per-op `generate.instructions` +- `examples/110_plan_execute_replan_solve.py` — adaptive goal-seeking loop: K parallel proposers + deterministic verifier per iteration; the replanner threads each candidate's exact failure modes back into the next iteration's prompt and loops until any candidate clears all constraints +- `examples/111_plan_execute_replan_binsearch.py` — many-iteration binary-search loop. The verifier holds a secret integer and each iteration's verdict reveals only one bit (too_low / too_high), so the loop *must* iterate ~log₂ N times. Use this when you want to see the plan-execute-replan cycle visibly converge over many iterations +- `examples/112_dowhile_loop_inside_workflow.py` — the loop *inside* a single Conductor workflow via a hand-built `DO_WHILE` task. Body of the loop: planner LLM → INLINE verify → reviewer LLM → SET_VARIABLE update. One workflow ID for the whole run; iterations show up as `planner_llm__1`, `planner_llm__2`, etc. in the same workflow's task list. This is the shape of the future `Strategy.PLAN_EXECUTE_REPLAN` (recommendation #2 from the design review) +- `examples/113_aml_sar_investigation_loop.py` — AML/SAR investigation as a DO_WHILE-inside-workflow with real PAC sub-workflows per iteration. The planner emits red-flag tool calls, the loop checks for "needs more evidence", and the cycle continues until the case is dispositioned. Mirrors finance compliance workflows +- `examples/114_portfolio_rebalance_loop.py` — multi-constraint portfolio rebalancing with wash-sale / concentration / drift checks. Each iteration refines the trade list to satisfy more constraints; the loop terminates when all checks pass +- `examples/115_plan_execute_planner_context.py` — customer onboarding with `planner_context` grounding the planner in tier rules. Mixed inline-text + commented Confluence-URL with `${CONFLUENCE_TOKEN}` reference for the credentialed-URL pattern. Mirrored to TS/Java/C# + +## Plan → execute → replan + +PAE itself is single-shot: plan-once, execute-once, fallback-once on hard failure. For tasks that need iterative refinement — run, check the output, decide to continue or replan, repeat — wrap the harness in your own loop. `examples/109_plan_execute_replan.py` shows the simple shape: each iteration calls `runtime.run(harness, prompt, plan=plan_N)`, the host code reads the artifacts the run produced, a decider returns `done | replan`, and a builder constructs `plan_{N+1}` with the prior iteration's measurements baked into the LLM instructions. The inner per-iteration run stays deterministic; the outer loop carries the adaptive control flow. + +`examples/110_plan_execute_replan_solve.py` shows the *adaptive goal-seeking* variant. Each iteration's plan emits **K parallel proposers** (generate ops in a FORK_JOIN step) feeding a deterministic verifier that produces a precise **per-candidate, per-constraint failure breakdown** (e.g. `word_count_off (got 21, expected 25)`). The outer loop reads the verdict JSON, terminates the moment any candidate clears all constraints, and otherwise threads each prior candidate's exact failures into the next iteration's `generate.instructions`. The result is a real plan → execute → replan → execute cycle that converges by *fixing what the previous attempt got wrong*, not by retrying the same prompt with a different seed. The pattern generalises to any LLM-generator + deterministic-verifier loop — swap the verifier for `run_pytest`, `check_proof`, `query_db`, etc., and the outer loop is identical. + +## Failure modes + +| Symptom | Cause | Fix | +|---|---|---| +| Workflow FAILED with "uses unknown tool" in PAC error | Planner emitted a tool name not in `harness.tools` | Add the tool, or fix the planner prompt; the auto-injected `## Available tools` block already constrains the planner — check it appears in your prompt | +| Workflow FAILED, no fallback ran | `plan_exec` SUB_WORKFLOW failure not caught | Confirm `harness.fallback` is set; failures route through `exec_route` SWITCH to fallback | +| Compile fails with "guardrails with on_fail=retry\|fix\|human but no fallback" | PAC blocks compile to prevent the silent degrade-to-terminate footgun | Configure a fallback or set `on_fail=raise` | +| Compile fails with "uses unsupported JSON Schema keyword '$ref' (or `oneOf`/`allOf`/`format`/etc.)" | The runtime input-schema validator implements a Draft-07 subset — keywords like `$ref`/`allOf`/`oneOf`/`format` would silently pass at runtime, producing *permissive validation*. PAC rejects at compile time instead | Restrict the tool's `inputSchema` to the supported subset (`type`, `properties`, `required`, `additionalProperties`, `enum`, `minLength`, `maxLength`, `pattern`, `minimum`, `maximum`, `items`, `minItems`, `maxItems`) or remove the misleading constraint | +| Compile fails with "plannerContext header '...' contains CR/LF" | A `Context(url=…, headers=…)` value contained a newline — would smuggle a fake HTTP header (response-splitting vector) | Sanitize the credential value; CR/LF in HTTP header values is never legitimate | +| `[doc unavailable]` markers in the planner's `## Reference Context` block | `Context(url=…, required=False)` doc fetch returned non-2xx | If the doc IS required, set `required=True` (default) so the workflow fails loudly instead. If it's truly optional, the marker is the intended behaviour | +| Plan compiled but did wrong thing | Planner LLM produced a syntactically-valid but semantically-wrong plan | Improve `planner_instructions`; consider switching to `plan=` static plan for deterministic flows. For domain rules, lift them into `planner_context` so the planner re-reads them on every run instead of relying on the static `instructions` | +| Need to see what PAC will compile a plan to without running it | Use the `POST /api/agent/inspect-plan` endpoint — same compile path PAC uses at runtime, no SUB_WORKFLOW dispatch | See "Inspecting compiled plans" above | diff --git a/docs/developer-guides/agentspan/concepts/scheduling.mdx b/docs/developer-guides/agentspan/concepts/scheduling.mdx new file mode 100644 index 00000000..2919188b --- /dev/null +++ b/docs/developer-guides/agentspan/concepts/scheduling.mdx @@ -0,0 +1,328 @@ +--- +slug: "/developer-guides/agentspan/concepts/scheduling" +title: "Agent Scheduling" +--- + +# Agent Scheduling + +Run any agent on a cron schedule. The agentspan SDK lets you attach one or +more crons to a deployed agent in a single declarative call; the runtime's +scheduler fires the agent on cadence and you watch the executions roll in. + +This page covers the user-facing API. For the design rationale see +[`docs/design/scheduling.md`](https://github.com/agentspan-ai/agentspan/blob/c873e60bc7eff73e61f568ec68e28cb1f121fe39/docs/design/scheduling.md). For the implementation +plan see [`docs/design/plans/2026-05-27-agent-scheduling.md`](https://github.com/agentspan-ai/agentspan/blob/c873e60bc7eff73e61f568ec68e28cb1f121fe39/docs/design/plans/2026-05-27-agent-scheduling.md). + +## What you get + +- **One agent, many schedules.** Each schedule is a named cron expression + with its own input, timezone, and pause/resume state. +- **Declarative deploy.** `deploy(agent, schedules=[...])` reconciles whatever + you pass against what's on the server — added, modified, removed. +- **Pause/resume/run-now** via a wire-name-keyed module API. +- **Tri-state semantics**: omit `schedules` → leave alone. Pass `[]` → purge. + Pass `[…]` → upsert listed, prune the rest. +- **Identical surface in all four SDKs** (Python, TypeScript, Java, C#). + +## Requirements + +- agentspan-runtime ≥ 0.x with the scheduler module enabled + (`conductor.scheduler.enabled=true`, on by default). +- Cron expressions use the **6-field Quartz format** (`sec min hour day mon dow`) + that Conductor's scheduler expects, e.g. `0 0 9 * * ?` for "every day at 9am". + +## Quickstart + +### Python + +```python +from agentspan.agents import Agent, deploy, schedules +from agentspan.agents.schedule import Schedule + +agent = Agent(name="daily_digest", model="openai/gpt-4o-mini", + instructions="Summarize today's eng activity.") + +deploy( + agent, + schedules=[ + Schedule(name="weekday-9am", + cron="0 0 9 * * MON-FRI", + timezone="America/Los_Angeles", + input={"channel": "#eng"}), + Schedule(name="friday-5pm", + cron="0 0 17 * * FRI", + input={"channel": "#all-hands", "mode": "weekly"}), + ], +) + +# Lifecycle (key by wire name returned from list()) +infos = schedules.list(agent="daily_digest") +schedules.pause("daily_digest-weekday-9am", reason="rate limit cooldown") +schedules.resume("daily_digest-weekday-9am") +schedules.delete("daily_digest-weekday-9am") + +# Fire ad-hoc; returns execution id immediately. +exec_id = schedules.run_now("daily_digest-friday-5pm") + +# Preview cron fire times in the UI. +next_times = schedules.preview_next("0 0 9 * * MON-FRI", n=5) +``` + +A full working example is in `sdk/python/examples/hello_world_agent_schedule.py`. + +### TypeScript + +```ts +import { + Agent, AgentRuntime, Schedule, schedules, +} from "@agentspan-ai/sdk"; + +const agent = new Agent({ + name: "dailyDigest", + model: "openai/gpt-4o-mini", + instructions: "Summarize today's eng activity.", +}); + +await using const runtime = new AgentRuntime(); +await runtime.deploy(agent, { + schedules: [ + new Schedule({ + name: "weekday-9am", + cron: "0 0 9 * * MON-FRI", + timezone: "America/Los_Angeles", + input: { channel: "#eng" }, + }), + new Schedule({ + name: "friday-5pm", + cron: "0 0 17 * * FRI", + input: { channel: "#all-hands", mode: "weekly" }, + }), + ], +}); + +await schedules.list({ agent: "dailyDigest" }); +await schedules.pause("dailyDigest-weekday-9am", { reason: "rate limit cooldown" }); +await schedules.resume("dailyDigest-weekday-9am"); +await schedules.delete("dailyDigest-weekday-9am"); +const execId = await schedules.runNow("dailyDigest-friday-5pm"); +const nextTimes = await schedules.previewNext("0 0 9 * * MON-FRI", { n: 5 }); +``` + +### Java + +```java +import ai.agentspan.Agent; +import ai.agentspan.AgentRuntime; +import ai.agentspan.schedule.Schedule; + +Agent agent = Agent.builder() + .name("daily_digest") + .model("openai/gpt-4o-mini") + .instructions("Summarize today's eng activity.") + .build(); + +try (AgentRuntime runtime = new AgentRuntime()) { + runtime.deploy(agent, List.of( + Schedule.builder() + .name("weekday-9am") + .cron("0 0 9 * * MON-FRI") + .timezone("America/Los_Angeles") + .input(Map.of("channel", "#eng")) + .build(), + Schedule.builder() + .name("friday-5pm") + .cron("0 0 17 * * FRI") + .input(Map.of("channel", "#all-hands", "mode", "weekly")) + .build() + )); + + runtime.schedules().list("daily_digest"); + runtime.schedules().pause("daily_digest-weekday-9am", "rate limit cooldown"); + runtime.schedules().resume("daily_digest-weekday-9am"); + String execId = runtime.schedules().runNow(runtime.schedules().get("daily_digest-friday-5pm")); + List nextTimes = runtime.schedules().previewNext("0 0 9 * * MON-FRI", 5); +} +``` + +### C# + +```csharp +using Agentspan; +using Agentspan.Scheduling; + +var agent = new Agent +{ + Name = "daily_digest", + Model = "openai/gpt-4o-mini", + Instructions = "Summarize today's eng activity.", +}; + +await using var runtime = new AgentRuntime(); +await runtime.DeployAsync(agent, new[] +{ + new Schedule + { + Name = "weekday-9am", + Cron = "0 0 9 * * MON-FRI", + Timezone = "America/Los_Angeles", + Input = new Dictionary { ["channel"] = "#eng" }, + }, + new Schedule + { + Name = "friday-5pm", + Cron = "0 0 17 * * FRI", + Input = new Dictionary + { + ["channel"] = "#all-hands", + ["mode"] = "weekly", + }, + }, +}); + +await runtime.Schedules.ListAsync("daily_digest"); +await runtime.Schedules.PauseAsync("daily_digest-weekday-9am", reason: "rate limit cooldown"); +await runtime.Schedules.ResumeAsync("daily_digest-weekday-9am"); +var info = await runtime.Schedules.GetAsync("daily_digest-friday-5pm"); +string execId = await runtime.Schedules.RunNowAsync(info); +var nextTimes = await runtime.Schedules.PreviewNextAsync("0 0 9 * * MON-FRI", n: 5); +``` + +## Concepts + +### Wire-name prefix + +You construct schedules with a short, agent-scoped `name` (e.g. `"daily"`). +The SDK transparently prefixes the wire name to `{agent.name}-{name}` so +Conductor's org-wide uniqueness requirement is satisfied. The lifecycle +calls — `pause`, `resume`, `delete`, `get`, `runNow` — take the **wire +name**, which is what `list()` returns. `ScheduleInfo` exposes both: + +- `name`: the prefixed wire name (`"daily_digest-daily"`) +- `shortName` (`short_name` in Python): the user's original (`"daily"`) + +### Declarative reconciliation on deploy + +The `schedules=` argument to `deploy()` is **declarative**: + +| Argument | Effect | +|---|---| +| omitted / `None` / `null` | Leave existing schedules untouched. | +| `[]` (empty list) | Delete every schedule for this agent. | +| `[Schedule(...), ...]` | Upsert listed, **prune** any other schedule for this agent. | + +Reconciliation is scoped by `startWorkflowRequest.name == agent.name`, so +deploys never touch schedules belonging to other agents. + +### Schedule fields + +| Field | Required | Default | Notes | +|---|:---:|---|---| +| `name` | ✅ | — | Short identifier, unique within this agent. | +| `cron` | ✅ | — | 6-field Quartz cron (seconds-precision). | +| `timezone` (`zoneId` on the wire) | | `"UTC"` | IANA zone id. | +| `input` | | `{}` | Workflow input passed when the cron fires. | +| `catchup` (`runCatchupScheduleInstances` on the wire) | | `false` | Replay missed fires on resume. | +| `paused` | | `false` | Create in paused state (still computes `nextRunTime`). | +| `startAt`, `endAt` | | `null` | Optional epoch-ms execution window. | +| `description` | | `null` | Human-readable note. | + +### Errors + +All four SDKs surface the same typed errors: + +| Error | When | +|---|---| +| `ScheduleNameConflict` | Two schedules in the same agent share a `name`. Raised before any wire call. | +| `ScheduleNotFound` | `get`/`pause`/`resume`/`delete` on a missing wire name. | +| `InvalidCronExpression` | Server rejects the cron syntax (400 + body mentions "cron"). | +| `ScheduleError` (base) | Other scheduler API failures. | + +### `runNow` is non-blocking by default + +`runNow` fires the agent once with the schedule's stored input and returns +the workflow execution id **immediately**. To wait for completion, poll the +runtime for the workflow status, or use the language-specific opt-in: + +- Python: `schedules.run_now(name, wait=True)` returns the completed workflow. +- TS/Java/C#: poll `runtime`/Conductor workflow status using the returned id. + +This default exists because agents can run for minutes; blocking would be +the wrong behavior for buttons, scripts, and serverless contexts. + +## UI + +The Scheduler page (`/scheduler`) lists all schedules across agents with an +**Agent** column and filterable by query string: + +``` +/scheduler?workflowName=daily_digest +``` + +…shows only schedules belonging to `daily_digest`. Open a row to edit it +in the drawer; new schedules get the standard form (cron, timezone, input +JSON, window, *catch-up missed runs on resume*, *start paused*). + +## What's not covered yet + +These are future-phase items, not blockers: + +- **Skip-if-running** / **queue** overlap policies. Today Conductor fires + every tick — if your agent runs longer than the cadence, executions + overlap. If that's a concern, gate the agent with a workflow-level + idempotency check, or set a wider cadence. +- **Event / webhook / file / stream triggers.** Separate trigger types + under the same `triggers=[...]` umbrella; spec'd but not built. +- **Per-schedule retry / timeout overrides.** Inherits from the agent. +- **Optimistic concurrency on edits.** Conductor uses last-write-wins; no + ETag is exposed. + +## Recipes + +### Run a hello-world agent every 5 seconds + +```python +from agentspan.agents import Agent, AgentRuntime +from agentspan.agents.schedule import Schedule + +agent = Agent(name="hello", model="openai/gpt-4o-mini", + instructions="Say 'Hello, world!' and the current UTC time.") + +with AgentRuntime() as rt: + rt.deploy(agent, schedules=[ + Schedule(name="every-5s", cron="0/5 * * * * ?"), + ]) + rt.serve(agent, blocking=False) + time.sleep(20) # let the scheduler fire 3–4 times +``` + +### Schedule the same agent on weekday and weekend cadences + +```python +deploy(agent, schedules=[ + Schedule(name="weekday", cron="0 0 9 * * MON-FRI", input={"mode": "daily"}), + Schedule(name="weekend", cron="0 0 11 * * SAT,SUN", input={"mode": "leisurely"}), +]) +``` + +Each fire gets its own input, its own execution history, and its own +pause/resume state. Pause the weekend cadence without touching weekday. + +### Snapshot-style redeploy: replace whatever's there + +```python +deploy(agent, schedules=desired_list) # any missing schedules get pruned +``` + +### Leave schedules alone on redeploy (CI/CD pattern) + +```python +deploy(agent) # schedules untouched +# or, explicitly: +deploy(agent, schedules=None) +``` + +### Purge all schedules for an agent + +```python +deploy(agent, schedules=[]) +``` diff --git a/docs/developer-guides/agentspan/concepts/skills.mdx b/docs/developer-guides/agentspan/concepts/skills.mdx new file mode 100644 index 00000000..1b769fc1 --- /dev/null +++ b/docs/developer-guides/agentspan/concepts/skills.mdx @@ -0,0 +1,244 @@ +--- +slug: "/developer-guides/agentspan/concepts/skills" +title: "Skills" +description: "Load, register, run, and test agentskills.io skill folders with Agentspan" +--- + +# Skills + +Skills let you package a reusable agent as a folder. A skill can include +instructions, sub-agents, scripts, examples, references, and assets. Agentspan +loads that folder as a durable `Agent`, compiles it to Conductor, and makes each +component visible in the execution graph. + +## Skill Folder Layout + +At minimum, a skill contains `SKILL.md` with YAML frontmatter and markdown +instructions: + +```text +my-skill/ ++-- SKILL.md ++-- reviewer-agent.md ++-- scripts/ +| +-- lint.sh ++-- references/ +| +-- rubric.md ++-- examples/ ++-- assets/ +``` + +Agentspan discovers files by convention: + +| File or directory | Runtime behavior | +|---|---| +| `SKILL.md` | Main skill instructions and metadata | +| `*-agent.md` | Sub-agent workflow definitions | +| `scripts/*` | Worker tools executed by the local SDK or CLI worker process | +| `references/*`, `examples/*`, `assets/*` | Read on demand through `read_skill_file` | +| Other root files | Read on demand through `read_skill_file` | + +Script and file-read tools are real Conductor `SIMPLE` tasks. They are not +inlined into the prompt, and their input/output is visible in the UI. + +## CLI Workflow + +Run a local skill folder: + +```bash +agentspan skill run ~/.claude/skills/dg "Review auth.py" --model openai/gpt-4o +``` + +Register a skill package on the server: + +```bash +agentspan skill register ~/.claude/skills/dg --model openai/gpt-4o --version 2026.05.21 +agentspan skill list --all-versions +agentspan skill get dg --version 2026.05.21 +``` + +Run a registered skill by name: + +```bash +agentspan skill run dg "Review auth.py" --model openai/gpt-4o --version 2026.05.21 +``` + +When the CLI runs a registered skill, it downloads the package into +`~/.agentspan/skills///files` and reuses that cached copy until +the server checksum changes. Registered cross-skill references are resolved +from the server registry at compile time. The CLI also downloads referenced +packages before execution so their script tools and packaged resources have +local workers. Dependency versions are pinned when the parent skill is +registered, so `parent@v1` continues to use the same child version even if that +child skill is updated later. + +Use `skill load` when you want to deploy a skill as an agent definition and run +it later through regular agent commands: + +```bash +agentspan skill load ~/.claude/skills/dg --model openai/gpt-4o +agentspan skill serve dg +agentspan agent run --name dg "Review the current PR" +``` + +## Workspace Context + +Code review and development skills often need project context. `skill run` and +`skill serve` expose the current directory as a read-only `workspace` root by +default. + +```bash +agentspan skill run code-review "Review the entire codebase" \ + --model openai/gpt-4o \ + --workspace . +``` + +Add more read-only roots with `--filesystem`: + +```bash +agentspan skill run code-review "Review code and docs" \ + --model openai/gpt-4o \ + --workspace . \ + --filesystem docs=./docs +``` + +The generated workspace tools can list files, read files, search text, and read +git status/diff. The CLI enforces configured root boundaries. + +## SDK Usage + +### Python + +```python +from agentspan.agents import Agent, AgentRuntime, agent_tool, skill + +reviewer = skill("~/.claude/skills/dg", model="openai/gpt-4o") + +with AgentRuntime() as runtime: + result = runtime.run(reviewer, "Review auth.py") + result.print_result() + +lead = Agent( + name="tech_lead", + model="openai/gpt-4o", + instructions="Use the review skill for code review requests.", + tools=[agent_tool(reviewer, description="Run code review")], +) +``` + +### TypeScript + +```typescript +import { Agent, AgentRuntime, agentTool, skill } from "@agentspan-ai/sdk"; + +const reviewer = skill("~/.claude/skills/dg", { + model: "openai/gpt-4o", +}); + +const runtime = new AgentRuntime(); +const result = await runtime.run(reviewer, "Review auth.py"); +result.printResult(); + +const lead = new Agent({ + name: "tech_lead", + model: "openai/gpt-4o", + instructions: "Use the review skill for code review requests.", + tools: [agentTool(reviewer, { description: "Run code review" })], +}); +``` + +### Java + +```java +import ai.agentspan.Agent; +import ai.agentspan.AgentTool; +import ai.agentspan.Agentspan; +import ai.agentspan.model.AgentResult; +import ai.agentspan.skill.Skill; + +import java.nio.file.Paths; +import java.util.List; + +Agent reviewer = Skill.skill(Paths.get(System.getProperty("user.home"), ".claude", "skills", "dg"), + "openai/gpt-4o"); + +AgentResult direct = Agentspan.run(reviewer, "Review auth.py"); +direct.printResult(); + +Agent lead = Agent.builder() + .name("tech_lead") + .model("openai/gpt-4o") + .instructions("Use the review skill for code review requests.") + .tools(List.of(AgentTool.from(reviewer, "Run code review"))) + .build(); +``` + +### .NET + +```csharp +using Agentspan; + +var reviewer = Skill.Load("~/.claude/skills/dg", "openai/gpt-4o"); + +await using var runtime = new AgentRuntime(); +var result = await runtime.RunAsync(reviewer, "Review auth.py"); +result.PrintResult(); + +var lead = new Agent("tech_lead") +{ + Model = "openai/gpt-4o", + Instructions = "Use the review skill for code review requests.", + Tools = [AgentTool.Create(reviewer, description: "Run code review")], +}; +``` + +## Server Registry And UI + +`agentspan skill register` uploads the skill folder as an immutable server-side +package. The CLI excludes generated directories, common secret files such as +`.env` and private keys, and any paths matched by `.agentspanignore`. The server +stores owner-scoped metadata separately from the package blob, validates size +and file-count limits, derives the runtime skill config from package contents, +and exposes the package in the UI under Skills. Registered package downloads are +verified against the server checksum before the CLI caches or executes them. + +Package storage is configurable: + +| Property | Default | Description | +|---|---|---| +| `agentspan.skills.package-store.type` | `filesystem` | `filesystem` for local deployments or `conductor-payload` for Conductor external payload storage | +| `agentspan.skills.storage.directory` | `${java.io.tmpdir}/agentspan/skills` | Owner-scoped metadata root and default filesystem package root parent | +| `agentspan.skills.package-store.filesystem.directory` | `${agentspan.skills.storage.directory}/packages` | Filesystem package blob directory | +| `agentspan.skills.max-package-bytes` | `52428800` | Maximum compressed upload size | +| `agentspan.skills.max-uncompressed-bytes` | `209715200` | Maximum expanded package size | +| `agentspan.skills.max-file-count` | `2000` | Maximum files per package | + +## Testing + +Skills should have deterministic end-to-end tests against a real Agentspan +server. The repository includes skill e2e suites for Python, TypeScript, Java, +and .NET: + +```bash +cd sdk/python && AGENTSPAN_SERVER_URL=http://localhost:6767/api \ + python3 -m pytest e2e/test_suite15_skills.py -q + +cd sdk/typescript && AGENTSPAN_SERVER_URL=http://localhost:6767/api \ + npm test -- tests/e2e/test_suite15_skills.test.ts + +cd sdk/java && AGENTSPAN_SERVER_URL=http://localhost:6767/api \ + ./gradlew :test --tests Suite15Skills -Pe2e + +cd sdk/csharp && AGENTSPAN_SERVER_URL=http://localhost:6767/api \ + dotnet test tests/AgentspanE2eTests/AgentspanE2eTests.csproj --filter Suite16_Skills +``` + +These tests prove skill loading, script workers, resource-file reads, +multi-agent skill files, `agent_tool` nesting, and real worker execution. + +## Security Boundaries + +`read_skill_file` can only read files that are part of the skill package. +Workspace tools can only read configured filesystem roots. Script workers run +locally, so skill authors should validate script arguments and avoid assuming +unbounded filesystem access. diff --git a/docs/developer-guides/agentspan/concepts/streaming.mdx b/docs/developer-guides/agentspan/concepts/streaming.mdx new file mode 100644 index 00000000..d3c1a388 --- /dev/null +++ b/docs/developer-guides/agentspan/concepts/streaming.mdx @@ -0,0 +1,170 @@ +--- +slug: "/developer-guides/agentspan/concepts/streaming" +title: "Streaming" +description: "stream(), stream_async(), AgentEvent types, and streaming with human-in-the-loop" +--- + +# Streaming + +Stream events from an agent execution as they happen — tool calls, thinking steps, handoffs, and the final output. + +## Basic Streaming + +```python +from agentspan.agents import Agent, AgentRuntime + +agent = Agent(name="writer", model="openai/gpt-4o") + +with AgentRuntime() as runtime: + for event in runtime.stream(agent, "Write a haiku about Python"): + match event.type: + case "thinking": print(f"Thinking: {event.content}") + case "tool_call": print(f"Calling {event.tool_name}({event.args})") + case "tool_result": print(f"Result: {event.result}") + case "handoff": print(f"Delegating to {event.target}") + case "waiting": print("Waiting for human approval...") + case "guardrail_pass": print(f"Guardrail passed: {event.guardrail_name}") + case "guardrail_fail": print(f"Guardrail failed: {event.guardrail_name}") + case "message": print(f"Message: {event.content}") + case "error": print(f"Error: {event.content}") + case "done": print(f"\nFinal: {event.output}") +``` + +## Module-level stream() + +```python +from agentspan.agents import Agent, stream + +agent = Agent(name="writer", model="openai/gpt-4o") +for event in stream(agent, "Write a poem"): + if event.type == "done": + print(event.output) +``` + +## Async Streaming + +```python +from agentspan.agents import Agent, AgentRuntime + +agent = Agent(name="writer", model="openai/gpt-4o") + +async def main(): + with AgentRuntime() as runtime: + async for event in runtime.stream_async(agent, "Write a haiku"): + if event.type == "done": + print(event.output) +``` + +## AgentEvent Fields + +| Field | Type | Description | +|---|---|---| +| `type` | `str` | Event type (see below) | +| `content` | `Optional[str]` | Text content (thinking, message, error) | +| `tool_name` | `Optional[str]` | Tool name (tool_call, tool_result) | +| `args` | `Optional[Dict]` | Tool arguments (tool_call) | +| `result` | `Any` | Tool result (tool_result) | +| `target` | `Optional[str]` | Agent name for handoff events | +| `output` | `Any` | Final output (done event only) | +| `workflow_id` | `str` | Execution ID | +| `guardrail_name` | `Optional[str]` | Guardrail name (guardrail_pass, guardrail_fail) | + +## Event Types + +| Type | When | +|---|---| +| `thinking` | Agent internal reasoning step | +| `tool_call` | LLM is calling a tool | +| `tool_result` | Tool returned a result | +| `handoff` | Control passes to a sub-agent | +| `waiting` | Agent paused for human approval | +| `guardrail_pass` | A guardrail passed | +| `guardrail_fail` | A guardrail failed | +| `message` | Agent sent an intermediate message | +| `error` | An error occurred | +| `done` | Agent completed — `event.output` has the final result | + +## Streaming with Tools + +```python +from agentspan.agents import Agent, AgentRuntime, tool + +@tool +def search_web(query: str) -> str: + """Search the web for information.""" + return f"Results for: {query}" + +agent = Agent( + name="researcher", + model="openai/gpt-4o", + tools=[search_web], +) + +with AgentRuntime() as runtime: + for event in runtime.stream(agent, "What is agentspan?"): + match event.type: + case "tool_call": + print(f" → Calling {event.tool_name}({event.args})") + case "tool_result": + print(f" ← {event.result}") + case "done": + print(f"\nAnswer: {event.output}") +``` + +## Streaming with Human-in-the-Loop + +When an agent is waiting for approval, the `waiting` event fires. Handle it from a separate process using the execution ID: + +```python +from agentspan.agents import Agent, AgentRuntime, AgentHandle, tool + +@tool(approval_required=True) +def send_email(to: str, subject: str, body: str) -> dict: + """Send an email. Requires approval.""" + return {"sent": True} + +agent = Agent(name="emailer", model="openai/gpt-4o", tools=[send_email]) + +with AgentRuntime() as runtime: + workflow_id = None + for event in runtime.stream(agent, "Send a welcome email to alice@example.com"): + if event.type == "waiting": + workflow_id = event.workflow_id + print(f"Agent waiting for approval. Execution ID: {workflow_id}") + break + elif event.type == "done": + print(f"Done: {event.output}") + +# Later, from anywhere — approve or reject: +if workflow_id: + runtime2 = AgentRuntime() + runtime2.serve(agent, blocking=False) # start workers before reconnecting + handle = AgentHandle(workflow_id=workflow_id, runtime=runtime2) + handle.approve() +``` + +## Get Result from Stream + +To get the final `AgentResult` after streaming: + +```python +from agentspan.agents import Agent, AgentRuntime, start + +agent = Agent(name="writer", model="openai/gpt-4o") + +with AgentRuntime() as runtime: + handle = runtime.start(agent, "Write a report") + result = handle.stream().get_result() # Wait for completion + print(result.output) +``` + +## Filtering Events + +```python +with AgentRuntime() as runtime: + tool_events = [ + event + for event in runtime.stream(agent, "Research AI agents") + if event.type in ("tool_call", "tool_result") + ] +``` diff --git a/docs/developer-guides/agentspan/concepts/testing.mdx b/docs/developer-guides/agentspan/concepts/testing.mdx new file mode 100644 index 00000000..0ac133cc --- /dev/null +++ b/docs/developer-guides/agentspan/concepts/testing.mdx @@ -0,0 +1,349 @@ +--- +slug: "/developer-guides/agentspan/concepts/testing" +title: "Testing" +description: "Test agents without an LLM — mock_run, MockEvent, expect, record/replay, pytest" +--- + +# Testing + +Agentspan has a first-class testing module that lets you test agent behavior without making LLM API calls or running a server. Tests are deterministic, fast, and runnable in CI. + +## Import + +```python +from agentspan.agents.testing import mock_run, MockEvent, expect, record, replay +``` + +## mock_run + +`mock_run` runs an agent with a scripted sequence of events instead of calling an LLM: + +```python +from agentspan.agents import Agent, tool +from agentspan.agents.testing import mock_run, MockEvent, expect + +@tool +def search_web(query: str) -> str: + """Search the web.""" + return "results" + +agent = Agent( + name="research_bot", + model="openai/gpt-4o", + tools=[search_web], +) + +result = mock_run( + agent, + "What is agentspan?", + events=[ + MockEvent.thinking("I should search for information about agentspan."), + MockEvent.tool_call("search_web", {"query": "agentspan Python agent runtime"}), + MockEvent.tool_result("search_web", "Agentspan is an open source Python runtime for AI agents."), + MockEvent.done("Agentspan is an open source Python runtime for building AI agents."), + ] +) + +expect(result).completed().output_contains("Agentspan").used_tool("search_web") +``` + +`mock_run` signature: + +```python +mock_run( + agent: Agent, + prompt: str, + events: list[MockEvent], + context_state: dict | None = None, # initial ToolContext.state +) -> AgentResult +``` + +## MockEvent factory methods + +| Method | Description | +|--------|-------------| +| `MockEvent.thinking(content)` | Agent internal reasoning step | +| `MockEvent.tool_call(tool_name, args)` | LLM calls a tool with given arguments | +| `MockEvent.tool_result(tool_name, result)` | Tool returns a result | +| `MockEvent.message(content)` | Agent sends an intermediate message | +| `MockEvent.handoff(target)` | Control passes to a named agent | +| `MockEvent.guardrail_pass(name)` | A guardrail passes | +| `MockEvent.guardrail_fail(name, message)` | A guardrail fails (triggers retry/fix/etc.) | +| `MockEvent.waiting(content)` | Agent pauses for human approval | +| `MockEvent.error(message)` | An error occurs | +| `MockEvent.done(output)` | Agent completes with this output | + +## expect() fluent assertions + +Chain assertions on the result: + +```python +expect(result) + .completed() # status is COMPLETED + .output_contains("Paris") # output contains substring + .output_matches(r"capital.*France") # output matches regex + .used_tool("search_web") # tool was called at least once + .used_tool("search_web", args={"query": "capital of France"}) # with specific args + .max_turns(5) # used at most 5 turns + .no_errors() # no error events + .guardrail_passed("no_pii") # named guardrail passed +``` + +All assertions raise `AssertionError` with descriptive messages on failure. You can also assert failure cases: + +```python +expect(result).failed() # status is FAILED +expect(result).guardrail_failed("safety_check") # named guardrail failed +``` + +## Function-based assertions + +For more complex checks, inspect the result directly: + +```python +result = mock_run(agent, "prompt", events=[...]) + +# Check output — result.output is always a dict {'result': str, 'finishReason': str} +assert "expected phrase" in result.output['result'] +assert result.status == "COMPLETED" + +# Check tool calls +tool_calls = [t for t in result.tool_calls if t['name'] == "search_web"] +assert len(tool_calls) == 1 +assert tool_calls[0]['args']["query"] == "expected query" + +# Check structured output (parse JSON from result.output['result']) +import json +data = json.loads(result.output['result']) +assert data["city"] == "San Francisco" +``` + +## Testing tool side effects + +Test that tools are called with correct arguments and that state is managed properly: + +```python +from agentspan.agents import Agent, tool +from agentspan.agents.testing import mock_run, MockEvent, expect + +@tool +def send_email(to: str, subject: str, body: str) -> dict: + """Send an email.""" + return {"sent": True, "to": to} + +agent = Agent( + name="email_bot", + model="openai/gpt-4o", + tools=[send_email], +) + +result = mock_run( + agent, + "Send a welcome email to alice@example.com", + events=[ + MockEvent.tool_call("send_email", { + "to": "alice@example.com", + "subject": "Welcome!", + "body": "Welcome to our platform...", + }), + MockEvent.tool_result("send_email", {"sent": True, "to": "alice@example.com"}), + MockEvent.done("Email sent successfully to alice@example.com."), + ] +) + +expect(result).completed().used_tool("send_email", args={ + "to": "alice@example.com", + "subject": "Welcome!", +}) +``` + +## Testing HITL flows + +Test human-in-the-loop interactions: + +```python +from agentspan.agents import Agent, tool +from agentspan.agents.testing import mock_run, MockEvent, expect + +@tool(approval_required=True) +def delete_file(path: str) -> dict: + """Delete a file. Requires approval.""" + return {"deleted": True, "path": path} + +agent = Agent(name="file_manager", model="openai/gpt-4o", tools=[delete_file]) + +# Test the approval path +result = mock_run( + agent, + "Delete /tmp/old-logs.txt", + events=[ + MockEvent.tool_call("delete_file", {"path": "/tmp/old-logs.txt"}), + MockEvent.waiting("Agent wants to delete /tmp/old-logs.txt. Approve?"), + MockEvent.tool_result("delete_file", {"deleted": True, "path": "/tmp/old-logs.txt"}), + MockEvent.done("File /tmp/old-logs.txt has been deleted."), + ] +) +expect(result).completed().used_tool("delete_file") + +# Test the rejection path +result = mock_run( + agent, + "Delete /etc/hosts", + events=[ + MockEvent.tool_call("delete_file", {"path": "/etc/hosts"}), + MockEvent.waiting("Agent wants to delete /etc/hosts. Approve?"), + MockEvent.done("I was not able to delete /etc/hosts — the action was denied."), + ] +) +expect(result).completed().output_contains("denied") +``` + +## Testing multi-agent pipelines + +Test sequential pipelines: + +```python +researcher = Agent(name="researcher", model="openai/gpt-4o", + tools=[search_web], instructions="Research the topic.") +writer = Agent(name="writer", model="openai/gpt-4o", + instructions="Write an article.") + +pipeline = researcher >> writer + +result = mock_run( + pipeline, + "Write about Python asyncio", + events=[ + # Researcher turn + MockEvent.tool_call("search_web", {"query": "Python asyncio overview"}), + MockEvent.tool_result("search_web", "asyncio is Python's async I/O framework..."), + MockEvent.handoff("writer"), + # Writer turn + MockEvent.done("# Python asyncio\nasyncio enables concurrent code using async/await..."), + ] +) + +expect(result).completed().used_tool("search_web") +``` + +## Record and replay + +Record a real execution (with an actual LLM) and replay it deterministically in tests: + +```python +from agentspan.agents.testing import record, replay + +# Record a real run (calls LLM) +recording = record(agent, "What's the capital of France?") +recording.save("tests/fixtures/capital_query.json") + +# Replay it (no LLM, no server) +result = replay("tests/fixtures/capital_query.json") +expect(result).completed().output_contains("Paris") +``` + +This is useful for: +- Capturing known-good behavior as regression tests +- Running existing test cases against new model versions +- Debugging: record a failing production execution, replay locally + +## pytest integration + +```python +import pytest +from agentspan.agents.testing import mock_run, MockEvent, expect + +# Mark as unit test — no LLM, no server, fast +class TestWeatherAgent: + def test_weather_query(self, weather_agent): + result = mock_run( + weather_agent, + "Weather in NYC?", + events=[ + MockEvent.tool_call("get_weather", {"city": "New York"}), + MockEvent.tool_result("get_weather", {"temp_f": 65, "condition": "Cloudy"}), + MockEvent.done("New York is currently 65°F and cloudy."), + ] + ) + expect(result).completed().output_contains("65").used_tool("get_weather") + + def test_handles_unknown_city(self, weather_agent): + result = mock_run( + weather_agent, + "Weather in Atlantis?", + events=[ + MockEvent.tool_call("get_weather", {"city": "Atlantis"}), + MockEvent.tool_result("get_weather", {"error": "City not found"}), + MockEvent.done("I couldn't find weather data for Atlantis."), + ] + ) + expect(result).completed().output_contains("couldn't find") + +# Mark as integration test — calls real LLM, requires server +@pytest.mark.integration +class TestWeatherAgentIntegration: + def test_real_weather_query(self, weather_agent): + from agentspan.agents import run + result = run(weather_agent, "Weather in San Francisco?") + assert result.status == "COMPLETED" + assert len(result.output) > 10 +``` + +Run unit tests only: +```bash +pytest tests/ -m "not integration" +``` + +Run integration tests: +```bash +pytest tests/ -m integration +``` + +## Real Server SDK E2E Tests + +Runtime features that cross the SDK/server boundary should have deterministic +e2e tests against a real Agentspan server. Skills are covered this way in every +SDK: + +```bash +cd sdk/python && AGENTSPAN_SERVER_URL=http://localhost:6767/api \ + python3 -m pytest e2e/test_suite15_skills.py -q + +cd sdk/typescript && AGENTSPAN_SERVER_URL=http://localhost:6767/api \ + npm test -- tests/e2e/test_suite15_skills.test.ts + +cd sdk/java && AGENTSPAN_SERVER_URL=http://localhost:6767/api \ + ./gradlew :test --tests Suite15Skills -Pe2e + +cd sdk/csharp && AGENTSPAN_SERVER_URL=http://localhost:6767/api \ + dotnet test tests/AgentspanE2eTests/AgentspanE2eTests.csproj --filter Suite16_Skills +``` + +Skill e2e tests assert deterministic script output, resource reads, +multi-agent skill files, `agent_tool` nesting, `workerNames` propagation, and +real worker execution. + +## Evaluating output correctness + +For evaluating LLM output quality (not just structure), use `CorrectnessEval`: + +```python +from agentspan.agents import AgentRuntime +from agentspan.agents.testing import CorrectnessEval, EvalCase + +eval_runner = CorrectnessEval(runtime=AgentRuntime()) + +cases = [ + EvalCase( + name="capital_of_france", + agent=agent, + prompt="What is the capital of France?", + expect_output_contains=["Paris"], + expect_status="COMPLETED", + ), +] + +suite = eval_runner.run(cases) +suite.print_summary() +assert suite.all_passed +``` diff --git a/docs/developer-guides/agentspan/concepts/tools.mdx b/docs/developer-guides/agentspan/concepts/tools.mdx new file mode 100644 index 00000000..ab5715f3 --- /dev/null +++ b/docs/developer-guides/agentspan/concepts/tools.mdx @@ -0,0 +1,302 @@ +--- +slug: "/developer-guides/agentspan/concepts/tools" +title: "Tools" +description: "Building tools with @tool, http_tool, mcp_tool, api_tool — plus credentials and code execution" +--- + +# Tools + +Tools are functions the LLM can call during execution. + +**Agentspan ships built-in tools for the most common cases — HTTP endpoints, full OpenAPI specs, and MCP servers — so you don't have to write code for them.** When you need custom logic, decorate any Python function with `@tool` and it's available to the LLM automatically. + +| Type | Who writes the code | How it runs | Use when | +|---|---|---|---| +| `@tool` | You | Python worker process | Custom logic, any Python library | +| `http_tool()` | Nobody — configure only | Server-side HTTP task | Single API endpoint | +| `api_tool()` | Nobody — configure only | Server-side, auto-discovered | Full OpenAPI/Swagger/Postman spec | +| `mcp_tool()` | Nobody — configure only | Server-side MCP task | MCP server | + +For `http_tool`, `api_tool`, and `mcp_tool`, you provide a URL and optionally credentials. Agentspan handles execution entirely on the server — no worker process, no Python code, nothing to maintain. + +## `@tool` — Custom Python Functions + +Decorate any Python function to make it a tool: + +```python +from agentspan.agents import Agent, AgentRuntime, tool + +@tool +def get_weather(city: str) -> dict: + """Get current weather for a city.""" + return {"city": city, "temp": 72, "condition": "Sunny"} + +agent = Agent(name="assistant", model="openai/gpt-4o", tools=[get_weather]) + +with AgentRuntime() as runtime: + result = runtime.run(agent, "What's the weather in NYC?") + result.print_result() +``` + +The JSON schema for the LLM is auto-generated from type hints and the docstring. The function also works as a normal Python function: `get_weather("NYC")` returns the dict directly. + +### Options + +```python +@tool(name="custom_name", approval_required=True, timeout_seconds=60) +def dangerous_action(target: str) -> dict: + """Do something that requires human approval.""" + return {"done": True} +``` + +| Parameter | Default | Description | +|---|---|---| +| `name` | function name | Override the tool name | +| `approval_required` | `False` | Insert a wait task for human approval before execution | +| `timeout_seconds` | `None` | Maximum execution time | + +### ToolContext — shared state and dependencies + +Add a `context: ToolContext` parameter to access execution context and shared state: + +```python +from agentspan.agents import tool, ToolContext + +@tool +def query_database(query: str, context: ToolContext) -> dict: + """Run a database query.""" + db = context.dependencies["db"] + user_id = context.dependencies["user_id"] + return db.execute(query, user=user_id) + +@tool +def add_item(item: str, context: ToolContext) -> str: + """Add an item to the shared list.""" + items = context.state.get("items", []) + items.append(item) + context.state["items"] = items + return f"Added '{item}'. List has {len(items)} items." +``` + +`ToolContext` fields: + +| Field | Type | Description | +|---|---|---| +| `workflow_id` | `str` | Execution ID | +| `session_id` | `str` | Session ID | +| `agent_name` | `str` | Name of the executing agent | +| `dependencies` | `Dict` | Dependencies injected via `Agent(dependencies=...)` | +| `state` | `Dict` | Mutable shared state across tool calls in the same execution | +| `metadata` | `Dict` | Metadata from the agent | + +The `context` parameter is excluded from the tool's JSON schema — the LLM never sees it. + +Pass dependencies at agent creation: + +```python +agent = Agent( + name="bot", + model="openai/gpt-4o", + tools=[query_database], + dependencies={"db": my_database, "user_id": "u-123"}, +) +``` + +## `http_tool()` — HTTP Endpoints + +Define a single HTTP endpoint as a tool. Executes entirely server-side — no worker process needed: + +```python +from agentspan.agents import http_tool + +weather_api = http_tool( + name="get_weather", + description="Get weather for a city", + url="https://api.weather.com/v1/current", + method="GET", + headers={"Authorization": "Bearer token"}, + input_schema={ + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + }, +) + +agent = Agent(name="assistant", model="openai/gpt-4o", tools=[weather_api]) +``` + +## `api_tool()` — OpenAPI Auto-Discovery + +Point to any OpenAPI, Swagger, or Postman spec. All endpoints are auto-discovered and exposed as tools. The LLM filters to the most relevant ones at runtime: + +```python +from agentspan.agents import api_tool + +stripe = api_tool( + url="https://api.stripe.com/openapi.json", + headers={"Authorization": "Bearer ${STRIPE_KEY}"}, + credentials=["STRIPE_KEY"], + max_tools=20, # LLM auto-filters 300+ ops to top 20 most relevant +) + +agent = Agent( + name="billing_assistant", + model="openai/gpt-4o", + tools=[stripe], +) + +with AgentRuntime() as runtime: + result = runtime.run(agent, "Create a Stripe customer for alice@example.com") + result.print_result() +``` + +| Parameter | Description | +|---|---| +| `url` | URL to an OpenAPI/Swagger JSON spec, or a Postman collection | +| `headers` | HTTP headers sent with every request (use `${CRED_KEY}` for credentials) | +| `credentials` | List of credential keys to inject (see below) | +| `max_tools` | Maximum tools to expose to the LLM (default: all) | + +## `mcp_tool()` — MCP Servers + +Connect to an MCP server. Tools are auto-discovered at runtime. Executes server-side: + +```python +from agentspan.agents import mcp_tool + +github = mcp_tool( + server_url="http://localhost:6767/mcp", + name="github", + description="GitHub operations", +) + +agent = Agent(name="dev_assistant", model="openai/gpt-4o", tools=[github]) +``` + +## Credential Management + +Store secrets on the server once. Tools resolve them automatically at runtime — no `.env` files, no hardcoded keys, no secrets in git. + +**Step 1: Store credentials** + +```bash +agentspan credentials set GITHUB_TOKEN ghp_xxxxxxxxxxxx +agentspan credentials set SEARCH_API_KEY xxx-your-key +``` + +Credentials are encrypted at rest (AES-256-GCM). + +**Step 2: Declare which credentials a tool needs** + +```python +from agentspan.agents import tool, get_credential + +# Option A: isolated subprocess (credentials available as env vars) +@tool(credentials=["GITHUB_TOKEN"]) +def list_repos(username: str) -> dict: + """List GitHub repos.""" + import os + token = os.environ["GITHUB_TOKEN"] # Auto-injected + return {"repos": ["repo1", "repo2"]} + +# Option B: in-process (use get_credential) +@tool(isolated=False, credentials=["SEARCH_API_KEY"]) +def search(query: str) -> dict: + """Search using API key.""" + key = get_credential("SEARCH_API_KEY") # Resolved from server + return {"results": ["result1"]} +``` + +**With HTTP and MCP tools:** + +```python +# HTTP tool: server substitutes ${KEY} in headers at runtime +api = http_tool( + name="weather_api", description="Get weather", + url="https://api.weather.com/v1/current", + headers={"Authorization": "Bearer ${WEATHER_KEY}"}, + credentials=["WEATHER_KEY"], +) + +# MCP tool: credentials passed to MCP server connection +github = mcp_tool( + server_url="http://localhost:3001/mcp", + credentials=["GITHUB_TOKEN"], +) +``` + +**Agent-level credentials (shared with all tools):** + +```python +agent = Agent( + name="github_helper", + model="openai/gpt-4o", + tools=[list_repos, search], + credentials=["GITHUB_TOKEN"], +) +``` + +## Code Execution + +Agents can execute code in a sandboxed environment: + +```python +from agentspan.agents import Agent, AgentRuntime +from agentspan.agents.code_executor import DockerCodeExecutor + +executor = DockerCodeExecutor(image="python:3.12-slim", timeout=30) + +agent = Agent( + name="coder", + model="openai/gpt-4o", + tools=[executor.as_tool()], + instructions="Write and execute Python code to solve problems.", +) + +with AgentRuntime() as runtime: + result = runtime.run(agent, "Calculate the first 20 Fibonacci numbers.") + result.print_result() +``` + +Four built-in execution environments: + +| Executor | Use case | +|---|---| +| `DockerCodeExecutor` | Docker container (isolated, recommended for production) | +| `LocalCodeExecutor` | Local subprocess (fast, no isolation) | +| `JupyterCodeExecutor` | Jupyter kernel (stateful, good for data science) | +| `ServerlessCodeExecutor` | Cloud function (scales to zero) | + +## Human Approval + +Mark any tool to require human approval before it runs: + +```python +@tool(approval_required=True) +def delete_production_data(table: str) -> dict: + """Delete data from a production table. Requires approval.""" + return {"deleted": True, "table": table} +``` + +The execution pauses when the LLM calls this tool. Use `handle.approve()` or `handle.reject(reason)` to resume. See [Human-in-the-Loop examples](/developer-guides/agentspan/examples/human-in-the-loop). + +## Circuit Breaker + +Tools that fail 3 consecutive times are automatically disabled. The LLM is told to try a different approach. On a successful call, the error counter resets. + +## Mixing Tool Types + +Agents can use all tool types together: + +```python +agent = Agent( + name="assistant", + model="openai/gpt-4o", + tools=[ + get_weather, # @tool Python function + weather_api, # http_tool + stripe, # api_tool (OpenAPI) + github, # mcp_tool + ], +) +``` diff --git a/docs/developer-guides/agentspan/examples/crash-resume.mdx b/docs/developer-guides/agentspan/examples/crash-resume.mdx new file mode 100644 index 00000000..35598b37 --- /dev/null +++ b/docs/developer-guides/agentspan/examples/crash-resume.mdx @@ -0,0 +1,265 @@ +--- +slug: "/developer-guides/agentspan/examples/crash-resume" +title: "Crash and resume" +description: "How Agentspan keeps agents running through process crashes, restarts, and reconnects" +--- + +# Crash and resume + +**The problem:** Most agent frameworks run the agent loop inside your process. If your process crashes — or you deploy a new version, restart a pod, or lose a network connection — the agent's in-flight work is gone. + +**How Agentspan solves it:** The agent loop runs on the Agentspan server, not in your process. Your worker registers tools and polls for tasks. The agent state lives on the server. Your process can die and restart freely. + +--- + +## How it works + +``` +Your process Agentspan server +────────────── ──────────────────────────── +start(agent, prompt) ──► Creates workflow, starts agent loop + LLM call → tool scheduled → worker executes +Worker polls tasks ◄────── Dispatch: run_my_tool(input) +Worker returns result ────► Continue agent loop + ... +process crashes Agent loop continues on server + Next tool call is scheduled +Worker restarts ◄────── Task is still queued, picked up on reconnect + Agent loop resumes from where it was +``` + +The Conductor engine underlying Agentspan has durable execution built in — the same engine that powers workflows at Netflix, LinkedIn, and Tesla. + +--- + +## Try it: two-script walkthrough + +This example uses two scripts to show the full crash-resume cycle. Run them in order. + +:::info Prerequisites +- A running Agentspan server: `agentspan server start` +- Environment variables set: + +```bash +export OPENAI_API_KEY=sk-... # or ANTHROPIC_API_KEY if using Anthropic +``` + +::: +### Step 1 — Start the agent (`start.py`) + +Run this script. It starts the agent, prints the execution ID, checks status, then exits. The process ending is intentional — this simulates the crash. + +```python +# start.py +from agentspan.agents import Agent, tool, start + +@tool +def analyze_chunk(chunk_id: int, data: str) -> dict: + """Analyze a data chunk and return metrics.""" + return {"chunk_id": chunk_id, "processed": True, "metrics": {"count": len(data)}} + +@tool +def aggregate_results(results: list) -> dict: + """Aggregate metrics from all chunks into a final report.""" + return {"total_chunks": len(results), "summary": "Analysis complete"} + +agent = Agent( + name="data_analysis_agent", + model="openai/gpt-4o-mini", + tools=[analyze_chunk, aggregate_results], + instructions="""Analyze data in chunks using analyze_chunk, then aggregate with aggregate_results. + Process each chunk sequentially. Report progress as you go.""", +) + +handle = start(agent, "Analyze customer feedback dataset: chunk 1, chunk 2, chunk 3") +print(f"execution_id: {handle.execution_id}") +# execution_id: ← copy this + +status = handle.get_status() +print(f"Status: {status.status}") # RUNNING +# Script exits here — workflow keeps running on the server +``` + +### Step 2 — Reconnect (`reconnect.py`) + +Paste the execution ID from Step 1 and run this script. It re-registers the tool workers, reconnects to the in-flight workflow, and streams the remaining events to completion. + +```python +# reconnect.py +from agentspan.agents import Agent, tool, AgentRuntime, AgentHandle + +# Same agent and tools as start.py — workers need to be registered to handle tool calls +@tool +def analyze_chunk(chunk_id: int, data: str) -> dict: + """Analyze a data chunk.""" + return {"chunk_id": chunk_id, "processed": True, "metrics": {"count": len(data)}} + +@tool +def aggregate_results(results: list) -> dict: + """Aggregate metrics from all chunks into a final report.""" + return {"total_chunks": len(results), "summary": "Analysis complete"} + +agent = Agent( + name="data_analysis_agent", + model="openai/gpt-4o-mini", + tools=[analyze_chunk, aggregate_results], + instructions="...", +) + +EXECUTION_ID = "" # paste from Step 1 + +with AgentRuntime() as runtime: + # Register workers first — the server may already have tool tasks queued + runtime.serve(agent, blocking=False) + + handle = AgentHandle(execution_id=EXECUTION_ID, runtime=runtime) + print(f"Reconnected. Status: {handle.get_status().status}") # RUNNING + + # Stream remaining events to completion + for event in handle.stream(): + if event.type == "tool_call": + print(f"→ {event.tool_name}({event.args})") + elif event.type == "tool_result": + print(f"← {event.tool_name}: {event.result}") + elif event.type == "done": + print(f"\nResult: {event.output['result']}") + break + +# The agent never noticed the process crashed. +# It was running on the server the whole time. +``` + +--- + +## Reconnecting after a crash + +The execution ID is all you need to reconnect from any process, any machine. + +**If your agent has no `@tool` functions** (LLM-only agent), reconnecting is straightforward: + +```python +from agentspan.agents import AgentRuntime, AgentHandle + +with AgentRuntime() as runtime: + handle = AgentHandle(execution_id="", runtime=runtime) + result = handle.stream().get_result() + print(result.output["result"]) # output is a dict: {"result": "...", "finishReason": "STOP", ...} +``` + +**If your agent has `@tool` functions**, the reconnecting process must also register those workers — otherwise the workflow will hang waiting for a worker that never arrives. See Step 2 above for the full pattern. + +--- + +## Checking status from the CLI + +```bash +agentspan agent status +``` + +--- + +## Production pattern: separate worker from invoker + +In production, keep the worker process (which handles tool calls) separate from the invoker (which starts runs): + +```python +# worker.py — runs continuously, handles tool execution +from agentspan.agents import Agent, tool, AgentRuntime + +@tool +def analyze_chunk(chunk_id: int, data: str) -> dict: + """Analyze a data chunk and return metrics.""" + return {"chunk_id": chunk_id, "processed": True} + +agent = Agent( + name="data_analysis_agent", + model="openai/gpt-4o-mini", + tools=[analyze_chunk], + instructions="...", +) + +with AgentRuntime() as runtime: + runtime.serve(agent) # registers workers and blocks +``` + +```python +# invoker.py — runs once per job (REST endpoint, cron, CLI, etc.) +from agentspan.agents import Agent, tool, start + +@tool +def analyze_chunk(chunk_id: int, data: str) -> dict: + """Analyze a data chunk and return metrics.""" + return {"chunk_id": chunk_id, "processed": True} + +agent = Agent( + name="data_analysis_agent", + model="openai/gpt-4o-mini", + tools=[analyze_chunk], + instructions="...", +) + +handle = start(agent, "Analyze the dataset") +print(f"Job ID: {handle.execution_id}") +# Store this ID — use it to reconnect or check status later +``` + +--- + +## Idempotency: never re-process completed work + +Use `get_status()` to skip work that's already done before starting a new run: + +```python +from agentspan.agents import Agent, start, AgentRuntime, AgentHandle + +def ensure_analysis_running(execution_id: str | None, agent, prompt: str): + """Start a new run or reconnect to an existing one.""" + if execution_id: + with AgentRuntime() as runtime: + handle = AgentHandle(execution_id=execution_id, runtime=runtime) + status = handle.get_status() + if status.is_complete: + print("Already done") + return handle + if status.is_running or status.is_waiting: + print(f"Still running: {status.status}") + return handle + # Start fresh + return start(agent, prompt) +``` + +--- + +## Full stream with reconnect + +Stream events from a run — whether it's new or already in progress: + +```python +from agentspan.agents import Agent, tool, AgentRuntime, AgentHandle + +# Re-define (or import) agent and tools so workers can be registered +@tool +def analyze_chunk(chunk_id: int, data: str) -> dict: + """Analyze a data chunk and return metrics.""" + return {"chunk_id": chunk_id, "processed": True} + +agent = Agent( + name="data_analysis_agent", + model="openai/gpt-4o-mini", + tools=[analyze_chunk], + instructions="...", +) + +with AgentRuntime() as runtime: + runtime.serve(agent, blocking=False) + handle = AgentHandle(execution_id="", runtime=runtime) + + for event in handle.stream(): + if event.type == "tool_call": + print(f"→ {event.tool_name}({event.args})") + elif event.type == "tool_result": + print(f"← {event.tool_name}: {event.result}") + elif event.type == "done": + print(f"\nResult: {event.output['result']}") # output is a dict: {"result": "...", "finishReason": "STOP", ...} + break +``` diff --git a/docs/developer-guides/agentspan/examples/document-processor.mdx b/docs/developer-guides/agentspan/examples/document-processor.mdx new file mode 100644 index 00000000..10b60ad5 --- /dev/null +++ b/docs/developer-guides/agentspan/examples/document-processor.mdx @@ -0,0 +1,278 @@ +--- +slug: "/developer-guides/agentspan/examples/document-processor" +title: "Batch document processor" +description: "Process thousands of documents with crash recovery and progress tracking" +--- + +# Build a Batch Document Processor + +Use this example to review a large set of contracts in parallel. Each contract is processed by its own agent run — extract key terms, identify risks, and save a structured review to disk. If the process crashes mid-run, restart it and it picks up exactly where it left off. + +## How it works + +For each contract: + +1. Agent reads the contract text +2. Extracts parties, dates, payment terms, liability, and IP ownership +3. Identifies specific risks and assigns a risk level +4. Saves a structured JSON review to disk + +Contracts run in parallel on the server. Already-completed ones are skipped on restart. + +:::info Prerequisites +- A running Agentspan server: `agentspan server start` +- Environment variables set: + +```bash +export AGENTSPAN_SERVER_URL=http://localhost:6767/api +export ANTHROPIC_API_KEY= +``` + +::: +## Full code + +:::note +The `CONTRACTS` dict uses hardcoded text for demonstration. Replace it with file reads or database queries for production use. + +::: +```python +from agentspan.agents import Agent, tool, start +from pydantic import BaseModel, Field +from pathlib import Path +from enum import Enum +import json +import re + +# ── Output schema ───────────────────────────────────────────────────────────── + +class RiskLevel(str, Enum): + LOW = "low" + MEDIUM = "medium" + HIGH = "high" + +class ContractReview(BaseModel): + file: str + contract_type: str + parties: list[str] + effective_date: str | None + expiry_date: str | None + auto_renewal: bool + payment_terms: str | None + liability_cap: str | None + ip_ownership: str | None + risks: list[str] = Field(default_factory=list) + risk_level: RiskLevel + action_required: str | None + +# ── Contracts ───────────────────────────────────────────────────────────────── + +CONTRACTS = { + "acme-nda.txt": """ +MUTUAL NON-DISCLOSURE AGREEMENT +Parties: Acme Corp and Beta Inc. +Effective Date: 2026-01-15 +Term: 3 years. Auto-renews annually unless terminated with 30 days notice. +Liability: Each party's liability is capped at $50,000. +IP: All shared information remains the property of the disclosing party. +Governing Law: State of California. +""", + "vendor-msa.txt": """ +MASTER SERVICE AGREEMENT +Parties: TechCorp Ltd (Vendor) and GlobalCo Inc (Client) +Effective Date: 2026-02-01 +Term: 1 year. No auto-renewal. +Payment: Net 60 days. Late fees of 5% per month on overdue balances. +Liability: Vendor liability capped at 1x monthly fees paid. +IP: All work product is owned exclusively by Vendor unless explicitly transferred. +Termination: Either party may terminate with 90 days notice. +""", + "saas-agreement.txt": """ +SOFTWARE AS A SERVICE AGREEMENT +Parties: CloudSoft Inc (Provider) and StartupXYZ (Customer) +Effective Date: 2026-03-01 +Expiry: 2027-03-01. Auto-renews for successive 1-year terms. +Payment: $5,000/month, billed annually in advance. No refunds. +Liability: Provider liability capped at $500. No consequential damages. +IP: Provider retains all rights to the software and any improvements. +Data: Provider may use anonymized customer data for product improvement. +Governing Law: Delaware. +""", +} + +# ── Tool ────────────────────────────────────────────────────────────────────── + +@tool +def read_contract(filename: str) -> str: + """Read a contract by filename and return its text content.""" + if filename not in CONTRACTS: + return f"Error: contract '{filename}' not found." + return CONTRACTS[filename] + +# ── Agent ───────────────────────────────────────────────────────────────────── + +contract_reviewer = Agent( + name="contract_reviewer", + model="anthropic/claude-sonnet-4-6", + output_type=ContractReview, + tools=[read_contract], + instructions="""You are a paralegal specializing in technology contracts. + +For each contract: +1. Read the full text using read_contract +2. Extract all required fields into ContractReview +3. List specific risks (unusual clauses, missing protections, unfavorable terms) +4. Assign a risk level: low (standard terms), medium (some concerns), high (legal review needed) + +Be precise about dates and monetary amounts. +If a field is not present in the contract, use null — do not guess.""", +) + +# ── Output parsing ──────────────────────────────────────────────────────────── + +def extract_review(raw_output: dict) -> dict | None: + text = raw_output.get("result", "") + if isinstance(text, dict): + return text + match = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL) + if match: + return json.loads(match.group(1)) + return None + +# ── Batch runner ────────────────────────────────────────────────────────────── + +def process_contracts(max_concurrent: int = 3): + filenames = list(CONTRACTS.keys()) + print(f"Found {len(filenames)} contracts to process") + + # Skip already-completed (idempotent restarts) + reviews_dir = Path("reviews") + completed = {p.stem for p in reviews_dir.glob("*.json")} if reviews_dir.exists() else set() + pending = [f for f in filenames if Path(f).stem not in completed] + print(f"{len(completed)} already done, {len(pending)} remaining\n") + + for i in range(0, len(pending), max_concurrent): + batch = pending[i:i + max_concurrent] + + handles = { + filename: start(contract_reviewer, filename) + for filename in batch + } + + for filename, handle in handles.items(): + try: + result = handle.stream().get_result() + review = extract_review(result.output) + + if review is None: + print(f" ✗ {filename} could not parse output") + continue + + reviews_dir.mkdir(exist_ok=True) + out = reviews_dir / (Path(filename).stem + ".json") + out.write_text(json.dumps(review, indent=2)) + + print(f" ✓ {filename} [risk: {review.get('risk_level', '?')}]") + except Exception as e: + print(f" ✗ {filename} FAILED: {e}") + +if __name__ == "__main__": + process_contracts() +``` + +## Run it + +Save the file as `batch_processor.py`, and run it: + +```bash +python batch_processor.py +``` + +Output: + +``` +Found 3 contracts to process +0 already done, 3 remaining + + ✓ acme-nda.txt [risk: medium] + ✓ vendor-msa.txt [risk: high] + ✓ saas-agreement.txt [risk: high] +``` + +Reviews are saved to `reviews/` as JSON files. Run it again — already-completed contracts are skipped: + +``` +Found 3 contracts to process +3 already done, 0 remaining +``` + +## What this demonstrates + +**Parallel execution**: `start()` launches each contract as a separate workflow. All contracts in a batch run concurrently on the server without blocking each other. + +**Idempotent restarts**: Before each run, completed reviews are checked in `reviews/`. If the process crashes or is killed, restart it and it picks up exactly where it left off. Nothing is re-processed. + +**Structured output**: `output_type=ContractReview` enforces a typed schema. Every review has the same fields regardless of contract length or format. + +**Per-contract history**: Every execution is stored on the server with its full trace. Open `http://localhost:6767` to inspect any run, see exactly what the agent read, and audit the reasoning. + +## Example modifications + +### Load from real files + +Replace the `CONTRACTS` dict and `read_contract` tool to load from disk: + +```python +@tool +def read_contract(filename: str) -> str: + """Read a contract file and return its text content.""" + return Path(filename).read_text(encoding="utf-8") + +def process_contracts(contract_dir: str = "./contracts", max_concurrent: int = 10): + paths = list(Path(contract_dir).glob("**/*.txt")) + \ + list(Path(contract_dir).glob("**/*.pdf")) + filenames = [str(p) for p in paths] + ... +``` + +### Resume a failed contract + +Find failed executions via CLI: + +```bash +agentspan agent execution --name contract_reviewer --status FAILED --since 1d +``` + +Or open `http://localhost:6767` to browse executions visually. Re-run any failed contract by passing the same filename to `start()` again — the idempotent skip logic ensures already-completed contracts are never re-processed. + +### Stream progress per contract + +Use `stream()` instead of `start()` to log each tool call as it happens: + +```python +from agentspan.agents import stream + +for event in stream(contract_reviewer, filename): + if event.type == "tool_call": + print(f" → {filename}: {event.tool_name}") + elif event.type == "done": + print(f" ✓ {filename}: complete") +``` + +### Flag high-risk contracts + +After saving each review, route high-risk ones to a separate queue: + +```python +if review.get("risk_level") == "high": + flag_for_legal_review(filename, review.get("action_required")) +``` + +### Increase concurrency + +For large batches, increase `max_concurrent` to process more contracts at once: + +```python +process_contracts(max_concurrent=20) +``` + +The server handles the parallelism — each contract runs as an independent workflow. diff --git a/docs/developer-guides/agentspan/examples/google-adk.mdx b/docs/developer-guides/agentspan/examples/google-adk.mdx new file mode 100644 index 00000000..7ff4c763 --- /dev/null +++ b/docs/developer-guides/agentspan/examples/google-adk.mdx @@ -0,0 +1,351 @@ +--- +slug: "/developer-guides/agentspan/examples/google-adk" +title: "Google ADK — research assistant" +description: "Multi-agent research assistant built with Google ADK and Gemini, wrapped with Agentspan" +--- + +# Google ADK — Research Assistant + +This example shows how to wrap an existing Google ADK multi-agent pipeline with Agentspan. A sequential pipeline chains a web researcher, a data analyst, and a writer — with crash recovery and full step visibility added by replacing the runner setup with one line. + +## What Agentspan adds to Google ADK + +Google ADK handles your agent pipeline — `LlmAgent`, `SequentialAgent`, tools, and instructions. Agentspan adds a production execution layer without changing any of that: + +- **Crash recovery**: Pipeline execution runs on the Agentspan server; a process restart resumes from the current sub-agent +- **No session management**: Agentspan replaces `InMemorySessionService` and `Runner` setup; your pipeline definition is unchanged +- **Per-agent step visibility**: Every tool call and sub-agent handoff is logged and browsable in the UI at `http://localhost:6767` +- **Execution history**: Every research run is stored with inputs, outputs, and timing + +Your agent definitions, sub-agent structure, tools, and instructions stay exactly as written. + +:::info Prerequisites +- A running Agentspan server: `agentspan server start` +- Additional dependencies: `pip install google-adk httpx markdownify` +- Environment variables set: + +```bash +export GEMINI_API_KEY=... +``` + +::: +--- + +## Before: plain Google ADK + +Standard Google ADK code. Runs fine locally but all execution state lives in `InMemorySessionService` — a process crash loses everything. + +```python +import asyncio +from google.adk.agents import LlmAgent, SequentialAgent +from google.adk.tools import google_search, FunctionTool +from google.adk.runners import Runner +from google.adk.sessions import InMemorySessionService +from google.genai.types import Content, Part + +# ── Tools ──────────────────────────────────────────────────────────────────── + +def fetch_page(url: str) -> str: + """Fetch and return readable text content from a URL.""" + import httpx + from markdownify import markdownify + resp = httpx.get(url, timeout=10, follow_redirects=True) + return markdownify(resp.text)[:6000] + +def run_python(code: str) -> str: + """Execute Python code and return stdout. Use for data analysis.""" + import subprocess + result = subprocess.run( + ["python", "-c", code], + capture_output=True, text=True, timeout=30, + ) + return result.stdout or result.stderr + +fetch_tool = FunctionTool(func=fetch_page) +python_tool = FunctionTool(func=run_python) + +# ── Sub-agents ──────────────────────────────────────────────────────────────── + +researcher = LlmAgent( + name="researcher", + model="gemini-2.0-flash", + description="Web researcher. Searches and reads sources.", + instruction="""You are a research specialist. Given a topic: +1. Run 3–5 targeted Google searches +2. Fetch and read the most informative pages +3. Extract key facts, statistics, and direct quotes with source URLs +4. Return a structured research brief — facts only, no prose""", + tools=[google_search, fetch_tool], +) + +analyst = LlmAgent( + name="analyst", + model="gemini-2.0-flash", + description="Data analyst. Runs calculations and finds patterns.", + instruction="""You are a data analyst. Given research notes: +1. Identify any numerical claims or datasets worth verifying +2. Run Python calculations to check figures or derive insights +3. Summarize your findings with the code you ran +Return your analysis as structured notes.""", + tools=[python_tool], +) + +writer = LlmAgent( + name="writer", + model="gemini-2.5-pro", # stronger model for the final output + description="Technical writer. Produces the final report.", + instruction="""You are a technical writer. Given research and analysis: +1. Write a clear, well-structured report with an executive summary +2. Use specific numbers and quotes — never vague claims +3. Cite all sources inline +4. End with a 'Key Takeaways' section (3–5 bullet points)""", +) + +# ── Sequential pipeline ─────────────────────────────────────────────────────── + +pipeline = SequentialAgent( + name="research_pipeline", + description="Full research pipeline: search → analyse → write", + sub_agents=[researcher, analyst, writer], +) + +# ── Run (plain ADK — no durability) ────────────────────────────────────────── + +APP_NAME = "research_assistant" + +async def run_research(topic: str) -> str: + session_service = InMemorySessionService() + session = await session_service.create_session( + app_name=APP_NAME, + user_id="user1", + session_id="session1", + ) + runner = Runner( + agent=pipeline, + app_name=APP_NAME, + session_service=session_service, + ) + events = runner.run_async( + user_id="user1", + session_id="session1", + new_message=Content(role="user", parts=[Part(text=topic)]), + ) + final_response = "" + async for event in events: + if event.is_final_response(): + final_response = event.content.parts[0].text + return final_response + +result = asyncio.run(run_research("The current state of durable execution for AI agents")) +print(result) +``` + +--- + +## After: wrapped with Agentspan + +Three things change from the plain ADK version: `LlmAgent` → `Agent`, model strings use the `google_gemini/` provider prefix, and the runner setup is replaced with `runtime.run()`. Tools are passed as plain functions — no `FunctionTool` wrapper needed. + +```python +from google.adk.agents import Agent, SequentialAgent +from agentspan.agents import AgentRuntime + +# ── Tools (unchanged — no FunctionTool wrapper needed) ─────────────────────── + +def fetch_page(url: str) -> str: + """Fetch and return readable text content from a URL.""" + import httpx + from markdownify import markdownify + resp = httpx.get(url, timeout=10, follow_redirects=True) + return markdownify(resp.text)[:6000] + +def run_python(code: str) -> str: + """Execute Python code and return stdout. Use for data analysis.""" + import subprocess + result = subprocess.run( + ["python", "-c", code], + capture_output=True, text=True, timeout=30, + ) + return result.stdout or result.stderr + +def search_web(query: str) -> dict: + """Search the web and return a summary of results for the query.""" + return {"query": query, "results": f"Top results for: {query}"} + +# ── Sub-agents ──────────────────────────────────────────────────────────────── + +researcher = Agent( + name="researcher", + model="google_gemini/gemini-2.0-flash", + description="Web researcher. Searches and reads sources.", + instruction="""You are a research specialist. Given a topic: +1. Run 3–5 targeted Google searches +2. Fetch and read the most informative pages +3. Extract key facts, statistics, and direct quotes with source URLs +4. Return a structured research brief — facts only, no prose""", + tools=[search_web, fetch_page], +) + +analyst = Agent( + name="analyst", + model="google_gemini/gemini-2.0-flash", + description="Data analyst. Runs calculations and finds patterns.", + instruction="""You are a data analyst. Given research notes: +1. Identify any numerical claims or datasets worth verifying +2. Run Python calculations to check figures or derive insights +3. Summarize your findings with the code you ran +Return your analysis as structured notes.""", + tools=[run_python], +) + +writer = Agent( + name="writer", + model="google_gemini/gemini-2.5-pro", # stronger model for the final output + description="Technical writer. Produces the final report.", + instruction="""You are a technical writer. Given research and analysis: +1. Write a clear, well-structured report with an executive summary +2. Use specific numbers and quotes — never vague claims +3. Cite all sources inline +4. End with a 'Key Takeaways' section (3–5 bullet points)""", +) + +# ── Sequential pipeline ─────────────────────────────────────────────────────── + +pipeline = SequentialAgent( + name="research_pipeline", + description="Full research pipeline: search → analyse → write", + sub_agents=[researcher, analyst, writer], +) + +# was: the whole runner setup above (session_service, Runner, run_async, etc.) +with AgentRuntime() as runtime: + result = runtime.run(pipeline, "The current state of durable execution for AI agents") + +print(result.output) +print(f"Run ID: {result.execution_id}") +``` + +`runtime.run()` registers the full pipeline execution — including every sub-agent step and tool call — as a single managed run on the Agentspan server. + +--- + +## Run it + +Save all the code above into a single file called `research_assistant.py`, then run: + +```bash +python research_assistant.py +``` + +--- + +## What this demonstrates + +``` +topic → [research_pipeline] → [researcher] → [analyst] → [writer] → final report +``` + +**Sequential pipeline, Agentspan runtime**: The sub-agent chain (`researcher → analyst → writer`) runs exactly as defined. Replace the runner setup with `runtime.run` and the entire pipeline runs on the Agentspan server. + +**Per-agent step visibility**: Every tool call and sub-agent handoff is a logged step. Open `http://localhost:6767` to see which agent was active at each step, what tools it called, and what it produced. + +**Crash recovery**: If your process dies mid-pipeline (network timeout, OOM, deploy restart), Agentspan resumes from the current sub-agent when a new worker connects. The research run isn't dropped. + +**Run history**: Every execution is stored with inputs, outputs, token usage, and timing. + +--- + +## Example modifications + +### Run asynchronously + +```python +import asyncio +from agentspan.agents import run_async + +async def run_research(topic: str) -> str: + result = await run_async(pipeline, topic) + return result.output + +asyncio.run(run_research("The current state of durable execution for AI agents")) +``` + +### Fire-and-forget for long research jobs + +Use `start` to submit a job and return immediately. Useful when research runs are slow and you don't want to block. + +```python +from agentspan.agents import start + +# Launch and return immediately — pipeline runs in the background on the server +handle = start(pipeline, topic) +print(f"Running: {handle.execution_id}") + +# Check status +status = handle.get_status() +print(status.status) # "RUNNING" | "COMPLETED" | "FAILED" + +# Or wait for result +result = handle.stream().get_result() +print(result.output) +``` + +### Run multiple topics concurrently + +`start` works in a loop — each call submits immediately without waiting for the previous one to finish. + +```python +from agentspan.agents import start + +topics = [ + "Durable execution frameworks for AI agents", + "LangGraph vs OpenAI Agents SDK comparison 2026", + "Serverless vs container deployments for AI agent workloads", +] + +# All three run concurrently on the Agentspan server +handles = [start(pipeline, t) for t in topics] +results = [h.stream().get_result() for h in handles] + +for r in results: + print(str(r.output)[:200], "\n---") +``` + +### Stream sub-agent progress + +```python +from agentspan.agents import stream + +for event in stream(pipeline, topic): + if event.type == "handoff": + print(f"\n── {event.target} ──") + elif event.type == "tool_call": + print(f" → {event.tool_name}({event.args})") + elif event.type == "done": + print(f"\n{event.output}") +``` + +--- + +## Testing + +Use `mock_run` to test the pipeline without a live server or real API calls. Supply the expected sequence of sub-agent handoffs and tool calls; `mock_run` drives the pipeline through them and returns an `AgentResult` you can assert against. + +```python +from agentspan.agents.testing import mock_run, MockEvent, expect + +result = mock_run( + pipeline, + "The current state of durable execution for AI agents", + events=[ + MockEvent.handoff("researcher"), + MockEvent.tool_call("search_web", {"query": "durable execution AI agents 2026"}), + MockEvent.tool_result("search_web", "Agentspan, LangGraph, and OpenAI Agents SDK lead..."), + MockEvent.handoff("analyst"), + MockEvent.handoff("writer"), + MockEvent.done("# Final Report\nDurable execution has become..."), + ] +) + +expect(result).completed().used_tool("search_web") +``` diff --git a/docs/developer-guides/agentspan/examples/human-in-the-loop.mdx b/docs/developer-guides/agentspan/examples/human-in-the-loop.mdx new file mode 100644 index 00000000..78b5fdba --- /dev/null +++ b/docs/developer-guides/agentspan/examples/human-in-the-loop.mdx @@ -0,0 +1,219 @@ +--- +slug: "/developer-guides/agentspan/examples/human-in-the-loop" +title: "Human-in-the-loop" +description: "Pause agents at risky steps, hold state indefinitely, and resume after human approval" +--- + +# Human-in-the-loop + +Agents are great at finding the right action. Humans are better at authorizing risky ones. Agentspan lets you pause an agent at any tool call, hold state indefinitely on the server (no timeouts, no data loss), and resume after a human approves or rejects. + +--- + +## The one-line change + +Add `approval_required=True` to any `@tool` decorator. That's it. + +```python +from agentspan.agents import tool + +@tool(approval_required=True) +def process_refund(order_id: str, amount: float) -> dict: + """Process a refund. Requires human approval before executing.""" + return billing_api.refund(order_id, amount) +``` + +When the LLM calls this tool, Agentspan automatically: +1. Pauses the agent workflow +2. Sets `handle.get_status().is_waiting = True` +3. Holds the full agent state on the server (no timeout) +4. Waits for `handle.approve()` or `handle.reject(reason)` + +--- + +## Complete example: refund agent + +:::info Prerequisites +- A running Agentspan server: `agentspan server start` +- Environment variables set: + +```bash +export OPENAI_API_KEY=sk-... # or ANTHROPIC_API_KEY if using Anthropic +``` + +::: +```python +import time +from agentspan.agents import Agent, tool, start + +# Tools that run automatically +@tool +def get_order(order_id: str) -> dict: + """Look up an order by ID.""" + return {"order_id": order_id, "amount": 29.99, "status": "delivered"} + +@tool +def get_customer(customer_id: str) -> dict: + """Get customer account details.""" + return {"customer_id": customer_id, "name": "Alex", "email": "alex@example.com"} + +# Tool that requires human approval before executing +@tool(approval_required=True) +def process_refund(order_id: str, amount: float) -> dict: + """Issue a refund. Requires human approval.""" + return {"refunded": True, "order_id": order_id, "amount": amount} + +agent = Agent( + name="refund_agent", + model="openai/gpt-4o-mini", + tools=[get_order, get_customer, process_refund], + instructions="""You handle refund requests. + 1. Look up the order + 2. Look up the customer + 3. Call process_refund — it will pause for human approval automatically + """, +) + +# Start the agent — returns immediately, workflow runs on the server +handle = start(agent, "Customer Alex (cust_001) wants a refund on order ORD-8821") +print(f"Run ID: {handle.execution_id}") + +# Poll until the agent reaches the approval checkpoint +for _ in range(60): + time.sleep(2) + status = handle.get_status() + + if status.is_waiting: + print("\n--- Approval required ---") + print(f"Agent wants to call: process_refund") + print(f"Order: ORD-8821 Amount: $29.99") + + decision = input("Approve? (y/n): ").strip().lower() + if decision == "y": + handle.approve() + print("Approved. Waiting for agent to complete...") + result = handle.stream().get_result() + print("\nResult:", result.output["result"]) + else: + reason = input("Rejection reason: ").strip() + handle.reject(reason) + print("Rejected.") + break + + if status.is_complete: + print("Completed:", status.output["result"]) + break +``` + +When it reaches human review, the terminal prompts like this: + +```bash +--- Approval required --- +Agent wants to call: process_refund +Order: ORD-8821 Amount: $29.99 +``` + +After `approve()`, the agent executes `process_refund` and continues normally. + +After `reject(reason)`, the agent receives the rejection in its context and can respond — for example, by escalating to a human queue or explaining the rejection to the user. + +--- + +## Connecting a webhook or Slack approval + +In production, you don't poll in a loop — you store the execution ID and trigger approval from a webhook or approval UI: + +```python +# When a run starts, store the execution_id +handle = start(agent, customer_message) +db.store_pending_approval( + execution_id=handle.execution_id, + context={"customer": customer_id, "action": "refund"}, +) +# Notify your approval channel (Slack, email, internal tool) +notify_approver(handle.execution_id) +``` + +```python +# Later — your approval endpoint (FastAPI, Flask, Lambda, etc.) +from agentspan.agents import AgentRuntime, AgentHandle + +# In a web app, the agent (with its tools) must already be served. +# Call runtime.serve(agent, blocking=False) at app startup, then reconnect here. + +@app.post("/approvals/{execution_id}/approve") +def approve(execution_id: str): + handle = AgentHandle(execution_id=execution_id, runtime=app.state.runtime) + handle.approve() + return {"approved": True} + +@app.post("/approvals/{execution_id}/reject") +def reject(execution_id: str, reason: str): + handle = AgentHandle(execution_id=execution_id, runtime=app.state.runtime) + handle.reject(reason) + return {"rejected": True} +``` + +--- + +## Multiple approval points in one run + +You can have multiple `approval_required` tools — each one creates a separate approval checkpoint: + +```python +@tool(approval_required=True) +def send_email_blast(template_id: str, recipient_count: int) -> dict: + """Send a marketing email to all subscribers. Requires approval.""" + return email_service.send(template_id, recipient_count) + +@tool(approval_required=True) +def delete_account(customer_id: str, reason: str) -> dict: + """Permanently delete a customer account. Requires approval.""" + return accounts_db.delete(customer_id) +``` + +Each time the agent calls one of these tools, it pauses and waits for a fresh `handle.approve()` before continuing. + +--- + +## Stream events including approval pauses + +```python +from agentspan.agents import stream + +agent_stream = stream(agent, customer_message) +for event in agent_stream: + if event.type == "tool_call": + print(f"→ calling {event.tool_name}") + elif event.type == "waiting": + print("paused — waiting for approval") + # In a real app: send notification, store execution_id, return + agent_stream.approve() # or agent_stream.reject("reason") + elif event.type == "done": + print(event.output["result"]) # output is a dict: {"result": "...", "finishReason": "STOP", ...} + break +``` + +--- + +## Testing HITL flows + +`MockEvent.waiting()` simulates the approval pause, then `MockEvent.done()` simulates the post-approval response: + +```python +from agentspan.agents.testing import mock_run, MockEvent, expect + +result = mock_run( + agent, + "Refund order ORD-8821", + events=[ + MockEvent.tool_call("get_order", {"order_id": "ORD-8821"}), + MockEvent.tool_result("get_order", {"amount": 29.99}), + MockEvent.tool_call("process_refund", {"order_id": "ORD-8821", "amount": 29.99}), + MockEvent.waiting("Waiting for refund approval"), + MockEvent.done("Refund of $29.99 for order ORD-8821 has been processed."), + ] +) + +expect(result).completed().used_tool("process_refund") +``` diff --git a/docs/developer-guides/agentspan/examples/langgraph.mdx b/docs/developer-guides/agentspan/examples/langgraph.mdx new file mode 100644 index 00000000..dd965ba3 --- /dev/null +++ b/docs/developer-guides/agentspan/examples/langgraph.mdx @@ -0,0 +1,287 @@ +--- +slug: "/developer-guides/agentspan/examples/langgraph" +title: "LangGraph — code review bot" +description: "Wrap an existing LangGraph agent with Agentspan for crash recovery, run history, and human-in-the-loop" +--- + +# LangGraph — Code Review Bot + +This example shows how to wrap an existing LangGraph agent with Agentspan. The agent reads a GitHub pull request diff, analyses it for bugs, security issues, and style problems, and posts inline review comments — with crash recovery and full run history added by changing one line. + +## What Agentspan adds to LangGraph + +LangGraph handles your graph — nodes, edges, conditional branching, typed state. Agentspan adds a production execution layer without changing any of that: + +- **Crash recovery**: Graph execution runs on the Agentspan server; a process restart picks up the run without re-running completed steps +- **Human-in-the-loop**: Pause at any tool call for human approval, hold state indefinitely server-side, resume cleanly +- **Execution history**: Every run is logged with full inputs, outputs, and timing, browsable at `http://localhost:6767` or via CLI +- **Re-run from history**: Replay any past run with the same input from the UI + +Your graph definition, nodes, edges, and typed state schema stay exactly as written. + +:::info Prerequisites +- A running Agentspan server: `agentspan server start` +- Additional dependencies: `pip install langgraph langchain-anthropic httpx` +- Environment variables set: + +```bash +export ANTHROPIC_API_KEY=sk-ant-... +export GITHUB_TOKEN=ghp_... +``` + +To generate a GitHub token, go to **Settings → Developer settings → Personal access tokens → Tokens (classic)** and check the **`repo`** scope. This gives the bot read access to diffs and write access to post review comments. + +::: +--- + +## Before: plain LangGraph + +Standard LangGraph code. It works locally but has no durability — if the process dies mid-review, the run is gone. + +```python +import operator +from typing import TypedDict, Annotated +from langchain_anthropic import ChatAnthropic +from langchain_core.messages import HumanMessage, SystemMessage +from langchain_core.tools import tool +from langgraph.graph import StateGraph, END +from langgraph.prebuilt import ToolNode +import os + +# ── Tools ──────────────────────────────────────────────────────────────────── + +@tool +def read_file(path: str) -> str: + """Read a file from the repository.""" + return open(path).read() + +@tool +def get_pr_diff(pr_number: int, repo: str) -> str: + """Fetch the unified diff for a GitHub pull request.""" + import httpx + resp = httpx.get( + f"https://api.github.com/repos/{repo}/pulls/{pr_number}", + headers={"Accept": "application/vnd.github.v3.diff", + "Authorization": f"Bearer {os.environ['GITHUB_TOKEN']}"}, + ) + return resp.text + +@tool +def get_pr_commits(pr_number: int, repo: str) -> str: + """Get the commits for a GitHub pull request. Call this before post_review_comment to get a valid commit_id.""" + import httpx + resp = httpx.get( + f"https://api.github.com/repos/{repo}/pulls/{pr_number}/commits", + headers={"Authorization": f"Bearer {os.environ['GITHUB_TOKEN']}"}, + ) + return resp.text + +@tool +def post_review_comment(pr_number: int, repo: str, body: str, commit_id: str, + path: str, line: int) -> dict: + """Post an inline review comment on a specific line of a PR.""" + import httpx + resp = httpx.post( + f"https://api.github.com/repos/{repo}/pulls/{pr_number}/comments", + headers={"Authorization": f"Bearer {os.environ['GITHUB_TOKEN']}"}, + json={"body": body, "commit_id": commit_id, "path": path, "line": line}, + ) + return resp.json() + +tools = [read_file, get_pr_diff, get_pr_commits, post_review_comment] +tool_node = ToolNode(tools) + +# ── Model ───────────────────────────────────────────────────────────────────── + +model = ChatAnthropic(model="claude-sonnet-4-6").bind_tools(tools) + +SYSTEM = """You are an expert code reviewer. When reviewing a pull request: +1. Fetch the diff with get_pr_diff +2. Fetch the commits with get_pr_commits to get a valid commit_id +3. Read any relevant context files with read_file +4. Identify: bugs, security issues, missing error handling, style violations +5. Post inline comments with post_review_comment for each finding (using the commit_id from step 2) +6. End with a summary of findings and an overall verdict (approve / request changes)""" + +# ── Graph ───────────────────────────────────────────────────────────────────── + +class State(TypedDict): + messages: Annotated[list, operator.add] + +def agent_node(state: State): + messages = [SystemMessage(content=SYSTEM)] + state["messages"] + response = model.invoke(messages) + return {"messages": [response]} + +def should_continue(state: State): + last = state["messages"][-1] + return "tools" if last.tool_calls else END + +workflow = StateGraph(State) +workflow.add_node("agent", agent_node) +workflow.add_node("tools", tool_node) +workflow.set_entry_point("agent") +workflow.add_conditional_edges("agent", should_continue) +workflow.add_edge("tools", "agent") + +app = workflow.compile() + +# ── Run ─────────────────────────────────────────────────────────────────────── + +result = app.invoke({ + "messages": [HumanMessage(content="Review PR #142 in acme-corp/backend")] +}) +print(result["messages"][-1].content) +``` + +--- + +## After: wrapped with Agentspan + +Replace `app.invoke({...})` with `runtime.run(app, {...})`. That's the only change. Agentspan auto-detects LangGraph apps — no extra imports or graph modifications needed. + +```python +from agentspan.agents import AgentRuntime + +with AgentRuntime() as runtime: + result = runtime.run(app, { + "messages": [HumanMessage(content="Review PR #142 in acme-corp/backend")] + }) + +print(result.output["messages"][-1].content) +print(f"Run ID: {result.execution_id}") +``` + +`runtime.run()` registers the graph execution as a managed run on the Agentspan server. The graph logic stays identical — Agentspan wraps the execution lifecycle around it. + +--- + +## What you gain + +**Crash recovery**: If your process dies mid-review (network timeout, OOM, deploy restart), Agentspan restarts the graph run when a new worker connects. The run is not lost. + +**Run history**: Every PR review is stored with its full input, output, tool calls, and timing. Open `http://localhost:6767` to browse executions and inspect what the model did on each run. + +**Re-run**: Replay any past run with the same input directly from the UI. Useful when you update your system prompt or swap models and want to compare outputs. + +--- + +## Run it + +Save all the code above (tools, graph, and runtime block) into a single file called `code_review_bot.py`, then run: + +```bash +python code_review_bot.py +``` + +:::note Placeholder values +`"Review PR #142 in acme-corp/backend"` is a placeholder. Replace it with a real PR number and repository you have access to, otherwise the GitHub API will return a 404. + +::: +--- + +## Example modifications + +### Run asynchronously + +Use `run_async` in async contexts, such as FastAPI route handlers or async worker loops. + +```python +import asyncio +from agentspan.agents import run_async + +async def review_pr(pr_number: int, repo: str): + result = await run_async(app, { + "messages": [HumanMessage(content=f"Review PR #{pr_number} in {repo}")] + }) + return result.output["messages"][-1].content + +asyncio.run(review_pr(142, "acme-corp/backend")) +``` + +### Fire-and-forget for long reviews + +Use `start` to submit a review and return immediately. Useful when reviews are slow (large diffs, many tool calls) and you don't want to block. + +```python +from agentspan.agents import start + +# Returns immediately — graph runs in the background on the server +handle = start(app, { + "messages": [HumanMessage(content="Review PR #142 in acme-corp/backend")] +}) + +print(f"Started: {handle.execution_id}") + +# Collect the result whenever you're ready +result = handle.stream().get_result() +print(result.output["messages"][-1].content) +``` + +### Review multiple PRs concurrently + +`start` works in a loop — each call submits immediately without waiting for the previous one to finish. + +```python +from agentspan.agents import start + +prs = [(142, "acme-corp/backend"), (87, "acme-corp/frontend"), (23, "acme-corp/infra")] + +handles = [ + start(app, {"messages": [HumanMessage(content=f"Review PR #{n} in {repo}")]}) + for n, repo in prs +] + +# Block until all reviews are done +results = [h.stream().get_result() for h in handles] +``` + +--- + +## Checkpointing and LangSmith + +**LangGraph checkpointing** (`MemorySaver`, `PostgresSaver`) saves graph state after each node so a run can resume from where it left off if interrupted. When you wrap with Agentspan, do not use a checkpointer — Agentspan manages the execution lifecycle and the two mechanisms conflict: + +```python +# Correct: compile without a checkpointer +app = workflow.compile() + +# Do not do this — conflicts with AgentRuntime +# app = workflow.compile(checkpointer=MemorySaver()) +``` + +Agentspan handles crash recovery at the run level. If your worker dies, the graph run restarts from the beginning when a new worker connects. Use Agentspan's recovery instead of LangGraph's node-level checkpointing when the graph is wrapped. + +**LangSmith** continues to work as usual. LLM call traces (prompts, completions, token counts) still fire inside the wrapped graph. Agentspan adds run-level tracking on top — execution IDs, full input/output, timing, and status across all your agents — but does not replace per-call LLM traces. + +--- + +## Testing + +Use `mock_run` to test the graph without a live server or real API calls. You supply the expected sequence of tool calls and results; `mock_run` drives the graph through them and returns an `AgentResult` you can assert against. + +```python +from agentspan.agents.testing import mock_run, MockEvent, expect +from langchain_core.messages import HumanMessage + +result = mock_run( + app, + {"messages": [HumanMessage(content="Review PR #1 in test/repo")]}, + events=[ + MockEvent.tool_call("get_pr_diff", {"pr_number": 1, "repo": "test/repo"}), + MockEvent.tool_result("get_pr_diff", "- def foo():\n+ def foo(x: int):"), + MockEvent.tool_call("post_review_comment", { + "pr_number": 1, + "repo": "test/repo", + "body": "Consider adding a type hint", + "commit_id": "abc123", + "path": "main.py", + "line": 5, + }), + MockEvent.tool_result("post_review_comment", {"id": 1, "body": "Consider adding a type hint"}), + MockEvent.done("Review complete. Posted 1 comment."), + ] +) + +expect(result).completed().used_tool("get_pr_diff").used_tool("post_review_comment") +``` diff --git a/docs/developer-guides/agentspan/examples/openai-agents-sdk.mdx b/docs/developer-guides/agentspan/examples/openai-agents-sdk.mdx new file mode 100644 index 00000000..485f1332 --- /dev/null +++ b/docs/developer-guides/agentspan/examples/openai-agents-sdk.mdx @@ -0,0 +1,281 @@ +--- +slug: "/developer-guides/agentspan/examples/openai-agents-sdk" +title: "OpenAI Agents SDK — customer support" +description: "Multi-agent handoff system built with the OpenAI Agents SDK, wrapped with Agentspan" +--- + +# OpenAI Agents SDK — Customer Support + +This example shows how to wrap an existing OpenAI Agents SDK multi-agent system with Agentspan. A triage agent routes customer support tickets to billing, technical, or account specialists using the SDK's native handoff feature — with crash recovery and full handoff tracing added by changing one line. + +## What Agentspan adds to the OpenAI Agents SDK + +The OpenAI Agents SDK handles your agent definitions, handoffs, and tool routing. Agentspan adds a production execution layer without changing any of that: + +- **Crash recovery**: If your process dies during a multi-step resolution, Agentspan resumes when a worker reconnects +- **Full handoff trace**: Every handoff between agents is a logged step, visible in the UI at `http://localhost:6767` +- **Human approval on tools**: Add `approval_required=True` to any tool to pause execution for human sign-off +- **Execution history**: Every ticket run is stored with inputs, outputs, and timing + +Your agent definitions, handoff configurations, and tool implementations stay exactly as written. + +:::info Prerequisites +- A running Agentspan server: `agentspan server start` +- Additional dependencies: `pip install openai-agents` +- Environment variables set: + +```bash +export OPENAI_API_KEY=sk-... +``` + +::: +--- + +## Before: plain OpenAI Agents SDK + +Standard code using the OpenAI Agents SDK. The native handoff pattern works well, but runs have no history, no crash recovery, and no human-in-the-loop. + +```python +from agents import Agent, Runner, function_tool + +# ── Mock data ───────────────────────────────────────────────────────────────── + +ACCOUNTS = { + "CUST-001": {"id": "CUST-001", "name": "Alice Smith", "plan": "pro", "billing_status": "active", "region": "us-east"}, +} +INVOICES = { + "INV-8821": {"id": "INV-8821", "customer_id": "CUST-001", "amount": 99.00, "status": "paid", "date": "2024-12-01"}, +} +TICKETS = [ + {"id": "TKT-101", "customer_id": "CUST-001", "subject": "Login issue", "status": "resolved"}, +] + +# ── Tools ──────────────────────────────────────────────────────────────────── + +@function_tool +def get_account(customer_id: str) -> dict: + """Look up a customer's account: plan, billing status, usage.""" + return ACCOUNTS.get(customer_id, {"error": "Account not found"}) + +@function_tool +def get_invoice(invoice_id: str) -> dict: + """Fetch an invoice by ID.""" + return INVOICES.get(invoice_id, {"error": "Invoice not found"}) + +@function_tool +def process_refund(invoice_id: str, reason: str) -> dict: + """Issue a full refund for an invoice.""" + invoice = INVOICES.get(invoice_id) + if not invoice: + return {"error": "Invoice not found"} + return {"status": "refunded", "invoice_id": invoice_id, "amount": invoice["amount"]} + +@function_tool +def get_ticket_history(customer_id: str) -> list[dict]: + """Get the last 5 support tickets for a customer.""" + return [t for t in TICKETS if t["customer_id"] == customer_id][:5] + +@function_tool +def reset_password(customer_id: str) -> dict: + """Send a password reset email to the customer.""" + account = ACCOUNTS.get(customer_id) + if not account: + return {"error": "Account not found"} + return {"status": "reset_email_sent", "customer_id": customer_id} + +@function_tool +def check_service_status(region: str) -> dict: + """Check current service health for a region.""" + return {"region": region, "status": "operational", "latency_ms": 42} + +@function_tool +def escalate_to_human(ticket_id: str, reason: str, priority: str) -> dict: + """Escalate this ticket to a human agent.""" + return {"status": "escalated", "ticket_id": ticket_id, "priority": priority, "eta_minutes": 15} + +# ── Specialist agents ───────────────────────────────────────────────────────── + +billing_agent = Agent( + name="billing_specialist", + model="gpt-4o", + instructions="""You handle billing questions: invoices, charges, refunds, plan changes. + Always look up the account first. Process refunds only for clear billing errors. + For amounts over $200, escalate to a human.""", + tools=[get_account, get_invoice, process_refund, escalate_to_human], +) + +technical_agent = Agent( + name="technical_specialist", + model="gpt-4o", + instructions="""You handle technical issues: login problems, service outages, API errors. + Always check service status first. Reset passwords only after verifying the customer's identity.""", + tools=[check_service_status, reset_password], +) + +account_agent = Agent( + name="account_specialist", + model="gpt-4o", + instructions="""You handle account changes: upgrades, downgrades, cancellations, data exports. + Always look up ticket history before making changes. For cancellations, attempt retention first.""", + tools=[get_ticket_history], +) + +# ── Triage agent with handoffs ──────────────────────────────────────────────── + +triage_agent = Agent( + name="support_triage", + model="gpt-4o-mini", # fast, cheap — just routes + instructions="""You are a support triage agent. Understand the customer's issue + and hand off to the right specialist immediately. + + - Billing, charges, invoices, refunds → billing_specialist + - Login, outages, API errors, technical issues → technical_specialist + - Plan changes, cancellations, account settings → account_specialist""", + handoffs=[billing_agent, technical_agent, account_agent], +) + +# ── Run (plain OpenAI Agents SDK — no durability) ───────────────────────────── + +result = Runner.run_sync( + triage_agent, + "Hi, I was charged $99 twice last month (invoice INV-8821). Can I get a refund?", +) +print(result.final_output) +``` + +--- + +## After: wrapped with Agentspan + +Replace `Runner.run_sync(triage_agent, message)` with `runtime.run(triage_agent, message)`. That's the only change. Agentspan auto-detects OpenAI Agents SDK agents — no extra imports or agent modifications needed. + +```python +from agentspan.agents import AgentRuntime + +message = "Hi, I was charged $99 twice last month (invoice INV-8821). Can I get a refund?" + +# was: result = Runner.run_sync(triage_agent, message) +with AgentRuntime() as runtime: + result = runtime.run(triage_agent, message) + +print(result.output) +print(f"Run ID: {result.execution_id}") +``` + +`runtime.run()` registers the full multi-agent execution — including every handoff — as a single managed run on the Agentspan server. + +--- + +## Run it + +Save all the code above (tools, agents, and runtime block) into a single file called `support_bot.py`, then run: + +```bash +python support_bot.py +``` + +--- + +## What this demonstrates + +``` +ticket → [support_triage] → handoff → [billing_specialist] → tools → final response +``` + +**Native handoffs, Agentspan runtime**: The triage agent, specialist agents, and handoff configuration stay exactly as written. Replace `Runner.run_sync` with `runtime.run` and the entire multi-agent execution runs on the Agentspan server. + +**Full handoff trace**: Every handoff is a logged step. Open `http://localhost:6767` to see exactly which specialist handled the ticket, what tools they called, and what they returned. + +**Crash recovery**: If your process dies during a complex multi-step billing resolution, Agentspan resumes when a worker reconnects. The customer's ticket isn't dropped. + +**Run history**: Every execution is stored with inputs, outputs, token usage, and timing. + +--- + +## Example modifications + +### Run asynchronously + +```python +import asyncio +from agentspan.agents import run_async + +async def handle_ticket(message: str): + result = await run_async(triage_agent, message) + return result.output + +asyncio.run(handle_ticket("I was charged twice last month")) +``` + +### Fire-and-forget for slow tickets + +Use `start` to submit a ticket and return immediately without blocking. + +```python +from agentspan.agents import start + +handle = start(triage_agent, customer_message) +print(f"Ticket queued: {handle.execution_id}") + +# Collect the result later +result = handle.stream().get_result() +print(result.output) +``` + +### Stream events as they happen + +Use `stream` to process handoffs and tool calls in real time as the agents work through the ticket. + +```python +from agentspan.agents import stream + +for event in stream(triage_agent, customer_message): + if event.type == "handoff": + print(f" → routed to {event.target}") + elif event.type == "tool_call": + print(f" → {event.tool_name}({event.args})") + elif event.type == "done": + print(f"\n{event.output}") +``` + +--- + +## Adding human approval for large refunds + +Wrap any sensitive tool with Agentspan's `@tool` decorator and set `approval_required=True`. Execution pauses at that tool call until a human approves or rejects it in the UI. + +```python +from agentspan.agents import tool + +@tool(approval_required=True) +def process_refund(invoice_id: str, reason: str) -> dict: + """Issue a full refund. Requires human approval.""" + return billing_api.refund(invoice_id, reason=reason) + +# Use it in your agent as normal +billing_agent = Agent( + name="billing_specialist", + tools=[get_account, get_invoice, process_refund, escalate_to_human], + ... +) +``` + +--- + +## Testing + +Use `mock_run` to test the multi-agent flow without a live server or real API calls. Supply the expected sequence of handoffs and tool calls; `mock_run` drives the agents through them and returns an `AgentResult` you can assert against. + +```python +from agentspan.agents.testing import mock_run, MockEvent, expect + +result = mock_run( + triage_agent, + "I was charged twice last month", + events=[ + MockEvent.handoff("billing_specialist"), + MockEvent.done("I've looked into your account. A refund has been initiated."), + ] +) +expect(result).completed() +``` diff --git a/docs/developer-guides/agentspan/examples/research-pipeline.mdx b/docs/developer-guides/agentspan/examples/research-pipeline.mdx new file mode 100644 index 00000000..051ee0df --- /dev/null +++ b/docs/developer-guides/agentspan/examples/research-pipeline.mdx @@ -0,0 +1,137 @@ +--- +slug: "/developer-guides/agentspan/examples/research-pipeline" +title: "Research pipeline" +description: "Multi-agent research → write → edit pipeline with crash recovery" +--- + +# Build a Research Pipeline + +Use this example to build a multi-agent research pipeline that takes a topic, gathers findings, writes an article, and edits it for publication — all in a single run. + +## How it works + +Three agents run in sequence: + +- **Researcher**: Takes the topic and returns key facts and data points +- **Writer**: Takes the research findings and writes a structured article +- **Editor**: Reviews the draft and outputs the final polished version + +Each agent's output becomes the next agent's input. + +:::info Prerequisites +- A running Agentspan server: `agentspan server start` +- Environment variables set: + +```bash +export AGENTSPAN_SERVER_URL=http://localhost:6767/api +export OPENAI_API_KEY= +export AGENTSPAN_LLM_MODEL=openai/gpt-4o-mini +``` + +::: +## Full code + +```python +import os +from agentspan.agents import Agent, AgentRuntime + +researcher = Agent( + name="researcher", + model=os.environ["AGENTSPAN_LLM_MODEL"], + instructions=( + "You are a researcher. Given a topic, provide key facts and data points. " + "Be thorough but concise. Output raw research findings." + ), +) + +writer = Agent( + name="writer", + model=os.environ["AGENTSPAN_LLM_MODEL"], + instructions=( + "You are a writer. Take research findings and write a clear, engaging " + "article. Use headers and bullet points where appropriate." + ), +) + +editor = Agent( + name="editor", + model=os.environ["AGENTSPAN_LLM_MODEL"], + instructions=( + "You are an editor. Review the article for clarity, grammar, and tone. " + "Make improvements and output the final polished version." + ), +) + +pipeline = researcher >> writer >> editor + +with AgentRuntime() as runtime: + result = runtime.run(pipeline, "The impact of AI agents on software development in 2025") + result.print_result() +``` + +## Run it + +Save the file as `research_pipeline.py`, and run it: + +```bash +python research_pipeline.py +``` + +## What this demonstrates + +``` +topic → [researcher] → research brief → [writer] → draft article → [editor] → final article +``` + +**Multi-agent pipeline (`>>`)**: The three agents run sequentially. Each agent sees only the output of the previous one, not the raw prompt. The researcher's output is the writer's input; the writer's output is the editor's input. + +**Crash recovery**: The pipeline runs on the Agentspan server. If your process dies mid-run, the server resumes from the current agent when you restart. Nothing reruns from scratch. + +**Run history**: Every execution is stored with inputs, outputs, token usage, and timing. Open `http://localhost:6767` to browse execution history and replay past runs. + +## Example modifications + +### Swap models per stage + +Use a cheaper model for research and a stronger one for writing and editing. + +```python +researcher = Agent(name="researcher", model="google_gemini/gemini-2.0-flash", ...) +writer = Agent(name="writer", model="anthropic/claude-sonnet-4-6", ...) +editor = Agent(name="editor", model="openai/gpt-4o", ...) +``` + +### Run multiple topics concurrently + +Use `start` instead of `run` to kick off multiple pipelines without waiting for each to finish. + +```python +from agentspan.agents import start + +topics = [ + "Multi-agent frameworks reshaping software development", + "LangGraph 1.0 production deployments", + "CrewAI enterprise customer traction", +] + +handles = [start(researcher >> writer >> editor, t) for t in topics] +results = [h.stream().get_result() for h in handles] +``` + +### Schedule as a daily job + +`start` works outside of a `with AgentRuntime()` block — it lazily creates a runtime singleton and shuts it down when the process exits. + +```python +import schedule, time +from agentspan.agents import start + +def run_daily(): + for topic in WATCH_LIST: + start(researcher >> writer >> editor, topic) + +schedule.every().day.at("07:00").do(run_daily) +while True: + schedule.run_pending() + time.sleep(60) +``` diff --git a/docs/developer-guides/agentspan/examples/support-triage.mdx b/docs/developer-guides/agentspan/examples/support-triage.mdx new file mode 100644 index 00000000..eb085a3c --- /dev/null +++ b/docs/developer-guides/agentspan/examples/support-triage.mdx @@ -0,0 +1,222 @@ +--- +slug: "/developer-guides/agentspan/examples/support-triage" +title: "Support ticket triage" +description: "Classify, auto-resolve, or escalate with human approval — plus full history per ticket" +--- + +# Build a Support Triage Agent + +Use this example to build an AI support agent that classifies incoming tickets, resolves simple issues automatically, and routes sensitive operations (refunds, credits, suspensions) through a human approval queue before executing. + +## How it works + +A single agent handles each ticket end-to-end: + +1. Looks up the customer's account and ticket history +2. Diagnoses the issue +3. Resolves general and technical questions immediately via `send_reply` +4. Pauses for human review before executing billing or account actions +5. Returns a structured `Resolution` with what happened + +:::info Prerequisites +- A running Agentspan server: `agentspan server start` +- Environment variables set: + +```bash +export AGENTSPAN_SERVER_URL=http://localhost:6767/api +export OPENAI_API_KEY= +export AGENTSPAN_LLM_MODEL=openai/gpt-4o-mini +``` + +::: +## Full code + +```python +from agentspan.agents import Agent, AgentHandle, AgentRuntime, tool, start +from pydantic import BaseModel +from enum import Enum + +# ── Data types ──────────────────────────────────────────────────────────────── + +class TicketCategory(str, Enum): + BILLING = "billing" + TECHNICAL = "technical" + ACCOUNT = "account" + GENERAL = "general" + +class Resolution(BaseModel): + category: TicketCategory + action_taken: str + response_to_customer: str + requires_followup: bool + +# ── Tools ───────────────────────────────────────────────────────────────────── + +@tool +def lookup_customer(email: str) -> dict: + """Fetch customer record: plan, billing status, open tickets, account age.""" + return {"id": "cust_123", "email": email, "plan": "pro", "billing_status": "active"} + +@tool +def lookup_ticket_history(customer_id: str) -> list[dict]: + """Fetch the last 10 support tickets for this customer.""" + return [{"id": "TKT-001", "subject": "Login issue", "status": "resolved"}] + +@tool +def send_reply(customer_id: str, message: str) -> dict: + """Send a reply to the customer and mark the ticket as resolved.""" + return {"status": "sent", "customer_id": customer_id} + +@tool(approval_required=True) +def issue_refund(customer_id: str, amount_usd: float, reason: str) -> dict: + """Issue a refund to the customer. Requires human approval.""" + return {"status": "refund_issued", "amount": amount_usd} + +@tool(approval_required=True) +def suspend_account(customer_id: str, reason: str) -> dict: + """Suspend a customer account. Requires human approval.""" + return {"status": "suspended", "customer_id": customer_id} + +@tool(approval_required=True) +def apply_credit(customer_id: str, amount_usd: float, note: str) -> dict: + """Apply account credit. Requires human approval.""" + return {"status": "credit_applied", "amount": amount_usd} + +# ── Agent ───────────────────────────────────────────────────────────────────── + +support_agent = Agent( + name="support_agent", + model="openai/gpt-4o-mini", + output_type=Resolution, + tools=[ + lookup_customer, + lookup_ticket_history, + send_reply, + issue_refund, + suspend_account, + apply_credit, + ], + instructions="""You are a support agent for a SaaS product. + +When a ticket arrives: +1. Look up the customer's account and ticket history. +2. Diagnose the issue based on context. +3. For general and technical questions: resolve directly with send_reply. +4. For billing actions (refunds, credits): use the appropriate tool — these will pause + for human review before executing. +5. Return a Resolution with what happened. + +Always be clear and empathetic in your response_to_customer. +Never invent facts about the customer's account.""", +) + +# ── Ticket handler ──────────────────────────────────────────────────────────── + +def handle_ticket(ticket_id: str, customer_email: str, message: str, runtime: AgentRuntime): + prompt = f""" +Ticket ID: {ticket_id} +Customer email: {customer_email} +Message: {message} +""" + handle = start(support_agent, prompt, runtime=runtime) + + for event in handle.stream(): + if event.type == "waiting": + print(f"Paused for approval — tool: {event.tool_name}, args: {event.args}") + return handle.execution_id + + # No approval gate hit — already complete + return None + +# ── Approve or reject ───────────────────────────────────────────────────────── + +def reviewer_approve(execution_id: str, runtime: AgentRuntime): + handle = AgentHandle(execution_id=execution_id, runtime=runtime) + handle.approve() + +def reviewer_reject(execution_id: str, runtime: AgentRuntime, reason: str): + handle = AgentHandle(execution_id=execution_id, runtime=runtime) + handle.reject(reason) + +# ── Run ─────────────────────────────────────────────────────────────────────── + +with AgentRuntime() as runtime: + eid = handle_ticket( + "TKT-002", "user@example.com", "I was charged twice. Please refund.", runtime + ) + + if eid: + decision = input("\nApprove? (y/n): ").strip().lower() + handle = AgentHandle(execution_id=eid, runtime=runtime) + if decision == "y": + handle.approve() + else: + reason = input("Rejection reason: ").strip() + handle.reject(reason) + + print("\nWaiting for agent to complete...") + result = handle.stream().get_result() + result.print_result() +``` + +## Run it + +Save the file as `support_triage.py`, and run it: + +```bash +python support_triage.py +``` + +When the terminal prompts for human approval, approve or reject. + +## What this demonstrates + +**Human-in-the-loop (`approval_required=True`)**: When the agent calls a gated tool, execution pauses server-side indefinitely. The workflow holds its full state until a reviewer approves or rejects. + +**Reconnect from any process**: `AgentHandle(execution_id=..., runtime=...)` lets you re-attach to a paused workflow from a completely different process (e.g. a webhook handler), without the original caller staying alive. + +**Structured output**: `output_type=Resolution` enforces a typed response. The agent cannot return a free-form string; the SDK validates the output matches the schema. + +**Risk-tiered tools**: `send_reply` executes immediately; `issue_refund`, `apply_credit`, and `suspend_account` always pause for human review before touching money or account state. + +## Example modifications + +### Approve from the CLI + +If the agent is paused and you have the execution ID, you can approve or reject directly from the terminal without writing any code: + +```bash +# Approve +agentspan agent respond --approve + +# Reject +agentspan agent respond --reject --reason "Amount too large" +``` + +### Auto-approve low-risk actions + +Route only high-value operations to a human reviewer, and approve the rest automatically: + +```python +def reviewer_approve_or_escalate(execution_id: str, runtime: AgentRuntime, args: dict): + if args and args.get("amount_usd", 999) <= 25: + reviewer_approve(execution_id, runtime) + else: + notify_human(execution_id, args) +``` + +### Wire up a webhook approver + +Store the `execution_id` from `handle_ticket` in your database, then approve or reject from a webhook when a reviewer clicks a button in your UI: + +```python +@app.post("/approvals/{execution_id}/approve") +def approve(execution_id: str): + reviewer_approve(execution_id, runtime) + return {"status": "approved"} + +@app.post("/approvals/{execution_id}/reject") +def reject(execution_id: str, reason: str): + reviewer_reject(execution_id, runtime, reason) + return {"status": "rejected"} +``` diff --git a/docs/developer-guides/agentspan/overview.mdx b/docs/developer-guides/agentspan/overview.mdx new file mode 100644 index 00000000..af08d286 --- /dev/null +++ b/docs/developer-guides/agentspan/overview.mdx @@ -0,0 +1,50 @@ +--- +slug: "/developer-guides/agentspan" +title: "Agentspan" +description: "Agentspan documentation for building production AI agents." +--- + +# Agentspan +**Agentspan is a durable runtime for AI agents. Your code runs in your process. Execution state lives on the server.** + +Agentspan is a durable runtime for AI agents. Execution state lives server-side, so crashes, restarts, and deployments do not lose work. Write agents natively or wrap an existing LangGraph, OpenAI Agents SDK, or Google ADK agent in one line. + +## Getting Started + +- [Why Agentspan](/developer-guides/agentspan/why-agentspan) - Why agents fail in production, and how Agentspan solves it. +- [Quickstart](/developer-guides/agentspan/quickstart) - Build your first agent in 5 minutes. + +## Concepts + +- [Agents](/developer-guides/agentspan/concepts/agents) - The `Agent` class, parameters, results, and handles. +- [Tools](/developer-guides/agentspan/concepts/tools) - `@tool`, `http_tool()`, `api_tool()`, `mcp_tool()`, credentials, and approval-required tools. +- [Skills](/developer-guides/agentspan/concepts/skills) - Load, register, run, and test agentskills.io skill folders. +- [Multi-Agent Strategies](/developer-guides/agentspan/concepts/multi-agent) - Sequential, parallel, handoff, router, and nested agent coordination. +- [Guardrails](/developer-guides/agentspan/concepts/guardrails) - Input and output safety, retry, block, and fix behavior. +- [Memory](/developer-guides/agentspan/concepts/memory) - Conversation history and semantic search across sessions. +- [Streaming](/developer-guides/agentspan/concepts/streaming) - Runtime events, async execution, and HITL with streams. +- [Testing](/developer-guides/agentspan/concepts/testing) - `mock_run`, `expect`, record/replay, pytest, and evaluation helpers. + +## Deployment + +- [Deployment overview](/developer-guides/agentspan/reference/deployment) - Local development, Docker, Helm, and Orkes Cloud. +- [Self-hosting](/developer-guides/agentspan/reference/self-hosting) - Run Agentspan in your own environment. + +## Examples + +- [Support Ticket Triage](/developer-guides/agentspan/examples/support-triage) - Classify, route, and resolve support tickets. +- [Research Pipeline](/developer-guides/agentspan/examples/research-pipeline) - Run sequential research, writing, and editing agents. +- [Batch Document Processor](/developer-guides/agentspan/examples/document-processor) - Process multiple documents in parallel. +- [Crash and Resume](/developer-guides/agentspan/examples/crash-resume) - Resume durable executions after worker failure. +- [Human in the Loop](/developer-guides/agentspan/examples/human-in-the-loop) - Pause execution for human approval. +- [LangGraph Code Review Bot](/developer-guides/agentspan/examples/langgraph) - Wrap an existing LangGraph app. +- [OpenAI Agents SDK Customer Support](/developer-guides/agentspan/examples/openai-agents-sdk) - Run an OpenAI Agents SDK app through Agentspan. +- [Google ADK Research Assistant](/developer-guides/agentspan/examples/google-adk) - Run a Google ADK agent through Agentspan. + +## Reference + +- [CLI Reference](/developer-guides/agentspan/reference/cli) - Commands with exact syntax. +- [LLM Providers](/developer-guides/agentspan/reference/providers) - Providers, model strings, and API keys. +- [AI Models](/developer-guides/agentspan/reference/ai-models) - Model configuration and supported provider formats. +- [Integrations](/developer-guides/agentspan/reference/integrations) - Framework integrations and compatibility notes. +- [Worker Types](/developer-guides/agentspan/reference/worker-types) - Python and TypeScript worker models. diff --git a/docs/developer-guides/agentspan/quickstart.mdx b/docs/developer-guides/agentspan/quickstart.mdx new file mode 100644 index 00000000..2f58ab0d --- /dev/null +++ b/docs/developer-guides/agentspan/quickstart.mdx @@ -0,0 +1,124 @@ +--- +slug: "/developer-guides/agentspan/quickstart" +title: "Quickstart" +description: "Install Agentspan and run your first durable AI agent in under 60 seconds." +--- + +# Quickstart + +Get Agentspan running locally in under 60 seconds. + +## Step 1 — Install + +```bash +pip install agentspan +``` + +This installs the Python SDK and the `agentspan` CLI — everything you need as a Python developer. + +Verify your setup: + +```bash +agentspan doctor +``` + +> **uv:** `uv pip install agentspan` also works. +> +> **CLI only (no Python SDK):** `npm install -g @agentspan-ai/agentspan` — downloads the binary eagerly at install time, no Python required. + +## Step 2 — Set your LLM API key + +```bash +# OpenAI +export OPENAI_API_KEY=sk-... + +# Anthropic +export ANTHROPIC_API_KEY=sk-ant-... +``` + +See [Providers](/developer-guides/agentspan/reference/providers) for all supported models and environment variables. + +## Step 3 — Start the server + +```bash +agentspan server start +``` + +On first run, this downloads the Agentspan server JAR (~50 MB) and starts it on `http://localhost:6767`. Subsequent starts use the cached JAR. Open `http://localhost:6767` in your browser to see the visual execution UI. + +> **Local default:** The server uses SQLite with WAL mode — no external database needed for local development. Data is stored in `agent-runtime.db` in the working directory. + +## Step 4 — Run your first agent + +Save this as `hello.py` and run `python hello.py`: + +```python +from agentspan.agents import Agent, AgentRuntime, tool + +@tool +def get_weather(city: str) -> str: + """Get current weather for a city.""" + return f"72°F and sunny in {city}" + +agent = Agent( + name="weatherbot", + model="openai/gpt-4o", # if you set OPENAI_API_KEY + # model="anthropic/claude-sonnet-4-6", # if you set ANTHROPIC_API_KEY + instructions="You are an outdoor activity assistant. When asked about a city, look up the weather there, then recommend 2-3 specific outdoor activities suited to those conditions. Be direct: good weather for hiking is different from good weather for a beach day.", + tools=[get_weather], +) + +with AgentRuntime() as runtime: + result = runtime.run(agent, "What should I do today in NYC?") + result.print_result() +``` + +You should see the answer printed, and the execution visible in the UI at `http://localhost:6767`. + +## What just happened? + +When you called `runtime.run()`, the SDK: + +1. Compiled your `Agent` into a durable execution on the Agentspan server +2. Started a worker process to handle `@tool` function calls +3. Executed the workflow on the server — not in-process +4. Returned the result when complete + +**The key difference from other SDKs:** if your process crashes mid-execution, the workflow keeps running on the server. You can reconnect to it by execution ID from any machine. + +**Tool locality — what runs where:** +- `@tool` functions run in **your worker process** — full access to your code, libraries, and local state +- `http_tool()`, `api_tool()`, `mcp_tool()` run **server-side** — no code to write, just configure a URL + +See [Tools](/developer-guides/agentspan/concepts/tools) for all tool types. + +## Alternative: Using module-level functions + +If you prefer not to use the context manager, module-level functions are available. They use a shared singleton runtime under the hood: + +```python +from agentspan.agents import Agent, tool, run + +@tool +def get_weather(city: str) -> str: + """Get current weather for a city.""" + return f"72°F and sunny in {city}" + +agent = Agent( + name="weatherbot", + model="openai/gpt-4o", # if you set OPENAI_API_KEY + # model="anthropic/claude-sonnet-4-6", # if you set ANTHROPIC_API_KEY + instructions="You are an outdoor activity assistant. When asked about a city, look up the weather there, then recommend 2-3 specific outdoor activities suited to those conditions. Be direct: good weather for hiking is different from good weather for a beach day.", + tools=[get_weather], +) +result = run(agent, "What should I do today in NYC?") +result.print_result() +``` + +## Next steps + +- [Concepts: Agents](/developer-guides/agentspan/concepts/agents) — all `Agent` constructor parameters +- [Concepts: Tools](/developer-guides/agentspan/concepts/tools) — `@tool`, `http_tool`, `mcp_tool`, `api_tool` +- [Providers](/developer-guides/agentspan/reference/providers) — all supported LLM providers and model strings +- [Deployment](/developer-guides/agentspan/reference/deployment) — local, Docker, and Kubernetes setups +- [Examples](/developer-guides/agentspan/examples) — 180+ runnable examples diff --git a/docs/developer-guides/agentspan/reference/ai-models.mdx b/docs/developer-guides/agentspan/reference/ai-models.mdx new file mode 100644 index 00000000..369e5c2d --- /dev/null +++ b/docs/developer-guides/agentspan/reference/ai-models.mdx @@ -0,0 +1,222 @@ +--- +slug: "/developer-guides/agentspan/reference/ai-models" +title: "AI Model Configuration" +--- + +# AI Model Configuration + +Agentspan supports 12+ AI providers out of the box. Configure them by setting environment variables before starting the server. + +## Quick Setup + +Set the API key for the provider(s) you want to use: + +```bash +# OpenAI (most common) +export OPENAI_API_KEY=sk-... + +# Anthropic (Claude) +export ANTHROPIC_API_KEY=sk-ant-... + +# Google Gemini +export GEMINI_API_KEY=AI... +export GOOGLE_CLOUD_PROJECT=your-gcp-project-id + +# Then start the server +agentspan server start +``` + +## All Providers + +### OpenAI + +| Variable | Description | +|---|---| +| `OPENAI_API_KEY` | API key from [platform.openai.com](https://platform.openai.com/api-keys) | +| `OPENAI_ORG_ID` | Organization ID (optional) | + +**Models:** `openai/gpt-4o`, `openai/gpt-4o-mini`, `openai/gpt-4-turbo`, `openai/o1`, `openai/o1-mini`, `openai/o3-mini` + +**Embeddings:** `openai/text-embedding-3-small`, `openai/text-embedding-3-large` + +**Image generation:** `openai/dall-e-3` + +--- + +### Anthropic (Claude) + +| Variable | Description | +|---|---| +| `ANTHROPIC_API_KEY` | API key from [console.anthropic.com](https://console.anthropic.com/) | + +**Models:** `anthropic/claude-opus-4-20250514`, `anthropic/claude-sonnet-4-20250514`, `anthropic/claude-3-5-sonnet-20241022`, `anthropic/claude-3-haiku-20240307` + +--- + +### Google Gemini + +| Variable | Description | +|---|---| +| `GEMINI_API_KEY` | API key from [aistudio.google.com](https://aistudio.google.com/apikey) | +| `GOOGLE_CLOUD_PROJECT` | **Required.** GCP project ID | + +**Models:** `google_gemini/gemini-2.0-flash`, `google_gemini/gemini-1.5-pro`, `google_gemini/gemini-1.5-flash` + +**Embeddings:** `google_gemini/text-embedding-004` + +**Image generation:** `google_gemini/imagen-3.0-generate-002` + +--- + +### Azure OpenAI + +| Variable | Description | +|---|---| +| `AZURE_OPENAI_API_KEY` | API key from Azure portal | +| `AZURE_OPENAI_ENDPOINT` | **Required.** Endpoint URL (e.g. `https://your-resource.openai.azure.com`) | +| `AZURE_OPENAI_DEPLOYMENT` | **Required.** Deployment name | + +**Models:** `azure_openai/gpt-4o`, `azure_openai/gpt-4`, `azure_openai/gpt-3.5-turbo` + +--- + +### AWS Bedrock + +| Variable | Description | +|---|---| +| `AWS_ACCESS_KEY_ID` | AWS access key | +| `AWS_SECRET_ACCESS_KEY` | AWS secret key | + +**Server properties (optional):** +- `conductor.ai.bedrock.region` — defaults to `us-east-1` + +**Models:** `aws_bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0`, `aws_bedrock/anthropic.claude-3-haiku-20240307-v1:0`, `aws_bedrock/meta.llama3-70b-instruct-v1:0`, `aws_bedrock/amazon.titan-text-express-v1` + +**Embeddings:** `aws_bedrock/amazon.titan-embed-text-v2:0` + +--- + +### Mistral AI + +| Variable | Description | +|---|---| +| `MISTRAL_API_KEY` | API key from [console.mistral.ai](https://console.mistral.ai/) | + +**Models:** `mistral/mistral-large-latest`, `mistral/mistral-medium-latest`, `mistral/mistral-small-latest`, `mistral/open-mixtral-8x7b` + +**Embeddings:** `mistral/mistral-embed` + +--- + +### Cohere + +| Variable | Description | +|---|---| +| `COHERE_API_KEY` | API key from [dashboard.cohere.com](https://dashboard.cohere.com/) | + +**Models:** `cohere/command-r-plus`, `cohere/command-r`, `cohere/command` + +**Embeddings:** `cohere/embed-english-v3.0`, `cohere/embed-multilingual-v3.0` + +--- + +### Grok (xAI) + +| Variable | Description | +|---|---| +| `XAI_API_KEY` | API key from xAI | + +**Models:** `grok/grok-3`, `grok/grok-3-mini` + +--- + +### Perplexity AI + +| Variable | Description | +|---|---| +| `PERPLEXITY_API_KEY` | API key from [perplexity.ai](https://www.perplexity.ai/) | + +**Models:** `perplexity/sonar-pro`, `perplexity/sonar` + +--- + +### Hugging Face + +| Variable | Description | +|---|---| +| `HUGGINGFACE_API_KEY` | API token from [huggingface.co](https://huggingface.co/settings/tokens) | + +**Models:** `hugging_face/meta-llama/Llama-3-70b-chat-hf`, `hugging_face/mistralai/Mistral-7B-Instruct-v0.2` + +--- + +### Stability AI + +| Variable | Description | +|---|---| +| `STABILITY_API_KEY` | API key from [platform.stability.ai](https://platform.stability.ai/) | + +**Image generation:** `stabilityai/sd3.5-large`, `stabilityai/sd3.5-medium`, `stabilityai/stable-image-core` + +--- + +### Ollama (Local / Remote) + +No API key required. Ollama must be running and reachable. + +| Variable | Description | +|---|---| +| `OLLAMA_BASE_URL` | Ollama server URL (default: `http://localhost:11434`) | + +```bash +# Local (default) +# No configuration needed if Ollama is running on localhost + +# Remote or custom port +export OLLAMA_BASE_URL=http://your-gpu-server:11434 +``` + +Install Ollama: [ollama.com/download](https://ollama.com/download) + +**Models:** `ollama/llama3`, `ollama/mistral`, `ollama/phi3`, `ollama/codellama` + +**Embeddings:** `ollama/nomic-embed-text` + +--- + +## Model Format + +When specifying models in your agents, use the format `provider/model-name`: + +```python +agent = Agent(name="my_agent", model="openai/gpt-4o") +agent = Agent(name="my_agent", model="anthropic/claude-sonnet-4-20250514") +agent = Agent(name="my_agent", model="google_gemini/gemini-2.0-flash") +``` + +## Server Properties + +These can be set as environment variables using the Spring Boot convention (dots become underscores, uppercase): + +| Property | Env Variable | Default | Description | +|---|---|---|---| +| `conductor.integrations.ai.enabled` | `CONDUCTOR_INTEGRATIONS_AI_ENABLED` | `true` | Enable/disable AI integration | +| `conductor.ai.openai.api-key` | `OPENAI_API_KEY` | — | OpenAI API key | +| `conductor.ai.openai.organization-id` | `OPENAI_ORG_ID` | — | OpenAI organization | +| `conductor.ai.anthropic.api-key` | `ANTHROPIC_API_KEY` | — | Anthropic API key | +| `conductor.ai.gemini.api-key` | `GEMINI_API_KEY` | — | Google Gemini API key | +| `conductor.ai.gemini.project-id` | `GOOGLE_CLOUD_PROJECT` | — | GCP project ID | +| `conductor.ai.gemini.location` | — | `us-central1` | GCP region | +| `conductor.ai.azureopenai.api-key` | `AZURE_OPENAI_API_KEY` | — | Azure OpenAI API key | +| `conductor.ai.azureopenai.base-url` | `AZURE_OPENAI_ENDPOINT` | — | Azure OpenAI endpoint | +| `conductor.ai.azureopenai.deployment-name` | `AZURE_OPENAI_DEPLOYMENT` | — | Azure deployment name | +| `conductor.ai.bedrock.access-key` | `AWS_ACCESS_KEY_ID` | — | AWS access key | +| `conductor.ai.bedrock.secret-key` | `AWS_SECRET_ACCESS_KEY` | — | AWS secret key | +| `conductor.ai.bedrock.region` | — | `us-east-1` | AWS region | +| `conductor.ai.mistral.api-key` | `MISTRAL_API_KEY` | — | Mistral API key | +| `conductor.ai.cohere.api-key` | `COHERE_API_KEY` | — | Cohere API key | +| `conductor.ai.grok.api-key` | `XAI_API_KEY` | — | Grok/xAI API key | +| `conductor.ai.perplexity.api-key` | `PERPLEXITY_API_KEY` | — | Perplexity API key | +| `conductor.ai.huggingface.api-key` | `HUGGINGFACE_API_KEY` | — | Hugging Face token | +| `conductor.ai.stabilityai.api-key` | `STABILITY_API_KEY` | — | Stability AI API key | +| `conductor.ai.ollama.base-url` | `OLLAMA_BASE_URL` | `http://localhost:11434` | Ollama server URL | diff --git a/docs/developer-guides/agentspan/reference/cli.mdx b/docs/developer-guides/agentspan/reference/cli.mdx new file mode 100644 index 00000000..63dc7458 --- /dev/null +++ b/docs/developer-guides/agentspan/reference/cli.mdx @@ -0,0 +1,168 @@ +--- +slug: "/developer-guides/agentspan/reference/cli" +title: "CLI Reference" +description: "Agentspan CLI commands for server, credentials, agents, skills, status, and execution history" +--- + +# CLI Reference + +**Python developers:** `pip install agentspan` gives you the SDK and the CLI. The pip package registers the `agentspan` command as a console script; on first invocation it downloads the Go binary from S3 and caches it. + +**CLI only (no Python SDK):** `npm install -g @agentspan-ai/agentspan` — downloads the Go binary eagerly at install time. Useful if you don't have Python or want the binary pre-fetched. + +```bash +agentspan version # Print the CLI version +agentspan --help # List all commands +``` + +## Server Commands + +```bash +agentspan server start # Download (if needed) and start the server +agentspan server stop # Stop the server +agentspan server logs # View server logs +``` + +`agentspan server start` downloads the Agentspan server JAR on first run (~50 MB) and starts it as a local process. The JAR is cached — subsequent starts are instant. The server runs on port `6767`. The UI and API are both served from the same port — open `http://localhost:6767` in your browser to see the visual execution UI. + +## Diagnostics + +```bash +agentspan doctor # Check system dependencies and AI provider configuration +``` + +`agentspan doctor` verifies: +- CLI is installed and working +- Java runtime is available (required to run the server) +- Python SDK is installed +- API keys are configured +- Server is reachable + +## Credential Management + +Store secrets on the server once. Tools resolve them automatically at runtime — no `.env` files, no hardcoded keys, no secrets in git. + +```bash +agentspan credentials set KEY value # Store a credential (encrypted at rest) +agentspan credentials list # List stored credential keys +agentspan credentials delete KEY # Delete a credential +``` + +Credentials are encrypted with AES-256-GCM. Only the key names are shown in `list` — values are never exposed. + +Example: + +```bash +agentspan credentials set GITHUB_TOKEN ghp_xxxxxxxxxxxx +agentspan credentials set SEARCH_API_KEY xxx-your-key +``` + +Use them in tools with `@tool(credentials=["KEY"])`. See [Tools](/developer-guides/agentspan/concepts/tools) for details. + +## Agent Commands + +### Status + +```bash +agentspan agent status # Get detailed status of a running execution +``` + +### Respond to HITL + +```bash +agentspan agent respond --approve +agentspan agent respond --deny --reason "Amount too large, escalate to finance" +agentspan agent respond --message "Please use a different approach" +``` + +### Execution History + +```bash +agentspan agent execution --since 1h +agentspan agent execution --name my_agent --since 1d +agentspan agent execution --status COMPLETED --since 7d +agentspan agent execution --name my_agent --status FAILED --since 1mo +``` + +Time formats: `30s`, `5m`, `1h`, `6h`, `1d`, `7d`, `1mo`, `1y` + +### Run and Stream + +```bash +agentspan agent run --name my_agent "What is quantum computing?" # Run deployed agent and stream output +agentspan agent run --config agent.yaml "What is quantum computing?" # Run from config file +agentspan agent stream # Stream events from a running execution +``` + +### List and Get + +```bash +agentspan agent list # List all registered agents +agentspan agent get my_agent # Get agent configuration JSON +agentspan agent compile my_agent # Compile and inspect execution plan (dry run) +``` + +### Skills + +```bash +agentspan skill run ./my-skill "Do the task" --model openai/gpt-4o +agentspan skill run my-skill "Do the task" --model openai/gpt-4o --version 2026.05.21 +agentspan skill run code-review "Review current changes" --model openai/gpt-4o --workspace . +agentspan skill run code-review "Review docs too" --model openai/gpt-4o --filesystem docs=./docs +agentspan skill load ./my-skill --model openai/gpt-4o +agentspan skill register ./my-skill --model openai/gpt-4o --version 2026.05.21 +agentspan skill list --all-versions +agentspan skill get my-skill --version 2026.05.21 +agentspan skill pull my-skill ./my-skill-copy --version 2026.05.21 +agentspan skill delete my-skill --version 2026.05.21 --yes +agentspan skill serve my-skill --version 2026.05.21 --script-timeout 300 +``` + +When `skill run` or `skill serve` is given a registered skill name instead of +a local directory, the CLI downloads the package into +`~/.agentspan/skills///files` and reuses that cached copy until +the server checksum changes. Downloaded packages are checksum-verified before +the cache is installed or any script worker is started. If a registered skill +references another registered skill, the server resolves that reference at +compile time and the CLI downloads the referenced package too, so script tools +and `read_skill_file` work for both the parent and referenced skills. Referenced +skill versions are pinned when the parent is registered; running `parent@v1` +continues to use the child version that was latest at registration time. + +`skill register` excludes generated directories, common secret files such as +`.env` and private keys, and paths matched by `.agentspanignore`. + +`skill run` exposes the current directory as the `workspace` filesystem root by +default. Skills can list, read, search, and inspect git status/diff through +workspace tools served by the CLI. Use `--workspace ` to point at a +different checkout, `--filesystem =` to expose additional read-only +roots, or `--no-workspace` to run without local filesystem context. + +## Configuration + +Configure the server URL and auth credentials: + +```bash +agentspan configure --url https://your-server.example.com +agentspan configure --url https://your-server.example.com --auth-key my-key --auth-secret my-secret +``` + +Or set environment variables: + +```bash +export AGENTSPAN_SERVER_URL=https://your-server.example.com +export AGENTSPAN_AUTH_KEY=your-key +export AGENTSPAN_AUTH_SECRET=your-secret +``` + +Or configure in Python code: + +```python +from agentspan.agents import configure + +configure( + server_url="https://your-server.example.com", + auth_key="your-key", + auth_secret="your-secret", +) +``` diff --git a/docs/developer-guides/agentspan/reference/deployment.mdx b/docs/developer-guides/agentspan/reference/deployment.mdx new file mode 100644 index 00000000..a2c4ca7d --- /dev/null +++ b/docs/developer-guides/agentspan/reference/deployment.mdx @@ -0,0 +1,184 @@ +--- +slug: "/developer-guides/agentspan/reference/deployment" +title: "Deployment" +description: "Deploy Agentspan locally with SQLite, with Docker Compose, or on Kubernetes" +--- + +# Deployment + +Agentspan runs on a persistent server that manages durable execution. Your Python workers connect to the server and execute tools as distributed tasks. + +## Local Development (SQLite — zero setup) + +The default setup uses SQLite with WAL mode. No external database needed. + +```bash +agentspan server start +``` + +Data is stored in `agent-runtime.db` in the working directory. The UI and API are both at `http://localhost:6767`. + +## Production (PostgreSQL + Docker Compose) + +For production workloads, use PostgreSQL for durability and concurrent access. + +**1. Clone the repo and start the compose stack:** + +```bash +cd deployment/docker-compose +cp .env.example .env +# Set at least one LLM provider key in .env (e.g. OPENAI_API_KEY) +docker compose up -d +``` + +The compose stack starts two services: +- `agentspan` — the Agentspan server (port 6767) +- `postgres` — PostgreSQL 16 + +Open `http://localhost:6767` for the UI. Health check: `http://localhost:6767/actuator/health`. + +**2. Point your Python workers at the running server:** + +```bash +export AGENTSPAN_SERVER_URL=http://localhost:6767 +python my_agent.py +``` + +Workers are stateless Python processes that poll the server for tasks. Scale them independently. + +## PostgreSQL (without Docker) + +Start PostgreSQL separately and configure the server: + +```bash +export SPRING_DATASOURCE_URL=jdbc:postgresql://your-host:5432/conductor +export SPRING_DATASOURCE_USERNAME=your_user +export SPRING_DATASOURCE_PASSWORD=your_password +export SPRING_PROFILES_ACTIVE=postgres +agentspan server start +``` + +## Kubernetes + +Kubernetes manifests and a Helm chart are included in the repo under `deployment/k8s/` and `deployment/helm/`. + +For a minimal single-node deployment: + +```yaml +# agentspan-server Deployment +apiVersion: apps/v1 +kind: Deployment +metadata: + name: agentspan-server +spec: + replicas: 1 + selector: + matchLabels: + app: agentspan-server + template: + metadata: + labels: + app: agentspan-server + spec: + containers: + - name: agentspan-server + image: ghcr.io/agentspan-ai/agentspan-server:latest + ports: + - containerPort: 6767 + env: + - name: SPRING_PROFILES_ACTIVE + value: postgres + - name: SPRING_DATASOURCE_URL + valueFrom: + secretKeyRef: + name: agentspan-secrets + key: db-url + - name: SPRING_DATASOURCE_USERNAME + valueFrom: + secretKeyRef: + name: agentspan-secrets + key: db-username + - name: SPRING_DATASOURCE_PASSWORD + valueFrom: + secretKeyRef: + name: agentspan-secrets + key: db-password +--- +apiVersion: v1 +kind: Service +metadata: + name: agentspan-server +spec: + selector: + app: agentspan-server + ports: + - port: 6767 + targetPort: 6767 +``` + +Worker pods connect via `AGENTSPAN_SERVER_URL`: + +```python +import os +os.environ["AGENTSPAN_SERVER_URL"] = "http://agentspan-server:6767" + +from agentspan.agents import Agent, run +agent = Agent(name="my_agent", model="openai/gpt-4o") +result = run(agent, "Hello") +``` + +Workers are stateless — run as many replicas as you need. The server queues tasks; workers poll and execute them. + +## Authentication + +For remote servers (non-localhost), set auth credentials: + +```bash +export AGENTSPAN_SERVER_URL=https://my-server.example.com +export AGENTSPAN_AUTH_KEY=my-key +export AGENTSPAN_AUTH_SECRET=my-secret +``` + +Or via code: + +```python +from agentspan.agents import configure + +configure( + server_url="https://my-server.example.com", + auth_key="my-key", + auth_secret="my-secret", +) +``` + +No other code changes needed. Your agents run exactly the same — the only difference is where the server runs. + +## Configuration Reference + +| Variable | Default | Description | +|---|---|---| +| `SERVER_PORT` | `6767` | Server port | +| `SPRING_PROFILES_ACTIVE` | `default` (SQLite) | Set to `postgres` for PostgreSQL | +| `SPRING_DATASOURCE_URL` | `jdbc:sqlite:agent-runtime.db` | Database URL | +| `SPRING_DATASOURCE_USERNAME` | `postgres` | Database user | +| `SPRING_DATASOURCE_PASSWORD` | `postgres` | Database password | +| `AGENTSPAN_SERVER_URL` | `http://localhost:6767` | Server URL (used by SDK/workers) | +| `AGENTSPAN_AUTH_KEY` | — | Auth key (required for non-localhost) | +| `AGENTSPAN_AUTH_SECRET` | — | Auth secret (required for non-localhost) | +| `OPENAI_API_KEY` | — | OpenAI API key | +| `ANTHROPIC_API_KEY` | — | Anthropic API key | +| `GEMINI_API_KEY` | — | Google Gemini API key | +| `MISTRAL_API_KEY` | — | Mistral API key | +| `GROQ_API_KEY` | — | Groq API key | + +## Production Checklist + +Before going to production: + +- [ ] Switch from SQLite to a managed PostgreSQL (RDS, Cloud SQL, etc.) +- [ ] Set `AGENTSPAN_AUTH_KEY` and `AGENTSPAN_AUTH_SECRET` for all workers +- [ ] Configure `max_turns` and timeouts for long-running agents +- [ ] Add guardrails for all user-facing agents +- [ ] Test agents with `mock_run` before deploying +- [ ] Use `AgentHandle.get_status()` to monitor long-running executions +- [ ] Set up alerting via the Agentspan UI or Prometheus metrics diff --git a/docs/developer-guides/agentspan/reference/integrations.mdx b/docs/developer-guides/agentspan/reference/integrations.mdx new file mode 100644 index 00000000..f91e1338 --- /dev/null +++ b/docs/developer-guides/agentspan/reference/integrations.mdx @@ -0,0 +1,144 @@ +--- +slug: "/developer-guides/agentspan/reference/integrations" +title: "Framework Integrations" +description: "Use Agentspan with LangGraph, the OpenAI Agents SDK, Google ADK, or any framework. Pass your existing agent to runtime.run() — one line, no rewrites." +--- + +# Framework Integrations + +Agentspan works with the frameworks you already use. Pass your existing agent directly to `runtime.run()` — definitions, tools, and routing logic stay exactly as written. You get crash recovery, durable human-in-the-loop, and full execution history without changing a single node or handoff. + +```python +from agentspan.agents import AgentRuntime + +with AgentRuntime() as runtime: + result = runtime.run(your_existing_agent, "your prompt") +``` + +`your_existing_agent` can be a LangGraph compiled graph, an OpenAI Agents SDK `Agent`, a Google ADK pipeline, or a native Agentspan `Agent`. The API is the same. + +--- + +## Supported frameworks + +### LangGraph + +Pass a compiled `StateGraph` or any graph produced by `create_react_agent`: + +```python +from langgraph.prebuilt import create_react_agent +from agentspan.agents import AgentRuntime + +graph = create_react_agent(model="openai/gpt-4o", tools=[search, calculator]) + +with AgentRuntime() as runtime: + result = runtime.run(graph, "Research the history of the Eiffel Tower") + print(result.workflow_id) +``` + +**Note:** Do not pass a `checkpointer` when wrapping with AgentRuntime — Agentspan manages execution state server-side and the two checkpointing models conflict. LangSmith observability is fully compatible and unaffected. + +→ [Full LangGraph example — code review bot](/developer-guides/agentspan/examples/langgraph) + +--- + +### OpenAI Agents SDK + +Pass an `Agent` from the `agents` package directly: + +```python +from agents import Agent as OAIAgent, WebSearchTool +from agentspan.agents import AgentRuntime + +oai_agent = OAIAgent( + name="support_agent", + instructions="You are a helpful customer support agent.", + tools=[WebSearchTool()], +) + +with AgentRuntime() as runtime: + result = runtime.run(oai_agent, "How do I reset my password?") + print(result.output["result"]) +``` + +Agent definitions, handoffs, and tool registrations stay exactly as written. + +→ [Full OpenAI Agents SDK example — support agent](/developer-guides/agentspan/examples/openai-agents-sdk) + +--- + +### Google ADK + +Pass any ADK pipeline (`SequentialAgent`, `ParallelAgent`, `LoopAgent`, or a custom `BaseAgent`): + +```python +from google.adk.agents import SequentialAgent, LlmAgent +from agentspan.agents import AgentRuntime + +researcher = LlmAgent(name="researcher", model="gemini-2.0-flash", ...) +writer = LlmAgent(name="writer", model="gemini-2.0-flash", ...) +pipeline = SequentialAgent(name="pipeline", sub_agents=[researcher, writer]) + +with AgentRuntime() as runtime: + result = runtime.run(pipeline, "Research and summarize quantum computing trends") + print(result.output["result"]) +``` + +→ [Full Google ADK example — research assistant](/developer-guides/agentspan/examples/google-adk) + +--- + +## What Agentspan adds to any framework + +| Capability | Without Agentspan | With Agentspan | +|---|---|---| +| Process crash mid-run | Entire run lost | Resumes from last completed step | +| Human approval pause | State held in memory | Paused server-side, survives restarts | +| Execution history | None | Every run stored with inputs, outputs, token usage | +| Long-running agents | Risk of timeout or OOM | Runs detached from your process | +| Observability | Framework-specific | Unified across all frameworks | + +--- + +## Tool locality + +Regardless of which framework you use, tools in Agentspan run in one of two places: + +| Tool type | Where it runs | What you provide | +|---|---|---| +| `@tool` (Python function) | **Your worker process** | The function code | +| `http_tool()` | **Agentspan server** | A URL and optional headers | +| `api_tool()` | **Agentspan server** | An OpenAPI/Swagger spec URL | +| `mcp_tool()` | **Agentspan server** | An MCP server URL | + +When you wrap a LangGraph graph or OpenAI SDK agent with `AgentRuntime`, its tool functions become worker-executed tasks. Server-side tools (`http_tool`, `api_tool`, `mcp_tool`) run on the server regardless of framework. + +See [Tools](/developer-guides/agentspan/concepts/tools) for details. + +--- + +## Native Agentspan agents + +If you're not using an existing framework, define agents natively: + +```python +from agentspan.agents import Agent, tool, AgentRuntime + +@tool +def search_web(query: str) -> str: + """Search the web for information.""" + return f"Results for: {query}" + +agent = Agent( + name="researcher", + model="openai/gpt-4o", + tools=[search_web], + instructions="Research topics thoroughly.", +) + +with AgentRuntime() as runtime: + result = runtime.run(agent, "What is quantum entanglement?") + result.print_result() +``` + +→ [Quickstart](/developer-guides/agentspan/quickstart) · [Agents concept](/developer-guides/agentspan/concepts/agents) · [Tools concept](/developer-guides/agentspan/concepts/tools) diff --git a/docs/developer-guides/agentspan/reference/providers.mdx b/docs/developer-guides/agentspan/reference/providers.mdx new file mode 100644 index 00000000..283d1add --- /dev/null +++ b/docs/developer-guides/agentspan/reference/providers.mdx @@ -0,0 +1,207 @@ +--- +slug: "/developer-guides/agentspan/reference/providers" +title: "LLM Providers" +description: "All supported LLM providers, model strings, and API key configuration" +--- + +# LLM Providers + +Agentspan supports 15+ AI providers. Set the environment variables for the providers you want to use before starting the server. The server auto-enables each provider when its key is present — no manual integration setup needed. + +## Quick Setup + +```bash +# OpenAI +export OPENAI_API_KEY=sk-... + +# Anthropic +export ANTHROPIC_API_KEY=sk-ant-... + +# Google Gemini +export GEMINI_API_KEY=AI... +export GOOGLE_CLOUD_PROJECT=your-gcp-project-id + +# Then start the server +agentspan server start +``` + +## Model Format + +Specify models in your agents using `provider/model-name`: + +```python +agent = Agent(name="bot", model="openai/gpt-4o") +agent = Agent(name="bot", model="anthropic/claude-sonnet-4-6") +agent = Agent(name="bot", model="google_gemini/gemini-2.0-flash") +``` + +## All Providers + +### OpenAI + +| Variable | Description | +|---|---| +| `OPENAI_API_KEY` | API key from [platform.openai.com](https://platform.openai.com/api-keys) | +| `OPENAI_ORG_ID` | Organization ID (optional) | + +**Models:** `openai/gpt-4o`, `openai/gpt-4o-mini`, `openai/gpt-4-turbo`, `openai/o1`, `openai/o1-mini`, `openai/o3-mini` + +**Embeddings:** `openai/text-embedding-3-small`, `openai/text-embedding-3-large` + +--- + +### Anthropic (Claude) + +| Variable | Description | +|---|---| +| `ANTHROPIC_API_KEY` | API key from [console.anthropic.com](https://console.anthropic.com/) | + +**Models:** `anthropic/claude-opus-4-20250514`, `anthropic/claude-sonnet-4-6`, `anthropic/claude-3-5-sonnet-20241022`, `anthropic/claude-3-haiku-20240307` + +--- + +### Google Gemini + +| Variable | Description | +|---|---| +| `GEMINI_API_KEY` | API key from [aistudio.google.com](https://aistudio.google.com/apikey) | +| `GOOGLE_CLOUD_PROJECT` | **Required.** Your GCP project ID | + +**Models:** `google_gemini/gemini-2.0-flash`, `google_gemini/gemini-1.5-pro`, `google_gemini/gemini-1.5-flash` + +**Embeddings:** `google_gemini/text-embedding-004` + +--- + +### Azure OpenAI + +| Variable | Description | +|---|---| +| `AZURE_OPENAI_API_KEY` | API key from Azure portal | +| `AZURE_OPENAI_ENDPOINT` | **Required.** Endpoint URL, e.g. `https://your-resource.openai.azure.com` | +| `AZURE_OPENAI_DEPLOYMENT` | **Required.** Deployment name | + +**Models:** `azure_openai/gpt-4o`, `azure_openai/gpt-4`, `azure_openai/gpt-3.5-turbo` + +--- + +### AWS Bedrock + +| Variable | Description | +|---|---| +| `AWS_ACCESS_KEY_ID` | AWS access key | +| `AWS_SECRET_ACCESS_KEY` | AWS secret key | + +**Models:** `aws_bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0`, `aws_bedrock/anthropic.claude-3-haiku-20240307-v1:0`, `aws_bedrock/meta.llama3-70b-instruct-v1:0`, `aws_bedrock/amazon.titan-text-express-v1` + +**Embeddings:** `aws_bedrock/amazon.titan-embed-text-v2:0` + +--- + +### Mistral AI + +| Variable | Description | +|---|---| +| `MISTRAL_API_KEY` | API key from [console.mistral.ai](https://console.mistral.ai/) | + +**Models:** `mistral/mistral-large-latest`, `mistral/mistral-medium-latest`, `mistral/mistral-small-latest`, `mistral/open-mixtral-8x7b` + +**Embeddings:** `mistral/mistral-embed` + +--- + +### Cohere + +| Variable | Description | +|---|---| +| `COHERE_API_KEY` | API key from [dashboard.cohere.com](https://dashboard.cohere.com/) | + +**Models:** `cohere/command-r-plus`, `cohere/command-r`, `cohere/command` + +**Embeddings:** `cohere/embed-english-v3.0`, `cohere/embed-multilingual-v3.0` + +--- + +### Grok (xAI) + +| Variable | Description | +|---|---| +| `XAI_API_KEY` | API key from xAI | + +**Models:** `grok/grok-3`, `grok/grok-3-mini` + +--- + +### Perplexity AI + +| Variable | Description | +|---|---| +| `PERPLEXITY_API_KEY` | API key from [perplexity.ai](https://www.perplexity.ai/) | + +**Models:** `perplexity/sonar-pro`, `perplexity/sonar` + +--- + +### Hugging Face + +| Variable | Description | +|---|---| +| `HUGGINGFACE_API_KEY` | API token from [huggingface.co](https://huggingface.co/settings/tokens) | + +**Models:** `hugging_face/meta-llama/Llama-3-70b-chat-hf`, `hugging_face/mistralai/Mistral-7B-Instruct-v0.2` + +--- + +### Stability AI + +| Variable | Description | +|---|---| +| `STABILITY_API_KEY` | API key from [platform.stability.ai](https://platform.stability.ai/) | + +**Image generation:** `stabilityai/sd3.5-large`, `stabilityai/sd3.5-medium`, `stabilityai/stable-image-core` + +--- + +### DeepSeek + +| Variable | Description | +|---|---| +| `DEEPSEEK_API_KEY` | API key from DeepSeek | + +**Models:** `deepseek/deepseek-chat` + +--- + +### Ollama (local) + +No API key required. Ollama must be running and reachable. + +| Variable | Description | +|---|---| +| `OLLAMA_BASE_URL` | Ollama server URL (default: `http://localhost:11434`) | + +**Models:** `ollama/llama3`, `ollama/mistral`, `ollama/phi3`, `ollama/codellama` + +**Embeddings:** `ollama/nomic-embed-text` + +Install Ollama: [ollama.com/download](https://ollama.com/download) + +--- + +## Summary + +| Provider | Env Var | Model Prefix | +|---|---|---| +| OpenAI | `OPENAI_API_KEY` | `openai/` | +| Anthropic | `ANTHROPIC_API_KEY` | `anthropic/` | +| Google Gemini | `GEMINI_API_KEY` + `GOOGLE_CLOUD_PROJECT` | `google_gemini/` | +| Azure OpenAI | `AZURE_OPENAI_API_KEY` + endpoint + deployment | `azure_openai/` | +| AWS Bedrock | `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | `aws_bedrock/` | +| Mistral | `MISTRAL_API_KEY` | `mistral/` | +| Cohere | `COHERE_API_KEY` | `cohere/` | +| Grok / xAI | `XAI_API_KEY` | `grok/` | +| Perplexity | `PERPLEXITY_API_KEY` | `perplexity/` | +| Hugging Face | `HUGGINGFACE_API_KEY` | `hugging_face/` | +| Stability AI | `STABILITY_API_KEY` | `stabilityai/` | +| DeepSeek | `DEEPSEEK_API_KEY` | `deepseek/` | +| Ollama | `OLLAMA_BASE_URL` | `ollama/` | diff --git a/docs/developer-guides/agentspan/reference/self-hosting.mdx b/docs/developer-guides/agentspan/reference/self-hosting.mdx new file mode 100644 index 00000000..ae4297ba --- /dev/null +++ b/docs/developer-guides/agentspan/reference/self-hosting.mdx @@ -0,0 +1,168 @@ +--- +slug: "/developer-guides/agentspan/reference/self-hosting" +title: "Self-Hosting" +description: "Run the Agentspan server on your own infrastructure — single VM, multi-node, or managed cloud" +--- + +# Self-Hosting + +Self-hosting Agentspan means running the Agentspan server on your own infrastructure. The server is a Spring Boot application backed by PostgreSQL. Workers are stateless Python processes that connect to it. + +## Architecture + +```mermaid +sequenceDiagram + participant W1 as Python Worker 1 (your code) + participant W2 as Python Worker 2 (your code) + participant S as Agentspan Server :6767 + participant DB as PostgreSQL + Note over S: UI (React dashboard) + REST API + Conductor engine + W1->>S: poll for tasks + S->>DB: read task queue + DB-->>S: pending tasks + S-->>W1: task assignment + W1->>W1: execute tool + W1->>S: report result + S->>DB: persist result + W2->>S: poll for tasks + S->>DB: read task queue + DB-->>S: pending tasks + S-->>W2: task assignment + W2->>W2: execute tool + W2->>S: report result + S->>DB: persist result +``` + +Workers poll the server for tasks to execute. Add more workers to scale throughput. Workers are completely stateless. + +## Single VM — Docker Compose + +The fastest way to self-host. Uses the compose stack from the repo: + +```bash +git clone https://github.com/agentspan-ai/agentspan.git +cd agentspan/deployment/docker-compose +cp .env.example .env +``` + +Edit `.env` to set your LLM provider API keys (e.g. `OPENAI_API_KEY=sk-...`), then: + +```bash +docker compose up -d +``` + +This starts: +- **agentspan** — the server, accessible at `http://localhost:6767` +- **postgres** — PostgreSQL 16 + +Verify: + +```bash +curl http://localhost:6767/actuator/health +docker compose logs --tail=50 agentspan +``` + +Stop and clean up: + +```bash +docker compose down # stops containers, keeps data +docker compose down -v # stops containers, removes postgres volume +``` + +## Running Workers + +Workers are Python processes that connect to the server. Run them on the same host or on separate machines: + +```bash +export AGENTSPAN_SERVER_URL=http://your-server:6767 +export OPENAI_API_KEY=sk-... +python my_agent.py +``` + +Your agent code uses `AgentRuntime` as usual: + +```python +from agentspan.agents import Agent, AgentRuntime, tool + +@tool +def process_data(input: str) -> str: + """Process some data.""" + return f"Processed: {input}" + +agent = Agent(name="processor", model="openai/gpt-4o", tools=[process_data]) + +with AgentRuntime() as runtime: + result = runtime.run(agent, "Process this dataset") + result.print_result() +``` + +Run multiple worker processes in parallel to increase tool execution throughput. + +## Authentication + +For any server that isn't running on localhost, enable auth: + +```bash +# In server .env file: +AGENTSPAN_AUTH_KEY=your-app-key +AGENTSPAN_AUTH_SECRET=your-app-secret + +# In worker environment: +export AGENTSPAN_SERVER_URL=https://your-server.example.com +export AGENTSPAN_AUTH_KEY=your-app-key +export AGENTSPAN_AUTH_SECRET=your-app-secret +``` + +Or configure in code: + +```python +from agentspan.agents import configure + +configure( + server_url="https://your-server.example.com", + auth_key="your-app-key", + auth_secret="your-app-secret", +) +``` + +## Multi-Node / Kubernetes + +For multi-node deployments, use the Kubernetes manifests or Helm chart from `deployment/k8s/` and `deployment/helm/` in the repo. + +Key points: +- Run multiple server replicas for high availability +- Use a managed PostgreSQL (RDS, Cloud SQL, etc.) not a containerized one +- Workers scale independently from the server — add as many as needed +- All server replicas share the same database + +See [Deployment](/developer-guides/agentspan/reference/deployment) for Kubernetes YAML examples. + +## Configuration Reference + +The server is configured via environment variables: + +| Variable | Default | Description | +|---|---|---| +| `SERVER_PORT` | `6767` | Port the server listens on | +| `SPRING_PROFILES_ACTIVE` | `default` (SQLite) | Set to `postgres` for PostgreSQL | +| `SPRING_DATASOURCE_URL` | `jdbc:sqlite:agent-runtime.db` | JDBC database URL | +| `SPRING_DATASOURCE_USERNAME` | `postgres` | Database user | +| `SPRING_DATASOURCE_PASSWORD` | `postgres` | Database password | +| `SPRING_DATASOURCE_HIKARI_MAXIMUM_POOL_SIZE` | `8` | Connection pool size | +| `AGENTSPAN_AUTH_KEY` | — | Application auth key | +| `AGENTSPAN_AUTH_SECRET` | — | Application auth secret | +| `OPENAI_API_KEY` | — | OpenAI API key | +| `ANTHROPIC_API_KEY` | — | Anthropic API key | +| `GEMINI_API_KEY` | — | Google Gemini API key | + +All LLM provider keys follow the same pattern — set the key, the server auto-enables that provider. + +## Backup and Recovery + +Agentspan stores all execution state in PostgreSQL. Back up regularly using standard PostgreSQL tools: + +```bash +pg_dump agentspan > backup.sql +``` + +Execution state is durable — in-progress executions resume after a server restart as long as the database is intact. diff --git a/docs/developer-guides/agentspan/reference/worker-types.mdx b/docs/developer-guides/agentspan/reference/worker-types.mdx new file mode 100644 index 00000000..7d685948 --- /dev/null +++ b/docs/developer-guides/agentspan/reference/worker-types.mdx @@ -0,0 +1,89 @@ +--- +slug: "/developer-guides/agentspan/reference/worker-types" +title: "Agentspan Worker Types" +--- + +# Agentspan Worker Types + +Workers are functions registered as Conductor task pollers. They execute locally (SDK-side) +while execution orchestration happens server-side. Both Python and TypeScript SDKs implement +all worker types with identical behavior. + +## Quick Reference + +| # | Worker Type | Task Name Pattern | Trigger | +|---|-------------|-------------------|---------| +| 1 | Tool | `{tool_name}` | `agent.tools` with `tool_type == "worker"` | +| 2 | CLI Command | `{agent_name}_run_command` | `agent.cli_commands == True` | +| 3 | Code Execution | `{agent_name}_execute_code` | `agent.local_code_execution == True` | +| 4 | Output Guardrail (combined) | `{agent_name}_output_guardrail` | Custom guardrails (local compile path) | +| 5 | Individual Guardrail | `{guardrail.name}` | Custom guardrails (server compile path) | +| 6 | Stop When | `{agent_name}_stop_when` | `agent.stop_when` is callable | +| 7 | Gate | `{agent_name}_gate` | `agent.gate` is callable | +| 8 | Callback | `{agent_name}_{position}` | `agent.callbacks` | +| 9 | Termination | `{agent_name}_termination` | `agent.termination` is set | +| 10 | Check Transfer | `{agent_name}_check_transfer` | Hybrid or swarm agent | +| 11 | Router Function | `{agent_name}_router_fn` | `strategy == "router"` with callable router | +| 12 | Handoff Check | `{agent_name}_handoff_check` | `agent.handoffs` is non-empty | +| 13 | Swarm Transfer | `{source_agent}_transfer_to_{peer}` | `strategy == "swarm"` with sub-agents | +| 14 | Manual Selection | `{agent_name}_process_selection` | `strategy == "manual"` with sub-agents | +| 15 | Framework | `{worker.name}` | Foreign framework with extractable tools | +| 16 | Framework Passthrough | `{worker_name}` | Foreign framework (passthrough) | +| 17 | Claude Code Passthrough | `{agent_name}` | `model="claude-code/..."` | + +## Task Definition Registration + +**Task definitions are registered by the server during agent compilation.** SDKs do NOT register +task definitions — they only poll for tasks. The server returns `requiredWorkers` in the +start/deploy response so SDKs know exactly which workers to register. + +| Setting | Value | Source | +|----------------------|----------------|--------| +| `timeoutSeconds` | 0 (no timeout) | `AgentService.registerTaskDef()` | +| `responseTimeoutSeconds` | 3600 (1 hour) | Conductor requires minimum 1s | +| Retry count | 2 | Server-side | +| Retry logic | LINEAR_BACKOFF | Server-side | +| Retry delay | 2 s | Server-side | + +## Task Name Prefixing Rules + +All auto-generated task names MUST be prefixed with the agent name to prevent collisions +when multiple agents share the same Conductor namespace. + +| Category | Pattern | Example | +|---|---|---| +| System workers | `{agentName}_{type}` | `my_agent_termination` | +| CLI tools | `{agentName}_run_command` | `git_fetch_run_command` | +| Code execution | `{agentName}_execute_code` | `coder_execute_code` | +| Swarm transfers | `{sourceAgent}_transfer_to_{peer}` | `coder_transfer_to_qa_tester` | +| User-defined tools | `{tool_name}` (user-controlled) | `get_weather` | + +## Credentials + +Credentials are always resolved from the server. There is no environment variable fallback. + +- **Execution token present** → `POST /api/workers/secrets` → injected into `process.env`/`os.environ` +- **No execution token** → `FAILED_WITH_TERMINAL_ERROR` (non-retryable) +- **Credentials not found on server** → `FAILED_WITH_TERMINAL_ERROR` (non-retryable) +- **Cleanup** → credentials removed from environment after tool execution + +Store credentials with: `agentspan credentials set --name ` + +## TypeScript Parity + +Both Python and TypeScript SDKs implement all 17 worker types. The TypeScript SDK includes: +- All SWARM workers (transfer_to, check_transfer, handoff_check, process_selection) +- Claude Code agent support (`ClaudeCode` class, `PermissionMode` enum) +- CLI command execution (`CliConfig`, `makeCliTool()`) +- Code execution validators (`CommandValidator`) +- LLM guardrails (`LLMGuardrail` class) +- Credential resolution and injection (`resolveCredentials`, `injectCredentials`) + +## Worker Lifecycle + +1. **Server compilation** — Server compiles agent → registers all task definitions → returns `requiredWorkers` list +2. **SDK registration** — SDK registers workers (poll functions) for task names in `requiredWorkers` +3. **Polling** — Workers poll Conductor for tasks, execute functions, report results +4. **Credentials** — Before tool execution, credentials are resolved from server and injected +5. **Monitoring** — TaskHandler checks process health every 5s and restarts dead workers +6. **Shutdown** — Workers stopped during `AgentRuntime.shutdown()` diff --git a/docs/developer-guides/agentspan/why-agentspan.mdx b/docs/developer-guides/agentspan/why-agentspan.mdx new file mode 100644 index 00000000..3a4f5423 --- /dev/null +++ b/docs/developer-guides/agentspan/why-agentspan.mdx @@ -0,0 +1,89 @@ +--- +slug: "/developer-guides/agentspan/why-agentspan" +title: "Why Agentspan" +description: "Why agents fail in production, and how Agentspan's server-side execution model solves it." +--- + +# Why Agentspan + +**Agentspan is a durable runtime for AI agents. Your code runs in your process. Execution state lives on the server — so crashes, restarts, and deployments don't lose work.** + +--- + +## How most agent frameworks work + +Most agent frameworks — LangGraph, the OpenAI Agents SDK, Google ADK, and others — run the agent loop inside your process. Your code calls the LLM, receives a tool call, executes the tool, and loops. All of that happens in memory, in your process. + +``` +Your process +└── agent loop + ├── call LLM + ├── execute tool + ├── call LLM again + └── ...until done +``` + +This works fine on your laptop. In production, it breaks in predictable ways. + +--- + +## What can go wrong + +**Process crash mid-run.** A long-running agent — one that searches the web, reads files, calls APIs across dozens of steps — can take minutes. If your process dies (OOM kill, deploy, network drop), the entire run is gone. There is no way to resume from where it stopped. + +**Human-in-the-loop doesn't survive restarts.** Pausing an agent to wait for a human approval means holding state in memory. If anything interrupts that wait — a timeout, a restart, a deploy — the approval request is lost and the agent can't resume. + +**No history, no replay.** In-process execution leaves no record. You can't see what an agent did on a past run, replay a run with a different model, or query execution history across agents. + +**Scaling means duplicating state.** Running agents across multiple machines means solving distributed state management yourself — or accepting that each agent instance is isolated with no shared execution context. + +--- + +## How Agentspan works differently + +Agentspan separates where your code runs from where execution state lives. + +``` +Your process Agentspan server +└── worker └── agent execution + ├── registers tools ├── tracks current step + └── executes tool calls ←──────── delegates tool work + ├── retries on failure + ├── holds HITL state + └── stores full history +``` + +Your agent definition compiles into a durable workflow on the Agentspan server. The server orchestrates execution — calling your worker to run tools, tracking state at every step, and resuming from the last completed step if anything goes wrong. + +Your process can crash, restart, or be replaced. The agent keeps running. + +--- + +## What this enables + +**Crash recovery.** If your worker process dies mid-run, the server resumes execution when a new worker connects. No work is re-run from scratch — it picks up at the current step. + +**Durable human-in-the-loop.** Mark any tool with `approval_required=True`. The agent pauses server-side and waits indefinitely — no timeouts, no in-memory state at risk. Approve or deny via CLI, API, or the UI. + +**Full execution history.** Every run is stored with inputs, outputs, token usage, and per-step timing. Query via CLI, browse in the UI at `http://localhost:6767`, or replay any past run. + +**Works with frameworks you already use.** Pass a LangGraph `StateGraph`, an OpenAI Agents SDK `Agent`, or a Google ADK pipeline directly to `runtime.run()`. Your definitions stay unchanged. + +--- + +## Frequently asked questions + +**What makes Agentspan different from LangGraph?** +LangGraph is a graph framework for defining agent routing logic — nodes, edges, conditional branching. Agentspan is an execution runtime. You can pass a compiled LangGraph app directly to `runtime.run()` and it gains crash recovery, HITL, and execution history without changing a single node. They work together. + +**What makes Agentspan different from the OpenAI Agents SDK?** +The OpenAI Agents SDK defines agents, handoffs, and tools. Its execution model is in-process. Agentspan wraps that execution so it runs server-side — your agent definitions, handoffs, and tools stay exactly as written. + +**When should I use Agentspan?** +Whenever agents need to run reliably in production: long-running tasks, human approval steps, jobs that must survive process restarts, or situations where you need a queryable history of what every agent did. + +**Does Agentspan replace my existing framework?** +No. If you use LangGraph, the OpenAI Agents SDK, or Google ADK, pass your existing agent directly to `runtime.run()`. If you write agents natively, use the `Agent` class — one Python object with tools, instructions, and strategy. + +**What model providers does Agentspan support?** +Any provider with an OpenAI-compatible API. Set the model with one string: `"openai/gpt-4o"`, `"anthropic/claude-sonnet-4-6"`, `"google_gemini/gemini-2.0-flash"`. See [LLM Providers](/developer-guides/agentspan/reference/providers) for the full list. diff --git a/sidebars.js b/sidebars.js index e26fe4d7..94e420b5 100644 --- a/sidebars.js +++ b/sidebars.js @@ -225,7 +225,81 @@ const sidebars = { items: [ 'developer-guides/using-llms-in-your-orkes-conductor-workflows', 'developer-guides/using-vector-databases-in-your-orkes-conductor-workflows', - 'developer-guides/using-ai-prompts' + 'developer-guides/using-ai-prompts', + { + type: 'category', + label: 'Agentspan', + className: 'leftMenuHeader', + link: { + type: 'doc', + id: 'developer-guides/agentspan/overview', + }, + items: [ + 'developer-guides/agentspan/why-agentspan', + 'developer-guides/agentspan/quickstart', + { + type: 'category', + label: 'Concepts', + link: { + type: 'generated-index', + title: 'Agentspan Concepts', + slug: '/developer-guides/agentspan/concepts', + description: 'Learn the core Agentspan concepts for durable AI agents, including tools, memory, streaming, guardrails, scheduling, and multi-agent coordination.', + }, + items: [ + 'developer-guides/agentspan/concepts/agents', + 'developer-guides/agentspan/concepts/tools', + 'developer-guides/agentspan/concepts/skills', + 'developer-guides/agentspan/concepts/multi-agent', + 'developer-guides/agentspan/concepts/plan-execute', + 'developer-guides/agentspan/concepts/guardrails', + 'developer-guides/agentspan/concepts/memory', + 'developer-guides/agentspan/concepts/streaming', + 'developer-guides/agentspan/concepts/scheduling', + 'developer-guides/agentspan/concepts/testing', + ], + }, + { + type: 'category', + label: 'Examples', + link: { + type: 'generated-index', + title: 'Agentspan Examples', + slug: '/developer-guides/agentspan/examples', + description: 'Build durable AI agents with examples for support triage, research pipelines, document processing, crash recovery, human review, LangGraph, OpenAI Agents SDK, and Google ADK.', + }, + items: [ + 'developer-guides/agentspan/examples/support-triage', + 'developer-guides/agentspan/examples/research-pipeline', + 'developer-guides/agentspan/examples/document-processor', + 'developer-guides/agentspan/examples/crash-resume', + 'developer-guides/agentspan/examples/human-in-the-loop', + 'developer-guides/agentspan/examples/langgraph', + 'developer-guides/agentspan/examples/openai-agents-sdk', + 'developer-guides/agentspan/examples/google-adk', + ], + }, + { + type: 'category', + label: 'Reference', + link: { + type: 'generated-index', + title: 'Agentspan Reference', + slug: '/developer-guides/agentspan/reference', + description: 'Reference pages for Agentspan providers, model configuration, CLI commands, deployment, self-hosting, integrations, and worker types.', + }, + items: [ + 'developer-guides/agentspan/reference/providers', + 'developer-guides/agentspan/reference/ai-models', + 'developer-guides/agentspan/reference/cli', + 'developer-guides/agentspan/reference/deployment', + 'developer-guides/agentspan/reference/self-hosting', + 'developer-guides/agentspan/reference/integrations', + 'developer-guides/agentspan/reference/worker-types', + ], + }, + ], + }, ], }, { From d6b242c7b6840a2ee050bc81547152e7618ede7c Mon Sep 17 00:00:00 2001 From: Nick Lotz Date: Tue, 30 Jun 2026 22:39:44 -0500 Subject: [PATCH 2/2] Refresh Agentspan docs migration with latest content and new pages --- .../agentspan/assets/four-pillars.svg | 69 +++ .../assets/plan-execute-boundary.svg | 95 ++++ .../agentspan/concepts/adaptive-loops.mdx | 186 +++++++ .../agentspan/concepts/agents.mdx | 15 +- .../agentspan/concepts/guardrails.mdx | 10 +- .../agentspan/concepts/memory.mdx | 14 +- .../agentspan/concepts/multi-agent.mdx | 20 +- .../agentspan/concepts/plan-execute.mdx | 47 +- .../agentspan/concepts/scheduling.mdx | 33 +- .../agentspan/concepts/skills.mdx | 22 +- .../agentspan/concepts/streaming.mdx | 12 +- .../agentspan/concepts/testing.mdx | 24 +- .../agentspan/concepts/tools.mdx | 16 +- .../agentspan/examples/crash-resume.mdx | 25 +- .../agentspan/examples/document-processor.mdx | 6 +- .../agentspan/examples/google-adk.mdx | 13 +- .../agentspan/examples/human-in-the-loop.mdx | 13 +- .../agentspan/examples/langgraph.mdx | 12 +- .../agentspan/examples/openai-agents-sdk.mdx | 15 +- .../agentspan/examples/research-pipeline.mdx | 7 +- .../agentspan/examples/support-triage.mdx | 5 +- docs/developer-guides/agentspan/overview.mdx | 385 +++++++++++-- .../developer-guides/agentspan/quickstart.mdx | 8 +- .../agentspan/reference/ai-models.mdx | 2 +- .../agentspan/reference/cli.mdx | 4 +- .../agentspan/reference/deployment.mdx | 4 +- .../agentspan/reference/integrations.mdx | 12 +- .../agentspan/reference/providers.mdx | 2 +- .../agentspan/reference/sdk.mdx | 509 ++++++++++++++++++ .../agentspan/reference/self-hosting.mdx | 4 +- .../agentspan/why-agentspan.mdx | 43 +- sidebars.js | 2 + 32 files changed, 1443 insertions(+), 191 deletions(-) create mode 100644 docs/developer-guides/agentspan/assets/four-pillars.svg create mode 100644 docs/developer-guides/agentspan/assets/plan-execute-boundary.svg create mode 100644 docs/developer-guides/agentspan/concepts/adaptive-loops.mdx create mode 100644 docs/developer-guides/agentspan/reference/sdk.mdx diff --git a/docs/developer-guides/agentspan/assets/four-pillars.svg b/docs/developer-guides/agentspan/assets/four-pillars.svg new file mode 100644 index 00000000..e15ef397 --- /dev/null +++ b/docs/developer-guides/agentspan/assets/four-pillars.svg @@ -0,0 +1,69 @@ + + + + + + + + + + + + + + + Long-running + Survive crashes, + restarts, and deploys. + recovery · HITL · history + + + + + + + + + + + + Plan-Execute + LLM plans once; + Conductor executes. + adaptive · replay-safe + + + + + + Event-driven + Triggered by time + or external events. + cron · Kafka · webhooks + + + + + + + Adaptive loops + Iterate until correct. + Each iteration durable. + replan · converge · DO_WHILE + + + + + + + + + + + + + Agentspan Runtime + diff --git a/docs/developer-guides/agentspan/assets/plan-execute-boundary.svg b/docs/developer-guides/agentspan/assets/plan-execute-boundary.svg new file mode 100644 index 00000000..613d4637 --- /dev/null +++ b/docs/developer-guides/agentspan/assets/plan-execute-boundary.svg @@ -0,0 +1,95 @@ + + + + + + + + LLM ZONE · non-deterministic + + + + User + Prompt + + + + + + + + Planner + one LLM call + + + + + + + + JSON + Plan + + + ✓ One planner call only + ✓ Non-deterministic reasoning + ✓ Produces a plan as a value + Static plan? Planner is skipped entirely → + + + + + BOUNDARY + + + + + + + + + + + CONDUCTOR ZONE · deterministic + + + + PAC + compile once + + + + + + + + + + SET + + + + + + FORK + + + + + + JOIN + + + + + + VALI + DATE + + + ✓ No LLM in the execution path + ✓ Retries, FORK_JOIN, SWITCH — pure Conductor + ✓ Crash-safe: resumes at last step + ✓ Same plan → identical execution, always + + diff --git a/docs/developer-guides/agentspan/concepts/adaptive-loops.mdx b/docs/developer-guides/agentspan/concepts/adaptive-loops.mdx new file mode 100644 index 00000000..ec846600 --- /dev/null +++ b/docs/developer-guides/agentspan/concepts/adaptive-loops.mdx @@ -0,0 +1,186 @@ +--- +slug: "/developer-guides/agentspan/concepts/adaptive-loops" +title: "Adaptive Loops" +description: "Durable iterative agents — each iteration is a Conductor workflow. Iterate until correct, observe every step, and survive crashes mid-loop." +--- + +# Adaptive Loops + +**Any framework can loop. Only Agentspan makes each iteration a durable, observable workflow.** + +A Python `while` loop dies with your process. An Agentspan adaptive loop is a sequence of Conductor workflow executions — each iteration crash-safe, fully logged, and visible in the execution UI. The loop continues from the current iteration on reconnect, not from scratch. + +:::tip The core insight +Combine **Plan-Execute** for deterministic per-iteration execution with an **adaptive outer loop** that steers based on verified results. The LLM adapts *what* to try next; Conductor handles *how* each attempt runs — with parallelism, retry, validation, and crash recovery built in. + +::: + +--- + +## Why durable loops matter + +| | Plain while loop | Agentspan adaptive loop | +|---|---|---| +| Process crash mid-loop | Entire loop lost | Resume at current iteration | +| Observability | No record | Every iteration logged in UI | +| Per-iteration execution | LLM-driven, non-deterministic | Plan-Execute: deterministic | +| Parallelism within iteration | Manual threading | FORK_JOIN — free, crash-safe | +| Loop termination | Hope the LLM stops | Server-enforced DO_WHILE condition | +| Replay a specific iteration | Impossible | Full replay — plan is a value | + +--- + +## Pattern 1 — User-code replan loop + +The simplest shape: wrap `runtime.run()` in your own loop, inspect the output, build the next plan. + +```python +from conductor.ai.agents import plan_execute, Plan, Step, Op, Validation + +harness = plan_execute( + name="solver", + tools=[propose_solution, run_tests, check_constraints], + planner_instructions="Propose a solution. You will be told exactly what failed.", +) + +plan = build_initial_plan(prompt) + +for iteration in range(max_iterations): + result = runtime.run(harness, prompt, plan=plan) + + verdict = evaluate(result) # deterministic verifier — no LLM + if verdict.passed: + break + + # Thread failures into next iteration's generate instructions + plan = build_next_plan(prompt, verdict.failures) +``` + +**What makes this different from a plain while loop:** + +- Each `runtime.run()` is a full Conductor workflow — crash mid-iteration → resume at the current step. +- The inner execution is deterministic (Plan-Execute). Only the outer replanning call touches the LLM. +- Every iteration has its own execution record in the UI. + +### Adaptive goal-seeking (example 110) + +The pattern generalises to any LLM-generator + deterministic-verifier pair: + +```python +for iteration in range(max_iterations): + # K parallel proposers — deterministic FORK_JOIN, not LLM fan-out + plan = Plan(steps=[ + Step("propose", parallel=True, operations=[ + Op("propose_solution", generate=Generate( + instructions=f"Candidate {i}. Previous failures: {failures}", + output_schema='{"solution": "..."}', + )) + for i in range(K) + ]), + Step("verify", depends_on=["propose"], operations=[ + Op("run_tests", args={"candidates": Ref("propose")}), + ]), + ]) + + result = runtime.run(harness, prompt, plan=plan) + verdict = parse_verdict(result) + + if any(c.passed for c in verdict.candidates): + break + + # Each candidate's exact failure modes feed into the next round + failures = [c.failure_detail for c in verdict.candidates] +``` + +This converges by *fixing what the previous attempt got wrong*, not retrying the same prompt with a different seed. + +--- + +## Pattern 2 — DO_WHILE inside a single workflow + +For loops that should be **one execution** (one workflow ID, all iterations visible as a unit), build the loop inside the Conductor workflow using a `DO_WHILE` task. The entire loop — every iteration — appears under one execution ID in the UI. + +``` +Workflow (single ID) +└── DO_WHILE + ├── planner_llm__1 ← LLM_CHAT_COMPLETE, iteration 1 + ├── plan_and_compile__1 + ├── sub_workflow__1 ← the compiled plan executes here + ├── reviewer_llm__1 + ├── planner_llm__2 ← iteration 2 (same workflow) + ├── plan_and_compile__2 + ├── sub_workflow__2 + └── ... +``` + +Iterations share `workflow.variables` — state accumulates across iterations without leaving the workflow. The DO_WHILE condition is a JavaScript expression evaluated by Conductor's engine: same input → same branch, every time. + +### AML/SAR investigation (example 113) + +A compliance investigation loop: the planner picks the next-best evidence source per iteration; the loop terminates when the case is dispositioned. + +```python +# High-level structure (see examples/113_aml_sar_investigation_loop.py for full code) +aml_workflow = build_do_while_workflow( + name="sar_investigation", + body=[ + planner_task, # LLM picks next evidence source + pac_compile_task, # PAC compiles the evidence-gathering plan + sub_workflow_task, # deterministic execution + reviewer_task, # LLM decides: need_more_evidence | disposition + update_state_task, # SET_VARIABLE — accumulates findings + ], + condition="$.reviewer_output.decision != 'need_more_evidence'", +) +``` + +### Portfolio rebalancing (example 114) + +Multi-constraint convergence with wash-sale / concentration / drift checks. Each iteration refines the trade list; the loop exits when all constraints pass. + +--- + +## When to use each pattern + +| | User-code loop (Pattern 1) | DO_WHILE workflow (Pattern 2) | +|---|---|---| +| Simplicity | Simpler to write | Requires workflow construction | +| Observability | Separate execution per iteration | Single execution ID, all iterations in one view | +| State between iterations | Python variables | `workflow.variables` in Conductor | +| Crash recovery | Resume at current iteration | Resume at current task within iteration | +| Loop condition | Python `if` | JS expression in Conductor | +| Best for | Exploration, prototyping | Production pipelines, compliance, finance | + +--- + +## Upcoming: `Strategy.PLAN_EXECUTE_REPLAN` + +The replan pattern will become a first-class SDK strategy — `Strategy.PLAN_EXECUTE_REPLAN` — eliminating the need to write the outer loop manually. Declaration will look like: + +```python +# Coming soon +harness = Agent( + name="solver", + strategy=Strategy.PLAN_EXECUTE_REPLAN, + planner=planner, + fallback=fallback, + tools=[...], + max_iterations=10, + stop_condition="$.verdict.passed == true", +) +``` + +The server will manage the DO_WHILE loop, making the entire multi-iteration run a single observable workflow with one execution ID. + +--- + +## Examples + +- `examples/118_adaptive_loop_showcase.py` — **start here**: single-execution travel planner that iterates until budget constraints pass; shows the agent-tool-loop pattern in ~150 lines (`python 118_adaptive_loop_showcase.py "Tokyo"`) +- `examples/119_research_report_pae_replan.py` — **PAE-replan**: research report using DO_WHILE + PAC; planner writes only failing sections each iteration; one execution ID, FORK_JOIN parallel writes (`python 119_research_report_pae_replan.py "AI agents"`) +- `examples/109_plan_execute_replan.py` — basic replan with rule-based decider +- `examples/110_plan_execute_replan_solve.py` — K parallel proposers + deterministic verifier, converges by fixing failures +- `examples/111_plan_execute_replan_binsearch.py` — binary search loop (~log₂ N iterations to converge) +- `examples/112_dowhile_loop_inside_workflow.py` — DO_WHILE inside a single Conductor workflow +- `examples/113_aml_sar_investigation_loop.py` — AML/SAR investigation with PAC sub-workflows per iteration +- `examples/114_portfolio_rebalance_loop.py` — portfolio rebalancing with multi-constraint convergence diff --git a/docs/developer-guides/agentspan/concepts/agents.mdx b/docs/developer-guides/agentspan/concepts/agents.mdx index 6b83e0a6..fe039511 100644 --- a/docs/developer-guides/agentspan/concepts/agents.mdx +++ b/docs/developer-guides/agentspan/concepts/agents.mdx @@ -12,6 +12,7 @@ description: "The Agent class — constructor, parameters, results, handles, and The following diagrams show how the Agentspan server orchestrates different runtime behaviors — guardrail validation with retry and escalation, and human-in-the-loop approval. + **1. Retry** — the guardrail fails and the server re-invokes the same tool automatically. ```mermaid sequenceDiagram @@ -122,7 +123,7 @@ sequenceDiagram ## Import ```python -from agentspan.agents import Agent, AgentRuntime, run, start, stream +from conductor.ai.agents import Agent, AgentRuntime, run, start, stream ``` ## Constructor @@ -182,7 +183,7 @@ Agent(name="bot", model="openai/gpt-4o", ```python from pydantic import BaseModel -from agentspan.agents import Agent, AgentRuntime +from conductor.ai.agents import Agent, AgentRuntime class Report(BaseModel): title: str @@ -215,7 +216,7 @@ agent = Agent( ### `AgentRuntime` context manager (recommended) ```python -from agentspan.agents import Agent, AgentRuntime +from conductor.ai.agents import Agent, AgentRuntime agent = Agent(name="assistant", model="openai/gpt-4o") @@ -235,7 +236,7 @@ with AgentRuntime() as runtime: ### Module-level functions ```python -from agentspan.agents import run, start, stream +from conductor.ai.agents import run, start, stream result = run(agent, "Hello") # Uses a shared singleton runtime handle = start(agent, "Hello") @@ -245,7 +246,7 @@ for event in stream(agent, "Hi"): ... ### Async variants ```python -from agentspan.agents import run_async, start_async, stream_async +from conductor.ai.agents import run_async, start_async, stream_async result = await run_async(agent, "Hello") handle = await start_async(agent, "Hello") @@ -312,7 +313,7 @@ elif status.is_complete: ### Reconnect to an existing execution ```python -from agentspan.agents import AgentHandle, AgentRuntime +from conductor.ai.agents import AgentHandle, AgentRuntime runtime = AgentRuntime() runtime.serve(agent, blocking=False) # Start workers for @tool functions @@ -347,7 +348,7 @@ with AgentRuntime() as runtime: Compile the agent without executing it: ```python -from agentspan.agents import plan +from conductor.ai.agents import plan workflow = plan(agent) print(workflow) # Compiled workflow definition (server-side execution graph) diff --git a/docs/developer-guides/agentspan/concepts/guardrails.mdx b/docs/developer-guides/agentspan/concepts/guardrails.mdx index 35d1618f..a59f76d5 100644 --- a/docs/developer-guides/agentspan/concepts/guardrails.mdx +++ b/docs/developer-guides/agentspan/concepts/guardrails.mdx @@ -11,7 +11,7 @@ Guardrails validate agent input or output. On failure, you choose how to respond ## Import ```python -from agentspan.agents import ( +from conductor.ai.agents import ( Agent, AgentRuntime, Guardrail, GuardrailResult, guardrail, OnFail, Position, RegexGuardrail, LLMGuardrail, ) @@ -20,7 +20,7 @@ from agentspan.agents import ( ## Basic Usage ```python -from agentspan.agents import Agent, AgentRuntime, Guardrail, GuardrailResult, guardrail +from conductor.ai.agents import Agent, AgentRuntime, Guardrail, GuardrailResult, guardrail @guardrail def word_limit(content: str) -> GuardrailResult: @@ -104,7 +104,7 @@ agent = Agent( Block or allow responses based on regex patterns: ```python -from agentspan.agents import RegexGuardrail +from conductor.ai.agents import RegexGuardrail # Block responses containing profanity agent = Agent( @@ -130,10 +130,10 @@ agent = Agent( Use a second LLM as a judge: ```python -from agentspan.agents import LLMGuardrail +from conductor.ai.agents import LLMGuardrail factual_check = LLMGuardrail( - model="openai/gpt-4o-mini", + model="anthropic/claude-sonnet-4-6", policy="Is this response factually accurate and helpful? Reply YES or NO with a brief explanation.", on_fail=OnFail.RETRY, max_retries=2, diff --git a/docs/developer-guides/agentspan/concepts/memory.mdx b/docs/developer-guides/agentspan/concepts/memory.mdx index adb4c6a5..a49a3bc5 100644 --- a/docs/developer-guides/agentspan/concepts/memory.mdx +++ b/docs/developer-guides/agentspan/concepts/memory.mdx @@ -11,8 +11,8 @@ Agentspan provides two memory systems: **ConversationMemory** for managing chat ## Import ```python -from agentspan.agents import Agent, ConversationMemory -from agentspan.agents.semantic_memory import SemanticMemory +from conductor.ai.agents import Agent, ConversationMemory +from conductor.ai.agents.semantic_memory import SemanticMemory ``` ## ConversationMemory @@ -20,7 +20,7 @@ from agentspan.agents.semantic_memory import SemanticMemory Manages chat history as a list of messages. Messages are prepended to the LLM's message list at compile time, giving the LLM context from previous interactions. ```python -from agentspan.agents import Agent, AgentRuntime, ConversationMemory +from conductor.ai.agents import Agent, AgentRuntime, ConversationMemory memory = ConversationMemory(max_messages=100) @@ -70,7 +70,7 @@ When `max_messages` is set and the message count exceeds it: Long-term memory with similarity-based retrieval. Stores facts, preferences, and knowledge recalled by relevance to the current query. ```python -from agentspan.agents.semantic_memory import SemanticMemory +from conductor.ai.agents.semantic_memory import SemanticMemory memory = SemanticMemory(max_results=3) @@ -107,8 +107,8 @@ context = memory.get_context("What plan am I on?") The agent decides when to search and what to query: ```python -from agentspan.agents import Agent, AgentRuntime, tool -from agentspan.agents.semantic_memory import SemanticMemory +from conductor.ai.agents import Agent, AgentRuntime, tool +from conductor.ai.agents.semantic_memory import SemanticMemory memory = SemanticMemory(max_results=3) memory.add("User prefers Python over JavaScript") @@ -151,7 +151,7 @@ The default `InMemoryStore` uses Jaccard similarity. Non-persistent — suitable For production, implement `MemoryStore`: ```python -from agentspan.agents.semantic_memory import MemoryStore, MemoryEntry, SemanticMemory +from conductor.ai.agents.semantic_memory import MemoryStore, MemoryEntry, SemanticMemory class PineconeStore(MemoryStore): def __init__(self, index_name: str, api_key: str): diff --git a/docs/developer-guides/agentspan/concepts/multi-agent.mdx b/docs/developer-guides/agentspan/concepts/multi-agent.mdx index 9314f744..c3943089 100644 --- a/docs/developer-guides/agentspan/concepts/multi-agent.mdx +++ b/docs/developer-guides/agentspan/concepts/multi-agent.mdx @@ -51,7 +51,7 @@ sequenceDiagram ``` ```python -from agentspan.agents import Agent, AgentRuntime +from conductor.ai.agents import Agent, AgentRuntime researcher = Agent(name="researcher", model="openai/gpt-4o", instructions="Research the topic and provide key facts.") @@ -111,7 +111,7 @@ sequenceDiagram ``` ```python -from agentspan.agents import Agent, AgentRuntime +from conductor.ai.agents import Agent, AgentRuntime market = Agent(name="market", model="openai/gpt-4o", instructions="Analyze market size, growth, and key players.") @@ -161,7 +161,7 @@ sequenceDiagram ``` ```python -from agentspan.agents import Agent, AgentRuntime, tool +from conductor.ai.agents import Agent, AgentRuntime, tool @tool def check_balance(account_id: str) -> dict: @@ -213,11 +213,11 @@ sequenceDiagram ``` ```python -from agentspan.agents import Agent, AgentRuntime +from conductor.ai.agents import Agent, AgentRuntime classifier = Agent( name="classifier", - model="openai/gpt-4o-mini", + model="anthropic/claude-sonnet-4-6", instructions="Classify the request as 'billing', 'technical', or 'general'. Reply with just the category.", ) @@ -288,8 +288,8 @@ sequenceDiagram ``` ```python -from agentspan.agents import Agent, AgentRuntime, Strategy -from agentspan.agents import TextMentionTermination +from conductor.ai.agents import Agent, AgentRuntime, Strategy +from conductor.ai.agents import TextMentionTermination triage = Agent(name="triage", model="openai/gpt-4o", instructions="Triage support requests. Say 'BILLING' for billing, 'TECH' for technical.") @@ -428,7 +428,7 @@ sequenceDiagram ``` ```python -from agentspan.agents import Agent, AgentRuntime, start +from conductor.ai.agents import Agent, AgentRuntime, start workflow = Agent( name="manual_workflow", @@ -450,7 +450,7 @@ with AgentRuntime() as runtime: Control when multi-agent loops stop: ```python -from agentspan.agents import ( +from conductor.ai.agents import ( MaxMessageTermination, TextMentionTermination, StopMessageTermination, @@ -473,7 +473,7 @@ TokenUsageTermination(max_total_tokens=10000) Combine multiple conditions: ```python -from agentspan.agents import Agent +from conductor.ai.agents import Agent agent = Agent( name="team", diff --git a/docs/developer-guides/agentspan/concepts/plan-execute.mdx b/docs/developer-guides/agentspan/concepts/plan-execute.mdx index 37ddd98d..f6b6aa2d 100644 --- a/docs/developer-guides/agentspan/concepts/plan-execute.mdx +++ b/docs/developer-guides/agentspan/concepts/plan-execute.mdx @@ -4,14 +4,43 @@ title: "Plan-Execute Strategy" description: "PLAN_EXECUTE compiles LLM-generated (or static) plans into deterministic Conductor sub-workflows — the planner reasons, the executor runs." --- -# Plan-Execute Strategy +# Plan-Execute -`Strategy.PLAN_EXECUTE` (also called PAE; the server-side compiler is PAC, "PLAN_AND_COMPILE") splits a task into two phases: +**The LLM decides *what* to do. Conductor handles *how* — no tokens on orchestration, retries, or branching.** -1. **Plan** — a planner agent emits a JSON DAG of operations. -2. **Execute** — the server compiles that JSON into a Conductor sub-workflow and runs it deterministically. +Most agent frameworks route every decision through the LLM — retry logic, branch selection, parallelism. Tokens for every control flow operation. Nondeterminism throughout. -The LLM is only invoked where it adds value (planning, per-op content generation). Orchestration, retries, parallelism, and validation are pure Conductor primitives — no token cost, no nondeterminism. +Plan-Execute draws a hard line. + +![Plan-Execute deterministic boundary](../assets/plan-execute-boundary.svg) + +One planner call produces a JSON task graph. The Agentspan server compiles it into a deterministic Conductor sub-workflow. **After that single LLM call, the execution is pure Conductor** — crash-safe, replay-safe, and free of LLM randomness. + +:::tip The core insight +Two identical plans produce two identical workflow definitions and two identical executions. Retries, parallelism (FORK_JOIN), branching (SWITCH), and validation are Conductor primitives — not LLM turns. The LLM is only invoked where it genuinely adds value: planning the shape of work, and generating per-step content. + +::: + +The two phases in detail: + +1. **Plan** — the planner agent emits a JSON DAG of operations once. +2. **Execute** — the server compiles that JSON into an immutable Conductor sub-workflow and runs it deterministically. + +(The strategy is called `PLAN_EXECUTE` in the SDK. The server-side compiler is PAC, "PLAN_AND_COMPILE".) + +--- + +## Why this wins against a plain LLM loop + +| | LLM-loop agent | Plan-Execute | +|---|---|---| +| Retries | LLM re-reasons | Conductor retry, no token cost | +| Parallelism | Ask LLM to fan out | FORK_JOIN — deterministic, exact | +| Branching | LLM decides each turn | SWITCH on JS expression | +| Crash recovery | Restart from scratch | Resume at last completed step | +| Replay two runs | Different each time | Identical — plan is a value | +| Validation | LLM self-checks | Deterministic validator + SWITCH gate | +| Testing | Non-deterministic | Pass a static plan — no LLM at all | ## The deterministic boundary @@ -80,7 +109,7 @@ If you need fully agentic exploration with no fixed shape, use `Strategy.HANDOFF ## The shape ```python -from agentspan.agents import Strategy, Agent, plan_execute +from conductor.ai.agents import Strategy, Agent, plan_execute # One-call construction (recommended): harness = plan_execute( @@ -150,7 +179,7 @@ The schema PAC consumes: For static plans (or plans you build programmatically), import the typed builders: ```python -from agentspan.agents import Plan, Step, Op, Generate, Validation, Action +from conductor.ai.agents import Plan, Step, Op, Generate, Validation, Action plan = Plan( steps=[ @@ -180,7 +209,7 @@ IDE autocomplete, Pylance type-checks, no escaping nightmares. Wire the **whole output** of one step into the args of a later step with `Ref("step_id")`. No JSON path, no field selection, no Conductor task-ref naming to memorise. ```python -from agentspan.agents import Op, Plan, Ref, Step +from conductor.ai.agents import Op, Plan, Ref, Step plan = Plan(steps=[ Step("fetch", operations=[Op("fetch_data", args={"url": URL})]), @@ -377,7 +406,7 @@ The planner's `instructions` are fine for "how to emit a plan." They're a poor f `planner_context` injects those rules into the planner's user prompt at runtime, as a `## Reference Context` block. Two entry shapes: ```python -from agentspan.agents import Agent, Context, Strategy +from conductor.ai.agents import Agent, Context, Strategy harness = Agent( name="onboarding_harness", diff --git a/docs/developer-guides/agentspan/concepts/scheduling.mdx b/docs/developer-guides/agentspan/concepts/scheduling.mdx index 2919188b..f5fba70c 100644 --- a/docs/developer-guides/agentspan/concepts/scheduling.mdx +++ b/docs/developer-guides/agentspan/concepts/scheduling.mdx @@ -10,8 +10,7 @@ more crons to a deployed agent in a single declarative call; the runtime's scheduler fires the agent on cadence and you watch the executions roll in. This page covers the user-facing API. For the design rationale see -[`docs/design/scheduling.md`](https://github.com/agentspan-ai/agentspan/blob/c873e60bc7eff73e61f568ec68e28cb1f121fe39/docs/design/scheduling.md). For the implementation -plan see [`docs/design/plans/2026-05-27-agent-scheduling.md`](https://github.com/agentspan-ai/agentspan/blob/c873e60bc7eff73e61f568ec68e28cb1f121fe39/docs/design/plans/2026-05-27-agent-scheduling.md). +[`design/sentinel-agents.md`](https://github.com/agentspan-ai/agentspan/blob/main/design/sentinel-agents.md). ## What you get @@ -36,10 +35,10 @@ plan see [`docs/design/plans/2026-05-27-agent-scheduling.md`](https://github.com ### Python ```python -from agentspan.agents import Agent, deploy, schedules -from agentspan.agents.schedule import Schedule +from conductor.ai.agents import Agent, deploy, schedules +from conductor.ai.agents.schedule import Schedule -agent = Agent(name="daily_digest", model="openai/gpt-4o-mini", +agent = Agent(name="daily_digest", model="anthropic/claude-sonnet-4-6", instructions="Summarize today's eng activity.") deploy( @@ -75,11 +74,11 @@ A full working example is in `sdk/python/examples/hello_world_agent_schedule.py` ```ts import { Agent, AgentRuntime, Schedule, schedules, -} from "@agentspan-ai/sdk"; +} from "@conductor-oss/conductor-agent-sdk"; const agent = new Agent({ name: "dailyDigest", - model: "openai/gpt-4o-mini", + model: "anthropic/claude-sonnet-4-6", instructions: "Summarize today's eng activity.", }); @@ -111,13 +110,13 @@ const nextTimes = await schedules.previewNext("0 0 9 * * MON-FRI", { n: 5 }); ### Java ```java -import ai.agentspan.Agent; -import ai.agentspan.AgentRuntime; -import ai.agentspan.schedule.Schedule; +import org.conductoross.conductor.ai.Agent; +import org.conductoross.conductor.ai.AgentRuntime; +import org.conductoross.conductor.ai.schedule.Schedule; Agent agent = Agent.builder() .name("daily_digest") - .model("openai/gpt-4o-mini") + .model("anthropic/claude-sonnet-4-6") .instructions("Summarize today's eng activity.") .build(); @@ -147,13 +146,13 @@ try (AgentRuntime runtime = new AgentRuntime()) { ### C# ```csharp -using Agentspan; -using Agentspan.Scheduling; +using Conductor.AI; +using Conductor.AI.Scheduling; var agent = new Agent { Name = "daily_digest", - Model = "openai/gpt-4o-mini", + Model = "anthropic/claude-sonnet-4-6", Instructions = "Summarize today's eng activity.", }; @@ -281,10 +280,10 @@ These are future-phase items, not blockers: ### Run a hello-world agent every 5 seconds ```python -from agentspan.agents import Agent, AgentRuntime -from agentspan.agents.schedule import Schedule +from conductor.ai.agents import Agent, AgentRuntime +from conductor.ai.agents.schedule import Schedule -agent = Agent(name="hello", model="openai/gpt-4o-mini", +agent = Agent(name="hello", model="anthropic/claude-sonnet-4-6", instructions="Say 'Hello, world!' and the current UTC time.") with AgentRuntime() as rt: diff --git a/docs/developer-guides/agentspan/concepts/skills.mdx b/docs/developer-guides/agentspan/concepts/skills.mdx index 1b769fc1..f1c65876 100644 --- a/docs/developer-guides/agentspan/concepts/skills.mdx +++ b/docs/developer-guides/agentspan/concepts/skills.mdx @@ -110,7 +110,7 @@ git status/diff. The CLI enforces configured root boundaries. ### Python ```python -from agentspan.agents import Agent, AgentRuntime, agent_tool, skill +from conductor.ai.agents import Agent, AgentRuntime, agent_tool, skill reviewer = skill("~/.claude/skills/dg", model="openai/gpt-4o") @@ -129,7 +129,7 @@ lead = Agent( ### TypeScript ```typescript -import { Agent, AgentRuntime, agentTool, skill } from "@agentspan-ai/sdk"; +import { Agent, AgentRuntime, agentTool, skill } from "@conductor-oss/conductor-agent-sdk"; const reviewer = skill("~/.claude/skills/dg", { model: "openai/gpt-4o", @@ -150,11 +150,11 @@ const lead = new Agent({ ### Java ```java -import ai.agentspan.Agent; -import ai.agentspan.AgentTool; -import ai.agentspan.Agentspan; -import ai.agentspan.model.AgentResult; -import ai.agentspan.skill.Skill; +import org.conductoross.conductor.ai.Agent; +import org.conductoross.conductor.ai.tools.AgentTool; +import org.conductoross.conductor.ai.AgentRuntime; +import org.conductoross.conductor.ai.model.AgentResult; +import org.conductoross.conductor.ai.skill.Skill; import java.nio.file.Paths; import java.util.List; @@ -162,8 +162,10 @@ import java.util.List; Agent reviewer = Skill.skill(Paths.get(System.getProperty("user.home"), ".claude", "skills", "dg"), "openai/gpt-4o"); -AgentResult direct = Agentspan.run(reviewer, "Review auth.py"); -direct.printResult(); +try (AgentRuntime runtime = new AgentRuntime()) { + AgentResult direct = runtime.run(reviewer, "Review auth.py"); + direct.printResult(); +} Agent lead = Agent.builder() .name("tech_lead") @@ -176,7 +178,7 @@ Agent lead = Agent.builder() ### .NET ```csharp -using Agentspan; +using Conductor.AI; var reviewer = Skill.Load("~/.claude/skills/dg", "openai/gpt-4o"); diff --git a/docs/developer-guides/agentspan/concepts/streaming.mdx b/docs/developer-guides/agentspan/concepts/streaming.mdx index d3c1a388..d007e058 100644 --- a/docs/developer-guides/agentspan/concepts/streaming.mdx +++ b/docs/developer-guides/agentspan/concepts/streaming.mdx @@ -11,7 +11,7 @@ Stream events from an agent execution as they happen — tool calls, thinking st ## Basic Streaming ```python -from agentspan.agents import Agent, AgentRuntime +from conductor.ai.agents import Agent, AgentRuntime agent = Agent(name="writer", model="openai/gpt-4o") @@ -33,7 +33,7 @@ with AgentRuntime() as runtime: ## Module-level stream() ```python -from agentspan.agents import Agent, stream +from conductor.ai.agents import Agent, stream agent = Agent(name="writer", model="openai/gpt-4o") for event in stream(agent, "Write a poem"): @@ -44,7 +44,7 @@ for event in stream(agent, "Write a poem"): ## Async Streaming ```python -from agentspan.agents import Agent, AgentRuntime +from conductor.ai.agents import Agent, AgentRuntime agent = Agent(name="writer", model="openai/gpt-4o") @@ -87,7 +87,7 @@ async def main(): ## Streaming with Tools ```python -from agentspan.agents import Agent, AgentRuntime, tool +from conductor.ai.agents import Agent, AgentRuntime, tool @tool def search_web(query: str) -> str: @@ -116,7 +116,7 @@ with AgentRuntime() as runtime: When an agent is waiting for approval, the `waiting` event fires. Handle it from a separate process using the execution ID: ```python -from agentspan.agents import Agent, AgentRuntime, AgentHandle, tool +from conductor.ai.agents import Agent, AgentRuntime, AgentHandle, tool @tool(approval_required=True) def send_email(to: str, subject: str, body: str) -> dict: @@ -148,7 +148,7 @@ if workflow_id: To get the final `AgentResult` after streaming: ```python -from agentspan.agents import Agent, AgentRuntime, start +from conductor.ai.agents import Agent, AgentRuntime, start agent = Agent(name="writer", model="openai/gpt-4o") diff --git a/docs/developer-guides/agentspan/concepts/testing.mdx b/docs/developer-guides/agentspan/concepts/testing.mdx index 0ac133cc..8faf602f 100644 --- a/docs/developer-guides/agentspan/concepts/testing.mdx +++ b/docs/developer-guides/agentspan/concepts/testing.mdx @@ -11,7 +11,7 @@ Agentspan has a first-class testing module that lets you test agent behavior wit ## Import ```python -from agentspan.agents.testing import mock_run, MockEvent, expect, record, replay +from conductor.ai.agents.testing import mock_run, MockEvent, expect, record, replay ``` ## mock_run @@ -19,8 +19,8 @@ from agentspan.agents.testing import mock_run, MockEvent, expect, record, replay `mock_run` runs an agent with a scripted sequence of events instead of calling an LLM: ```python -from agentspan.agents import Agent, tool -from agentspan.agents.testing import mock_run, MockEvent, expect +from conductor.ai.agents import Agent, tool +from conductor.ai.agents.testing import mock_run, MockEvent, expect @tool def search_web(query: str) -> str: @@ -123,8 +123,8 @@ assert data["city"] == "San Francisco" Test that tools are called with correct arguments and that state is managed properly: ```python -from agentspan.agents import Agent, tool -from agentspan.agents.testing import mock_run, MockEvent, expect +from conductor.ai.agents import Agent, tool +from conductor.ai.agents.testing import mock_run, MockEvent, expect @tool def send_email(to: str, subject: str, body: str) -> dict: @@ -162,8 +162,8 @@ expect(result).completed().used_tool("send_email", args={ Test human-in-the-loop interactions: ```python -from agentspan.agents import Agent, tool -from agentspan.agents.testing import mock_run, MockEvent, expect +from conductor.ai.agents import Agent, tool +from conductor.ai.agents.testing import mock_run, MockEvent, expect @tool(approval_required=True) def delete_file(path: str) -> dict: @@ -231,7 +231,7 @@ expect(result).completed().used_tool("search_web") Record a real execution (with an actual LLM) and replay it deterministically in tests: ```python -from agentspan.agents.testing import record, replay +from conductor.ai.agents.testing import record, replay # Record a real run (calls LLM) recording = record(agent, "What's the capital of France?") @@ -251,7 +251,7 @@ This is useful for: ```python import pytest -from agentspan.agents.testing import mock_run, MockEvent, expect +from conductor.ai.agents.testing import mock_run, MockEvent, expect # Mark as unit test — no LLM, no server, fast class TestWeatherAgent: @@ -283,7 +283,7 @@ class TestWeatherAgent: @pytest.mark.integration class TestWeatherAgentIntegration: def test_real_weather_query(self, weather_agent): - from agentspan.agents import run + from conductor.ai.agents import run result = run(weather_agent, "Weather in San Francisco?") assert result.status == "COMPLETED" assert len(result.output) > 10 @@ -328,8 +328,8 @@ real worker execution. For evaluating LLM output quality (not just structure), use `CorrectnessEval`: ```python -from agentspan.agents import AgentRuntime -from agentspan.agents.testing import CorrectnessEval, EvalCase +from conductor.ai.agents import AgentRuntime +from conductor.ai.agents.testing import CorrectnessEval, EvalCase eval_runner = CorrectnessEval(runtime=AgentRuntime()) diff --git a/docs/developer-guides/agentspan/concepts/tools.mdx b/docs/developer-guides/agentspan/concepts/tools.mdx index ab5715f3..d36151df 100644 --- a/docs/developer-guides/agentspan/concepts/tools.mdx +++ b/docs/developer-guides/agentspan/concepts/tools.mdx @@ -24,7 +24,7 @@ For `http_tool`, `api_tool`, and `mcp_tool`, you provide a URL and optionally cr Decorate any Python function to make it a tool: ```python -from agentspan.agents import Agent, AgentRuntime, tool +from conductor.ai.agents import Agent, AgentRuntime, tool @tool def get_weather(city: str) -> dict: @@ -60,7 +60,7 @@ def dangerous_action(target: str) -> dict: Add a `context: ToolContext` parameter to access execution context and shared state: ```python -from agentspan.agents import tool, ToolContext +from conductor.ai.agents import tool, ToolContext @tool def query_database(query: str, context: ToolContext) -> dict: @@ -107,7 +107,7 @@ agent = Agent( Define a single HTTP endpoint as a tool. Executes entirely server-side — no worker process needed: ```python -from agentspan.agents import http_tool +from conductor.ai.agents import http_tool weather_api = http_tool( name="get_weather", @@ -130,7 +130,7 @@ agent = Agent(name="assistant", model="openai/gpt-4o", tools=[weather_api]) Point to any OpenAPI, Swagger, or Postman spec. All endpoints are auto-discovered and exposed as tools. The LLM filters to the most relevant ones at runtime: ```python -from agentspan.agents import api_tool +from conductor.ai.agents import api_tool stripe = api_tool( url="https://api.stripe.com/openapi.json", @@ -162,7 +162,7 @@ with AgentRuntime() as runtime: Connect to an MCP server. Tools are auto-discovered at runtime. Executes server-side: ```python -from agentspan.agents import mcp_tool +from conductor.ai.agents import mcp_tool github = mcp_tool( server_url="http://localhost:6767/mcp", @@ -189,7 +189,7 @@ Credentials are encrypted at rest (AES-256-GCM). **Step 2: Declare which credentials a tool needs** ```python -from agentspan.agents import tool, get_credential +from conductor.ai.agents import tool, get_credential # Option A: isolated subprocess (credentials available as env vars) @tool(credentials=["GITHUB_TOKEN"]) @@ -241,8 +241,8 @@ agent = Agent( Agents can execute code in a sandboxed environment: ```python -from agentspan.agents import Agent, AgentRuntime -from agentspan.agents.code_executor import DockerCodeExecutor +from conductor.ai.agents import Agent, AgentRuntime +from conductor.ai.agents.code_executor import DockerCodeExecutor executor = DockerCodeExecutor(image="python:3.12-slim", timeout=30) diff --git a/docs/developer-guides/agentspan/examples/crash-resume.mdx b/docs/developer-guides/agentspan/examples/crash-resume.mdx index 35598b37..810d61c3 100644 --- a/docs/developer-guides/agentspan/examples/crash-resume.mdx +++ b/docs/developer-guides/agentspan/examples/crash-resume.mdx @@ -45,13 +45,14 @@ export OPENAI_API_KEY=sk-... # or ANTHROPIC_API_KEY if using Anthropic ``` ::: + ### Step 1 — Start the agent (`start.py`) Run this script. It starts the agent, prints the execution ID, checks status, then exits. The process ending is intentional — this simulates the crash. ```python # start.py -from agentspan.agents import Agent, tool, start +from conductor.ai.agents import Agent, tool, start @tool def analyze_chunk(chunk_id: int, data: str) -> dict: @@ -65,7 +66,7 @@ def aggregate_results(results: list) -> dict: agent = Agent( name="data_analysis_agent", - model="openai/gpt-4o-mini", + model="anthropic/claude-sonnet-4-6", tools=[analyze_chunk, aggregate_results], instructions="""Analyze data in chunks using analyze_chunk, then aggregate with aggregate_results. Process each chunk sequentially. Report progress as you go.""", @@ -86,7 +87,7 @@ Paste the execution ID from Step 1 and run this script. It re-registers the tool ```python # reconnect.py -from agentspan.agents import Agent, tool, AgentRuntime, AgentHandle +from conductor.ai.agents import Agent, tool, AgentRuntime, AgentHandle # Same agent and tools as start.py — workers need to be registered to handle tool calls @tool @@ -101,7 +102,7 @@ def aggregate_results(results: list) -> dict: agent = Agent( name="data_analysis_agent", - model="openai/gpt-4o-mini", + model="anthropic/claude-sonnet-4-6", tools=[analyze_chunk, aggregate_results], instructions="...", ) @@ -138,7 +139,7 @@ The execution ID is all you need to reconnect from any process, any machine. **If your agent has no `@tool` functions** (LLM-only agent), reconnecting is straightforward: ```python -from agentspan.agents import AgentRuntime, AgentHandle +from conductor.ai.agents import AgentRuntime, AgentHandle with AgentRuntime() as runtime: handle = AgentHandle(execution_id="", runtime=runtime) @@ -164,7 +165,7 @@ In production, keep the worker process (which handles tool calls) separate from ```python # worker.py — runs continuously, handles tool execution -from agentspan.agents import Agent, tool, AgentRuntime +from conductor.ai.agents import Agent, tool, AgentRuntime @tool def analyze_chunk(chunk_id: int, data: str) -> dict: @@ -173,7 +174,7 @@ def analyze_chunk(chunk_id: int, data: str) -> dict: agent = Agent( name="data_analysis_agent", - model="openai/gpt-4o-mini", + model="anthropic/claude-sonnet-4-6", tools=[analyze_chunk], instructions="...", ) @@ -184,7 +185,7 @@ with AgentRuntime() as runtime: ```python # invoker.py — runs once per job (REST endpoint, cron, CLI, etc.) -from agentspan.agents import Agent, tool, start +from conductor.ai.agents import Agent, tool, start @tool def analyze_chunk(chunk_id: int, data: str) -> dict: @@ -193,7 +194,7 @@ def analyze_chunk(chunk_id: int, data: str) -> dict: agent = Agent( name="data_analysis_agent", - model="openai/gpt-4o-mini", + model="anthropic/claude-sonnet-4-6", tools=[analyze_chunk], instructions="...", ) @@ -210,7 +211,7 @@ print(f"Job ID: {handle.execution_id}") Use `get_status()` to skip work that's already done before starting a new run: ```python -from agentspan.agents import Agent, start, AgentRuntime, AgentHandle +from conductor.ai.agents import Agent, start, AgentRuntime, AgentHandle def ensure_analysis_running(execution_id: str | None, agent, prompt: str): """Start a new run or reconnect to an existing one.""" @@ -235,7 +236,7 @@ def ensure_analysis_running(execution_id: str | None, agent, prompt: str): Stream events from a run — whether it's new or already in progress: ```python -from agentspan.agents import Agent, tool, AgentRuntime, AgentHandle +from conductor.ai.agents import Agent, tool, AgentRuntime, AgentHandle # Re-define (or import) agent and tools so workers can be registered @tool @@ -245,7 +246,7 @@ def analyze_chunk(chunk_id: int, data: str) -> dict: agent = Agent( name="data_analysis_agent", - model="openai/gpt-4o-mini", + model="anthropic/claude-sonnet-4-6", tools=[analyze_chunk], instructions="...", ) diff --git a/docs/developer-guides/agentspan/examples/document-processor.mdx b/docs/developer-guides/agentspan/examples/document-processor.mdx index 10b60ad5..36ab3522 100644 --- a/docs/developer-guides/agentspan/examples/document-processor.mdx +++ b/docs/developer-guides/agentspan/examples/document-processor.mdx @@ -29,14 +29,16 @@ export ANTHROPIC_API_KEY= ``` ::: + ## Full code :::note The `CONTRACTS` dict uses hardcoded text for demonstration. Replace it with file reads or database queries for production use. ::: + ```python -from agentspan.agents import Agent, tool, start +from conductor.ai.agents import Agent, tool, start from pydantic import BaseModel, Field from pathlib import Path from enum import Enum @@ -249,7 +251,7 @@ Or open `http://localhost:6767` to browse executions visually. Re-run any failed Use `stream()` instead of `start()` to log each tool call as it happens: ```python -from agentspan.agents import stream +from conductor.ai.agents import stream for event in stream(contract_reviewer, filename): if event.type == "tool_call": diff --git a/docs/developer-guides/agentspan/examples/google-adk.mdx b/docs/developer-guides/agentspan/examples/google-adk.mdx index 7ff4c763..31d17df3 100644 --- a/docs/developer-guides/agentspan/examples/google-adk.mdx +++ b/docs/developer-guides/agentspan/examples/google-adk.mdx @@ -29,6 +29,7 @@ export GEMINI_API_KEY=... ``` ::: + --- ## Before: plain Google ADK @@ -148,7 +149,7 @@ Three things change from the plain ADK version: `LlmAgent` → `Agent`, model st ```python from google.adk.agents import Agent, SequentialAgent -from agentspan.agents import AgentRuntime +from conductor.ai.agents import AgentRuntime # ── Tools (unchanged — no FunctionTool wrapper needed) ─────────────────────── @@ -261,7 +262,7 @@ topic → [research_pipeline] → [researcher] → [analyst] → [writer] → fi ```python import asyncio -from agentspan.agents import run_async +from conductor.ai.agents import run_async async def run_research(topic: str) -> str: result = await run_async(pipeline, topic) @@ -275,7 +276,7 @@ asyncio.run(run_research("The current state of durable execution for AI agents") Use `start` to submit a job and return immediately. Useful when research runs are slow and you don't want to block. ```python -from agentspan.agents import start +from conductor.ai.agents import start # Launch and return immediately — pipeline runs in the background on the server handle = start(pipeline, topic) @@ -295,7 +296,7 @@ print(result.output) `start` works in a loop — each call submits immediately without waiting for the previous one to finish. ```python -from agentspan.agents import start +from conductor.ai.agents import start topics = [ "Durable execution frameworks for AI agents", @@ -314,7 +315,7 @@ for r in results: ### Stream sub-agent progress ```python -from agentspan.agents import stream +from conductor.ai.agents import stream for event in stream(pipeline, topic): if event.type == "handoff": @@ -332,7 +333,7 @@ for event in stream(pipeline, topic): Use `mock_run` to test the pipeline without a live server or real API calls. Supply the expected sequence of sub-agent handoffs and tool calls; `mock_run` drives the pipeline through them and returns an `AgentResult` you can assert against. ```python -from agentspan.agents.testing import mock_run, MockEvent, expect +from conductor.ai.agents.testing import mock_run, MockEvent, expect result = mock_run( pipeline, diff --git a/docs/developer-guides/agentspan/examples/human-in-the-loop.mdx b/docs/developer-guides/agentspan/examples/human-in-the-loop.mdx index 78b5fdba..9b021406 100644 --- a/docs/developer-guides/agentspan/examples/human-in-the-loop.mdx +++ b/docs/developer-guides/agentspan/examples/human-in-the-loop.mdx @@ -15,7 +15,7 @@ Agents are great at finding the right action. Humans are better at authorizing r Add `approval_required=True` to any `@tool` decorator. That's it. ```python -from agentspan.agents import tool +from conductor.ai.agents import tool @tool(approval_required=True) def process_refund(order_id: str, amount: float) -> dict: @@ -42,9 +42,10 @@ export OPENAI_API_KEY=sk-... # or ANTHROPIC_API_KEY if using Anthropic ``` ::: + ```python import time -from agentspan.agents import Agent, tool, start +from conductor.ai.agents import Agent, tool, start # Tools that run automatically @tool @@ -65,7 +66,7 @@ def process_refund(order_id: str, amount: float) -> dict: agent = Agent( name="refund_agent", - model="openai/gpt-4o-mini", + model="anthropic/claude-sonnet-4-6", tools=[get_order, get_customer, process_refund], instructions="""You handle refund requests. 1. Look up the order @@ -136,7 +137,7 @@ notify_approver(handle.execution_id) ```python # Later — your approval endpoint (FastAPI, Flask, Lambda, etc.) -from agentspan.agents import AgentRuntime, AgentHandle +from conductor.ai.agents import AgentRuntime, AgentHandle # In a web app, the agent (with its tools) must already be served. # Call runtime.serve(agent, blocking=False) at app startup, then reconnect here. @@ -179,7 +180,7 @@ Each time the agent calls one of these tools, it pauses and waits for a fresh `h ## Stream events including approval pauses ```python -from agentspan.agents import stream +from conductor.ai.agents import stream agent_stream = stream(agent, customer_message) for event in agent_stream: @@ -201,7 +202,7 @@ for event in agent_stream: `MockEvent.waiting()` simulates the approval pause, then `MockEvent.done()` simulates the post-approval response: ```python -from agentspan.agents.testing import mock_run, MockEvent, expect +from conductor.ai.agents.testing import mock_run, MockEvent, expect result = mock_run( agent, diff --git a/docs/developer-guides/agentspan/examples/langgraph.mdx b/docs/developer-guides/agentspan/examples/langgraph.mdx index dd965ba3..99aacc60 100644 --- a/docs/developer-guides/agentspan/examples/langgraph.mdx +++ b/docs/developer-guides/agentspan/examples/langgraph.mdx @@ -32,6 +32,7 @@ export GITHUB_TOKEN=ghp_... To generate a GitHub token, go to **Settings → Developer settings → Personal access tokens → Tokens (classic)** and check the **`repo`** scope. This gives the bot read access to diffs and write access to post review comments. ::: + --- ## Before: plain LangGraph @@ -141,7 +142,7 @@ print(result["messages"][-1].content) Replace `app.invoke({...})` with `runtime.run(app, {...})`. That's the only change. Agentspan auto-detects LangGraph apps — no extra imports or graph modifications needed. ```python -from agentspan.agents import AgentRuntime +from conductor.ai.agents import AgentRuntime with AgentRuntime() as runtime: result = runtime.run(app, { @@ -178,6 +179,7 @@ python code_review_bot.py `"Review PR #142 in acme-corp/backend"` is a placeholder. Replace it with a real PR number and repository you have access to, otherwise the GitHub API will return a 404. ::: + --- ## Example modifications @@ -188,7 +190,7 @@ Use `run_async` in async contexts, such as FastAPI route handlers or async worke ```python import asyncio -from agentspan.agents import run_async +from conductor.ai.agents import run_async async def review_pr(pr_number: int, repo: str): result = await run_async(app, { @@ -204,7 +206,7 @@ asyncio.run(review_pr(142, "acme-corp/backend")) Use `start` to submit a review and return immediately. Useful when reviews are slow (large diffs, many tool calls) and you don't want to block. ```python -from agentspan.agents import start +from conductor.ai.agents import start # Returns immediately — graph runs in the background on the server handle = start(app, { @@ -223,7 +225,7 @@ print(result.output["messages"][-1].content) `start` works in a loop — each call submits immediately without waiting for the previous one to finish. ```python -from agentspan.agents import start +from conductor.ai.agents import start prs = [(142, "acme-corp/backend"), (87, "acme-corp/frontend"), (23, "acme-corp/infra")] @@ -261,7 +263,7 @@ Agentspan handles crash recovery at the run level. If your worker dies, the grap Use `mock_run` to test the graph without a live server or real API calls. You supply the expected sequence of tool calls and results; `mock_run` drives the graph through them and returns an `AgentResult` you can assert against. ```python -from agentspan.agents.testing import mock_run, MockEvent, expect +from conductor.ai.agents.testing import mock_run, MockEvent, expect from langchain_core.messages import HumanMessage result = mock_run( diff --git a/docs/developer-guides/agentspan/examples/openai-agents-sdk.mdx b/docs/developer-guides/agentspan/examples/openai-agents-sdk.mdx index 485f1332..1f9e58b7 100644 --- a/docs/developer-guides/agentspan/examples/openai-agents-sdk.mdx +++ b/docs/developer-guides/agentspan/examples/openai-agents-sdk.mdx @@ -29,6 +29,7 @@ export OPENAI_API_KEY=sk-... ``` ::: + --- ## Before: plain OpenAI Agents SDK @@ -124,7 +125,7 @@ account_agent = Agent( triage_agent = Agent( name="support_triage", - model="gpt-4o-mini", # fast, cheap — just routes + model="anthropic/claude-sonnet-4-6", # fast, cheap — just routes instructions="""You are a support triage agent. Understand the customer's issue and hand off to the right specialist immediately. @@ -150,7 +151,7 @@ print(result.final_output) Replace `Runner.run_sync(triage_agent, message)` with `runtime.run(triage_agent, message)`. That's the only change. Agentspan auto-detects OpenAI Agents SDK agents — no extra imports or agent modifications needed. ```python -from agentspan.agents import AgentRuntime +from conductor.ai.agents import AgentRuntime message = "Hi, I was charged $99 twice last month (invoice INV-8821). Can I get a refund?" @@ -198,7 +199,7 @@ ticket → [support_triage] → handoff → [billing_specialist] → tools → f ```python import asyncio -from agentspan.agents import run_async +from conductor.ai.agents import run_async async def handle_ticket(message: str): result = await run_async(triage_agent, message) @@ -212,7 +213,7 @@ asyncio.run(handle_ticket("I was charged twice last month")) Use `start` to submit a ticket and return immediately without blocking. ```python -from agentspan.agents import start +from conductor.ai.agents import start handle = start(triage_agent, customer_message) print(f"Ticket queued: {handle.execution_id}") @@ -227,7 +228,7 @@ print(result.output) Use `stream` to process handoffs and tool calls in real time as the agents work through the ticket. ```python -from agentspan.agents import stream +from conductor.ai.agents import stream for event in stream(triage_agent, customer_message): if event.type == "handoff": @@ -245,7 +246,7 @@ for event in stream(triage_agent, customer_message): Wrap any sensitive tool with Agentspan's `@tool` decorator and set `approval_required=True`. Execution pauses at that tool call until a human approves or rejects it in the UI. ```python -from agentspan.agents import tool +from conductor.ai.agents import tool @tool(approval_required=True) def process_refund(invoice_id: str, reason: str) -> dict: @@ -267,7 +268,7 @@ billing_agent = Agent( Use `mock_run` to test the multi-agent flow without a live server or real API calls. Supply the expected sequence of handoffs and tool calls; `mock_run` drives the agents through them and returns an `AgentResult` you can assert against. ```python -from agentspan.agents.testing import mock_run, MockEvent, expect +from conductor.ai.agents.testing import mock_run, MockEvent, expect result = mock_run( triage_agent, diff --git a/docs/developer-guides/agentspan/examples/research-pipeline.mdx b/docs/developer-guides/agentspan/examples/research-pipeline.mdx index 051ee0df..7d146124 100644 --- a/docs/developer-guides/agentspan/examples/research-pipeline.mdx +++ b/docs/developer-guides/agentspan/examples/research-pipeline.mdx @@ -29,11 +29,12 @@ export AGENTSPAN_LLM_MODEL=openai/gpt-4o-mini ``` ::: + ## Full code ```python import os -from agentspan.agents import Agent, AgentRuntime +from conductor.ai.agents import Agent, AgentRuntime researcher = Agent( name="researcher", @@ -106,7 +107,7 @@ editor = Agent(name="editor", model="openai/gpt-4o", ...) Use `start` instead of `run` to kick off multiple pipelines without waiting for each to finish. ```python -from agentspan.agents import start +from conductor.ai.agents import start topics = [ "Multi-agent frameworks reshaping software development", @@ -124,7 +125,7 @@ results = [h.stream().get_result() for h in handles] ```python import schedule, time -from agentspan.agents import start +from conductor.ai.agents import start def run_daily(): for topic in WATCH_LIST: diff --git a/docs/developer-guides/agentspan/examples/support-triage.mdx b/docs/developer-guides/agentspan/examples/support-triage.mdx index eb085a3c..59d997d2 100644 --- a/docs/developer-guides/agentspan/examples/support-triage.mdx +++ b/docs/developer-guides/agentspan/examples/support-triage.mdx @@ -29,10 +29,11 @@ export AGENTSPAN_LLM_MODEL=openai/gpt-4o-mini ``` ::: + ## Full code ```python -from agentspan.agents import Agent, AgentHandle, AgentRuntime, tool, start +from conductor.ai.agents import Agent, AgentHandle, AgentRuntime, tool, start from pydantic import BaseModel from enum import Enum @@ -86,7 +87,7 @@ def apply_credit(customer_id: str, amount_usd: float, note: str) -> dict: support_agent = Agent( name="support_agent", - model="openai/gpt-4o-mini", + model="anthropic/claude-sonnet-4-6", output_type=Resolution, tools=[ lookup_customer, diff --git a/docs/developer-guides/agentspan/overview.mdx b/docs/developer-guides/agentspan/overview.mdx index af08d286..a03d4541 100644 --- a/docs/developer-guides/agentspan/overview.mdx +++ b/docs/developer-guides/agentspan/overview.mdx @@ -5,46 +5,367 @@ description: "Agentspan documentation for building production AI agents." --- # Agentspan -**Agentspan is a durable runtime for AI agents. Your code runs in your process. Execution state lives on the server.** -Agentspan is a durable runtime for AI agents. Execution state lives server-side, so crashes, restarts, and deployments do not lose work. Write agents natively or wrap an existing LangGraph, OpenAI Agents SDK, or Google ADK agent in one line. +**AI agents that don't die when your process does.** -## Getting Started +Most agent frameworks run the loop in your process. A crash, deploy, or OOM kill loses everything. Agentspan separates your code from execution state — the server holds state, your workers execute tools. Agents survive restarts, pause for human approval indefinitely, and resume at the last completed step. -- [Why Agentspan](/developer-guides/agentspan/why-agentspan) - Why agents fail in production, and how Agentspan solves it. -- [Quickstart](/developer-guides/agentspan/quickstart) - Build your first agent in 5 minutes. +![Agentspan four pillars](assets/four-pillars.svg) -## Concepts +--- + +## Why production agents break + +When an agent loop runs inside your process, six things can go wrong — and they will: + +- **Process crash mid-run.** A long agent run takes minutes. If your process dies, the run is gone. No resume. +- **Human approval loses state.** Pausing for human input means holding state in memory. A restart kills the pending approval. +- **No history.** In-process execution leaves no record. You can't query what any agent did, replay a run, or compare models. +- **Scaling duplicates state.** Multiple machines mean distributed state management — or isolated, uncoordinated agents. +- **Scheduling requires external infra.** A cron means a separate scheduler, missed-fire handling, and overlap detection — all failure-prone. +- **Background jobs vanish.** An async agent fired via threading or asyncio dies when your process does. + +Agentspan eliminates all six by keeping execution state on the server. + +``` +Your process Agentspan server +└── worker └── agent execution + ├── registers tools ├── tracks current step + └── executes tool calls ←────── delegates tool work + ├── retries on failure + ├── holds HITL state + └── stores full history +``` + +Your process can crash, restart, or be replaced. **The agent keeps running.** + +--- + +## Get started in 30 seconds + +### 1. Install + +=== "Python" + + ```bash + pip install conductor-agent-sdk + ``` + +=== "TypeScript" + + ```bash + npm install @conductor-oss/conductor-agent-sdk + ``` + +=== "Java" + + ```xml + + + org.conductoross.conductor + conductor-agent-sdk + 0.1.0 + + ``` + + ```gradle + // Gradle + implementation 'org.conductoross.conductor:conductor-agent-sdk:0.1.0' + ``` + +=== "C#" + + ```bash + dotnet add package conductor-agent-sdk + ``` + +=== "Rust *(coming soon)*" + + ```toml + # Cargo.toml — not yet published + # [dependencies] + # conductor-agent-sdk = "0.1" + ``` + + Star the [repo](https://github.com/agentspan-ai/agentspan) to be notified when Rust support ships. + +=== "Ruby *(coming soon)*" + + ```ruby + # Gemfile — not yet published + # gem 'conductor-agent-sdk' + ``` + + Star the [repo](https://github.com/agentspan-ai/agentspan) to be notified when Ruby support ships. + +### 2. Set your LLM key + +```bash +export OPENAI_API_KEY=sk-... # OpenAI +# or +export ANTHROPIC_API_KEY=sk-ant-... # Anthropic +``` + +### 3. Start the server + +```bash +agentspan server start +``` + +Downloads and starts the Agentspan runtime on `http://localhost:6767`. First run fetches the JAR (~50 MB); subsequent starts use the cache. + +### 4. Run your first agent + +=== "Python" + + ```python + from conductor.ai.agents import Agent, AgentRuntime, tool + + @tool + def get_weather(city: str) -> str: + """Get current weather for a city.""" + return f"72°F and sunny in {city}" + + agent = Agent( + name="weatherbot", + model="openai/gpt-4o", + instructions="You are an outdoor activity assistant. Look up the weather, then recommend activities.", + tools=[get_weather], + ) + + with AgentRuntime() as runtime: + result = runtime.run(agent, "What should I do today in NYC?") + result.print_result() + ``` + +=== "TypeScript" + + ```typescript + import { Agent, AgentRuntime, tool } from '@conductor-oss/conductor-agent-sdk'; + + const getWeather = tool( + async ({ city }: { city: string }) => `72°F and sunny in ${city}`, + { name: 'get_weather', description: 'Get current weather for a city', + inputSchema: { type: 'object', properties: { city: { type: 'string' } }, required: ['city'] } } + ); + + const agent = new Agent({ + name: 'weatherbot', + model: 'openai/gpt-4o', + instructions: 'You are an outdoor activity assistant. Look up the weather, then recommend activities.', + tools: [getWeather], + }); + + const runtime = new AgentRuntime(); + const result = await runtime.run(agent, 'What should I do today in NYC?'); + result.printResult(); + await runtime.shutdown(); + ``` + +=== "Java" + + ```java + import org.conductoross.conductor.ai.Agent; + import org.conductoross.conductor.ai.AgentRuntime; + import org.conductoross.conductor.ai.tool.Tool; + + public class Weatherbot { + + @Tool(description = "Get current weather for a city") + public String getWeather(String city) { + return "72°F and sunny in " + city; + } + + public static void main(String[] args) throws Exception { + Weatherbot wb = new Weatherbot(); + + Agent agent = Agent.builder() + .name("weatherbot") + .model("openai/gpt-4o") + .instructions("You are an outdoor activity assistant. Look up the weather, then recommend activities.") + .tools(wb) + .build(); + + try (AgentRuntime runtime = new AgentRuntime()) { + var result = runtime.run(agent, "What should I do today in NYC?"); + System.out.println(result.getOutput()); + } + } + } + ``` + +=== "C#" + + ```csharp + using Conductor.AI; + + var agent = new Agent(new AgentConfig { + Name = "weatherbot", + Model = "openai/gpt-4o", + Instructions = "You are an outdoor activity assistant. Look up the weather, then recommend activities.", + Tools = [ + Tool.From((string city) => $"72°F and sunny in {city}", + name: "get_weather", + description: "Get current weather for a city") + ] + }); + + await using var runtime = new AgentRuntime(); + var result = await runtime.RunAsync(agent, "What should I do today in NYC?"); + Console.WriteLine(result.Output); + ``` + +Open **http://localhost:6767** to see the execution in the visual UI. + +--- + +## The four production patterns + +### Long-running agents + +Execution state lives on the server, not in your process. Workers connect, run tool calls, and disconnect — the server tracks every step. A crash mid-run resumes at the last completed step when a worker reconnects. Human approval pauses indefinitely with no in-memory state at risk. + +```python +agent = Agent( + name="researcher", + model="anthropic/claude-sonnet-4-6", + instructions="Research the topic thoroughly. Use all available tools.", + tools=[web_search, read_page, write_report], +) + +# Even if this process crashes, the agent resumes on reconnect +with AgentRuntime() as runtime: + result = runtime.run(agent, "Write a report on LLM inference optimization") +``` + +### Dynamic plan-execute + +The LLM plans once; Conductor executes deterministically. A planner agent emits a JSON task graph. The server compiles it into an immutable sub-workflow — parallelism is `FORK_JOIN`, branching is `SWITCH`, retries cost zero tokens. See [Plan-Execute](concepts/plan-execute.md). + +```python +from conductor.ai.agents import plan_execute + +harness = plan_execute( + name="report_generator", + tools=[create_dir, write_section, assemble, check_length], + planner_instructions="Plan a 3-section report on the topic.", + fallback_instructions="Fix what the deterministic plan couldn't.", +) + +result = runtime.run(harness, "AI agents in 2025") +``` + +### Event-driven agents + +Attach cron schedules at deploy time. Connect to Kafka, SQS, AMQP, webhooks, or any Conductor event source — every execution fully recorded with inputs, outputs, and per-step timing. See [Scheduling](scheduling.md). + +```python +from conductor.ai.agents import Schedule + +agent = Agent( + name="daily_digest", + instructions="Summarize the top news for today.", + tools=[fetch_news, send_email], + schedules=[Schedule(name="morning", cron="0 9 * * *")], +) + +runtime.deploy(agent) # registers the schedule; server fires it every day at 9 AM +``` + +### Adaptive loops + +Any framework can loop. Only Agentspan makes each iteration a **durable, observable workflow** — crash mid-loop and resume at the current iteration, not from scratch. Combine Plan-Execute for deterministic inner execution with an adaptive outer loop that converges based on verified results. See [Adaptive Loops](concepts/adaptive-loops.md). + +```python +from conductor.ai.agents import Agent, AgentRuntime + +agent = Agent(name="travel_planner", instructions="Output itineraries as JSON.", ...) + +failures = [] +with AgentRuntime() as runtime: + for iteration in range(max_iterations): + prompt = build_prompt(destination, daily_budget, failures) + result = runtime.run(agent, prompt) # durable workflow — survives crashes + + itinerary = extract_json(result) + failures = verify_constraints(itinerary) # deterministic — no LLM + if not failures: + print(f"✓ All constraints passed in {iteration + 1} iteration(s)") + break + # Exact failure messages feed into the next prompt +``` + +Each `runtime.run()` is a durable Conductor workflow — every iteration observable in the UI, resumable on crash, and fully logged. + +See `examples/118_adaptive_loop_showcase.py` — a runnable travel planner that loops until budget and structural constraints pass (`python 118_adaptive_loop_showcase.py "Tokyo"`). + +--- + +## Works with your existing framework + +Pass your existing agent directly to `runtime.run()`. Your code is unchanged. + +```python +# LangGraph +from langgraph.prebuilt import create_react_agent +graph = create_react_agent(model, tools) +result = runtime.run(graph, prompt) + +# OpenAI Agents SDK +from agents import Agent as OAIAgent +agent = OAIAgent(name="helper", instructions="...", tools=[...]) +result = runtime.run(agent, prompt) + +# Google ADK +from google.adk.agents import LlmAgent +agent = LlmAgent(name="helper", instruction="...", tools=[...]) +result = runtime.run(agent, prompt) +``` + +--- + +## Go deeper + +
+ +- :material-help-circle-outline: **Why Agentspan** + + --- + + Why conventional frameworks fail in production, and how Agentspan's server-side execution model solves it. + + [Read more →](why-agentspan.md) + +- :material-graph-outline: **Plan-Execute** + + --- + + The LLM plans once; Conductor executes deterministically. No tokens on retries, parallelism, or branching. + + [Read more →](concepts/plan-execute.md) + +- :material-book-open-outline: **SDK Reference** + + --- + + Python, TypeScript, Java, and C# — full API reference, examples, and framework integration guides. + + [Read more →](sdk.md) + +- :material-rocket-launch-outline: **Examples** + + --- -- [Agents](/developer-guides/agentspan/concepts/agents) - The `Agent` class, parameters, results, and handles. -- [Tools](/developer-guides/agentspan/concepts/tools) - `@tool`, `http_tool()`, `api_tool()`, `mcp_tool()`, credentials, and approval-required tools. -- [Skills](/developer-guides/agentspan/concepts/skills) - Load, register, run, and test agentskills.io skill folders. -- [Multi-Agent Strategies](/developer-guides/agentspan/concepts/multi-agent) - Sequential, parallel, handoff, router, and nested agent coordination. -- [Guardrails](/developer-guides/agentspan/concepts/guardrails) - Input and output safety, retry, block, and fix behavior. -- [Memory](/developer-guides/agentspan/concepts/memory) - Conversation history and semantic search across sessions. -- [Streaming](/developer-guides/agentspan/concepts/streaming) - Runtime events, async execution, and HITL with streams. -- [Testing](/developer-guides/agentspan/concepts/testing) - `mock_run`, `expect`, record/replay, pytest, and evaluation helpers. + Production-shape examples: support triage, research pipelines, HITL workflows, LangGraph bots. -## Deployment + [Read more →](examples/support-triage.md) -- [Deployment overview](/developer-guides/agentspan/reference/deployment) - Local development, Docker, Helm, and Orkes Cloud. -- [Self-hosting](/developer-guides/agentspan/reference/self-hosting) - Run Agentspan in your own environment. +- :material-refresh: **Adaptive loops** -## Examples + --- -- [Support Ticket Triage](/developer-guides/agentspan/examples/support-triage) - Classify, route, and resolve support tickets. -- [Research Pipeline](/developer-guides/agentspan/examples/research-pipeline) - Run sequential research, writing, and editing agents. -- [Batch Document Processor](/developer-guides/agentspan/examples/document-processor) - Process multiple documents in parallel. -- [Crash and Resume](/developer-guides/agentspan/examples/crash-resume) - Resume durable executions after worker failure. -- [Human in the Loop](/developer-guides/agentspan/examples/human-in-the-loop) - Pause execution for human approval. -- [LangGraph Code Review Bot](/developer-guides/agentspan/examples/langgraph) - Wrap an existing LangGraph app. -- [OpenAI Agents SDK Customer Support](/developer-guides/agentspan/examples/openai-agents-sdk) - Run an OpenAI Agents SDK app through Agentspan. -- [Google ADK Research Assistant](/developer-guides/agentspan/examples/google-adk) - Run a Google ADK agent through Agentspan. + Durable iterative agents — each iteration a Conductor workflow. Replan based on verified results, converge on goals, and observe every iteration in the UI. -## Reference + [Read more →](concepts/adaptive-loops.md) -- [CLI Reference](/developer-guides/agentspan/reference/cli) - Commands with exact syntax. -- [LLM Providers](/developer-guides/agentspan/reference/providers) - Providers, model strings, and API keys. -- [AI Models](/developer-guides/agentspan/reference/ai-models) - Model configuration and supported provider formats. -- [Integrations](/developer-guides/agentspan/reference/integrations) - Framework integrations and compatibility notes. -- [Worker Types](/developer-guides/agentspan/reference/worker-types) - Python and TypeScript worker models. +
diff --git a/docs/developer-guides/agentspan/quickstart.mdx b/docs/developer-guides/agentspan/quickstart.mdx index 2f58ab0d..9b39d9e0 100644 --- a/docs/developer-guides/agentspan/quickstart.mdx +++ b/docs/developer-guides/agentspan/quickstart.mdx @@ -11,7 +11,7 @@ Get Agentspan running locally in under 60 seconds. ## Step 1 — Install ```bash -pip install agentspan +pip install conductor-agent-sdk ``` This installs the Python SDK and the `agentspan` CLI — everything you need as a Python developer. @@ -22,7 +22,7 @@ Verify your setup: agentspan doctor ``` -> **uv:** `uv pip install agentspan` also works. +> **uv:** `uv pip install conductor-agent-sdk` also works. > > **CLI only (no Python SDK):** `npm install -g @agentspan-ai/agentspan` — downloads the binary eagerly at install time, no Python required. @@ -53,7 +53,7 @@ On first run, this downloads the Agentspan server JAR (~50 MB) and starts it on Save this as `hello.py` and run `python hello.py`: ```python -from agentspan.agents import Agent, AgentRuntime, tool +from conductor.ai.agents import Agent, AgentRuntime, tool @tool def get_weather(city: str) -> str: @@ -97,7 +97,7 @@ See [Tools](/developer-guides/agentspan/concepts/tools) for all tool types. If you prefer not to use the context manager, module-level functions are available. They use a shared singleton runtime under the hood: ```python -from agentspan.agents import Agent, tool, run +from conductor.ai.agents import Agent, tool, run @tool def get_weather(city: str) -> str: diff --git a/docs/developer-guides/agentspan/reference/ai-models.mdx b/docs/developer-guides/agentspan/reference/ai-models.mdx index 369e5c2d..36e32e11 100644 --- a/docs/developer-guides/agentspan/reference/ai-models.mdx +++ b/docs/developer-guides/agentspan/reference/ai-models.mdx @@ -35,7 +35,7 @@ agentspan server start | `OPENAI_API_KEY` | API key from [platform.openai.com](https://platform.openai.com/api-keys) | | `OPENAI_ORG_ID` | Organization ID (optional) | -**Models:** `openai/gpt-4o`, `openai/gpt-4o-mini`, `openai/gpt-4-turbo`, `openai/o1`, `openai/o1-mini`, `openai/o3-mini` +**Models:** `openai/gpt-4o`, `anthropic/claude-sonnet-4-6`, `openai/gpt-4-turbo`, `openai/o1`, `openai/o1-mini`, `openai/o3-mini` **Embeddings:** `openai/text-embedding-3-small`, `openai/text-embedding-3-large` diff --git a/docs/developer-guides/agentspan/reference/cli.mdx b/docs/developer-guides/agentspan/reference/cli.mdx index 63dc7458..25989d96 100644 --- a/docs/developer-guides/agentspan/reference/cli.mdx +++ b/docs/developer-guides/agentspan/reference/cli.mdx @@ -6,7 +6,7 @@ description: "Agentspan CLI commands for server, credentials, agents, skills, st # CLI Reference -**Python developers:** `pip install agentspan` gives you the SDK and the CLI. The pip package registers the `agentspan` command as a console script; on first invocation it downloads the Go binary from S3 and caches it. +**Python developers:** `pip install conductor-agent-sdk` gives you the SDK and the CLI. The pip package registers the `agentspan` command as a console script; on first invocation it downloads the Go binary from S3 and caches it. **CLI only (no Python SDK):** `npm install -g @agentspan-ai/agentspan` — downloads the Go binary eagerly at install time. Useful if you don't have Python or want the binary pre-fetched. @@ -158,7 +158,7 @@ export AGENTSPAN_AUTH_SECRET=your-secret Or configure in Python code: ```python -from agentspan.agents import configure +from conductor.ai.agents import configure configure( server_url="https://your-server.example.com", diff --git a/docs/developer-guides/agentspan/reference/deployment.mdx b/docs/developer-guides/agentspan/reference/deployment.mdx index a2c4ca7d..d48b6eae 100644 --- a/docs/developer-guides/agentspan/reference/deployment.mdx +++ b/docs/developer-guides/agentspan/reference/deployment.mdx @@ -122,7 +122,7 @@ Worker pods connect via `AGENTSPAN_SERVER_URL`: import os os.environ["AGENTSPAN_SERVER_URL"] = "http://agentspan-server:6767" -from agentspan.agents import Agent, run +from conductor.ai.agents import Agent, run agent = Agent(name="my_agent", model="openai/gpt-4o") result = run(agent, "Hello") ``` @@ -142,7 +142,7 @@ export AGENTSPAN_AUTH_SECRET=my-secret Or via code: ```python -from agentspan.agents import configure +from conductor.ai.agents import configure configure( server_url="https://my-server.example.com", diff --git a/docs/developer-guides/agentspan/reference/integrations.mdx b/docs/developer-guides/agentspan/reference/integrations.mdx index f91e1338..6b87629b 100644 --- a/docs/developer-guides/agentspan/reference/integrations.mdx +++ b/docs/developer-guides/agentspan/reference/integrations.mdx @@ -9,7 +9,7 @@ description: "Use Agentspan with LangGraph, the OpenAI Agents SDK, Google ADK, o Agentspan works with the frameworks you already use. Pass your existing agent directly to `runtime.run()` — definitions, tools, and routing logic stay exactly as written. You get crash recovery, durable human-in-the-loop, and full execution history without changing a single node or handoff. ```python -from agentspan.agents import AgentRuntime +from conductor.ai.agents import AgentRuntime with AgentRuntime() as runtime: result = runtime.run(your_existing_agent, "your prompt") @@ -27,7 +27,7 @@ Pass a compiled `StateGraph` or any graph produced by `create_react_agent`: ```python from langgraph.prebuilt import create_react_agent -from agentspan.agents import AgentRuntime +from conductor.ai.agents import AgentRuntime graph = create_react_agent(model="openai/gpt-4o", tools=[search, calculator]) @@ -48,7 +48,7 @@ Pass an `Agent` from the `agents` package directly: ```python from agents import Agent as OAIAgent, WebSearchTool -from agentspan.agents import AgentRuntime +from conductor.ai.agents import AgentRuntime oai_agent = OAIAgent( name="support_agent", @@ -73,7 +73,7 @@ Pass any ADK pipeline (`SequentialAgent`, `ParallelAgent`, `LoopAgent`, or a cus ```python from google.adk.agents import SequentialAgent, LlmAgent -from agentspan.agents import AgentRuntime +from conductor.ai.agents import AgentRuntime researcher = LlmAgent(name="researcher", model="gemini-2.0-flash", ...) writer = LlmAgent(name="writer", model="gemini-2.0-flash", ...) @@ -117,12 +117,12 @@ See [Tools](/developer-guides/agentspan/concepts/tools) for details. --- -## Native Agentspan agents +## Native Agentspan If you're not using an existing framework, define agents natively: ```python -from agentspan.agents import Agent, tool, AgentRuntime +from conductor.ai.agents import Agent, tool, AgentRuntime @tool def search_web(query: str) -> str: diff --git a/docs/developer-guides/agentspan/reference/providers.mdx b/docs/developer-guides/agentspan/reference/providers.mdx index 283d1add..20fa993d 100644 --- a/docs/developer-guides/agentspan/reference/providers.mdx +++ b/docs/developer-guides/agentspan/reference/providers.mdx @@ -44,7 +44,7 @@ agent = Agent(name="bot", model="google_gemini/gemini-2.0-flash") | `OPENAI_API_KEY` | API key from [platform.openai.com](https://platform.openai.com/api-keys) | | `OPENAI_ORG_ID` | Organization ID (optional) | -**Models:** `openai/gpt-4o`, `openai/gpt-4o-mini`, `openai/gpt-4-turbo`, `openai/o1`, `openai/o1-mini`, `openai/o3-mini` +**Models:** `openai/gpt-4o`, `anthropic/claude-sonnet-4-6`, `openai/gpt-4-turbo`, `openai/o1`, `openai/o1-mini`, `openai/o3-mini` **Embeddings:** `openai/text-embedding-3-small`, `openai/text-embedding-3-large` diff --git a/docs/developer-guides/agentspan/reference/sdk.mdx b/docs/developer-guides/agentspan/reference/sdk.mdx new file mode 100644 index 00000000..77edb7dc --- /dev/null +++ b/docs/developer-guides/agentspan/reference/sdk.mdx @@ -0,0 +1,509 @@ +--- +slug: "/developer-guides/agentspan/reference/sdk" +title: "SDK Overview" +description: "Conductor Agent SDK — coordinates, Agent 101, and framework integrations for Python, TypeScript, Java, and C#." +--- + +# SDK Overview + +The Conductor Agent SDK lets you build and run Agentspan agents in four languages. Install the package, point it at an Agentspan server, and you're running agents in under 30 seconds. + +## Quick reference + +| Language | Install | Import | Full docs | +|---|---|---|---| +| **Python** | `pip install conductor-agent-sdk` | `from conductor.ai.agents import Agent, AgentRuntime` | [Python SDK](python-sdk/README.md) | +| **TypeScript** | `npm install @conductor-oss/conductor-agent-sdk` | `import { Agent, AgentRuntime } from '@conductor-oss/conductor-agent-sdk'` | [TypeScript SDK](typescript-sdk/README.md) | +| **Java** | `org.conductoross.conductor:conductor-agent-sdk:0.1.0` | `import org.conductoross.conductor.ai.*` | [Java SDK](java-sdk/index.md) | +| **C#** | `dotnet add package conductor-agent-sdk` | `using Conductor.AI;` | [C# SDK](csharp-sdk/README.md) | + +## Environment setup + +All SDKs read the same environment variables: + +```bash +export AGENTSPAN_SERVER_URL=http://localhost:6767/api +export OPENAI_API_KEY= +export AGENTSPAN_LLM_MODEL=openai/gpt-4o-mini +``` + +Start a local server: + +```bash +agentspan server start # downloads the runtime jar on first run, starts on :6767 +``` + +--- + +## Agent 101 + +The minimal loop: define an `Agent`, open a runtime, call `run`. + +--- +#### Python + + ```python + from conductor.ai.agents import Agent, AgentRuntime, tool + + @tool + def get_weather(city: str) -> str: + """Return current weather for a city.""" + return f"72°F and sunny in {city}" + + agent = Agent( + name="weather_agent", + model="anthropic/claude-sonnet-4-6", + instructions="You are a helpful assistant. Use available tools.", + tools=[get_weather], + ) + + with AgentRuntime() as runtime: + result = runtime.run(agent, "What's the weather in San Francisco?") + result.print_result() + ``` +--- +#### TypeScript + + ```ts + import { Agent, AgentRuntime, tool } from '@conductor-oss/conductor-agent-sdk'; + import { z } from 'zod'; + + const getWeather = tool({ + name: 'get_weather', + description: 'Return current weather for a city.', + parameters: z.object({ city: z.string() }), + execute: async ({ city }) => `72°F and sunny in ${city}`, + }); + + const agent = new Agent({ + name: 'weather_agent', + model: 'anthropic/claude-sonnet-4-6', + instructions: 'You are a helpful assistant. Use available tools.', + tools: [getWeather], + }); + + const runtime = new AgentRuntime(); + try { + const result = await runtime.run(agent, "What's the weather in San Francisco?"); + result.printResult(); + } finally { + await runtime.shutdown(); + } + ``` + +--- +#### Java + + ```java + import org.conductoross.conductor.ai.Agent; + import org.conductoross.conductor.ai.AgentRuntime; + import org.conductoross.conductor.ai.tool.Tool; + + public class WeatherExample { + @Tool(description = "Return current weather for a city.") + public static String getWeather(String city) { + return "72°F and sunny in " + city; + } + + public static void main(String[] args) throws Exception { + Agent agent = Agent.builder() + .name("weather_agent") + .model("anthropic/claude-sonnet-4-6") + .instructions("You are a helpful assistant. Use available tools.") + .tools(WeatherExample.class) + .build(); + + try (AgentRuntime runtime = new AgentRuntime()) { + var result = runtime.run(agent, "What's the weather in San Francisco?"); + result.printResult(); + } + } + } + ``` + +--- +#### C# + + ```csharp + using Conductor.AI; + using Conductor.AI.Tools; + + [AgentTools] + public static class WeatherTools + { + [AgentTool(Description = "Return current weather for a city.")] + public static string GetWeather(string city) => + $"72°F and sunny in {city}"; + } + + var agent = new Agent("weather_agent") + { + Model = "anthropic/claude-sonnet-4-6", + Instructions = "You are a helpful assistant. Use available tools.", + Tools = ToolRegistry.FromType(), + }; + + await using var runtime = new AgentRuntime(); + var result = await runtime.RunAsync(agent, "What's the weather in San Francisco?"); + result.PrintResult(); + ``` + +--- + +## What `run()` returns + +All four SDKs return an `AgentResult` with the same fields: + +| Field | Description | +|---|---| +| `output` / `Output` | Final text response from the agent | +| `execution_id` / `ExecutionId` | Server-side workflow execution ID — use to poll, stream, or inspect | +| `status` | `COMPLETED`, `FAILED`, `TIMED_OUT` | +| `usage` | Token counts (input, output, total) | +| `print_result()` / `PrintResult()` | Pretty-print to stdout | + +--- + +## Framework agents + +You don't have to rewrite agents authored in another framework. Pass the framework object directly to `runtime.run()` — the runtime auto-detects the framework and runs it on Agentspan. + +### LangGraph + +--- +#### Python + + ```python + import math + from langchain_core.tools import tool + from langchain_openai import ChatOpenAI + from langgraph.prebuilt import create_react_agent + from conductor.ai.agents import AgentRuntime + + @tool + def calculate(expression: str) -> str: + """Evaluate a safe math expression.""" + return str(eval(expression, {"__builtins__": {}}, {"sqrt": math.sqrt, "pi": math.pi})) + + llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) + graph = create_react_agent(llm, tools=[calculate], name="math_agent") + + with AgentRuntime() as runtime: + result = runtime.run(graph, "What is sqrt(256) + 2**10?") + result.print_result() + ``` + +--- +#### TypeScript + + ```ts + import { createReactAgent } from '@langchain/langgraph/prebuilt'; + import { ChatOpenAI } from '@langchain/openai'; + import { tool } from '@langchain/core/tools'; + import { z } from 'zod'; + import { AgentRuntime } from '@conductor-oss/conductor-agent-sdk'; + + const calculate = tool( + async ({ expression }) => String(eval(expression)), + { name: 'calculate', description: 'Evaluate a math expression.', + schema: z.object({ expression: z.string() }) } + ); + + const llm = new ChatOpenAI({ model: 'gpt-4o-mini', temperature: 0 }); + const graph = createReactAgent({ llm, tools: [calculate], name: 'math_agent' }); + + const runtime = new AgentRuntime(); + try { + const result = await runtime.run(graph, 'What is sqrt(256) + 2**10?'); + result.printResult(); + } finally { + await runtime.shutdown(); + } + ``` + +--- +#### Java (LangGraph4j) + + ```java + import dev.langchain4j.model.openai.OpenAiChatModel; + import org.bsc.langgraph4j.agentexecutor.AgentExecutor; + import org.conductoross.conductor.ai.AgentRuntime; + + var model = OpenAiChatModel.builder() + .apiKey("agentspan-server-handles-credentials") + .modelName("anthropic/claude-sonnet-4-6") + .build(); + + AgentExecutor.Builder agent = AgentExecutor.builder().chatModel(model); + + try (AgentRuntime runtime = new AgentRuntime()) { + var result = runtime.run(agent, "What is sqrt(256) + 2**10?"); + result.printResult(); + } + ``` + +--- + +### LangChain + +--- +##### Python + + ```python + from conductor.ai.agents import AgentRuntime + from langchain.agents import create_tool_calling_agent, AgentExecutor + from langchain_core.prompts import ChatPromptTemplate + from langchain_core.tools import tool + from langchain_openai import ChatOpenAI + + @tool + def lookup_order(order_id: str) -> str: + """Look up an order by ID.""" + return f"Order {order_id}: shipped, arriving tomorrow." + + llm = ChatOpenAI(model="gpt-4o-mini") + prompt = ChatPromptTemplate.from_messages([ + ("system", "You are an order support assistant."), + ("human", "{input}"), + ("placeholder", "{agent_scratchpad}"), + ]) + lc_agent = create_tool_calling_agent(llm, [lookup_order], prompt) + executor = AgentExecutor(agent=lc_agent, tools=[lookup_order]) + + with AgentRuntime() as runtime: + result = runtime.run(executor, "Where is order #12345?") + result.print_result() + ``` + +--- +#### TypeScript + + ```ts + import { createAgentExecutor } from '@conductor-oss/conductor-agent-sdk/langchain'; + import { AgentRuntime } from '@conductor-oss/conductor-agent-sdk'; + import { ChatOpenAI } from '@langchain/openai'; + import { tool } from '@langchain/core/tools'; + import { z } from 'zod'; + + const lookupOrder = tool( + async ({ orderId }) => `Order ${orderId}: shipped, arriving tomorrow.`, + { name: 'lookup_order', description: 'Look up an order.', + schema: z.object({ orderId: z.string() }) } + ); + + const llm = new ChatOpenAI({ model: 'gpt-4o-mini' }); + const executor = createAgentExecutor({ llm, tools: [lookupOrder] }); + + const runtime = new AgentRuntime(); + try { + const result = await runtime.run(executor, 'Where is order #12345?'); + result.printResult(); + } finally { + await runtime.shutdown(); + } + ``` + +--- + +### OpenAI Agents SDK + +--- +#### Python + + ```python + from conductor.ai import Runner # drop-in: replaces `from agents import Runner` + from agents import Agent, function_tool + + @function_tool + def get_weather(city: str) -> str: + return f"72°F and sunny in {city}" + + agent = Agent( + name="weather_assistant", + model="gpt-4o", + tools=[get_weather], + instructions="You are a helpful assistant.", + ) + + result = Runner.run_sync(agent, "What's the weather in NYC?") + print(result.final_output) + ``` + + One import change — everything else stays as written. + +--- +#### TypeScript + + ```ts + import { Agent, setTracingDisabled } from '@openai/agents'; + import { AgentRuntime } from '@conductor-oss/conductor-agent-sdk'; + + setTracingDisabled(true); + + const agent = new Agent({ + name: 'support_agent', + instructions: 'You are a helpful support assistant.', + model: 'gpt-4o', + }); + + const runtime = new AgentRuntime(); + try { + const result = await runtime.run(agent, 'How do I reset my password?'); + result.printResult(); + } finally { + await runtime.shutdown(); + } + ``` + +--- +#### Java + + ```java + import org.conductoross.conductor.ai.Agent; + import org.conductoross.conductor.ai.AgentRuntime; + import org.conductoross.conductor.ai.Strategy; + + // OpenAI Agents SDK agents are expressed as native Agentspan agents + // with Strategy.HANDOFF for multi-agent handoffs + Agent agent = Agent.builder() + .name("support_agent") + .model("gpt-4o") + .instructions("You are a helpful support assistant.") + .strategy(Strategy.HANDOFF) + .build(); + + try (AgentRuntime runtime = new AgentRuntime()) { + var result = runtime.run(agent, "How do I reset my password?"); + result.printResult(); + } + ``` + +--- + +### Google ADK + +--- +#### Python + + ```python + from google.adk.agents import LlmAgent + from google.adk.tools import FunctionTool + from conductor.ai.agents import AgentRuntime + + def get_weather(city: str) -> dict: + """Get current weather for a city.""" + return {"city": city, "condition": "Sunny", "temp_c": 22} + + adk_agent = LlmAgent( + name="weather_agent", + model="gemini-2.0-flash", + instruction="Answer weather questions using tools.", + tools=[FunctionTool(func=get_weather)], + ) + + with AgentRuntime() as runtime: + result = runtime.run(adk_agent, "What's the weather in Tokyo?") + result.print_result() + ``` + +--- +#### TypeScript + + ```ts + import { LlmAgent } from '@google/adk'; + import { AgentRuntime } from '@conductor-oss/conductor-agent-sdk'; + + const agent = new LlmAgent({ + name: 'weather_agent', + model: 'gemini-2.0-flash', + instruction: 'Answer weather questions using available tools.', + }); + + const runtime = new AgentRuntime(); + try { + const result = await runtime.run(agent, "What's the weather in Tokyo?"); + result.printResult(); + } finally { + await runtime.shutdown(); + } + ``` + +--- +#### Java + + ```java + import com.google.adk.agents.LlmAgent; + import org.conductoross.conductor.ai.AgentRuntime; + import org.conductoross.conductor.ai.frameworks.AdkBridge; + + LlmAgent adkAgent = LlmAgent.builder() + .name("weather_agent") + .model("gemini-2.0-flash") + .instruction("Answer weather questions.") + .build(); + + try (AgentRuntime runtime = new AgentRuntime()) { + var result = runtime.run(AdkBridge.toAgentspan(adkAgent), "What's the weather in Tokyo?"); + result.printResult(); + } + ``` + +--- + +### Vercel AI SDK + +--- +#### TypeScript + + **Option 1 — Use AI SDK tools on a native Agent (recommended):** + + The Conductor Agent SDK auto-detects Vercel AI SDK `tool()` objects — no wrapper needed. + + ```ts + import { tool as aiTool } from 'ai'; + import { z } from 'zod'; + import { Agent, AgentRuntime } from '@conductor-oss/conductor-agent-sdk'; + + const getWeather = aiTool({ + description: 'Get current weather for a city.', + parameters: z.object({ city: z.string() }), + execute: async ({ city }) => ({ city, tempF: 72, condition: 'Sunny' }), + }); + + const agent = new Agent({ + name: 'weather_agent', + model: 'anthropic/claude-sonnet-4-6', + instructions: 'Use tools to answer weather questions.', + tools: [getWeather], + }); + + const runtime = new AgentRuntime(); + try { + const result = await runtime.run(agent, "What's the weather in SF?"); + result.printResult(); + } finally { + await runtime.shutdown(); + } + ``` + + **Option 2 — Drop-in `generateText`:** + + ```ts + import { generateText } from '@conductor-oss/conductor-agent-sdk/vercel-ai'; + + const { text } = await generateText({ + model: 'anthropic/claude-sonnet-4-6', + prompt: 'Write a haiku about durable execution.', + }); + console.log(text); + ``` + +--- + +## Next steps + +- [Why Agentspan](why-agentspan.md) — the case for server-side execution +- [Quickstart](quickstart.md) — full local setup in 60 seconds +- [Plan-Execute](concepts/plan-execute.md) — dynamic agents with deterministic execution +- [Scheduling](scheduling.md) — cron-triggered agents +- Per-SDK deep dives: [Python](python-sdk/README.md) · [TypeScript](typescript-sdk/README.md) · [Java](java-sdk/index.md) · [C#](csharp-sdk/README.md) diff --git a/docs/developer-guides/agentspan/reference/self-hosting.mdx b/docs/developer-guides/agentspan/reference/self-hosting.mdx index ae4297ba..b3c9f3c4 100644 --- a/docs/developer-guides/agentspan/reference/self-hosting.mdx +++ b/docs/developer-guides/agentspan/reference/self-hosting.mdx @@ -82,7 +82,7 @@ python my_agent.py Your agent code uses `AgentRuntime` as usual: ```python -from agentspan.agents import Agent, AgentRuntime, tool +from conductor.ai.agents import Agent, AgentRuntime, tool @tool def process_data(input: str) -> str: @@ -116,7 +116,7 @@ export AGENTSPAN_AUTH_SECRET=your-app-secret Or configure in code: ```python -from agentspan.agents import configure +from conductor.ai.agents import configure configure( server_url="https://your-server.example.com", diff --git a/docs/developer-guides/agentspan/why-agentspan.mdx b/docs/developer-guides/agentspan/why-agentspan.mdx index 3a4f5423..91537b66 100644 --- a/docs/developer-guides/agentspan/why-agentspan.mdx +++ b/docs/developer-guides/agentspan/why-agentspan.mdx @@ -6,11 +6,13 @@ description: "Why agents fail in production, and how Agentspan's server-side exe # Why Agentspan -**Agentspan is a durable runtime for AI agents. Your code runs in your process. Execution state lives on the server — so crashes, restarts, and deployments don't lose work.** +**Agentspan is a durable runtime for AI agents, built for Conductor. Your code runs in your process. Execution state lives on the server — so crashes, restarts, and deployments don't lose work.** + +![Agentspan four pillars: long-running agents, dynamic plan-execute, event-driven agents, adaptive loops](assets/four-pillars.svg) --- -## How most agent frameworks work +## How most agent frameworks work (_and what could go wrong?_) Most agent frameworks — LangGraph, the OpenAI Agents SDK, Google ADK, and others — run the agent loop inside your process. Your code calls the LLM, receives a tool call, executes the tool, and loops. All of that happens in memory, in your process. @@ -22,12 +24,9 @@ Your process ├── call LLM again └── ...until done ``` - This works fine on your laptop. In production, it breaks in predictable ways. ---- - -## What can go wrong +### What can go wrong **Process crash mid-run.** A long-running agent — one that searches the web, reads files, calls APIs across dozens of steps — can take minutes. If your process dies (OOM kill, deploy, network drop), the entire run is gone. There is no way to resume from where it stopped. @@ -37,9 +36,13 @@ This works fine on your laptop. In production, it breaks in predictable ways. **Scaling means duplicating state.** Running agents across multiple machines means solving distributed state management yourself — or accepting that each agent instance is isolated with no shared execution context. +**No scheduling without external infrastructure.** Running an agent on a cron means maintaining a separate scheduler, handling missed fires, and managing overlap. Any of those can fail silently — and there's no execution history tied to your agent when it does. + +**Background jobs block or disappear.** Firing an agent asynchronously in-process — via threading or asyncio — means the job dies when your process does. There's no durable handle, no execution record, and no way to push new events into it from another process. + --- -## How Agentspan works differently +## How Agentspan separates orchestration from execution Agentspan separates where your code runs from where execution state lives. @@ -61,12 +64,38 @@ Your process can crash, restart, or be replaced. The agent keeps running. ## What this enables +### Long-running agents + **Crash recovery.** If your worker process dies mid-run, the server resumes execution when a new worker connects. No work is re-run from scratch — it picks up at the current step. **Durable human-in-the-loop.** Mark any tool with `approval_required=True`. The agent pauses server-side and waits indefinitely — no timeouts, no in-memory state at risk. Approve or deny via CLI, API, or the UI. **Full execution history.** Every run is stored with inputs, outputs, token usage, and per-step timing. Query via CLI, browse in the UI at `http://localhost:6767`, or replay any past run. +### Dynamic agents (Plan-Execute) + +**LLM plans, Conductor executes.** Define a planner agent that emits a JSON DAG of operations at runtime — adapting the plan to the specific task and inputs. The server compiles it into an immutable Conductor sub-workflow: no LLM involved in orchestration, retries, parallelism, or validation. The plan is fixed once compiled — replay-safe and branch-stable. See [Plan-Execute](concepts/plan-execute.md). + +**Call existing Conductor workflows.** Plan steps can invoke any deployed Conductor workflow as a sub-workflow. This bridges dynamic AI planning with your existing deterministic business automation — the LLM decides when to call it; Conductor handles the execution. + +### Event-driven agents + +**Scheduled agents.** Attach one or more crons to any agent at deploy time. The server fires the agent on cadence, tracks every execution, and lets you pause, resume, or trigger ad-hoc — without touching application code. See [Scheduling](scheduling.md). + +**Conductor event handlers.** Agentspan runs on Conductor, which has native integrations for Kafka, SQS, AMQP, webhooks, and database events. Any event source that can trigger a Conductor workflow can trigger an agent — with a full durable execution record for every event. + +### Adaptive loops + +**Durable iterations.** Any framework can write a `while` loop. In Agentspan, each iteration is a Conductor workflow execution — crash mid-loop and the current iteration resumes when a worker reconnects. The loop itself survives process failures. + +**Single execution ID across all iterations.** Using DO_WHILE inside a Conductor workflow, every iteration appears as a suffixed task (`planner_llm__1`, `planner_llm__2`, ...) under one workflow ID. The entire loop is observable, queryable, and replayable as a unit in the UI. + +**Deterministic inner × adaptive outer.** Combine Plan-Execute (deterministic per-iteration execution) with an outer loop that adapts based on verified results. The LLM decides *what* to try next; Conductor handles *how* each attempt runs — with full parallelism, retry, and validation built in. + +See [Adaptive Loops](concepts/adaptive-loops.md). + +### Framework compatibility + **Works with frameworks you already use.** Pass a LangGraph `StateGraph`, an OpenAI Agents SDK `Agent`, or a Google ADK pipeline directly to `runtime.run()`. Your definitions stay unchanged. --- diff --git a/sidebars.js b/sidebars.js index 94e420b5..02edb875 100644 --- a/sidebars.js +++ b/sidebars.js @@ -252,6 +252,7 @@ const sidebars = { 'developer-guides/agentspan/concepts/skills', 'developer-guides/agentspan/concepts/multi-agent', 'developer-guides/agentspan/concepts/plan-execute', + 'developer-guides/agentspan/concepts/adaptive-loops', 'developer-guides/agentspan/concepts/guardrails', 'developer-guides/agentspan/concepts/memory', 'developer-guides/agentspan/concepts/streaming', @@ -289,6 +290,7 @@ const sidebars = { description: 'Reference pages for Agentspan providers, model configuration, CLI commands, deployment, self-hosting, integrations, and worker types.', }, items: [ + 'developer-guides/agentspan/reference/sdk', 'developer-guides/agentspan/reference/providers', 'developer-guides/agentspan/reference/ai-models', 'developer-guides/agentspan/reference/cli',