diff --git a/Makefile b/Makefile index f1b1aa2..e723041 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ help: @echo " test Run fast tests (excludes real CLI calls)" @echo " test-unit Run unit tests only" @echo " test-integration Run integration tests (real CLI calls - slow)" - @echo " test-external Run external tests (requires CLIs: claude, codex, gemini, opencode)" + @echo " test-external Run external tests (requires CLIs: claude, cursor(agent), codex, gemini, opencode)" @echo " test-all Run all tests including integration" @echo " test-coverage Generate coverage report" @echo " test-race Run tests with race detector" @@ -80,7 +80,7 @@ test-unit: test-integration: go test -v -tags=integration ./internal/infrastructure/agents/... ./tests/integration/... -# External tests (requires external CLIs: claude, codex, gemini, opencode) +# External tests (requires external CLIs: claude, cursor(agent), codex, gemini, opencode) test-external: go test -v -tags=external ./... diff --git a/README.md b/README.md index c126ade..cb6ec8f 100644 --- a/README.md +++ b/README.md @@ -9,11 +9,11 @@ A Go CLI tool for orchestrating AI agents (Claude, Gemini, Codex, OpenAI-Compati - **State Machine Execution** - Define workflows as state machines with conditional transitions based on exit codes, command output, or custom expressions - **Inline Error Handling** - Specify error messages and exit codes directly on steps without creating separate terminal states -- **Agent Steps** - Invoke AI agents via CLI tools (Claude, Codex, Gemini) or direct HTTP (OpenAI, Ollama, vLLM, Groq) with prompt templates, response parsing, and accurate token tracking +- **Agent Steps** - Invoke AI agents via CLI tools (Claude, Cursor, Codex, Gemini) or direct HTTP (OpenAI, Ollama, vLLM, Groq) with prompt templates, response parsing, and accurate token tracking - **Output Formatting for Agent Steps** - Automatically strip markdown code fences and validate JSON output; human-readable streaming display controlled by `output_format` field (text vs raw NDJSON) - **External Prompt Files** - Load agent prompts from `.md` files with full template interpolation, helper functions, and local override support - **External Script Files** - Load commands from external script files with shebang-based interpreter dispatch, template interpolation, path resolution, and local override support -- **Conversation Mode** - Multi-turn conversations with native session resume for CLI providers (`claude`, `codex`, `gemini`, `opencode`), automatic context window management for HTTP providers, mid-conversation context injection via `inject_context` field, and token tracking across all turns +- **Conversation Mode** - Multi-turn conversations with native session resume for CLI providers (`claude`, `cursor`, `codex`, `gemini`, `opencode`), automatic context window management for HTTP providers, mid-conversation context injection via `inject_context` field, and token tracking across all turns - **OpenAI-Compatible Provider** - Use any Chat Completions API (OpenAI, Ollama, vLLM, Groq) with native HTTP integration, accurate token reporting, and no CLI tool required - **Parallel Execution** - Run multiple steps concurrently with configurable strategies - **Loop Constructs** - For-each and while loops with full context access diff --git a/docs/README.md b/docs/README.md index c35bee7..4ccf5c4 100644 --- a/docs/README.md +++ b/docs/README.md @@ -28,7 +28,7 @@ Learn how to use AWF effectively: - [Commands](user-guide/commands.md) - All CLI commands and flags - [Interactive Input Collection](user-guide/interactive-inputs.md) - Automatic prompting for missing workflow inputs -- [Agent Steps](user-guide/agent-steps.md) - Invoke AI agents via CLI (Claude, Codex, Gemini) or HTTP APIs (OpenAI, Ollama, vLLM, Groq) +- [Agent Steps](user-guide/agent-steps.md) - Invoke AI agents via CLI (Claude, Cursor, Codex, Gemini) or HTTP APIs (OpenAI, Ollama, vLLM, Groq) - [Output Formatting](user-guide/agent-steps.md#output-formatting) - Automatic code fence stripping and JSON validation (`output_format: json|text`) - [Streaming Output Display](user-guide/agent-steps.md#streaming-output-display) - Human-readable filtered output for `--output streaming` and `--output buffered` modes - [External Prompt Files](user-guide/agent-steps.md#external-prompt-files) - Load prompts from Markdown files with template interpolation diff --git a/docs/development/testing.md b/docs/development/testing.md index 5790735..1f17792 100644 --- a/docs/development/testing.md +++ b/docs/development/testing.md @@ -16,7 +16,7 @@ make test-unit # Integration tests (requires full system setup, tagged with //go:build integration) make test-integration -# External tests (requires external CLIs: claude, codex, gemini, opencode) +# External tests (requires external CLIs: claude, cursor(agent), codex, gemini, opencode) make test-external # All tests including integration @@ -41,7 +41,7 @@ AWF uses Go build tags to control which tests run in different environments. Thi | Tag | Purpose | Usage | Example | |-----|---------|-------|---------| | `integration` | Full system tests requiring setup, state persistence, CLI execution | `make test-integration` or `go test -tags=integration ./...` | End-to-end workflow execution | -| `external` | Tests requiring external CLI tools (claude, codex, gemini, opencode) | `make test-external` or `go test -tags=external ./...` | AI provider validation | +| `external` | Tests requiring external CLI tools (claude, cursor(agent), codex, gemini, opencode) | `make test-external` or `go test -tags=external ./...` | AI provider validation | | `slow` | Resource-intensive tests (high memory, concurrency, long-running) | `go test -tags=slow ./...` | Memory leak detection, stress tests | | `!short` | Standard Go short mode exclusion for tests that take >100ms | `go test -short ./...` (excludes these) | Database operations, file I/O | diff --git a/docs/user-guide/agent-steps.md b/docs/user-guide/agent-steps.md index 6daa24f..279ce8e 100644 --- a/docs/user-guide/agent-steps.md +++ b/docs/user-guide/agent-steps.md @@ -2,7 +2,7 @@ title: "Agent Steps Guide" --- -Invoke AI agents (Claude, Codex, Gemini, OpenCode, OpenAI-Compatible) in your workflows with structured prompts and response parsing. +Invoke AI agents (Claude, Cursor, Codex, Gemini, OpenCode, OpenAI-Compatible) in your workflows with structured prompts and response parsing. ## Overview @@ -86,6 +86,28 @@ analyze: - `allowed_tools`: Comma-separated list of tools to allow (e.g., `"bash,read"` → `--allowedTools bash,read`) - `dangerously_skip_permissions`: Skip permission prompts (boolean, maps to `--dangerously-skip-permissions`). **Security warning**: bypasses all safety prompts — use only in trusted, automated environments. Emits a security audit log. +### Cursor CLI + +Requires the Cursor CLI `agent` binary installed (from [cursor.com/cli](https://cursor.com/cli)). + +```yaml +analyze: + type: agent + provider: cursor + prompt: "Review this diff: {{.inputs.diff}}" + options: + model: composer-2 + mode: ask + timeout: 120 + on_success: next +``` + +**Provider-Specific Options:** +- `model`: Cursor model identifier (no AWF-side whitelist; validated by Cursor CLI at runtime) +- `mode`: `ask` or `plan` (maps to `--mode`) +- `sandbox`: `enabled` or `disabled` (maps to `--sandbox`) +- `dangerously_skip_permissions`: Skip approval prompts (boolean, maps to `--force`). **Security warning**: bypasses safety confirmations — use only in trusted automation environments. + ### Codex (OpenAI) Requires the `codex` CLI tool installed. @@ -242,7 +264,24 @@ step validation error: model must start with "gpt-", "codex-", or match o-series ### OpenCode & OpenAI-Compatible -No model validation for `opencode` or `openai_compatible` providers — these use arbitrary backend models. +No model validation for `cursor`, `opencode`, or `openai_compatible` providers — these use arbitrary/backend-specific model names. + +## Cursor vs Claude Parity + +AWF targets the highest possible parity between `cursor` and `claude` providers. Current mapping: + +| Capability | Claude | Cursor | Notes | +|------------|--------|--------|-------| +| Single-turn execution | Yes | Yes | Both run in print/headless mode | +| Conversation mode | Yes | Yes | Cursor uses `--resume ` | +| Session extraction | `session_id` from `result` | `chat_id`/`chatId` (system init) | Cursor extraction is tolerant to field variants | +| `output_format: text` | Yes | Yes | AWF extracts readable assistant text | +| `output_format: json` | Yes | Yes | AWF exposes final result event payload | +| `system_prompt` in conversation | Native `--system-prompt` | Inlined on first turn | Cursor CLI has no dedicated system prompt flag | +| `allowed_tools` | Supported | Not mapped | No Cursor CLI equivalent currently | +| `dangerously_skip_permissions` | `--dangerously-skip-permissions` | `--force` | Closest available equivalent | + +When no exact Cursor equivalent exists, AWF prefers explicit behavior over silent mismatches and documents the fallback. ### When Validation Occurs @@ -854,7 +893,7 @@ See [Workflow Syntax — Inline Error Shorthand](workflow-syntax.md#inline-error |-------|-------|----------| | Provider not found | CLI tool not installed | Install required CLI (e.g., `claude install`) | | Timeout | Agent response took too long | Increase timeout or reduce prompt complexity | -| Invalid provider | Unsupported provider | Use `claude`, `codex`, `gemini`, `opencode`, or `openai_compatible` | +| Invalid provider | Unsupported provider | Use `claude`, `cursor`, `codex`, `gemini`, `opencode`, or `openai_compatible` | | Command failed | Provider CLI returned error | Check provider configuration and logs | ### Debugging diff --git a/docs/user-guide/examples.md b/docs/user-guide/examples.md index 1ab32b3..e1581af 100644 --- a/docs/user-guide/examples.md +++ b/docs/user-guide/examples.md @@ -408,7 +408,7 @@ outer=B inner=1 ## AI Agent Integration -Invoke AI agents (Claude, Codex, Gemini) directly in workflows: +Invoke AI agents (Claude, Cursor, Codex, Gemini) directly in workflows: ```yaml name: code-review-with-agent diff --git a/docs/user-guide/workflow-syntax.md b/docs/user-guide/workflow-syntax.md index e2a2ba8..c4ed0d6 100644 --- a/docs/user-guide/workflow-syntax.md +++ b/docs/user-guide/workflow-syntax.md @@ -58,7 +58,7 @@ states: | Type | Description | |------|-------------| | `step` | Execute a command | -| `agent` | Invoke an AI agent (Claude, Codex, Gemini, etc.) | +| `agent` | Invoke an AI agent (Claude, Cursor, Codex, Gemini, etc.) | | `terminal` | End state with success/failure status | | `parallel` | Execute multiple steps concurrently | | `for_each` | Iterate over a list of items | @@ -369,7 +369,7 @@ Error messages include the resolved file path for easy debugging. ## Agent State -Invoke an AI agent (Claude, Codex, Gemini, OpenCode) with a prompt template. +Invoke an AI agent (Claude, Cursor, Codex, Gemini, OpenCode) with a prompt template. ### Basic Agent Step @@ -412,7 +412,7 @@ For **automated cross-step session resume** (no stdin loop), use `mode: single` | Option | Type | Required | Description | |--------|------|----------|-------------| -| `provider` | string | Yes | Agent provider: `claude`, `codex`, `gemini`, `opencode`, `openai_compatible` | +| `provider` | string | Yes | Agent provider: `claude`, `cursor`, `codex`, `gemini`, `opencode`, `openai_compatible` | | `mode` | string | No | `single` (default) or `conversation` (interactive user-driven loop) | | `prompt` | string | Yes* | Prompt template (supports `{{.inputs.*}}` and `{{.states.*}}` interpolation); in `mode: conversation` this serves as the first user message | | `prompt_file` | string | No* | Path to external prompt template file (mutually exclusive with `prompt`; not supported in `mode: conversation`) | @@ -472,7 +472,7 @@ See [Conversation Mode & Session Tracking](conversation-steps.md) for the full r | `opencode` | `opencode` CLI | Multi-turn (session resume via `-s`) | OpenCode CLI | | `openai_compatible` | HTTP API | Full multi-turn (messages array) | Chat Completions API (OpenAI, Ollama, vLLM, Groq) | -> **Conversation mode and providers:** All providers support multi-turn conversations. CLI-based providers (`claude`, `codex`, `gemini`, `opencode`) use native session resume flags to maintain context across turns — session IDs are extracted from CLI output after the first turn and passed on subsequent turns. If session ID extraction fails, the provider falls back to stateless mode gracefully. `openai_compatible` maintains full conversation history via the Chat Completions API messages array. +> **Conversation mode and providers:** All providers support multi-turn conversations. CLI-based providers (`claude`, `cursor`, `codex`, `gemini`, `opencode`) use native session resume flags to maintain context across turns — session IDs are extracted from CLI output after the first turn and passed on subsequent turns. If session ID extraction fails, the provider falls back to stateless mode gracefully. `openai_compatible` maintains full conversation history via the Chat Completions API messages array. ### Agent Output diff --git a/internal/domain/ports/agent_provider.go b/internal/domain/ports/agent_provider.go index e0ed8a9..d390fc1 100644 --- a/internal/domain/ports/agent_provider.go +++ b/internal/domain/ports/agent_provider.go @@ -8,7 +8,7 @@ import ( ) // AgentProvider defines the contract for executing AI agent CLI commands. -// Implementations adapt specific agent CLIs (Claude, Codex, Gemini, etc.) +// Implementations adapt specific agent CLIs (Claude, Cursor, Codex, Gemini, etc.) // to this unified interface. type AgentProvider interface { // Execute invokes the agent with the given prompt and options. diff --git a/internal/domain/workflow/agent_config.go b/internal/domain/workflow/agent_config.go index 9c228dd..8f897fa 100644 --- a/internal/domain/workflow/agent_config.go +++ b/internal/domain/workflow/agent_config.go @@ -26,7 +26,7 @@ var validOutputFormats = map[OutputFormat]bool{ // AgentConfig holds configuration for invoking an AI agent. type AgentConfig struct { - Provider string `yaml:"provider"` // agent provider: claude, codex, gemini, opencode, openai_compatible + Provider string `yaml:"provider"` // agent provider: claude, cursor, codex, gemini, opencode, openai_compatible Prompt string `yaml:"prompt"` // prompt template with {{inputs.*}} and {{states.*}} (single mode) or first user message (conversation mode) PromptFile string `yaml:"prompt_file"` // path to external prompt template file (mutually exclusive with Prompt) Options map[string]any `yaml:"options"` // provider-specific options (model, temperature, max_tokens, etc.) diff --git a/internal/domain/workflow/doc.go b/internal/domain/workflow/doc.go index e4e9b97..80f411d 100644 --- a/internal/domain/workflow/doc.go +++ b/internal/domain/workflow/doc.go @@ -108,7 +108,7 @@ // // AI agent invocation: // - AgentConfig: Provider, prompt, options, timeout, mode -// - Provider values: claude, codex, gemini, opencode, custom +// - Provider values: claude, cursor, codex, gemini, opencode, openai_compatible // - Mode: single (one-shot) or conversation (multi-turn) // // ## Conversation Mode (F033, conversation.go) diff --git a/internal/infrastructure/agents/cursor_provider.go b/internal/infrastructure/agents/cursor_provider.go new file mode 100644 index 0000000..3378d42 --- /dev/null +++ b/internal/infrastructure/agents/cursor_provider.go @@ -0,0 +1,266 @@ +package agents + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "os/exec" + "strings" + + "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/workflow" + "github.com/awf-project/cli/internal/infrastructure/logger" +) + +// CursorProvider implements AgentProvider for Cursor CLI. +// Invokes: agent -p --output-format stream-json "prompt" +type CursorProvider struct { + base *baseCLIProvider + logger ports.Logger + executor ports.CLIExecutor +} + +func NewCursorProvider() *CursorProvider { + p := &CursorProvider{ + logger: logger.NopLogger{}, + executor: NewExecCLIExecutor(), + } + p.base = p.newBase() + return p +} + +func NewCursorProviderWithOptions(opts ...CursorProviderOption) *CursorProvider { + p := &CursorProvider{ + logger: logger.NopLogger{}, + executor: NewExecCLIExecutor(), + } + for _, opt := range opts { + opt(p) + } + p.base = p.newBase() + return p +} + +func (p *CursorProvider) newBase() *baseCLIProvider { + return newBaseCLIProvider("cursor", "agent", p.executor, p.logger, cliProviderHooks{ + buildExecuteArgs: p.buildExecuteArgs, + buildConversationArgs: p.buildConversationArgs, + extractSessionID: p.extractSessionID, + extractTextContent: p.extractTextFromJSON, + validateOptions: validateCursorOptions, + parseStreamLine: p.parseCursorStreamLine, + }) +} + +func (p *CursorProvider) Execute(ctx context.Context, prompt string, options map[string]any, stdout, stderr io.Writer) (*workflow.AgentResult, error) { + result, rawOutput, err := p.base.execute(ctx, prompt, options, stdout, stderr) + if err != nil { + return nil, err + } + + userFormat, _ := getStringOption(options, "output_format") + if userFormat == "json" || userFormat == "stream-json" { + if jsonResp := p.extractResultEvent(rawOutput); jsonResp != nil { + result.Response = jsonResp + } + } else { + if extracted := p.extractTextFromJSON(rawOutput); extracted != "" { + result.Output = extracted + result.Tokens = estimateTokens(extracted) + } + } + + return result, nil +} + +func (p *CursorProvider) ExecuteConversation(ctx context.Context, state *workflow.ConversationState, prompt string, options map[string]any, stdout, stderr io.Writer) (*workflow.ConversationResult, error) { + result, rawOutput, err := p.base.executeConversation(ctx, state, prompt, options, stdout, stderr) + if err != nil { + return nil, err + } + + // Keep behavior aligned with Claude provider: only expose raw final result + // wrapper when explicitly requested. + userFormat, userFormatSet := getStringOption(options, "output_format") + if userFormatSet && userFormat == "json" { + if jsonResp := p.extractResultEvent(rawOutput); jsonResp != nil { + result.Response = jsonResp + } + } + + return result, nil +} + +func (p *CursorProvider) Name() string { + return "cursor" +} + +func (p *CursorProvider) Validate() error { + _, err := exec.LookPath("agent") + if err != nil { + return fmt.Errorf("cursor CLI not found in PATH (expected binary 'agent'): %w", err) + } + return nil +} + +func (p *CursorProvider) buildExecuteArgs(prompt string, options map[string]any) ([]string, error) { + args := []string{"-p", prompt, "--output-format", "stream-json"} + return appendCursorOptions(args, options), nil +} + +func (p *CursorProvider) buildConversationArgs(state *workflow.ConversationState, prompt string, options map[string]any) ([]string, error) { + effectivePrompt := prompt + if state != nil && state.SessionID == "" { + // Cursor CLI has no dedicated system prompt flag. Inline it only for first turn. + if sysPrompt, ok := getStringOption(options, "system_prompt"); ok && sysPrompt != "" { + effectivePrompt = sysPrompt + "\n\n" + prompt + } + } + + args := []string{"-p", effectivePrompt, "--output-format", "stream-json"} + if state != nil && state.SessionID != "" { + args = append(args, "--resume", state.SessionID) + } + return appendCursorOptions(args, options), nil +} + +func appendCursorOptions(args []string, options map[string]any) []string { + if model, ok := getStringOption(options, "model"); ok && model != "" { + args = append(args, "--model", model) + } + if mode, ok := getStringOption(options, "mode"); ok && mode != "" { + args = append(args, "--mode", mode) + } + if skip, ok := getBoolOption(options, "dangerously_skip_permissions"); ok && skip { + args = append(args, "--force") + } + if sandbox, ok := getStringOption(options, "sandbox"); ok && sandbox != "" { + args = append(args, "--sandbox", sandbox) + } + return args +} + +func validateCursorOptions(options map[string]any) error { + if options == nil { + return nil + } + + if mode, ok := getStringOption(options, "mode"); ok { + if mode != "plan" && mode != "ask" { + return fmt.Errorf("invalid mode: %s (must be 'plan' or 'ask')", mode) + } + } + + if sandbox, ok := getStringOption(options, "sandbox"); ok { + if sandbox != "enabled" && sandbox != "disabled" { + return fmt.Errorf("invalid sandbox: %s (must be 'enabled' or 'disabled')", sandbox) + } + } + + return nil +} + +func (p *CursorProvider) extractResultEvent(output string) map[string]any { + return findFirstNDJSONEvent(output, "result") +} + +func (p *CursorProvider) extractInitEvent(output string) map[string]any { + for _, line := range strings.Split(output, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + + var evt map[string]any + if err := json.Unmarshal([]byte(line), &evt); err != nil { + continue + } + + eventType, _ := evt["type"].(string) + subtype, _ := evt["subtype"].(string) + if eventType == "system" && subtype == "init" { + return evt + } + } + return nil +} + +func extractStringValue(values map[string]any, keys ...string) string { + for _, key := range keys { + if value, ok := values[key].(string); ok && value != "" { + return value + } + } + return "" +} + +func (p *CursorProvider) extractSessionID(output string) (string, error) { + if output == "" { + return "", errors.New("empty output") + } + evt := p.extractInitEvent(output) + if evt == nil { + return "", errors.New("system init event not found") + } + + sessionID := extractStringValue(evt, + "chat_id", "chatId", + "session_id", "sessionId", + "conversation_id", "conversationId", + "thread_id", "threadId", + "id", + ) + if sessionID == "" { + return "", errors.New("session identifier missing") + } + return sessionID, nil +} + +func (p *CursorProvider) extractTextFromJSON(output string) string { + evt := p.extractResultEvent(output) + if evt == nil { + return "" + } + + if result, ok := evt["result"].(string); ok && result != "" { + return result + } + if message, ok := evt["message"].(string); ok && message != "" { + return message + } + + return extractDisplayText(output, p.parseCursorStreamLine) +} + +// parseCursorStreamLine extracts displayable text from Cursor CLI stream-json +// lines. It surfaces assistant message text blocks and ignores tool/system events. +func (p *CursorProvider) parseCursorStreamLine(line []byte) string { + var evt struct { + Type string `json:"type"` + Message *struct { + Content []struct { + Type string `json:"type"` + Text string `json:"text"` + } `json:"content"` + } `json:"message"` + } + if err := json.Unmarshal(line, &evt); err != nil { + return "" + } + if evt.Type != "assistant" || evt.Message == nil { + return "" + } + + var out strings.Builder + for _, block := range evt.Message.Content { + if block.Type == "text" && block.Text != "" { + if out.Len() > 0 { + out.WriteByte('\n') + } + out.WriteString(block.Text) + } + } + return out.String() +} diff --git a/internal/infrastructure/agents/cursor_provider_unit_test.go b/internal/infrastructure/agents/cursor_provider_unit_test.go new file mode 100644 index 0000000..cae4700 --- /dev/null +++ b/internal/infrastructure/agents/cursor_provider_unit_test.go @@ -0,0 +1,157 @@ +package agents + +import ( + "context" + "testing" + + "github.com/awf-project/cli/internal/domain/workflow" + "github.com/awf-project/cli/internal/testutil/mocks" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCursorProvider_Execute_WithOptions(t *testing.T) { + mockExec := mocks.NewMockCLIExecutor() + mockExec.SetOutput([]byte(`{"type":"result","result":"Done"}`), nil) + provider := NewCursorProviderWithOptions(WithCursorExecutor(mockExec)) + + _, err := provider.Execute(context.Background(), "analyze this", map[string]any{ + "model": "composer-2", + "mode": "plan", + "sandbox": "enabled", + "dangerously_skip_permissions": true, + }, nil, nil) + + require.NoError(t, err) + calls := mockExec.GetCalls() + require.Len(t, calls, 1) + assert.Equal(t, "agent", calls[0].Name) + assert.Equal(t, []string{ + "-p", "analyze this", "--output-format", "stream-json", + "--model", "composer-2", + "--mode", "plan", + "--force", + "--sandbox", "enabled", + }, calls[0].Args) +} + +func TestCursorProvider_Execute_JsonFormatSetsResponse(t *testing.T) { + mockExec := mocks.NewMockCLIExecutor() + mockExec.SetOutput([]byte(`{"type":"result","result":"Final","duration_ms":42}`), nil) + provider := NewCursorProviderWithOptions(WithCursorExecutor(mockExec)) + + result, err := provider.Execute(context.Background(), "hello", map[string]any{ + "output_format": "json", + }, nil, nil) + + require.NoError(t, err) + require.NotNil(t, result) + assert.Equal(t, "cursor", result.Provider) + assert.Equal(t, "result", result.Response["type"]) + assert.Equal(t, "Final", result.Response["result"]) +} + +func TestCursorProvider_Execute_TextExtractsResultField(t *testing.T) { + mockExec := mocks.NewMockCLIExecutor() + mockExec.SetOutput([]byte(`{"type":"result","result":"Readable text"}`), nil) + provider := NewCursorProviderWithOptions(WithCursorExecutor(mockExec)) + + result, err := provider.Execute(context.Background(), "hello", nil, nil, nil) + + require.NoError(t, err) + require.NotNil(t, result) + assert.Equal(t, "Readable text", result.Output) + assert.Equal(t, len("Readable text")/4, result.Tokens) +} + +func TestCursorProvider_ExecuteConversation_UsesResumeWhenSessionExists(t *testing.T) { + mockExec := mocks.NewMockCLIExecutor() + mockExec.SetOutput([]byte(`{"type":"system","subtype":"init","chat_id":"chat-42"}`), nil) + provider := NewCursorProviderWithOptions(WithCursorExecutor(mockExec)) + + state := workflow.NewConversationState("") + state.SessionID = "chat-previous" + + _, err := provider.ExecuteConversation(context.Background(), state, "continue", nil, nil, nil) + require.NoError(t, err) + + calls := mockExec.GetCalls() + require.Len(t, calls, 1) + assert.Contains(t, calls[0].Args, "--resume") + assert.Contains(t, calls[0].Args, "chat-previous") +} + +func TestCursorProvider_ExecuteConversation_InlinesSystemPromptOnFirstTurn(t *testing.T) { + mockExec := mocks.NewMockCLIExecutor() + mockExec.SetOutput([]byte(`{"type":"system","subtype":"init","chat_id":"chat-42"}`), nil) + provider := NewCursorProviderWithOptions(WithCursorExecutor(mockExec)) + + state := workflow.NewConversationState("") + _, err := provider.ExecuteConversation(context.Background(), state, "User ask", map[string]any{ + "system_prompt": "You are strict", + }, nil, nil) + require.NoError(t, err) + + calls := mockExec.GetCalls() + require.Len(t, calls, 1) + assert.Equal(t, "-p", calls[0].Args[0]) + assert.Equal(t, "You are strict\n\nUser ask", calls[0].Args[1]) +} + +func TestCursorProvider_ExtractSessionID(t *testing.T) { + provider := NewCursorProvider() + + t.Run("chat_id", func(t *testing.T) { + id, err := provider.extractSessionID(`{"type":"system","subtype":"init","chat_id":"chat-1"}`) + require.NoError(t, err) + assert.Equal(t, "chat-1", id) + }) + + t.Run("chatId", func(t *testing.T) { + id, err := provider.extractSessionID(`{"type":"system","subtype":"init","chatId":"chat-2"}`) + require.NoError(t, err) + assert.Equal(t, "chat-2", id) + }) + + t.Run("missing", func(t *testing.T) { + _, err := provider.extractSessionID(`{"type":"system","subtype":"init"}`) + assert.Error(t, err) + }) +} + +func TestCursorProvider_ParseCursorStreamLine(t *testing.T) { + provider := NewCursorProvider() + + got := provider.parseCursorStreamLine([]byte(`{"type":"assistant","message":{"content":[{"type":"text","text":"Hello"},{"type":"text","text":"World"}]}}`)) + assert.Equal(t, "Hello\nWorld", got) + + got = provider.parseCursorStreamLine([]byte(`{"type":"tool_call","subtype":"started"}`)) + assert.Equal(t, "", got) +} + +func TestValidateCursorOptions(t *testing.T) { + tests := []struct { + name string + options map[string]any + wantErr bool + }{ + {name: "nil options", options: nil, wantErr: false}, + {name: "valid mode ask", options: map[string]any{"mode": "ask"}, wantErr: false}, + {name: "valid mode plan", options: map[string]any{"mode": "plan"}, wantErr: false}, + {name: "valid sandbox enabled", options: map[string]any{"sandbox": "enabled"}, wantErr: false}, + {name: "valid sandbox disabled", options: map[string]any{"sandbox": "disabled"}, wantErr: false}, + {name: "invalid mode", options: map[string]any{"mode": "agent"}, wantErr: true}, + {name: "invalid sandbox", options: map[string]any{"sandbox": "auto"}, wantErr: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validateCursorOptions(tt.options) + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} diff --git a/internal/infrastructure/agents/doc.go b/internal/infrastructure/agents/doc.go index ff2d35e..51bbddb 100644 --- a/internal/infrastructure/agents/doc.go +++ b/internal/infrastructure/agents/doc.go @@ -2,7 +2,7 @@ // // The agents package provides concrete implementations of the AgentProvider and AgentRegistry // ports defined in the domain layer, enabling workflow steps to invoke AI agents (Claude, Gemini, -// Codex, OpenCode, and OpenAI-compatible endpoints) for code generation, analysis, and decision-making tasks. +// Cursor, Codex, OpenCode, and OpenAI-compatible endpoints) for code generation, analysis, and decision-making tasks. // Each provider wraps a CLI executor and handles model-specific invocation patterns, streaming // output, and error mapping. // @@ -46,6 +46,14 @@ // - ExecuteConversation: Not supported (returns error) // - Name: Returns "codex" // - Validate: Checks API key and model configuration + +// ## CursorProvider (cursor_provider.go) +// +// Cursor CLI provider: +// - Execute: Single-shot prompt execution via Cursor CLI (`agent -p`) +// - ExecuteConversation: Multi-turn conversation using `--resume ` +// - Name: Returns "cursor" +// - Validate: Checks Cursor CLI binary availability (`agent`) // // ## OpenCodeProvider (opencode_provider.go) // @@ -96,6 +104,7 @@ // - WithClaudeExecutor: Inject custom executor for Claude provider // - WithGeminiExecutor: Inject custom executor for Gemini provider // - WithCodexExecutor: Inject custom executor for Codex provider +// - WithCursorExecutor: Inject custom executor for Cursor provider // - WithOpenCodeExecutor: Inject custom executor for OpenCode provider // - WithHTTPClient: Inject custom HTTP client for OpenAICompatible provider // diff --git a/internal/infrastructure/agents/options.go b/internal/infrastructure/agents/options.go index abe989b..050f341 100644 --- a/internal/infrastructure/agents/options.go +++ b/internal/infrastructure/agents/options.go @@ -35,6 +35,20 @@ func WithCodexLogger(l ports.Logger) CodexProviderOption { } } +type CursorProviderOption func(*CursorProvider) + +func WithCursorExecutor(executor ports.CLIExecutor) CursorProviderOption { + return func(p *CursorProvider) { + p.executor = executor + } +} + +func WithCursorLogger(l ports.Logger) CursorProviderOption { + return func(p *CursorProvider) { + p.logger = l + } +} + type OpenCodeProviderOption func(*OpenCodeProvider) func WithOpenCodeExecutor(executor ports.CLIExecutor) OpenCodeProviderOption { diff --git a/internal/infrastructure/agents/provider_options_test.go b/internal/infrastructure/agents/provider_options_test.go index 48b583f..bbf29d7 100644 --- a/internal/infrastructure/agents/provider_options_test.go +++ b/internal/infrastructure/agents/provider_options_test.go @@ -245,6 +245,57 @@ func TestOpenCodeProvider_NewWithOptions_HappyPath(t *testing.T) { } } +func TestCursorProvider_NewWithOptions_HappyPath(t *testing.T) { + tests := []struct { + name string + setupMock func(*mocks.MockCLIExecutor) + options []CursorProviderOption + }{ + { + name: "no options uses default executor", + setupMock: func(m *mocks.MockCLIExecutor) { + m.SetOutput([]byte(`{"type":"result","result":"cursor output"}`), []byte("")) + }, + options: nil, + }, + { + name: "with custom executor option", + setupMock: func(m *mocks.MockCLIExecutor) { + m.SetOutput([]byte(`{"type":"result","result":"custom cursor output"}`), []byte("")) + }, + options: []CursorProviderOption{ + WithCursorExecutor(mocks.NewMockCLIExecutor()), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var mockExec *mocks.MockCLIExecutor + var opts []CursorProviderOption + if tt.setupMock != nil { + mockExec = mocks.NewMockCLIExecutor() + tt.setupMock(mockExec) + opts = []CursorProviderOption{WithCursorExecutor(mockExec)} + } else if tt.options != nil { + opts = tt.options + } + + provider := NewCursorProviderWithOptions(opts...) + + require.NotNil(t, provider) + assert.NotNil(t, provider.executor) + + if mockExec != nil { + ctx := context.Background() + result, err := provider.Execute(ctx, "test prompt", nil, nil, nil) + assert.NoError(t, err) + assert.NotNil(t, result) + } + }) + } +} + func TestProviderOptions_EdgeCases(t *testing.T) { t.Run("nil executor option panics are prevented", func(t *testing.T) { // Note: Passing nil executor should work but will cause runtime issues later @@ -266,6 +317,10 @@ func TestProviderOptions_EdgeCases(t *testing.T) { // OpenCode opencodeProvider := NewOpenCodeProviderWithOptions(WithOpenCodeExecutor(nil)) assert.NotNil(t, opencodeProvider) + + // Cursor + cursorProvider := NewCursorProviderWithOptions(WithCursorExecutor(nil)) + assert.NotNil(t, cursorProvider) }) t.Run("empty options slice behaves like no options", func(t *testing.T) { @@ -316,6 +371,10 @@ func TestProviderOptions_EdgeCases(t *testing.T) { opencodeProvider := NewOpenCodeProvider() assert.NotNil(t, opencodeProvider) assert.NotNil(t, opencodeProvider.executor) + + cursorProvider := NewCursorProvider() + assert.NotNil(t, cursorProvider) + assert.NotNil(t, cursorProvider.executor) }) } @@ -375,6 +434,20 @@ func TestProviderOptions_ErrorHandling(t *testing.T) { assert.Nil(t, result) assert.Contains(t, err.Error(), "opencode execution failed") }) + + t.Run("cursor provider executor error propagates", func(t *testing.T) { + mockExec := mocks.NewMockCLIExecutor() + mockExec.SetError(errors.New("cursor CLI failed")) + + provider := NewCursorProviderWithOptions(WithCursorExecutor(mockExec)) + ctx := context.Background() + + result, err := provider.Execute(ctx, "test prompt", nil, nil, nil) + + assert.Error(t, err) + assert.Nil(t, result) + assert.Contains(t, err.Error(), "cursor execution failed") + }) } func TestProviderOptions_Integration(t *testing.T) { @@ -464,6 +537,25 @@ func TestProviderOptions_Integration(t *testing.T) { assert.Equal(t, "opencode", calls[0].Name) }) + t.Run("cursor provider with mock executor executes successfully", func(t *testing.T) { + mockExec := mocks.NewMockCLIExecutor() + mockExec.SetOutput([]byte(`{"type":"result","result":"Cursor response"}`), []byte("")) + + provider := NewCursorProviderWithOptions(WithCursorExecutor(mockExec)) + ctx := context.Background() + + result, err := provider.Execute(ctx, "Generate code", nil, nil, nil) + + require.NoError(t, err) + require.NotNil(t, result) + assert.Equal(t, "cursor", result.Provider) + assert.Contains(t, result.Output, "Cursor response") + + calls := mockExec.GetCalls() + require.Len(t, calls, 1) + assert.Equal(t, "agent", calls[0].Name) + }) + t.Run("multiple providers can use different executors", func(t *testing.T) { // Create separate mock executors for each provider claudeMock := mocks.NewMockCLIExecutor() diff --git a/internal/infrastructure/agents/registry.go b/internal/infrastructure/agents/registry.go index 09978c4..42d29f1 100644 --- a/internal/infrastructure/agents/registry.go +++ b/internal/infrastructure/agents/registry.go @@ -73,6 +73,7 @@ func (r *AgentRegistry) Has(name string) bool { func (r *AgentRegistry) RegisterDefaults() error { defaults := []ports.AgentProvider{ NewClaudeProvider(), + NewCursorProvider(), NewCodexProvider(), NewGeminiProvider(), NewOpenAICompatibleProvider(), diff --git a/internal/infrastructure/agents/registry_test.go b/internal/infrastructure/agents/registry_test.go index 9d0b581..8eb6293 100644 --- a/internal/infrastructure/agents/registry_test.go +++ b/internal/infrastructure/agents/registry_test.go @@ -269,8 +269,9 @@ func TestAgentRegistry_RegisterDefaults(t *testing.T) { // Verify default providers are registered list := registry.List() - assert.Len(t, list, 5) + assert.Len(t, list, 6) assert.Contains(t, list, "claude") + assert.Contains(t, list, "cursor") assert.Contains(t, list, "codex") assert.Contains(t, list, "gemini") assert.Contains(t, list, "openai_compatible") @@ -281,7 +282,7 @@ func TestAgentRegistry_RegisterDefaults_EachProviderRetrievable(t *testing.T) { registry := NewAgentRegistry() _ = registry.RegisterDefaults() - tests := []string{"claude", "codex", "gemini", "openai_compatible", "opencode"} + tests := []string{"claude", "cursor", "codex", "gemini", "openai_compatible", "opencode"} for _, name := range tests { t.Run(name, func(t *testing.T) { @@ -446,15 +447,16 @@ func TestAgentRegistry_RegisterDefaults_PartialFailure(t *testing.T) { // Verify RegisterDefaults continues on error - other providers should be registered list := registry.List() - assert.Len(t, list, 5, "All 5 default providers should be registered (1 pre-existing + 4 new)") + assert.Len(t, list, 6, "All 6 default providers should be registered (1 pre-existing + 5 new)") assert.Contains(t, list, "claude") + assert.Contains(t, list, "cursor") assert.Contains(t, list, "codex") assert.Contains(t, list, "gemini") assert.Contains(t, list, "openai_compatible") assert.Contains(t, list, "opencode") // Verify each provider is retrievable - for _, name := range []string{"claude", "codex", "gemini", "openai_compatible", "opencode"} { + for _, name := range []string{"claude", "cursor", "codex", "gemini", "openai_compatible", "opencode"} { provider, getErr := registry.Get(name) assert.NoError(t, getErr, "Provider %s should be retrievable", name) assert.NotNil(t, provider) @@ -473,17 +475,18 @@ func TestAgentRegistry_RegisterDefaults_EmptyRegistry(t *testing.T) { // Should succeed without errors require.NoError(t, err) - // Verify all 5 default providers are registered + // Verify all 6 default providers are registered list := registry.List() - assert.Len(t, list, 5) + assert.Len(t, list, 6) assert.Contains(t, list, "claude") + assert.Contains(t, list, "cursor") assert.Contains(t, list, "codex") assert.Contains(t, list, "gemini") assert.Contains(t, list, "openai_compatible") assert.Contains(t, list, "opencode") // Verify each provider is retrievable and functional - for _, name := range []string{"claude", "codex", "gemini", "openai_compatible", "opencode"} { + for _, name := range []string{"claude", "cursor", "codex", "gemini", "openai_compatible", "opencode"} { provider, getErr := registry.Get(name) require.NoError(t, getErr, "Provider %s should be retrievable", name) require.NotNil(t, provider) @@ -509,10 +512,11 @@ func TestAgentRegistry_RegisterDefaults_MultiplePreRegistered(t *testing.T) { assert.Contains(t, err.Error(), "claude") assert.Contains(t, err.Error(), "gemini") - // All 5 providers should still be registered + // All 6 providers should still be registered list := registry.List() - assert.Len(t, list, 5) + assert.Len(t, list, 6) assert.Contains(t, list, "claude") + assert.Contains(t, list, "cursor") assert.Contains(t, list, "codex") assert.Contains(t, list, "gemini") assert.Contains(t, list, "openai_compatible") @@ -530,15 +534,16 @@ func TestAgentRegistry_RegisterDefaults_AllPreRegistered(t *testing.T) { // Try to register defaults again err2 := registry.RegisterDefaults() - // Should fail with aggregated error for all 5 providers + // Should fail with aggregated error for all 6 providers assert.Error(t, err2) assert.Contains(t, err2.Error(), "claude") + assert.Contains(t, err2.Error(), "cursor") assert.Contains(t, err2.Error(), "codex") assert.Contains(t, err2.Error(), "gemini") assert.Contains(t, err2.Error(), "openai_compatible") assert.Contains(t, err2.Error(), "opencode") - // Should still have exactly 5 providers (no duplicates) + // Should still have exactly 6 providers (no duplicates) list := registry.List() - assert.Len(t, list, 5) + assert.Len(t, list, 6) } diff --git a/internal/infrastructure/agents/session_resume_test.go b/internal/infrastructure/agents/session_resume_test.go index 0e49f0f..3026db3 100644 --- a/internal/infrastructure/agents/session_resume_test.go +++ b/internal/infrastructure/agents/session_resume_test.go @@ -251,6 +251,68 @@ func TestSessionResume_OpenCodeExtractsSessionID(t *testing.T) { } } +func TestSessionResume_CursorExtractsChatID(t *testing.T) { + tests := []struct { + name string + mockOutput []byte + wantSessionID string + wantResumeFlag bool + }{ + { + name: "turn 1: extract chat_id from system init event", + mockOutput: []byte(`{"type":"system","subtype":"init","chat_id":"chat-abc123","model":"composer-2"}` + "\n" + + `{"type":"result","result":"Done"}`), + wantSessionID: "chat-abc123", + wantResumeFlag: false, + }, + { + name: "turn 2: use --resume with extracted chat_id", + mockOutput: []byte(`{"type":"system","subtype":"init","chat_id":"chat-abc123","model":"composer-2"}` + "\n" + + `{"type":"result","result":"Continued"}`), + wantSessionID: "chat-abc123", + wantResumeFlag: true, + }, + { + name: "missing chat identifier: graceful fallback", + mockOutput: []byte(`{"type":"result","result":"No session info"}`), + wantSessionID: "", + wantResumeFlag: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mockExec := mocks.NewMockCLIExecutor() + mockExec.SetOutput(tt.mockOutput, nil) + provider := NewCursorProviderWithOptions(WithCursorExecutor(mockExec)) + + state := workflow.NewConversationState("") + if tt.wantResumeFlag { + state.SessionID = "chat-abc123" + } + + result, err := provider.ExecuteConversation(context.Background(), state, "test prompt", nil, nil, nil) + require.NoError(t, err) + require.NotNil(t, result) + assert.Equal(t, tt.wantSessionID, result.State.SessionID) + + calls := mockExec.GetCalls() + require.Len(t, calls, 1) + + hasResumeFlag := false + for i, arg := range calls[0].Args { + if arg == "--resume" && i+1 < len(calls[0].Args) { + hasResumeFlag = true + if tt.wantResumeFlag { + assert.Equal(t, "chat-abc123", calls[0].Args[i+1]) + } + } + } + assert.Equal(t, tt.wantResumeFlag, hasResumeFlag) + }) + } +} + func TestSessionResume_ContinueFromCrossStep(t *testing.T) { t.Run("Gemini: continue_from links turn 2 to turn 1 session ID", func(t *testing.T) { mockExec1 := mocks.NewMockCLIExecutor() @@ -333,4 +395,31 @@ func TestSessionResume_ContinueFromCrossStep(t *testing.T) { assert.Contains(t, calls2[0].Args, "-s") assert.Contains(t, calls2[0].Args, "ses_abc123") }) + + t.Run("Cursor: continue_from links turn 2 to turn 1 chat ID", func(t *testing.T) { + mockExec1 := mocks.NewMockCLIExecutor() + mockExec1.SetOutput([]byte(`{"type":"system","subtype":"init","chat_id":"chat-abc123"}`+"\n"+ + `{"type":"result","result":"Step 1"}`), nil) + provider := NewCursorProviderWithOptions(WithCursorExecutor(mockExec1)) + + state := workflow.NewConversationState("") + result1, err := provider.ExecuteConversation(context.Background(), state, "step 1 prompt", nil, nil, nil) + require.NoError(t, err) + require.Equal(t, "chat-abc123", result1.State.SessionID) + + mockExec2 := mocks.NewMockCLIExecutor() + mockExec2.SetOutput([]byte(`{"type":"system","subtype":"init","chat_id":"chat-abc123"}`+"\n"+ + `{"type":"result","result":"Step 2"}`), nil) + provider2 := NewCursorProviderWithOptions(WithCursorExecutor(mockExec2)) + + state2 := workflow.NewConversationState("") + state2.SessionID = result1.State.SessionID + _, err = provider2.ExecuteConversation(context.Background(), state2, "step 2 prompt", nil, nil, nil) + require.NoError(t, err) + + calls2 := mockExec2.GetCalls() + require.Len(t, calls2, 1) + assert.Contains(t, calls2[0].Args, "--resume") + assert.Contains(t, calls2[0].Args, "chat-abc123") + }) } diff --git a/internal/infrastructure/repository/yaml_types.go b/internal/infrastructure/repository/yaml_types.go index f571263..c4928b1 100644 --- a/internal/infrastructure/repository/yaml_types.go +++ b/internal/infrastructure/repository/yaml_types.go @@ -67,7 +67,7 @@ type yamlStep struct { // Agent configuration (for AI agent steps - F039) // Flat structure: provider, prompt, options directly on step - Provider string `yaml:"provider"` // agent provider: claude, codex, gemini, opencode, custom + Provider string `yaml:"provider"` // agent provider: claude, cursor, codex, gemini, opencode, openai_compatible Prompt string `yaml:"prompt"` // prompt template with {{inputs.*}} and {{states.*}} PromptFile string `yaml:"prompt_file"` // path to external prompt template file Options map[string]any `yaml:"options"` // provider-specific options (model, temperature, max_tokens, etc.)