From 242bea99f8b9d410482332fb35b7d0bc0e51f0a2 Mon Sep 17 00:00:00 2001 From: Alexandre Balmes Date: Mon, 13 Apr 2026 22:54:23 +0200 Subject: [PATCH] feat(agents): add streaming display output for CLI providers - `internal/infrastructure/agents/stream_filter.go`: Add `streamFilterWriter` io.Writer decorator and `LineExtractor` func type for NDJSON-to-text filtering - `internal/infrastructure/agents/base_cli_provider.go`: Wire `parseStreamLine` hook and `DisplayOutput` extraction into shared execution flow - `internal/infrastructure/agents/claude_provider.go`: Implement `parseClaudeStreamLine` extracting `content_block_delta` text events - `internal/infrastructure/agents/gemini_provider.go`: Add stub `parseStreamLine` hook for future NDJSON extraction - `internal/infrastructure/agents/codex_provider.go`: Add stub `parseStreamLine` hook for future NDJSON extraction - `internal/infrastructure/agents/opencode_provider.go`: Add stub `parseStreamLine` hook for future NDJSON extraction - `internal/domain/workflow/agent_config.go`: Add `DisplayOutput` field (json:"-") to `AgentResult` - `internal/domain/workflow/context.go`: Add `DisplayOutput` field (json:"-") to `StepState` - `internal/domain/workflow/conversation.go`: Add `DisplayOutput` field (json:"-") to `ConversationResult` - `internal/application/execution_service.go`: Use `DisplayOutput` in `showStepOutputs`; clone options before injecting `output_format` - `internal/application/conversation_manager.go`: Propagate `DisplayOutput` from conversation result - `internal/interfaces/cli/run.go`: Pass output format context to execution service - `internal/infrastructure/agents/base_cli_provider_display_output_unit_test.go`: Add 629-line unit test suite for stream filter and base provider display output - `internal/infrastructure/agents/claude_provider_display_output_unit_test.go`: Add unit tests for Claude NDJSON parser - `internal/infrastructure/agents/gemini_provider_display_output_unit_test.go`: Add unit tests for Gemini display output stub - `internal/infrastructure/agents/codex_provider_display_output_unit_test.go`: Add unit tests for Codex display output stub - `internal/infrastructure/agents/opencode_provider_display_output_unit_test.go`: Add unit tests for OpenCode display output stub - `internal/infrastructure/agents/stream_filter_unit_test.go`: Add unit tests for `streamFilterWriter` - `internal/application/execution_service_display_output_test.go`: Add 424-line tests for display output cross-layer wiring - `internal/domain/workflow/context_json_test.go`: Verify `DisplayOutput` is excluded from JSON serialization - `pkg/interpolation/resolver_data_test.go`: Verify `DisplayOutput` not resolvable in template context - `tests/integration/agents/f082_display_matrix_test.go`: Add integration test matrix across all providers - `docs/user-guide/agent-steps.md`: Document `output_format` field and streaming display behavior - `docs/user-guide/commands.md`: Update run command docs with output mode behavior - `CHANGELOG.md`: Add F082 entry - `README.md`: Update feature list - `docs/README.md`: Update docs index Closes #311 --- CHANGELOG.md | 1 + CLAUDE.md | 12 +- README.md | 2 +- docs/README.md | 1 + docs/user-guide/agent-steps.md | 60 ++ docs/user-guide/commands.md | 6 +- internal/application/conversation_manager.go | 8 +- internal/application/execution_service.go | 25 +- .../execution_service_display_output_test.go | 424 ++++++++++++ internal/domain/workflow/agent_config.go | 1 + internal/domain/workflow/context.go | 3 + internal/domain/workflow/context_json_test.go | 299 +++++++++ internal/domain/workflow/conversation.go | 1 + .../agents/base_cli_provider.go | 42 +- ...e_cli_provider_display_output_unit_test.go | 629 ++++++++++++++++++ .../infrastructure/agents/claude_provider.go | 81 ++- .../agents/claude_provider_delegation_test.go | 5 +- ...laude_provider_display_output_unit_test.go | 119 ++++ .../claude_provider_stream_json_test.go | 11 +- .../infrastructure/agents/codex_provider.go | 45 +- ...codex_provider_display_output_unit_test.go | 93 +++ .../infrastructure/agents/gemini_provider.go | 49 +- ...emini_provider_display_output_unit_test.go | 88 +++ .../agents/gemini_provider_migration_test.go | 9 +- .../agents/gemini_provider_unit_test.go | 5 +- .../agents/opencode_provider.go | 77 ++- .../opencode_provider_delegation_test.go | 2 +- ...ncode_provider_display_output_unit_test.go | 89 +++ .../agents/opencode_provider_unit_test.go | 3 +- .../infrastructure/agents/stream_filter.go | 125 ++++ .../agents/stream_filter_unit_test.go | 223 +++++++ internal/interfaces/cli/run.go | 12 +- internal/interfaces/cli/run_internal_test.go | 118 +++- pkg/interpolation/resolver_data_test.go | 38 ++ .../agents/f082_display_matrix_test.go | 282 ++++++++ 35 files changed, 2900 insertions(+), 88 deletions(-) create mode 100644 internal/application/execution_service_display_output_test.go create mode 100644 internal/infrastructure/agents/base_cli_provider_display_output_unit_test.go create mode 100644 internal/infrastructure/agents/claude_provider_display_output_unit_test.go create mode 100644 internal/infrastructure/agents/codex_provider_display_output_unit_test.go create mode 100644 internal/infrastructure/agents/gemini_provider_display_output_unit_test.go create mode 100644 internal/infrastructure/agents/opencode_provider_display_output_unit_test.go create mode 100644 internal/infrastructure/agents/stream_filter.go create mode 100644 internal/infrastructure/agents/stream_filter_unit_test.go create mode 100644 tests/integration/agents/f082_display_matrix_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 1051aef..c097afe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- **F082**: Human-readable streaming output for agent steps — when running with `awf run --output streaming`, agent responses now display as clean text instead of raw NDJSON; `output_format` field controls filtering (text/none formats filter NDJSON, `json` format passes through raw); buffered mode (`--output buffered`) displays filtered text in post-execution summary; raw NDJSON always preserved in `state.Output` for template interpolation; `--output silent` remains silent regardless of `output_format`; per-provider extractors implemented for Claude (parses `content_block_delta` events) with stubs for Gemini/Codex/OpenCode - **F081**: Model validation by prefix/pattern for Gemini and Codex providers — Gemini validates that `model` starts with `gemini-` (enables use of any Gemini model without CLI updates); Codex validates `model` against prefixes `gpt-`, `codex-`, or o-series pattern (`o` followed by digit, e.g., `o1`, `o3-mini`); validation errors include format guidance to guide correction - **F078**: OpenCode `--model` flag support — `model` option in workflow YAML now passed as `--model ` to OpenCode CLI in both `Execute` and `ExecuteConversation`; OpenCode always passes `--format json` for structured output - **F077**: `dangerously_skip_permissions` support for Gemini (`--approval-mode=yolo`) and Codex (`--yolo`) providers — unified permission bypass key works across all three agent providers (Claude, Gemini, Codex) diff --git a/CLAUDE.md b/CLAUDE.md index adfc096..5df8430 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -217,8 +217,6 @@ func TestWorkflowValidation(t *testing.T) { ## Architecture Rules -- Document discovered runtime bugs in .specify/implementation/ISSUE/bug/ directory before implementation; prevents scope creep and enables separate tracking from test fixes -- Own timeout responsibility in application layer via context.WithTimeout; infrastructure adapters must respect context cancellation without enforcing additional timeouts - Evaluate step transitions before fallback behaviors; transitions take priority over OnSuccess, OnFailure, and ContinueOnError (ADR-001) - Use pointer types (*T) for optional config fields in infrastructure types; apply defaults during mapping to distinguish omitted from explicit zero values - Implement private per-provider extraction methods (no shared interface) when output formats diverge fundamentally; avoids premature abstraction and enables independent testing @@ -240,6 +238,10 @@ func TestWorkflowValidation(t *testing.T) { - Synchronize provider CLI flag changes across both implementation files and central options configuration (options.go); verify declarations and validation rules align - When extracting shared infrastructure behavior across multiple provider implementations, apply the delegation pattern uniformly; partial refactoring creates inconsistent ownership +- When wiring optional transformations across multiple execution paths (ExecuteConversation, runWorkflow, etc.), apply consistently to all paths; missing stubs in any path indicates incomplete cross-layer wiring + +- When adding hook fields to shared infrastructure types, implement (with stubs acceptable for future providers) across all concrete providers in the same layer; missing implementations in any provider blocks deployment + ## Common Pitfalls - Never block on I/O without context support; use goroutine+channel+select with buffered channel (cap 1) to enable graceful cancellation @@ -286,8 +288,6 @@ func TestWorkflowValidation(t *testing.T) { ## Test Conventions -- Write unit tests for prompt file validation, interpolation, and YAML mapping before integration tests; use table-driven tests for path resolution scenarios -- Never use switch statements to populate table-driven test variables; declare all fields in struct literals to prevent silent zero-value failures from missed case names - Write table-driven tests for inline error object parsing (message + status validation) before integration tests; use yamlStep.OnFailure field as 'any' type in test fixtures to validate both string and object forms - Use distinct file naming for unit vs integration tests: *_unit_test.go vs *_test.go; prevents error analysis tools from reporting incorrect file scopes - Never hardcode OS-specific values in test assertions (usernames, paths, shell names); use `os/user.Current()` or mock dependencies for reproducible tests across environments @@ -307,5 +307,9 @@ func TestWorkflowValidation(t *testing.T) { - When flipping integration test assertions for newly-enabled features, transition from 'not configured' errors to provider-level implementation errors; verify assertions change state, not disappear - Create separate test files for delegation patterns (*_delegation_test.go) to validate shared behavior independently from provider-specific unit tests +- When adding fields to internal state types (DisplayOutput, cache fields, etc.), write explicit tests verifying the field is NOT resolvable in template interpolation context; prevents accidental exposure of implementation details + +- Add BenchmarkXX functions for new I/O processing components; measure throughput, memory allocation, and verify capacity constraints (1MB buffer, etc.) are respected + ## Review Standards diff --git a/README.md b/README.md index 5660415..c126ade 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ A Go CLI tool for orchestrating AI agents (Claude, Gemini, Codex, OpenAI-Compati - **State Machine Execution** - Define workflows as state machines with conditional transitions based on exit codes, command output, or custom expressions - **Inline Error Handling** - Specify error messages and exit codes directly on steps without creating separate terminal states - **Agent Steps** - Invoke AI agents via CLI tools (Claude, Codex, Gemini) or direct HTTP (OpenAI, Ollama, vLLM, Groq) with prompt templates, response parsing, and accurate token tracking -- **Output Formatting for Agent Steps** - Automatically strip markdown code fences and validate JSON output +- **Output Formatting for Agent Steps** - Automatically strip markdown code fences and validate JSON output; human-readable streaming display controlled by `output_format` field (text vs raw NDJSON) - **External Prompt Files** - Load agent prompts from `.md` files with full template interpolation, helper functions, and local override support - **External Script Files** - Load commands from external script files with shebang-based interpreter dispatch, template interpolation, path resolution, and local override support - **Conversation Mode** - Multi-turn conversations with native session resume for CLI providers (`claude`, `codex`, `gemini`, `opencode`), automatic context window management for HTTP providers, mid-conversation context injection via `inject_context` field, and token tracking across all turns diff --git a/docs/README.md b/docs/README.md index 2b014f1..c35bee7 100644 --- a/docs/README.md +++ b/docs/README.md @@ -30,6 +30,7 @@ Learn how to use AWF effectively: - [Interactive Input Collection](user-guide/interactive-inputs.md) - Automatic prompting for missing workflow inputs - [Agent Steps](user-guide/agent-steps.md) - Invoke AI agents via CLI (Claude, Codex, Gemini) or HTTP APIs (OpenAI, Ollama, vLLM, Groq) - [Output Formatting](user-guide/agent-steps.md#output-formatting) - Automatic code fence stripping and JSON validation (`output_format: json|text`) + - [Streaming Output Display](user-guide/agent-steps.md#streaming-output-display) - Human-readable filtered output for `--output streaming` and `--output buffered` modes - [External Prompt Files](user-guide/agent-steps.md#external-prompt-files) - Load prompts from Markdown files with template interpolation - [Model Validation](user-guide/agent-steps.md#model-validation) - Provider-specific model name validation (Claude, Gemini, Codex) - [Conversation Mode](user-guide/conversation-steps.md) - Multi-turn conversations with native session resume for CLI providers and context window management diff --git a/docs/user-guide/agent-steps.md b/docs/user-guide/agent-steps.md index ad453a7..960ba28 100644 --- a/docs/user-guide/agent-steps.md +++ b/docs/user-guide/agent-steps.md @@ -540,6 +540,11 @@ process_response: ## Output Formatting +The `output_format` field serves two purposes: + +1. **Post-processing**: Strips markdown code fences and optionally validates JSON (F065) +2. **Display filtering**: Controls how agent responses appear on terminal during streaming and buffered execution (F082) + When an agent wraps its output in markdown code fences (common with many LLMs), use `output_format` to automatically strip the fences and optionally validate the content: ```yaml @@ -645,6 +650,61 @@ analyze: on_success: next ``` +### Streaming Output Display + +The `output_format` field also controls how agent responses appear on the terminal when running with `awf run --output streaming` or `--output buffered`: + +| `output_format` | Streaming Display | Buffered Display | Raw Storage | +|---|---|---|---| +| `text` (or omitted) | Human-readable filtered text | Filtered text in summary | Raw NDJSON | +| `json` | Raw NDJSON (unfiltered) | Raw NDJSON (unfiltered) | Raw NDJSON | + +#### Streaming Mode (`--output streaming`) + +When running with streaming output, agent responses display incrementally as they're generated: + +```bash +# Raw NDJSON appears on terminal (hard to read) +awf run code-review --output streaming +# Output: {"type":"content_block_delta",...}{"type":"content_block_delta",...} + +# Human-readable text with default output_format +awf run code-review --output streaming # output_format: text (or omitted) +# Output: The code has several issues... +``` + +**Filtering behavior:** +- `output_format: text` or omitted — Extracted text content displayed (filtered NDJSON) +- `output_format: json` — Raw NDJSON passed through unchanged + +#### Buffered Mode (`--output buffered`) + +When running with buffered output, the post-execution summary displays filtered text: + +```bash +awf run code-review --output buffered + +# With output_format: text (or omitted): +# Output of "analyze" step: +# The code has several issues... + +# With output_format: json: +# Output of "analyze" step: +# {"type":"content_block_delta",...} +``` + +#### Silent Mode (`--output silent`) + +Silent mode suppresses all display regardless of `output_format`: + +```bash +awf run code-review --output silent +# No output displayed (silent mode is absolute) +# state.Output still contains raw NDJSON for template interpolation +``` + +**Note:** `state.Output` always contains the raw NDJSON regardless of display filtering. Filtering only affects terminal display, not data storage. + ### Error Handling When `output_format: json` is specified but the output is invalid JSON: diff --git a/docs/user-guide/commands.md b/docs/user-guide/commands.md index c05a1eb..335b040 100644 --- a/docs/user-guide/commands.md +++ b/docs/user-guide/commands.md @@ -163,8 +163,10 @@ awf run [flags] | Mode | Description | |------|-------------| | `silent` | No command output displayed (default) | -| `streaming` | Real-time output with [OUT]/[ERR] prefixes | -| `buffered` | Show output after each step completes | +| `streaming` | Real-time output with [OUT]/[ERR] prefixes; for agent steps, displays human-readable text (or raw NDJSON if `output_format: json`) | +| `buffered` | Show output after each step completes; for agent steps, displays filtered text in post-execution summary (or raw NDJSON if `output_format: json`) | + +**Note:** For agent steps, the `output_format` field controls display filtering: `text` or omitted (default) shows human-readable output; `json` shows raw NDJSON. See [Output Formatting](agent-steps.md#streaming-output-display) for details. ### Examples diff --git a/internal/application/conversation_manager.go b/internal/application/conversation_manager.go index aca67ae..11263f9 100644 --- a/internal/application/conversation_manager.go +++ b/internal/application/conversation_manager.go @@ -221,10 +221,10 @@ func (m *ConversationManager) ExecuteConversation( return nil, err } - options := step.Agent.Options - if options == nil { - options = make(map[string]any) - } + // Clone options to preserve FR-009 immutability of step.Agent.Options, + // and inject output_format so baseCLIProvider can route display filtering + // identically between executeAgentStep and conversation mode (F082). + options := cloneAndInjectOutputFormat(step.Agent.Options, step.Agent.OutputFormat) if step.Agent.SystemPrompt != "" { options["system_prompt"] = step.Agent.SystemPrompt } diff --git a/internal/application/execution_service.go b/internal/application/execution_service.go index 907fb71..6f8aef2 100644 --- a/internal/application/execution_service.go +++ b/internal/application/execution_service.go @@ -2034,7 +2034,8 @@ func (s *ExecutionService) executeAgentStep( // Execute the agent s.logger.Debug("executing agent step", "step", step.Name, "provider", resolvedProvider) - result, execErr := provider.Execute(stepCtx, resolvedPrompt, step.Agent.Options, s.stdoutWriter, s.stderrWriter) + opts := cloneAndInjectOutputFormat(step.Agent.Options, step.Agent.OutputFormat) + result, execErr := provider.Execute(stepCtx, resolvedPrompt, opts, s.stdoutWriter, s.stderrWriter) // Record step state state := workflow.StepState{ @@ -2047,6 +2048,7 @@ func (s *ExecutionService) executeAgentStep( // Populate state from result if result != nil { state.Output = result.Output + state.DisplayOutput = result.DisplayOutput // AC5: JSON auto-parsed to states.step_name.Response state.Response = result.Response // AC6: Token usage in states.step_name.tokens_used @@ -2194,6 +2196,7 @@ func (s *ExecutionService) executeConversationStep( if result != nil { state.Output = result.Output + state.DisplayOutput = result.DisplayOutput state.Response = result.Response state.TokensUsed = result.TokensTotal state.Conversation = result.State @@ -2226,6 +2229,26 @@ func (s *ExecutionService) executeConversationStep( return s.resolveNextStep(step, intCtx, true) } +// cloneAndInjectOutputFormat shallow-clones opts and injects output_format as string. +// The original map is never mutated (FR-009). Precedence: an explicit +// options["output_format"] set by the user wins (display-only intent); otherwise +// the top-level step.Agent.OutputFormat is injected; otherwise defaults to text. +// This keeps F065 post-processing (top-level) decoupled from F082 display intent (options). +func cloneAndInjectOutputFormat(opts map[string]any, format workflow.OutputFormat) map[string]any { + cloned := make(map[string]any, len(opts)+2) + for k, v := range opts { + cloned[k] = v + } + if _, userSet := cloned["output_format"]; userSet { + return cloned + } + if format == workflow.OutputFormatNone { + format = workflow.OutputFormatText + } + cloned["output_format"] = string(format) + return cloned +} + // resolveOperationInputs resolves all string values in operation inputs via interpolation. func (s *ExecutionService) resolveOperationInputs( inputs map[string]any, diff --git a/internal/application/execution_service_display_output_test.go b/internal/application/execution_service_display_output_test.go new file mode 100644 index 0000000..d271dc3 --- /dev/null +++ b/internal/application/execution_service_display_output_test.go @@ -0,0 +1,424 @@ +package application_test + +import ( + "context" + "io" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/workflow" + testmocks "github.com/awf-project/cli/internal/testutil/mocks" +) + +// TestCloneAndInjectOutputFormat_OriginalMapNotMutated verifies that cloneAndInjectOutputFormat +// does not mutate the original options map when injecting output_format. +// This validates FR-009: the original map stays clean, preventing shared state corruption. +// Tested indirectly: when executeAgentStep calls the provider with injected options, +// the original step.Agent.Options remains unmodified. +func TestCloneAndInjectOutputFormat_OriginalMapNotMutated(t *testing.T) { + // Arrange: capture the options passed to the provider + var capturedOptions map[string]any + mockProvider := testmocks.NewMockAgentProvider("test-provider") + mockProvider.SetExecuteFunc(func( + ctx context.Context, + prompt string, + options map[string]any, + stdout, stderr io.Writer, + ) (*workflow.AgentResult, error) { + // Capture the options received by provider + capturedOptions = make(map[string]any) + for k, v := range options { + capturedOptions[k] = v + } + return &workflow.AgentResult{ + Output: "raw output", + DisplayOutput: "display output", + Tokens: 10, + }, nil + }) + + // Setup with a workflow that has options configured + wf := &workflow.Workflow{ + Name: "test", + Initial: "step1", + Steps: map[string]*workflow.Step{ + "step1": { + Name: "step1", + Type: workflow.StepTypeAgent, + Agent: &workflow.AgentConfig{ + Provider: "test-provider", + Prompt: "test prompt", + OutputFormat: workflow.OutputFormatText, + Options: map[string]any{ + "model": "test-model", + "temperature": 0.7, + }, + }, + OnSuccess: "done", + }, + "done": { + Name: "done", + Type: workflow.StepTypeTerminal, + Status: workflow.TerminalSuccess, + }, + }, + } + + svc, _ := NewTestHarness(t).WithWorkflow("test", wf).Build() + registry := testmocks.NewMockAgentRegistry() + registry.Register(mockProvider) + svc.SetAgentRegistry(registry) + + // Act: execute the workflow + ctx := context.Background() + _, err := svc.Run(ctx, "test", nil) + + // Assert: execution succeeds + require.NoError(t, err) + + // Assert: provider received output_format in its options (injected copy) + require.NotNil(t, capturedOptions) + assert.Equal(t, "text", capturedOptions["output_format"]) + assert.Equal(t, "test-model", capturedOptions["model"]) + assert.Equal(t, 0.7, capturedOptions["temperature"]) + + // Assert: CRITICAL - original step.Agent.Options still has NO output_format + // This proves cloneAndInjectOutputFormat created a clone before mutation + originalOptions := wf.Steps["step1"].Agent.Options + assert.NotContains(t, originalOptions, "output_format", + "original options map must not be mutated - cloneAndInjectOutputFormat should clone before injecting") + + // Assert: original options still have their original values + assert.Equal(t, "test-model", originalOptions["model"]) + assert.Equal(t, 0.7, originalOptions["temperature"]) +} + +// TestExecuteAgentStep_CopiesDisplayOutputToState verifies DisplayOutput from the provider +// result is correctly copied into the step state during agent step execution. +// This validates FR-004: the application layer propagates DisplayOutput from AgentResult to StepState. +func TestExecuteAgentStep_CopiesDisplayOutputToState(t *testing.T) { + tests := []struct { + name string + displayOutput string + rawOutput string + }{ + { + name: "copies_non_empty_display_output", + displayOutput: "Extracted text from agent provider", + rawOutput: `{"type":"content_block_delta","delta":{"type":"text_delta"}}`, + }, + { + name: "copies_empty_display_output", + displayOutput: "", + rawOutput: `{"type":"content_block_delta"}`, + }, + { + name: "copies_multiline_display_output", + displayOutput: "Line 1\nLine 2\nLine 3", + rawOutput: "raw line 1\nraw line 2\nraw line 3", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Arrange: setup execution service with agent step + svc, _ := NewTestHarness(t). + WithWorkflow("test", buildTestWorkflow()). + Build() + + // Mock provider that returns both raw and display output + mockProvider := testmocks.NewMockAgentProvider("test-provider") + mockProvider.SetExecuteFunc(func( + ctx context.Context, + prompt string, + options map[string]any, + stdout, stderr io.Writer, + ) (*workflow.AgentResult, error) { + return &workflow.AgentResult{ + Output: tt.rawOutput, + DisplayOutput: tt.displayOutput, + Response: map[string]any{"text": "response"}, + Tokens: 100, + }, nil + }) + + // Setup registry + registry := testmocks.NewMockAgentRegistry() + registry.Register(mockProvider) + svc.SetAgentRegistry(registry) + + // Act: execute the workflow + ctx := context.Background() + execCtx, err := svc.Run(ctx, "test", nil) + + // Assert: execution succeeds + require.NoError(t, err) + require.NotNil(t, execCtx) + + // Assert: step state contains DisplayOutput copied from result + stepState, exists := execCtx.States["agent-step"] + require.True(t, exists, "agent-step state must exist") + assert.Equal(t, tt.displayOutput, stepState.DisplayOutput, + "DisplayOutput should be copied from provider result to step state") + assert.Equal(t, tt.rawOutput, stepState.Output, + "raw Output should remain unchanged") + }) + } +} + +// TestExecuteAgentStep_OutputFormatInjectedIntoOptions verifies that output_format +// from step.Agent.OutputFormat is injected into the cloned options map passed to the provider. +// This validates FR-009 (cloning) and the bridging of OutputFormat to options["output_format"]. +func TestExecuteAgentStep_OutputFormatInjectedIntoOptions(t *testing.T) { + tests := []struct { + name string + outputFormat workflow.OutputFormat + expectedValue string + }{ + { + name: "injects_text_format", + outputFormat: workflow.OutputFormatText, + expectedValue: "text", + }, + { + name: "injects_json_format", + outputFormat: workflow.OutputFormatJSON, + expectedValue: "json", + }, + { + // Unspecified OutputFormatNone defaults to "text" so downstream CLI + // providers and the F082 display-matrix pipeline always see an + // explicit format value. + name: "defaults_none_to_text", + outputFormat: workflow.OutputFormatNone, + expectedValue: "text", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var capturedOptions map[string]any + mockProvider := testmocks.NewMockAgentProvider("test-provider") + mockProvider.SetExecuteFunc(func( + ctx context.Context, + prompt string, + options map[string]any, + stdout, stderr io.Writer, + ) (*workflow.AgentResult, error) { + capturedOptions = make(map[string]any) + for k, v := range options { + capturedOptions[k] = v + } + // Return valid JSON for json format test, otherwise plain text + output := `{"type":"response","text":"raw output"}` + if options["output_format"] != "json" { + output = "raw output" + } + return &workflow.AgentResult{ + Output: output, + DisplayOutput: "display output", + Tokens: 10, + }, nil + }) + + wf := &workflow.Workflow{ + Name: "test", + Initial: "step1", + Steps: map[string]*workflow.Step{ + "step1": { + Name: "step1", + Type: workflow.StepTypeAgent, + Agent: &workflow.AgentConfig{ + Provider: "test-provider", + Prompt: "test prompt", + OutputFormat: tt.outputFormat, + Options: map[string]any{ + "model": "test-model", + }, + }, + OnSuccess: "done", + }, + "done": { + Name: "done", + Type: workflow.StepTypeTerminal, + Status: workflow.TerminalSuccess, + }, + }, + } + + svc, _ := NewTestHarness(t).WithWorkflow("test", wf).Build() + registry := testmocks.NewMockAgentRegistry() + registry.Register(mockProvider) + svc.SetAgentRegistry(registry) + + ctx := context.Background() + _, err := svc.Run(ctx, "test", nil) + + require.NoError(t, err) + require.NotNil(t, capturedOptions) + assert.Equal(t, tt.expectedValue, capturedOptions["output_format"], + "output_format should be injected as string into options") + }) + } +} + +// TestExecuteAgentStep_WithNilOptions verifies that cloneAndInjectOutputFormat +// handles nil options map gracefully, creating a new map with just output_format. +func TestExecuteAgentStep_WithNilOptions(t *testing.T) { + var capturedOptions map[string]any + mockProvider := testmocks.NewMockAgentProvider("test-provider") + mockProvider.SetExecuteFunc(func( + ctx context.Context, + prompt string, + options map[string]any, + stdout, stderr io.Writer, + ) (*workflow.AgentResult, error) { + capturedOptions = options + return &workflow.AgentResult{ + Output: "raw output", + DisplayOutput: "display output", + Tokens: 10, + }, nil + }) + + wf := &workflow.Workflow{ + Name: "test", + Initial: "step1", + Steps: map[string]*workflow.Step{ + "step1": { + Name: "step1", + Type: workflow.StepTypeAgent, + Agent: &workflow.AgentConfig{ + Provider: "test-provider", + Prompt: "test prompt", + OutputFormat: workflow.OutputFormatText, + Options: nil, + }, + OnSuccess: "done", + }, + "done": { + Name: "done", + Type: workflow.StepTypeTerminal, + Status: workflow.TerminalSuccess, + }, + }, + } + + svc, _ := NewTestHarness(t).WithWorkflow("test", wf).Build() + registry := testmocks.NewMockAgentRegistry() + registry.Register(mockProvider) + svc.SetAgentRegistry(registry) + + ctx := context.Background() + _, err := svc.Run(ctx, "test", nil) + + require.NoError(t, err) + require.NotNil(t, capturedOptions) + assert.Equal(t, "text", capturedOptions["output_format"]) + assert.Len(t, capturedOptions, 1, "options should contain only output_format when original is nil") +} + +// TestExecuteAgentStep_PreservesMultipleOptionsWithFormat verifies that when +// cloneAndInjectOutputFormat is called, all original options are preserved in the clone +// along with the injected output_format, and the original map is untouched. +func TestExecuteAgentStep_PreservesMultipleOptionsWithFormat(t *testing.T) { + var capturedOptions map[string]any + mockProvider := testmocks.NewMockAgentProvider("test-provider") + mockProvider.SetExecuteFunc(func( + ctx context.Context, + prompt string, + options map[string]any, + stdout, stderr io.Writer, + ) (*workflow.AgentResult, error) { + capturedOptions = make(map[string]any) + for k, v := range options { + capturedOptions[k] = v + } + return &workflow.AgentResult{ + Output: "raw output", + DisplayOutput: "display output", + Tokens: 10, + }, nil + }) + + wf := &workflow.Workflow{ + Name: "test", + Initial: "step1", + Steps: map[string]*workflow.Step{ + "step1": { + Name: "step1", + Type: workflow.StepTypeAgent, + Agent: &workflow.AgentConfig{ + Provider: "test-provider", + Prompt: "test prompt", + OutputFormat: workflow.OutputFormatText, + Options: map[string]any{ + "model": "claude-3-sonnet", + "temperature": 0.9, + "max_tokens": 4096, + }, + }, + OnSuccess: "done", + }, + "done": { + Name: "done", + Type: workflow.StepTypeTerminal, + Status: workflow.TerminalSuccess, + }, + }, + } + + svc, _ := NewTestHarness(t).WithWorkflow("test", wf).Build() + registry := testmocks.NewMockAgentRegistry() + registry.Register(mockProvider) + svc.SetAgentRegistry(registry) + + originalOptions := wf.Steps["step1"].Agent.Options + originalOptionCount := len(originalOptions) + + ctx := context.Background() + _, err := svc.Run(ctx, "test", nil) + + require.NoError(t, err) + + // Verify provider received all options plus injected output_format + require.NotNil(t, capturedOptions) + assert.Len(t, capturedOptions, originalOptionCount+1, + "cloned options should have all original options plus output_format") + assert.Equal(t, "claude-3-sonnet", capturedOptions["model"]) + assert.Equal(t, 0.9, capturedOptions["temperature"]) + assert.Equal(t, 4096, capturedOptions["max_tokens"]) + assert.Equal(t, "text", capturedOptions["output_format"]) + + // Verify original options map is unchanged + assert.Len(t, originalOptions, originalOptionCount, + "original options map must not be mutated") + assert.NotContains(t, originalOptions, "output_format", + "original options should not have output_format") +} + +// buildTestWorkflow creates a simple workflow with one agent step. +func buildTestWorkflow() *workflow.Workflow { + return &workflow.Workflow{ + Name: "test", + Initial: "agent-step", + Steps: map[string]*workflow.Step{ + "agent-step": { + Name: "agent-step", + Type: workflow.StepTypeAgent, + Agent: &workflow.AgentConfig{ + Provider: "test-provider", + Prompt: "test prompt", + }, + OnSuccess: "done", + }, + "done": { + Name: "done", + Type: workflow.StepTypeTerminal, + Status: workflow.TerminalSuccess, + }, + }, + } +} diff --git a/internal/domain/workflow/agent_config.go b/internal/domain/workflow/agent_config.go index cdb7f43..b5471e6 100644 --- a/internal/domain/workflow/agent_config.go +++ b/internal/domain/workflow/agent_config.go @@ -136,6 +136,7 @@ func (c *AgentConfig) GetEffectivePrompt() string { type AgentResult struct { Provider string // provider name used Output string // raw output from agent CLI + DisplayOutput string // filtered human-readable output for display (empty when output_format=json or no parser) Response map[string]any // parsed JSON response (if applicable) Tokens int // token usage (if reported by provider) TokensEstimated bool // true if Tokens is an estimation, false if actual count diff --git a/internal/domain/workflow/context.go b/internal/domain/workflow/context.go index 11bf478..e038d1b 100644 --- a/internal/domain/workflow/context.go +++ b/internal/domain/workflow/context.go @@ -45,6 +45,9 @@ type StepState struct { // C069: Structured output from custom step types, accessible via {{states.step_name.Data.key}} Data map[string]any + + // F082: Filtered human-readable output for display; not persisted (derivable from Output). + DisplayOutput string `json:"-"` } // LoopContext holds the current loop iteration state. diff --git a/internal/domain/workflow/context_json_test.go b/internal/domain/workflow/context_json_test.go index 63740e7..ff8432a 100644 --- a/internal/domain/workflow/context_json_test.go +++ b/internal/domain/workflow/context_json_test.go @@ -326,6 +326,32 @@ func TestStepState_JSONField_NumberTypes(t *testing.T) { assert.Equal(t, float64(1e10), jsonObj["large"]) } +// Component: T001 +// Feature: F082 +func TestStepState_DisplayOutput_AbsentFromMarshaledJSON(t *testing.T) { + state := workflow.StepState{ + Name: "agent-step", + Status: workflow.StatusCompleted, + Output: `{"type":"result","text":"hello"}`, + DisplayOutput: "hello", + } + + data, err := json.Marshal(state) + require.NoError(t, err) + + var raw map[string]any + err = json.Unmarshal(data, &raw) + require.NoError(t, err) + + assert.NotContains(t, raw, "DisplayOutput", "DisplayOutput must not appear in marshaled JSON") + + var decoded workflow.StepState + err = json.Unmarshal(data, &decoded) + require.NoError(t, err) + + assert.Empty(t, decoded.DisplayOutput, "DisplayOutput must be empty after round-trip JSON deserialization") +} + func TestStepState_JSONField_ArrayTypes(t *testing.T) { state := workflow.StepState{ Name: "arrays", @@ -353,3 +379,276 @@ func TestStepState_JSONField_ArrayTypes(t *testing.T) { assert.Equal(t, true, mixedArr[2]) assert.Nil(t, mixedArr[3]) } + +// Component: T001 +// Feature: F082 +// AgentResult DisplayOutput tests + +func TestAgentResult_DisplayOutput_HappyPath(t *testing.T) { + result := workflow.NewAgentResult("claude") + result.Output = `{"type":"content_block_delta","delta":{"type":"text_delta","text":"hello"}}` + result.DisplayOutput = "hello" + + assert.Equal(t, "hello", result.DisplayOutput) + assert.NotEmpty(t, result.Output) + assert.NotEqual(t, result.Output, result.DisplayOutput) +} + +func TestAgentResult_DisplayOutput_EmptyWhenNoParser(t *testing.T) { + result := workflow.NewAgentResult("claude") + result.Output = `raw ndjson` + result.DisplayOutput = "" + + assert.Empty(t, result.DisplayOutput) + assert.NotEmpty(t, result.Output) +} + +func TestAgentResult_DisplayOutput_JSONPassthrough(t *testing.T) { + result := workflow.NewAgentResult("claude") + result.Output = `{"data": "raw"}` + result.DisplayOutput = "" + + assert.Empty(t, result.DisplayOutput, "DisplayOutput should be empty for output_format: json") + assert.Equal(t, `{"data": "raw"}`, result.Output) +} + +func TestAgentResult_DisplayOutput_MultilineText(t *testing.T) { + result := workflow.NewAgentResult("claude") + result.Output = `line1\nline2\nline3` + result.DisplayOutput = "line1\nline2\nline3" + + assert.Contains(t, result.DisplayOutput, "line1") + assert.Contains(t, result.DisplayOutput, "line3") +} + +func TestAgentResult_DisplayOutput_Preserved(t *testing.T) { + tests := []struct { + name string + displayOutput string + }{ + {name: "short text", displayOutput: "test"}, + {name: "long text", displayOutput: "a very long response from the agent with multiple lines and content"}, + {name: "empty", displayOutput: ""}, + {name: "special chars", displayOutput: "Hello 世界 🚀"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := workflow.NewAgentResult("claude") + result.DisplayOutput = tt.displayOutput + + assert.Equal(t, tt.displayOutput, result.DisplayOutput) + }) + } +} + +// Component: T001 +// Feature: F082 +// ConversationResult DisplayOutput tests + +func TestConversationResult_DisplayOutput_HappyPath(t *testing.T) { + result := workflow.NewConversationResult("claude") + result.Output = `{"type":"content_block_delta"}` + result.DisplayOutput = "extracted text from conversation" + + assert.Equal(t, "extracted text from conversation", result.DisplayOutput) + assert.NotEmpty(t, result.Output) +} + +func TestConversationResult_DisplayOutput_EmptyByDefault(t *testing.T) { + result := workflow.NewConversationResult("claude") + result.Output = "raw response" + + assert.Empty(t, result.DisplayOutput, "DisplayOutput should be empty on creation") + assert.NotEmpty(t, result.Output) +} + +func TestConversationResult_DisplayOutput_CanBeSetToAnyValue(t *testing.T) { + result := workflow.NewConversationResult("gemini") + testText := "Multi-line\nconversation\nresponse" + result.DisplayOutput = testText + + assert.Equal(t, testText, result.DisplayOutput) +} + +func TestConversationResult_DisplayOutput_IndependentFromOutput(t *testing.T) { + result := workflow.NewConversationResult("claude") + result.Output = `{"ndjson": "format"}` + result.DisplayOutput = "filtered text" + + assert.NotEqual(t, result.Output, result.DisplayOutput) + assert.Contains(t, result.Output, "ndjson") + assert.NotContains(t, result.DisplayOutput, "ndjson") +} + +// Component: T001 +// Feature: F082 +// StepState DisplayOutput serialization tests + +func TestStepState_DisplayOutput_EmptyAfterCreation(t *testing.T) { + state := workflow.StepState{ + Name: "agent-step", + Status: workflow.StatusCompleted, + } + + assert.Empty(t, state.DisplayOutput) +} + +func TestStepState_DisplayOutput_CanBePopulated(t *testing.T) { + state := workflow.StepState{ + Name: "agent-step", + Status: workflow.StatusCompleted, + Output: `{"text":"raw"}`, + DisplayOutput: "filtered text", + } + + assert.Equal(t, "filtered text", state.DisplayOutput) + assert.NotEqual(t, state.Output, state.DisplayOutput) +} + +func TestStepState_DisplayOutput_PreservedInMemory(t *testing.T) { + state := workflow.StepState{ + Name: "test", + Status: workflow.StatusRunning, + DisplayOutput: "test display content", + } + + assert.Equal(t, "test display content", state.DisplayOutput) +} + +func TestStepState_DisplayOutput_NotPersisted_RoundTrip(t *testing.T) { + original := workflow.StepState{ + Name: "test-step", + Status: workflow.StatusCompleted, + Output: "raw output", + DisplayOutput: "display output that should not persist", + } + + data, err := json.Marshal(original) + require.NoError(t, err) + + var decoded workflow.StepState + err = json.Unmarshal(data, &decoded) + require.NoError(t, err) + + assert.Empty(t, decoded.DisplayOutput, "DisplayOutput must be empty after round-trip (json:\"-\")") + assert.Equal(t, "raw output", decoded.Output, "Output must be preserved") +} + +func TestStepState_DisplayOutput_ExcludedFromJSONKeys(t *testing.T) { + state := workflow.StepState{ + Name: "step", + Status: workflow.StatusCompleted, + Output: "raw", + DisplayOutput: "display", + Response: map[string]any{"key": "value"}, + } + + data, err := json.Marshal(state) + require.NoError(t, err) + + var raw map[string]any + err = json.Unmarshal(data, &raw) + require.NoError(t, err) + + assert.NotContains(t, raw, "DisplayOutput", "DisplayOutput must be excluded from JSON") + assert.Contains(t, raw, "Output", "Output must be included in JSON") + assert.Contains(t, raw, "Response", "Response must be included in JSON") +} + +func TestStepState_DisplayOutput_WithOtherFields(t *testing.T) { + state := workflow.StepState{ + Name: "complex-step", + Status: workflow.StatusCompleted, + Output: "raw output content", + DisplayOutput: "display content", + Stderr: "error logs", + ExitCode: 0, + Response: map[string]any{ + "result": "success", + }, + } + + data, err := json.Marshal(state) + require.NoError(t, err) + + var decoded workflow.StepState + err = json.Unmarshal(data, &decoded) + require.NoError(t, err) + + assert.Empty(t, decoded.DisplayOutput, "DisplayOutput must not persist") + assert.Equal(t, "raw output content", decoded.Output, "Output must persist") + assert.Equal(t, "error logs", decoded.Stderr, "Stderr must persist") + assert.Equal(t, 0, decoded.ExitCode, "ExitCode must persist") + assert.NotNil(t, decoded.Response) +} + +func TestStepState_DisplayOutput_InExecutionContext(t *testing.T) { + ctx := workflow.NewExecutionContext("test-id", "test-workflow") + + state := workflow.StepState{ + Name: "agent-step", + Status: workflow.StatusCompleted, + Output: `raw ndjson`, + DisplayOutput: "human readable text", + } + + ctx.SetStepState("agent-step", state) + + retrieved, ok := ctx.GetStepState("agent-step") + require.True(t, ok) + + assert.Equal(t, "human readable text", retrieved.DisplayOutput) + assert.Equal(t, `raw ndjson`, retrieved.Output) +} + +func TestStepState_DisplayOutput_LargeContent(t *testing.T) { + largeDisplay := "" + for i := 0; i < 1000; i++ { + largeDisplay += "line\n" + } + + state := workflow.StepState{ + Name: "large-step", + Status: workflow.StatusCompleted, + DisplayOutput: largeDisplay, + } + + assert.NotEmpty(t, state.DisplayOutput) + assert.True(t, len(state.DisplayOutput) > 1000) + + data, err := json.Marshal(state) + require.NoError(t, err) + + var decoded workflow.StepState + err = json.Unmarshal(data, &decoded) + require.NoError(t, err) + + assert.Empty(t, decoded.DisplayOutput) +} + +func TestStepState_DisplayOutput_WithConversationState(t *testing.T) { + state := workflow.StepState{ + Name: "conversation-step", + Status: workflow.StatusCompleted, + Output: `conversation raw`, + DisplayOutput: "conversation display", + Conversation: &workflow.ConversationState{ + TotalTurns: 2, + TotalTokens: 100, + }, + } + + assert.Equal(t, "conversation display", state.DisplayOutput) + assert.NotNil(t, state.Conversation) + + data, err := json.Marshal(state) + require.NoError(t, err) + + var decoded workflow.StepState + err = json.Unmarshal(data, &decoded) + require.NoError(t, err) + + assert.Empty(t, decoded.DisplayOutput, "DisplayOutput excluded from JSON") + assert.NotNil(t, decoded.Conversation, "Conversation state must persist") +} diff --git a/internal/domain/workflow/conversation.go b/internal/domain/workflow/conversation.go index 7287a31..3094053 100644 --- a/internal/domain/workflow/conversation.go +++ b/internal/domain/workflow/conversation.go @@ -221,6 +221,7 @@ type ConversationResult struct { Provider string // provider name used State *ConversationState // final conversation state Output string // final assistant response (last turn) + DisplayOutput string // filtered human-readable output for display (empty when output_format=json or no parser) Response map[string]any // parsed JSON response from last turn (if applicable) TokensInput int // total input tokens across all turns TokensOutput int // total output tokens across all turns diff --git a/internal/infrastructure/agents/base_cli_provider.go b/internal/infrastructure/agents/base_cli_provider.go index a579aed..0808e2b 100644 --- a/internal/infrastructure/agents/base_cli_provider.go +++ b/internal/infrastructure/agents/base_cli_provider.go @@ -14,13 +14,14 @@ import ( ) // cliProviderHooks captures provider-specific behavior as function values. -// Optional hooks (extractTextContent, validateOptions) may be nil. +// Optional hooks (extractTextContent, validateOptions, parseStreamLine) may be nil. type cliProviderHooks struct { buildExecuteArgs func(prompt string, options map[string]any) ([]string, error) buildConversationArgs func(state *workflow.ConversationState, prompt string, options map[string]any) ([]string, error) extractSessionID func(output string) (string, error) extractTextContent func(output string) string validateOptions func(options map[string]any) error + parseStreamLine LineExtractor } // baseCLIProvider encapsulates the shared Execute and ExecuteConversation @@ -54,6 +55,19 @@ func combineOutput(stdoutBytes, stderrBytes []byte) string { return string(output) } +func wantsRawDisplay(options map[string]any) bool { + v, ok := getStringOption(options, "output_format") + return ok && v == "json" +} + +func (b *baseCLIProvider) applyStreamFilter(stdout io.Writer, rawDisplay bool) (io.Writer, *StreamFilterWriter) { + if b.hooks.parseStreamLine != nil && !rawDisplay && stdout != nil { + f := NewStreamFilterWriter(stdout, b.hooks.parseStreamLine) + return f, f + } + return stdout, nil +} + // execute runs the provider-specific CLI command and returns the AgentResult, // the raw output string (for Response field population by callers), and any error. func (b *baseCLIProvider) execute(ctx context.Context, prompt string, options map[string]any, stdout, stderr io.Writer) (*workflow.AgentResult, string, error) { @@ -78,8 +92,13 @@ func (b *baseCLIProvider) execute(ctx context.Context, prompt string, options ma return nil, "", err } - stdoutBytes, stderrBytes, err := b.executor.Run(ctx, b.binary, stdout, stderr, args...) + rawDisplay := wantsRawDisplay(options) + wrappedStdout, filter := b.applyStreamFilter(stdout, rawDisplay) + stdoutBytes, stderrBytes, err := b.executor.Run(ctx, b.binary, wrappedStdout, stderr, args...) completedAt := time.Now() + if filter != nil { + _ = filter.Flush() + } if err != nil { return nil, "", fmt.Errorf("%s execution failed: %w", b.name, err) @@ -92,9 +111,15 @@ func (b *baseCLIProvider) execute(ctx context.Context, prompt string, options ma outputStr = " " } + var displayOutput string + if !rawDisplay { + displayOutput = extractDisplayText(rawOutput, b.hooks.parseStreamLine) + } + result := &workflow.AgentResult{ Provider: b.name, Output: outputStr, + DisplayOutput: displayOutput, StartedAt: startedAt, CompletedAt: completedAt, Tokens: estimateTokens(outputStr), @@ -141,8 +166,13 @@ func (b *baseCLIProvider) executeConversation(ctx context.Context, state *workfl return nil, "", fmt.Errorf("failed to add user turn: %w", addErr) } - stdoutBytes, stderrBytes, err := b.executor.Run(ctx, b.binary, stdout, stderr, args...) + rawDisplay := wantsRawDisplay(options) + wrappedStdout, filter := b.applyStreamFilter(stdout, rawDisplay) + stdoutBytes, stderrBytes, err := b.executor.Run(ctx, b.binary, wrappedStdout, stderr, args...) completedAt := time.Now() + if filter != nil { + _ = filter.Flush() + } if err != nil { return nil, "", fmt.Errorf("%s execution failed: %w", b.name, err) @@ -176,10 +206,16 @@ func (b *baseCLIProvider) executeConversation(ctx context.Context, state *workfl inputTokens := estimateInputTokens(workingState.Turns, 1) + var displayOutput string + if !rawDisplay { + displayOutput = extractDisplayText(rawOutput, b.hooks.parseStreamLine) + } + result := &workflow.ConversationResult{ Provider: b.name, State: workingState, Output: outputStr, + DisplayOutput: displayOutput, TokensInput: inputTokens, TokensOutput: assistantTurn.Tokens, TokensTotal: inputTokens + assistantTurn.Tokens, diff --git a/internal/infrastructure/agents/base_cli_provider_display_output_unit_test.go b/internal/infrastructure/agents/base_cli_provider_display_output_unit_test.go new file mode 100644 index 0000000..2c5a5d8 --- /dev/null +++ b/internal/infrastructure/agents/base_cli_provider_display_output_unit_test.go @@ -0,0 +1,629 @@ +package agents + +import ( + "bytes" + "context" + "strings" + "testing" + "time" + + "github.com/awf-project/cli/internal/domain/workflow" + "github.com/awf-project/cli/internal/infrastructure/logger" + "github.com/awf-project/cli/internal/testutil/mocks" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBaseCLIProvider_Execute_WithParseStreamLineHook(t *testing.T) { + tests := []struct { + name string + parseStreamLine LineExtractor + rawOutput string + expectDisplayOut string + expectResultErr bool + }{ + { + name: "with parseStreamLine hook - extracts text", + parseStreamLine: func(line []byte) string { + // Simple parser: extract anything after "TEXT:" + return "extracted text" + }, + rawOutput: "raw output line", + expectDisplayOut: "extracted text", + expectResultErr: false, + }, + { + name: "with nil parseStreamLine - empty DisplayOutput", + parseStreamLine: nil, + rawOutput: "raw output", + expectDisplayOut: "", + expectResultErr: false, + }, + { + name: "parseStreamLine returning empty string - empty DisplayOutput", + parseStreamLine: func(line []byte) string { + return "" + }, + rawOutput: "raw output", + expectDisplayOut: "", + expectResultErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mockExecutor := mocks.NewMockCLIExecutor() + mockExecutor.SetOutput([]byte(tt.rawOutput), []byte("")) + + hooks := cliProviderHooks{ + buildExecuteArgs: func(prompt string, options map[string]any) ([]string, error) { + return []string{"--model", "test"}, nil + }, + parseStreamLine: tt.parseStreamLine, + } + + provider := newBaseCLIProvider("test", "test-bin", mockExecutor, logger.NopLogger{}, hooks) + + result, rawOut, err := provider.execute(context.Background(), "test prompt", map[string]any{}, nil, nil) + + if tt.expectResultErr { + assert.Error(t, err) + return + } + + require.NoError(t, err) + require.NotNil(t, result) + + assert.Equal(t, tt.rawOutput, rawOut) + assert.Equal(t, tt.expectDisplayOut, result.DisplayOutput) + }) + } +} + +func TestBaseCLIProvider_Execute_RawOutputPreserved(t *testing.T) { + parseFunc := func(line []byte) string { + return "filtered" + } + + rawOutput := `{"type":"content_block_delta","delta":{"type":"text_delta","text":"hello"}}` + mockExecutor := mocks.NewMockCLIExecutor() + mockExecutor.SetOutput([]byte(rawOutput), []byte("")) + + hooks := cliProviderHooks{ + buildExecuteArgs: func(prompt string, options map[string]any) ([]string, error) { + return []string{"--model", "test"}, nil + }, + parseStreamLine: parseFunc, + } + + provider := newBaseCLIProvider("test", "test-bin", mockExecutor, logger.NopLogger{}, hooks) + result, _, err := provider.execute(context.Background(), "test prompt", map[string]any{}, nil, nil) + + require.NoError(t, err) + require.NotNil(t, result) + + assert.Equal(t, rawOutput, result.Output) +} + +func TestBaseCLIProvider_ExecuteConversation_WithParseStreamLineHook(t *testing.T) { + tests := []struct { + name string + parseStreamLine LineExtractor + rawOutput string + expectDisplayOut string + expectResultErr bool + }{ + { + name: "conversation with parseStreamLine hook", + parseStreamLine: func(line []byte) string { + return "conversation response" + }, + rawOutput: "raw conversation output", + expectDisplayOut: "conversation response", + expectResultErr: false, + }, + { + name: "conversation with nil parseStreamLine", + parseStreamLine: nil, + rawOutput: "raw conversation output", + expectDisplayOut: "", + expectResultErr: false, + }, + { + name: "conversation with parseStreamLine returning empty", + parseStreamLine: func(line []byte) string { + return "" + }, + rawOutput: "raw output", + expectDisplayOut: "", + expectResultErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mockExecutor := mocks.NewMockCLIExecutor() + mockExecutor.SetOutput([]byte(tt.rawOutput), []byte("")) + + hooks := cliProviderHooks{ + buildConversationArgs: func(state *workflow.ConversationState, prompt string, options map[string]any) ([]string, error) { + return []string{"--model", "test"}, nil + }, + extractSessionID: func(output string) (string, error) { + return "", nil + }, + parseStreamLine: tt.parseStreamLine, + } + + provider := newBaseCLIProvider("test", "test-bin", mockExecutor, logger.NopLogger{}, hooks) + + state := workflow.NewConversationState("") + result, rawOut, err := provider.executeConversation(context.Background(), state, "test prompt", map[string]any{}, nil, nil) + + if tt.expectResultErr { + assert.Error(t, err) + return + } + + require.NoError(t, err) + require.NotNil(t, result) + + assert.Equal(t, tt.rawOutput, rawOut) + assert.Equal(t, tt.expectDisplayOut, result.DisplayOutput) + }) + } +} + +func TestBaseCLIProvider_ExecuteConversation_RawOutputPreserved(t *testing.T) { + parseFunc := func(line []byte) string { + return "filtered conversation" + } + + rawOutput := `{"session_id":"sess123"}` + mockExecutor := mocks.NewMockCLIExecutor() + mockExecutor.SetOutput([]byte(rawOutput), []byte("")) + + hooks := cliProviderHooks{ + buildConversationArgs: func(state *workflow.ConversationState, prompt string, options map[string]any) ([]string, error) { + return []string{"--model", "test"}, nil + }, + extractSessionID: func(output string) (string, error) { + return "sess123", nil + }, + parseStreamLine: parseFunc, + } + + provider := newBaseCLIProvider("test", "test-bin", mockExecutor, logger.NopLogger{}, hooks) + state := workflow.NewConversationState("") + result, _, err := provider.executeConversation(context.Background(), state, "test prompt", map[string]any{}, nil, nil) + + require.NoError(t, err) + require.NotNil(t, result) + + assert.Equal(t, rawOutput, result.Output) +} + +func TestBaseCLIProvider_Execute_WithWriter_AndParseStreamLine(t *testing.T) { + parseFunc := func(line []byte) string { + return "filtered" + } + + rawOutput := "raw line" + mockExecutor := mocks.NewMockCLIExecutor() + mockExecutor.SetOutput([]byte(rawOutput), []byte("")) + + hooks := cliProviderHooks{ + buildExecuteArgs: func(prompt string, options map[string]any) ([]string, error) { + return []string{"--model", "test"}, nil + }, + parseStreamLine: parseFunc, + } + + provider := newBaseCLIProvider("test", "test-bin", mockExecutor, logger.NopLogger{}, hooks) + + var stdoutBuf bytes.Buffer + result, _, err := provider.execute(context.Background(), "test prompt", map[string]any{}, &stdoutBuf, nil) + + require.NoError(t, err) + require.NotNil(t, result) + + assert.Equal(t, parseFunc([]byte(rawOutput)), result.DisplayOutput) +} + +func TestBaseCLIProvider_Execute_MultilineOutput_WithParseStreamLine(t *testing.T) { + parseFunc := func(line []byte) string { + // Extract only lines containing "KEEP" + if bytes.Contains(line, []byte("KEEP")) { + return string(line) + } + return "" + } + + rawOutput := `KEEP this line +SKIP this line +KEEP this too` + + mockExecutor := mocks.NewMockCLIExecutor() + mockExecutor.SetOutput([]byte(rawOutput), []byte("")) + + hooks := cliProviderHooks{ + buildExecuteArgs: func(prompt string, options map[string]any) ([]string, error) { + return []string{"--model", "test"}, nil + }, + parseStreamLine: parseFunc, + } + + provider := newBaseCLIProvider("test", "test-bin", mockExecutor, logger.NopLogger{}, hooks) + result, _, err := provider.execute(context.Background(), "test prompt", map[string]any{}, nil, nil) + + require.NoError(t, err) + require.NotNil(t, result) + + assert.Equal(t, rawOutput, result.Output) +} + +func TestBaseCLIProvider_Execute_TimestampsSet(t *testing.T) { + mockExecutor := mocks.NewMockCLIExecutor() + mockExecutor.SetOutput([]byte("test output"), []byte("")) + + hooks := cliProviderHooks{ + buildExecuteArgs: func(prompt string, options map[string]any) ([]string, error) { + return []string{"--model", "test"}, nil + }, + parseStreamLine: func(line []byte) string { + return "parsed" + }, + } + + provider := newBaseCLIProvider("test", "test-bin", mockExecutor, logger.NopLogger{}, hooks) + + beforeExec := time.Now() + result, _, err := provider.execute(context.Background(), "test prompt", map[string]any{}, nil, nil) + afterExec := time.Now() + + require.NoError(t, err) + require.NotNil(t, result) + + assert.True(t, result.StartedAt.After(beforeExec.Add(-time.Second))) + assert.True(t, result.CompletedAt.Before(afterExec.Add(time.Second))) + assert.True(t, result.CompletedAt.After(result.StartedAt)) +} + +func TestBaseCLIProvider_ExecuteConversation_TimestampsSet(t *testing.T) { + mockExecutor := mocks.NewMockCLIExecutor() + mockExecutor.SetOutput([]byte("test conversation output"), []byte("")) + + hooks := cliProviderHooks{ + buildConversationArgs: func(state *workflow.ConversationState, prompt string, options map[string]any) ([]string, error) { + return []string{"--model", "test"}, nil + }, + extractSessionID: func(output string) (string, error) { + return "sess123", nil + }, + parseStreamLine: func(line []byte) string { + return "parsed" + }, + } + + provider := newBaseCLIProvider("test", "test-bin", mockExecutor, logger.NopLogger{}, hooks) + state := workflow.NewConversationState("") + + beforeExec := time.Now() + result, _, err := provider.executeConversation(context.Background(), state, "test prompt", map[string]any{}, nil, nil) + afterExec := time.Now() + + require.NoError(t, err) + require.NotNil(t, result) + + assert.True(t, result.StartedAt.After(beforeExec.Add(-time.Second))) + assert.True(t, result.CompletedAt.Before(afterExec.Add(time.Second))) + assert.True(t, result.CompletedAt.After(result.StartedAt)) +} + +func TestCLIProviderHooks_ParseStreamLineField(t *testing.T) { + tests := []struct { + name string + parseFunc LineExtractor + expectNil bool + callableFunc bool + }{ + { + name: "parseStreamLine can be nil", + parseFunc: nil, + expectNil: true, + callableFunc: false, + }, + { + name: "parseStreamLine can be set to a function", + parseFunc: func(line []byte) string { + return "test" + }, + expectNil: false, + callableFunc: true, + }, + { + name: "parseStreamLine with empty function", + parseFunc: func(line []byte) string { return "" }, + expectNil: false, + callableFunc: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + hooks := cliProviderHooks{ + parseStreamLine: tt.parseFunc, + } + + if tt.expectNil { + assert.Nil(t, hooks.parseStreamLine) + } else { + assert.NotNil(t, hooks.parseStreamLine) + } + + if tt.callableFunc && tt.parseFunc != nil { + result := tt.parseFunc([]byte("test input")) + assert.NotEmpty(t, result) + } + }) + } +} + +// T008: Scenario 1 - execute() + parseStreamLine + output_format=text +// Should wrap stdout with filter, populate DisplayOutput, preserve raw Output +func TestBaseCLIProvider_Execute_Scenario1_TextFormatWithFilter(t *testing.T) { + parseFunc := func(line []byte) string { + // Extract only lines containing "RESPONSE" + if bytes.Contains(line, []byte("RESPONSE")) { + return "extracted response" + } + return "" + } + + rawOutput := `{"type":"content_block_delta"} +{"text":"RESPONSE: hello"} +{"type":"content_block_end"}` + + mockExecutor := mocks.NewMockCLIExecutor() + mockExecutor.SetOutput([]byte(rawOutput), []byte("")) + + hooks := cliProviderHooks{ + buildExecuteArgs: func(prompt string, options map[string]any) ([]string, error) { + return []string{"--model", "test"}, nil + }, + parseStreamLine: parseFunc, + } + + provider := newBaseCLIProvider("test", "test-bin", mockExecutor, logger.NopLogger{}, hooks) + + var stdoutBuf bytes.Buffer + options := map[string]any{"output_format": "text"} + result, _, err := provider.execute(context.Background(), "test prompt", options, &stdoutBuf, nil) + + require.NoError(t, err) + require.NotNil(t, result) + + assert.Equal(t, rawOutput, result.Output, "Output should contain raw NDJSON") + assert.NotEmpty(t, result.DisplayOutput, "DisplayOutput should be populated with filtered text") + assert.True(t, strings.Contains(result.DisplayOutput, "extracted response"), "DisplayOutput should contain extracted text") +} + +// T008: Scenario 2 - execute() + parseStreamLine + output_format=json +// Should NOT wrap stdout, keep DisplayOutput empty, preserve raw Output +func TestBaseCLIProvider_Execute_Scenario2_JSONFormatNoFilter(t *testing.T) { + parseFunc := func(line []byte) string { + return "should not be called" + } + + rawOutput := `{"type":"content_block_delta","delta":{"type":"text_delta","text":"hello"}}` + + mockExecutor := mocks.NewMockCLIExecutor() + mockExecutor.SetOutput([]byte(rawOutput), []byte("")) + + hooks := cliProviderHooks{ + buildExecuteArgs: func(prompt string, options map[string]any) ([]string, error) { + return []string{"--model", "test"}, nil + }, + parseStreamLine: parseFunc, + } + + provider := newBaseCLIProvider("test", "test-bin", mockExecutor, logger.NopLogger{}, hooks) + + var stdoutBuf bytes.Buffer + options := map[string]any{"output_format": "json"} + result, _, err := provider.execute(context.Background(), "test prompt", options, &stdoutBuf, nil) + + require.NoError(t, err) + require.NotNil(t, result) + + assert.Equal(t, rawOutput, result.Output, "Output should contain raw NDJSON") + assert.Empty(t, result.DisplayOutput, "DisplayOutput should be empty when output_format is json") +} + +// T008: Scenario 3 - execute() + nil parseStreamLine +// Should NOT wrap stdout, keep DisplayOutput empty, preserve raw Output +func TestBaseCLIProvider_Execute_Scenario3_NilParserNoFilter(t *testing.T) { + rawOutput := `{"type":"content_block_delta","delta":{"type":"text_delta","text":"hello"}}` + + mockExecutor := mocks.NewMockCLIExecutor() + mockExecutor.SetOutput([]byte(rawOutput), []byte("")) + + hooks := cliProviderHooks{ + buildExecuteArgs: func(prompt string, options map[string]any) ([]string, error) { + return []string{"--model", "test"}, nil + }, + parseStreamLine: nil, + } + + provider := newBaseCLIProvider("test", "test-bin", mockExecutor, logger.NopLogger{}, hooks) + + var stdoutBuf bytes.Buffer + options := map[string]any{} + result, _, err := provider.execute(context.Background(), "test prompt", options, &stdoutBuf, nil) + + require.NoError(t, err) + require.NotNil(t, result) + + assert.Equal(t, rawOutput, result.Output, "Output should contain raw NDJSON") + assert.Empty(t, result.DisplayOutput, "DisplayOutput should be empty when parseStreamLine is nil") +} + +// T008: Scenario 4 - executeConversation() + parseStreamLine + output_format=text +// Should wrap stdout with filter, populate DisplayOutput, preserve raw Output +func TestBaseCLIProvider_ExecuteConversation_Scenario1_TextFormatWithFilter(t *testing.T) { + parseFunc := func(line []byte) string { + if bytes.Contains(line, []byte("init")) { + return "session initialized" + } + return "" + } + + rawOutput := `{"type":"init","session_id":"sess123"} +{"type":"message","text":"response"}` + + mockExecutor := mocks.NewMockCLIExecutor() + mockExecutor.SetOutput([]byte(rawOutput), []byte("")) + + hooks := cliProviderHooks{ + buildConversationArgs: func(state *workflow.ConversationState, prompt string, options map[string]any) ([]string, error) { + return []string{"--model", "test"}, nil + }, + extractSessionID: func(output string) (string, error) { + return "sess123", nil + }, + parseStreamLine: parseFunc, + } + + provider := newBaseCLIProvider("test", "test-bin", mockExecutor, logger.NopLogger{}, hooks) + + var stdoutBuf bytes.Buffer + state := workflow.NewConversationState("") + options := map[string]any{"output_format": "text"} + result, _, err := provider.executeConversation(context.Background(), state, "test prompt", options, &stdoutBuf, nil) + + require.NoError(t, err) + require.NotNil(t, result) + + assert.Equal(t, rawOutput, result.Output, "Output should contain raw NDJSON") + assert.NotEmpty(t, result.DisplayOutput, "DisplayOutput should be populated with filtered text") +} + +// T008: Scenario 5 - executeConversation() + parseStreamLine + output_format=json +// Should NOT wrap stdout, keep DisplayOutput empty, preserve raw Output +func TestBaseCLIProvider_ExecuteConversation_Scenario2_JSONFormatNoFilter(t *testing.T) { + parseFunc := func(line []byte) string { + return "should not be called" + } + + rawOutput := `{"type":"init","session_id":"sess123"}` + + mockExecutor := mocks.NewMockCLIExecutor() + mockExecutor.SetOutput([]byte(rawOutput), []byte("")) + + hooks := cliProviderHooks{ + buildConversationArgs: func(state *workflow.ConversationState, prompt string, options map[string]any) ([]string, error) { + return []string{"--model", "test"}, nil + }, + extractSessionID: func(output string) (string, error) { + return "sess123", nil + }, + parseStreamLine: parseFunc, + } + + provider := newBaseCLIProvider("test", "test-bin", mockExecutor, logger.NopLogger{}, hooks) + + var stdoutBuf bytes.Buffer + state := workflow.NewConversationState("") + options := map[string]any{"output_format": "json"} + result, _, err := provider.executeConversation(context.Background(), state, "test prompt", options, &stdoutBuf, nil) + + require.NoError(t, err) + require.NotNil(t, result) + + assert.Equal(t, rawOutput, result.Output, "Output should contain raw NDJSON") + assert.Empty(t, result.DisplayOutput, "DisplayOutput should be empty when output_format is json") +} + +// T008: Scenario 6 - executeConversation() + nil parseStreamLine +// Should NOT wrap stdout, keep DisplayOutput empty, preserve raw Output +func TestBaseCLIProvider_ExecuteConversation_Scenario3_NilParserNoFilter(t *testing.T) { + rawOutput := `{"type":"init","session_id":"sess123"}` + + mockExecutor := mocks.NewMockCLIExecutor() + mockExecutor.SetOutput([]byte(rawOutput), []byte("")) + + hooks := cliProviderHooks{ + buildConversationArgs: func(state *workflow.ConversationState, prompt string, options map[string]any) ([]string, error) { + return []string{"--model", "test"}, nil + }, + extractSessionID: func(output string) (string, error) { + return "sess123", nil + }, + parseStreamLine: nil, + } + + provider := newBaseCLIProvider("test", "test-bin", mockExecutor, logger.NopLogger{}, hooks) + + var stdoutBuf bytes.Buffer + state := workflow.NewConversationState("") + options := map[string]any{} + result, _, err := provider.executeConversation(context.Background(), state, "test prompt", options, &stdoutBuf, nil) + + require.NoError(t, err) + require.NotNil(t, result) + + assert.Equal(t, rawOutput, result.Output, "Output should contain raw NDJSON") + assert.Empty(t, result.DisplayOutput, "DisplayOutput should be empty when parseStreamLine is nil") +} + +// Test wantsRawDisplay helper function behavior +func TestWantsRawDisplay_Helper(t *testing.T) { + tests := []struct { + name string + options map[string]any + expected bool + }{ + { + name: "nil options returns false", + options: nil, + expected: false, + }, + { + name: "empty options returns false", + options: map[string]any{}, + expected: false, + }, + { + name: "output_format=json returns true", + options: map[string]any{"output_format": "json"}, + expected: true, + }, + { + name: "output_format=text returns false", + options: map[string]any{"output_format": "text"}, + expected: false, + }, + { + name: "output_format=none returns false", + options: map[string]any{"output_format": "none"}, + expected: false, + }, + { + name: "output_format not a string returns false", + options: map[string]any{"output_format": 123}, + expected: false, + }, + { + name: "missing output_format returns false", + options: map[string]any{"other_key": "value"}, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := wantsRawDisplay(tt.options) + assert.Equal(t, tt.expected, result) + }) + } +} diff --git a/internal/infrastructure/agents/claude_provider.go b/internal/infrastructure/agents/claude_provider.go index 2422162..ec338e4 100644 --- a/internal/infrastructure/agents/claude_provider.go +++ b/internal/infrastructure/agents/claude_provider.go @@ -1,6 +1,7 @@ package agents import ( + "bytes" "context" "encoding/json" "errors" @@ -57,6 +58,7 @@ func (p *ClaudeProvider) newBase() *baseCLIProvider { extractSessionID: p.extractSessionID, extractTextContent: p.extractTextFromJSON, validateOptions: validateClaudeOptions, + parseStreamLine: p.parseClaudeStreamLine, }) } @@ -66,24 +68,24 @@ func (p *ClaudeProvider) Execute(ctx context.Context, prompt string, options map return nil, err } - userFormat, userFormatSet := getStringOption(options, "output_format") + userFormat, _ := getStringOption(options, "output_format") - // When stream-json was forced internally (user didn't set output_format), - // extract clean text from the NDJSON result event for downstream steps. - if !userFormatSet { + // Claude CLI is always invoked with --output-format stream-json (NDJSON). + // For text intent (default or explicit), extract the clean assistant text + // from the result event so {{states.step.Output}} is human-readable. + // For json intent, keep rawOutput in state.Output and populate Response + // with the parsed result event. + if userFormat == "json" || userFormat == "stream-json" { + if jsonResp := p.extractResultEvent(rawOutput); jsonResp != nil { + result.Response = jsonResp + } + } else { if extracted := p.extractTextFromJSON(rawOutput); extracted != "" { result.Output = extracted result.Tokens = estimateTokens(extracted) } } - // Populate Response only when user explicitly requested structured output. - if userFormatSet && (userFormat == "json" || userFormat == "stream-json") { - if jsonResp := p.extractResultEvent(rawOutput); jsonResp != nil { - result.Response = jsonResp - } - } - return result, nil } @@ -126,15 +128,12 @@ func (p *ClaudeProvider) buildExecuteArgs(prompt string, options map[string]any) args = append(args, "--model", model) } - // Force stream-json (NDJSON events) unless the user explicitly set output_format: text. + // Always force stream-json NDJSON at the CLI level so the F082 display filter + // and text extraction have a consistent wire format. The user-facing + // output_format (text vs json) is resolved in the application layer and the + // display filter — not by toggling the Claude CLI's --output-format flag. // stream-json requires --verbose in -p mode for live streaming. - userFormat, userFormatSet := getStringOption(options, "output_format") - switch { - case !userFormatSet, userFormat == "json", userFormat == "stream-json": - args = append(args, "--output-format", "stream-json", "--verbose") - default: - args = append(args, "--output-format", userFormat) - } + args = append(args, "--output-format", "stream-json", "--verbose") if tools, ok := getStringOption(options, "allowed_tools"); ok && tools != "" { args = append(args, "--allowedTools", tools) @@ -262,3 +261,47 @@ func (p *ClaudeProvider) extractTextFromJSON(output string) string { } return "" } + +// parseClaudeStreamLine extracts displayable text from Claude CLI's NDJSON stream-json +// output. Claude CLI (claude -p --output-format stream-json --verbose) emits one JSON +// object per line with these top-level event types: +// - "system" — session/hook metadata (ignored) +// - "assistant" — assistant turn, with message.content[] blocks ({type,text}) +// - "rate_limit_event" — throttling notice (ignored) +// - "result" — final aggregated result with .result string (ignored here; +// consumed by extractResultEvent for AgentResult.Output) +// +// We surface only "assistant" text blocks so the user sees the live reply. Tool-use +// blocks, thinking blocks, and everything else are skipped to keep the stream readable. +func (p *ClaudeProvider) parseClaudeStreamLine(line []byte) string { + // Escape literal null bytes before unmarshaling: Go's json package rejects + // bare 0x00 in string values even though they round-trip as \u0000. + line = bytes.ReplaceAll(line, []byte{0x00}, []byte(`\u0000`)) + + var evt struct { + Type string `json:"type"` + Message *struct { + Content []struct { + Type string `json:"type"` + Text string `json:"text"` + } `json:"content"` + } `json:"message"` + } + if err := json.Unmarshal(line, &evt); err != nil { + return "" + } + if evt.Type != "assistant" || evt.Message == nil { + return "" + } + + var out strings.Builder + for _, block := range evt.Message.Content { + if block.Type == "text" && block.Text != "" { + if out.Len() > 0 { + out.WriteByte('\n') + } + out.WriteString(block.Text) + } + } + return out.String() +} diff --git a/internal/infrastructure/agents/claude_provider_delegation_test.go b/internal/infrastructure/agents/claude_provider_delegation_test.go index 6965c6b..d2c0b1a 100644 --- a/internal/infrastructure/agents/claude_provider_delegation_test.go +++ b/internal/infrastructure/agents/claude_provider_delegation_test.go @@ -321,9 +321,12 @@ func TestClaudeProvider_OutputFormatMapping(t *testing.T) { inputFmt string expected string }{ + // Claude CLI is always invoked with stream-json regardless of the + // user-facing output_format; the F082 display filter and application + // layer decide text vs json at a higher level. {"json maps to stream-json", "json", "stream-json"}, {"stream-json stays stream-json", "stream-json", "stream-json"}, - {"text stays text", "text", "text"}, + {"text also maps to stream-json (F082)", "text", "stream-json"}, } for _, tt := range tests { diff --git a/internal/infrastructure/agents/claude_provider_display_output_unit_test.go b/internal/infrastructure/agents/claude_provider_display_output_unit_test.go new file mode 100644 index 0000000..8b9b783 --- /dev/null +++ b/internal/infrastructure/agents/claude_provider_display_output_unit_test.go @@ -0,0 +1,119 @@ +package agents + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// T004: parseClaudeStreamLine extracts displayable text from Claude CLI's stream-json +// output. Claude CLI (claude -p --output-format stream-json --verbose) emits one JSON +// object per line; the "assistant" event carries message.content[] blocks whose {type,text} +// entries contain the text to surface. Other event types (system, result, rate_limit_event) +// are ignored — result.result is consumed separately by extractResultEvent. + +func TestClaudeProvider_parseClaudeStreamLine_HappyPath(t *testing.T) { + provider := NewClaudeProvider() + + tests := []struct { + name string + line []byte + wantText string + }{ + { + name: "assistant message with single text block", + line: []byte(`{"type":"assistant","message":{"content":[{"type":"text","text":"Hello, world!"}]}}`), + wantText: "Hello, world!", + }, + { + name: "assistant message with special characters", + line: []byte(`{"type":"assistant","message":{"content":[{"type":"text","text":"Response {braces} [brackets]"}]}}`), + wantText: "Response {braces} [brackets]", + }, + { + name: "assistant message with multiple text blocks joined by newline", + line: []byte(`{"type":"assistant","message":{"content":[{"type":"text","text":"first"},{"type":"text","text":"second"}]}}`), + wantText: "first\nsecond", + }, + { + name: "tool_use blocks are skipped, text blocks preserved", + line: []byte(`{"type":"assistant","message":{"content":[{"type":"tool_use","id":"t1"},{"type":"text","text":"after tool"}]}}`), + wantText: "after tool", + }, + { + name: "empty text block returns empty", + line: []byte(`{"type":"assistant","message":{"content":[{"type":"text","text":""}]}}`), + wantText: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := provider.parseClaudeStreamLine(tt.line) + assert.Equal(t, tt.wantText, got) + }) + } +} + +func TestClaudeProvider_parseClaudeStreamLine_ErrorPaths(t *testing.T) { + provider := NewClaudeProvider() + + tests := []struct { + name string + line []byte + wantText string + }{ + { + name: "malformed JSON", + line: []byte(`{"type":"assistant","message":{"content":[{"type":"text","text":"incomplete`), + wantText: "", + }, + { + name: "system event (ignored)", + line: []byte(`{"type":"system","subtype":"hook_started","session_id":"abc"}`), + wantText: "", + }, + { + name: "result event (ignored, consumed separately)", + line: []byte(`{"type":"result","result":"final answer"}`), + wantText: "", + }, + { + name: "rate_limit_event (ignored)", + line: []byte(`{"type":"rate_limit_event","reset_at":"2026-04-13T22:00:00Z"}`), + wantText: "", + }, + { + name: "assistant without message field", + line: []byte(`{"type":"assistant"}`), + wantText: "", + }, + { + name: "assistant with empty content array", + line: []byte(`{"type":"assistant","message":{"content":[]}}`), + wantText: "", + }, + { + name: "assistant with only tool_use blocks", + line: []byte(`{"type":"assistant","message":{"content":[{"type":"tool_use","id":"t1"}]}}`), + wantText: "", + }, + { + name: "empty line", + line: []byte(``), + wantText: "", + }, + { + name: "plain text (not JSON)", + line: []byte(`this is not JSON`), + wantText: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := provider.parseClaudeStreamLine(tt.line) + assert.Equal(t, tt.wantText, got) + }) + } +} diff --git a/internal/infrastructure/agents/claude_provider_stream_json_test.go b/internal/infrastructure/agents/claude_provider_stream_json_test.go index 6dc019b..a303460 100644 --- a/internal/infrastructure/agents/claude_provider_stream_json_test.go +++ b/internal/infrastructure/agents/claude_provider_stream_json_test.go @@ -47,11 +47,14 @@ func TestClaudeProvider_Execute_OutputFormatMapping(t *testing.T) { wantVerbose: true, }, { - name: "explicit text format is respected (no stream-json override)", + // F082: Claude CLI is always invoked with stream-json; the text vs + // json distinction is handled by the application-layer display filter, + // not by toggling Claude's --output-format flag. + name: "explicit text format also maps to stream-json (F082)", options: map[string]any{"output_format": "text"}, - mockOutput: []byte("test output"), - wantFormatFlag: "text", - wantVerbose: false, + mockOutput: ndjson, + wantFormatFlag: "stream-json", + wantVerbose: true, }, } diff --git a/internal/infrastructure/agents/codex_provider.go b/internal/infrastructure/agents/codex_provider.go index d7622f5..e88041c 100644 --- a/internal/infrastructure/agents/codex_provider.go +++ b/internal/infrastructure/agents/codex_provider.go @@ -2,6 +2,7 @@ package agents import ( "context" + "encoding/json" "errors" "fmt" "io" @@ -48,14 +49,27 @@ func (p *CodexProvider) newBase() *baseCLIProvider { buildConversationArgs: p.buildConversationArgs, extractSessionID: p.extractSessionID, validateOptions: validateCodexOptions, + parseStreamLine: p.parseCodexStreamLine, }) } func (p *CodexProvider) Execute(ctx context.Context, prompt string, options map[string]any, stdout, stderr io.Writer) (*workflow.AgentResult, error) { - result, _, err := p.base.execute(ctx, prompt, options, stdout, stderr) + result, rawOutput, err := p.base.execute(ctx, prompt, options, stdout, stderr) if err != nil { return nil, err } + + // Codex CLI is always invoked with `exec --json` (NDJSON). For text intent, + // aggregate assistant message content for state.Output so downstream + // interpolation ({{states.step.Output}}) is human-readable (F082). + userFormat, _ := getStringOption(options, "output_format") + if userFormat != "json" && userFormat != "stream-json" { + if extracted := extractDisplayText(rawOutput, p.parseCodexStreamLine); extracted != "" { + result.Output = extracted + result.Tokens = estimateTokens(extracted) + } + } + return result, nil } @@ -163,3 +177,32 @@ func isValidCodexModel(model string) bool { // o-series: "o" followed by a digit (e.g., o1, o3, o4-mini); rejects "ollama", "oracle" return len(model) >= 2 && model[0] == 'o' && model[1] >= '0' && model[1] <= '9' } + +// parseCodexStreamLine extracts displayable assistant text from Codex CLI's JSON +// output (`codex exec --json`). Codex emits one JSON object per line with top-level +// types including: +// - "thread.started" — {thread_id} (ignored; session consumed separately) +// - "turn.started" — (ignored) +// - "item.completed" — {item:{item_type,text}} (surface assistant_message.text) +// - "turn.completed" — (ignored) +// - "error" — {message} (ignored; stderr path already shows it) +// +// Only `item.completed` with `item.item_type=="assistant_message"` is surfaced so +// the live display shows the final reply text. Reasoning items, tool calls, and +// status events are skipped. +func (p *CodexProvider) parseCodexStreamLine(line []byte) string { + var evt struct { + Type string `json:"type"` + Item struct { + ItemType string `json:"item_type"` + Text string `json:"text"` + } `json:"item"` + } + if err := json.Unmarshal(line, &evt); err != nil { + return "" + } + if evt.Type != "item.completed" || evt.Item.ItemType != "assistant_message" { + return "" + } + return evt.Item.Text +} diff --git a/internal/infrastructure/agents/codex_provider_display_output_unit_test.go b/internal/infrastructure/agents/codex_provider_display_output_unit_test.go new file mode 100644 index 0000000..4dad1d9 --- /dev/null +++ b/internal/infrastructure/agents/codex_provider_display_output_unit_test.go @@ -0,0 +1,93 @@ +package agents + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// parseCodexStreamLine extracts assistant text from Codex CLI's `exec --json` +// output. Only `item.completed` events with `item.item_type:"assistant_message"` +// are surfaced. + +func TestCodexProvider_parseCodexStreamLine_HappyPath(t *testing.T) { + provider := NewCodexProvider() + + tests := []struct { + name string + line []byte + wantText string + }{ + { + name: "assistant_message item.completed surfaces text", + line: []byte(`{"type":"item.completed","item":{"item_type":"assistant_message","text":"hello"}}`), + wantText: "hello", + }, + { + name: "assistant_message with multi-line text preserved", + line: []byte(`{"type":"item.completed","item":{"item_type":"assistant_message","text":"line1\nline2"}}`), + wantText: "line1\nline2", + }, + { + name: "assistant_message with special characters", + line: []byte(`{"type":"item.completed","item":{"item_type":"assistant_message","text":"Response {braces} [brackets]"}}`), + wantText: "Response {braces} [brackets]", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := provider.parseCodexStreamLine(tt.line) + assert.Equal(t, tt.wantText, got) + }) + } +} + +func TestCodexProvider_parseCodexStreamLine_ErrorPaths(t *testing.T) { + provider := NewCodexProvider() + + tests := []struct { + name string + line []byte + }{ + { + name: "thread.started is skipped", + line: []byte(`{"type":"thread.started","thread_id":"019d8872"}`), + }, + { + name: "turn.started is skipped", + line: []byte(`{"type":"turn.started"}`), + }, + { + name: "turn.completed is skipped", + line: []byte(`{"type":"turn.completed"}`), + }, + { + name: "error event is skipped", + line: []byte(`{"type":"error","message":"Reconnecting..."}`), + }, + { + name: "reasoning item is skipped", + line: []byte(`{"type":"item.completed","item":{"item_type":"reasoning","text":"thinking"}}`), + }, + { + name: "tool_call item is skipped", + line: []byte(`{"type":"item.completed","item":{"item_type":"tool_call","name":"read_file"}}`), + }, + { + name: "malformed JSON returns empty", + line: []byte(`{"type":"item.completed","item":{"item_type":`), + }, + { + name: "empty line returns empty", + line: []byte(``), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := provider.parseCodexStreamLine(tt.line) + assert.Equal(t, "", got) + }) + } +} diff --git a/internal/infrastructure/agents/gemini_provider.go b/internal/infrastructure/agents/gemini_provider.go index 921aee1..f2a3dbf 100644 --- a/internal/infrastructure/agents/gemini_provider.go +++ b/internal/infrastructure/agents/gemini_provider.go @@ -2,6 +2,7 @@ package agents import ( "context" + "encoding/json" "errors" "fmt" "io" @@ -44,6 +45,7 @@ func (p *GeminiProvider) newBase() *baseCLIProvider { buildConversationArgs: p.buildConversationArgs, extractSessionID: p.extractSessionID, validateOptions: validateGeminiOptions, + parseStreamLine: p.parseGeminiStreamLine, }) } @@ -71,8 +73,19 @@ func (p *GeminiProvider) Execute(ctx context.Context, prompt string, options map return nil, err } - if jsonResp := tryParseJSONResponse(rawOutput); jsonResp != nil { - result.Response = jsonResp + // Gemini CLI is always invoked with --output-format stream-json. + // For text intent (default), aggregate assistant content for state.Output; + // for json intent, keep raw NDJSON and expose parsed result in Response. + userFormat, _ := getStringOption(options, "output_format") + if userFormat == "json" || userFormat == "stream-json" { + if jsonResp := tryParseJSONResponse(rawOutput); jsonResp != nil { + result.Response = jsonResp + } + } else { + if extracted := extractDisplayText(rawOutput, p.parseGeminiStreamLine); extracted != "" { + result.Output = extracted + result.Tokens = estimateTokens(extracted) + } } return result, nil @@ -104,12 +117,9 @@ func (p *GeminiProvider) buildExecuteArgs(prompt string, options map[string]any) if model, ok := getStringOption(options, "model"); ok { args = append([]string{"--model", model}, args...) } - if outputFormat, ok := getStringOption(options, "output_format"); ok { - if outputFormat == "json" { - outputFormat = "stream-json" - } - args = append([]string{"--output-format", outputFormat}, args...) - } + // Always force stream-json NDJSON at the CLI level so the F082 display filter + // and text extraction have a consistent wire format (F082, aligned with Claude). + args = append([]string{"--output-format", "stream-json"}, args...) if skipPerms, ok := getBoolOption(options, "dangerously_skip_permissions"); ok && skipPerms { args = append([]string{"--approval-mode=yolo"}, args...) } @@ -168,3 +178,26 @@ func (p *GeminiProvider) extractSessionID(output string) (string, error) { } return str, nil } + +// parseGeminiStreamLine extracts displayable assistant text from Gemini CLI's +// stream-json output. Gemini CLI (`gemini --output-format stream-json -p`) emits +// one JSON object per line with these top-level types: +// - "init" — {session_id, model} (ignored) +// - "message" — {role, content, delta?} (surface role=="assistant") +// - "result" — {status, stats} (ignored) +// +// Only assistant messages are surfaced. User echoes and metadata are skipped. +func (p *GeminiProvider) parseGeminiStreamLine(line []byte) string { + var evt struct { + Type string `json:"type"` + Role string `json:"role"` + Content string `json:"content"` + } + if err := json.Unmarshal(line, &evt); err != nil { + return "" + } + if evt.Type != "message" || evt.Role != "assistant" { + return "" + } + return evt.Content +} diff --git a/internal/infrastructure/agents/gemini_provider_display_output_unit_test.go b/internal/infrastructure/agents/gemini_provider_display_output_unit_test.go new file mode 100644 index 0000000..9929107 --- /dev/null +++ b/internal/infrastructure/agents/gemini_provider_display_output_unit_test.go @@ -0,0 +1,88 @@ +package agents + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// parseGeminiStreamLine extracts assistant text from Gemini CLI's stream-json +// output. Only `message` events with `role:"assistant"` are surfaced. + +func TestGeminiProvider_parseGeminiStreamLine_HappyPath(t *testing.T) { + provider := NewGeminiProvider() + + tests := []struct { + name string + line []byte + wantText string + }{ + { + name: "assistant message surfaces content", + line: []byte(`{"type":"message","timestamp":"2026-04-13T20:04:45.219Z","role":"assistant","content":"hello","delta":true}`), + wantText: "hello", + }, + { + name: "assistant content with special chars preserved", + line: []byte(`{"type":"message","role":"assistant","content":"Response {braces} [brackets]"}`), + wantText: "Response {braces} [brackets]", + }, + { + name: "assistant delta chunk surfaces content", + line: []byte(`{"type":"message","role":"assistant","content":"partial ","delta":true}`), + wantText: "partial ", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := provider.parseGeminiStreamLine(tt.line) + assert.Equal(t, tt.wantText, got) + }) + } +} + +func TestGeminiProvider_parseGeminiStreamLine_ErrorPaths(t *testing.T) { + provider := NewGeminiProvider() + + tests := []struct { + name string + line []byte + }{ + { + name: "user message (echo) is skipped", + line: []byte(`{"type":"message","role":"user","content":"reply with: ok"}`), + }, + { + name: "init event is skipped", + line: []byte(`{"type":"init","session_id":"abc","model":"auto-gemini-3"}`), + }, + { + name: "result event is skipped", + line: []byte(`{"type":"result","status":"success","stats":{"total_tokens":100}}`), + }, + { + name: "assistant missing role is skipped", + line: []byte(`{"type":"message","content":"orphan"}`), + }, + { + name: "malformed JSON returns empty", + line: []byte(`{"type":"message","role":"assistant","content":`), + }, + { + name: "empty line returns empty", + line: []byte(``), + }, + { + name: "plain text returns empty", + line: []byte(`plain text`), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := provider.parseGeminiStreamLine(tt.line) + assert.Equal(t, "", got) + }) + } +} diff --git a/internal/infrastructure/agents/gemini_provider_migration_test.go b/internal/infrastructure/agents/gemini_provider_migration_test.go index d966537..346a2af 100644 --- a/internal/infrastructure/agents/gemini_provider_migration_test.go +++ b/internal/infrastructure/agents/gemini_provider_migration_test.go @@ -272,14 +272,14 @@ func TestGeminiProvider_Migration_ExecuteConversation_CLIError(t *testing.T) { } // TestGeminiProvider_Migration_BuildExecuteArgs validates hook method exists -// and constructs the -p argument pair. +// and forces stream-json NDJSON for consistent F082 display/text extraction. func TestGeminiProvider_Migration_BuildExecuteArgs(t *testing.T) { provider := NewGeminiProvider() args, err := provider.buildExecuteArgs("test prompt", nil) assert.NoError(t, err) - assert.Equal(t, []string{"-p", "test prompt"}, args) + assert.Equal(t, []string{"--output-format", "stream-json", "-p", "test prompt"}, args) } // TestGeminiProvider_Migration_BuildConversationArgs validates conversation args hook: @@ -414,14 +414,15 @@ func TestGeminiProvider_Migration_Execute_OutputWithStderr(t *testing.T) { assert.Equal(t, "stdout contentstderr content", result.Output) } -// TestGeminiProvider_Migration_Execute_JSONResponse validates JSON response parsing. +// TestGeminiProvider_Migration_Execute_JSONResponse validates JSON response parsing +// when the caller explicitly requests output_format: json (F082 intent routing). func TestGeminiProvider_Migration_Execute_JSONResponse(t *testing.T) { mockExec := mocks.NewMockCLIExecutor() jsonOutput := []byte(`{"result":"success","data":{"value":42}}`) mockExec.SetOutput(jsonOutput, nil) provider := NewGeminiProviderWithOptions(WithGeminiExecutor(mockExec)) - result, err := provider.Execute(context.Background(), "test", nil, nil, nil) + result, err := provider.Execute(context.Background(), "test", map[string]any{"output_format": "json"}, nil, nil) require.NoError(t, err) require.NotNil(t, result) diff --git a/internal/infrastructure/agents/gemini_provider_unit_test.go b/internal/infrastructure/agents/gemini_provider_unit_test.go index 652f428..3e96bd9 100644 --- a/internal/infrastructure/agents/gemini_provider_unit_test.go +++ b/internal/infrastructure/agents/gemini_provider_unit_test.go @@ -113,7 +113,10 @@ func TestGeminiProvider_Execute_JSONParsing(t *testing.T) { mockExec.SetOutput(tt.mockStdout, nil) provider := NewGeminiProviderWithOptions(WithGeminiExecutor(mockExec)) - result, err := provider.Execute(context.Background(), "test", nil, nil, nil) + // F082: Response is only populated when caller explicitly asks for + // output_format: json. Auto-detection from raw output no longer happens + // in text intent (default). + result, err := provider.Execute(context.Background(), "test", map[string]any{"output_format": "json"}, nil, nil) require.NoError(t, err) require.NotNil(t, result) diff --git a/internal/infrastructure/agents/opencode_provider.go b/internal/infrastructure/agents/opencode_provider.go index 0d526c8..5561877 100644 --- a/internal/infrastructure/agents/opencode_provider.go +++ b/internal/infrastructure/agents/opencode_provider.go @@ -2,6 +2,7 @@ package agents import ( "context" + "encoding/json" "errors" "fmt" "io" @@ -50,6 +51,7 @@ func (p *OpenCodeProvider) newBase() *baseCLIProvider { buildConversationArgs: p.buildConversationArgs, extractSessionID: p.extractSessionID, validateOptions: validateOpenCodeOptions, + parseStreamLine: p.parseOpencodeStreamLine, }) } @@ -60,8 +62,19 @@ func (p *OpenCodeProvider) Execute(ctx context.Context, prompt string, options m return nil, err } - if jsonResp := tryParseJSONResponse(rawOutput); jsonResp != nil { - result.Response = jsonResp + // OpenCode CLI is always invoked with --format json (NDJSON). For text intent, + // aggregate assistant text parts for state.Output; for json intent, expose the + // parsed result via Response (F082). + userFormat, _ := getStringOption(options, "output_format") + if userFormat == "json" || userFormat == "stream-json" { + if jsonResp := tryParseJSONResponse(rawOutput); jsonResp != nil { + result.Response = jsonResp + } + } else { + if extracted := extractDisplayText(rawOutput, p.parseOpencodeStreamLine); extracted != "" { + result.Output = extracted + result.Tokens = estimateTokens(extracted) + } } return result, nil @@ -80,7 +93,11 @@ func (p *OpenCodeProvider) ExecuteConversation(ctx context.Context, state *workf // opencode CLI syntax: opencode run "prompt" --format [--model X] [--framework F] [--verbose] [--output DIR] func (p *OpenCodeProvider) buildExecuteArgs(prompt string, options map[string]any) ([]string, error) { args := []string{"run", prompt} - args = append(args, "--format", resolveOpenCodeFormat(options)) + // Always request NDJSON: session ID extraction, display filter, and raw + // display (output_format: json) all consume stream-json events. For + // output_format: text, the F082 display filter extracts assistant text + // from the "text" events (consistent with Claude's stream-json approach). + args = append(args, "--format", "json") if model, ok := getStringOption(options, "model"); ok { args = append(args, "--model", model) @@ -107,7 +124,11 @@ func (p *OpenCodeProvider) buildConversationArgs(state *workflow.ConversationSta } args := []string{"run", effectivePrompt} - args = append(args, "--format", resolveOpenCodeFormat(options)) + // Always request NDJSON: session ID extraction, display filter, and raw + // display (output_format: json) all consume stream-json events. For + // output_format: text, the F082 display filter extracts assistant text + // from the "text" events (consistent with Claude's stream-json approach). + args = append(args, "--format", "json") if model, ok := getStringOption(options, "model"); ok { args = append(args, "--model", model) @@ -161,29 +182,6 @@ func (p *OpenCodeProvider) Validate() error { return nil } -// resolveOpenCodeFormat maps the user-provided output_format option to the -// matching opencode CLI --format value. opencode supports: -// - "default": formatted human-readable output -// - "json": NDJSON events (required by session ID extraction and -// compatible with --output=streaming for live events) -// -// Mapping: -// - output_format: "text"|"default" → "default" -// - output_format: "json" → "json" -// - absent or any other value → "json" (default, preserves prior behavior) -func resolveOpenCodeFormat(options map[string]any) string { - format, ok := getStringOption(options, "output_format") - if !ok { - return "json" - } - switch format { - case "text", "default": - return "default" - default: - return "json" - } -} - // validateOpenCodeOptions validates provider-specific options. func validateOpenCodeOptions(options map[string]any) error { if options == nil { @@ -226,3 +224,28 @@ func (p *OpenCodeProvider) extractSessionID(output string) (string, error) { return sessionID, nil } + +// parseOpencodeStreamLine extracts displayable assistant text from OpenCode CLI's +// stream-json output. OpenCode CLI (`opencode run --format json`) emits one JSON +// object per line with these top-level types: +// - "step_start" — {sessionID, part} (ignored; session ID consumed separately) +// - "text" — {part:{text}} (surface the text field) +// - "step_finish" — {sessionID, part:{tokens, cost}} (ignored) +// +// Only "text" events are surfaced to the live display. All other events (step +// metadata, tool use, reasoning blocks) are skipped. +func (p *OpenCodeProvider) parseOpencodeStreamLine(line []byte) string { + var evt struct { + Type string `json:"type"` + Part struct { + Text string `json:"text"` + } `json:"part"` + } + if err := json.Unmarshal(line, &evt); err != nil { + return "" + } + if evt.Type != "text" { + return "" + } + return evt.Part.Text +} diff --git a/internal/infrastructure/agents/opencode_provider_delegation_test.go b/internal/infrastructure/agents/opencode_provider_delegation_test.go index 2fe8e9d..6a25d19 100644 --- a/internal/infrastructure/agents/opencode_provider_delegation_test.go +++ b/internal/infrastructure/agents/opencode_provider_delegation_test.go @@ -93,7 +93,7 @@ func TestOpenCodeProvider_buildExecuteArgs_HappyPath(t *testing.T) { options: map[string]any{"output_format": "text"}, wantArgs: []string{ "run", "test prompt", - "--format", "default", + "--format", "json", }, wantErr: false, }, diff --git a/internal/infrastructure/agents/opencode_provider_display_output_unit_test.go b/internal/infrastructure/agents/opencode_provider_display_output_unit_test.go new file mode 100644 index 0000000..d4a587c --- /dev/null +++ b/internal/infrastructure/agents/opencode_provider_display_output_unit_test.go @@ -0,0 +1,89 @@ +package agents + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// parseOpencodeStreamLine extracts assistant text from OpenCode CLI's +// `run --format json` output. Only `text` events (with part.text) are surfaced; +// step_start / step_finish / metadata events are skipped. + +func TestOpenCodeProvider_parseOpencodeStreamLine_HappyPath(t *testing.T) { + provider := NewOpenCodeProvider() + + tests := []struct { + name string + line []byte + wantText string + }{ + { + name: "text event surfaces part.text", + line: []byte(`{"type":"text","timestamp":1776110705680,"sessionID":"ses_abc","part":{"id":"prt_1","type":"text","text":"ok"}}`), + wantText: "ok", + }, + { + name: "text event with special chars preserved", + line: []byte(`{"type":"text","part":{"text":"Response {braces} [brackets]"}}`), + wantText: "Response {braces} [brackets]", + }, + { + name: "text event with multi-line content", + line: []byte(`{"type":"text","part":{"text":"line1\nline2"}}`), + wantText: "line1\nline2", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := provider.parseOpencodeStreamLine(tt.line) + assert.Equal(t, tt.wantText, got) + }) + } +} + +func TestOpenCodeProvider_parseOpencodeStreamLine_ErrorPaths(t *testing.T) { + provider := NewOpenCodeProvider() + + tests := []struct { + name string + line []byte + }{ + { + name: "step_start is skipped", + line: []byte(`{"type":"step_start","sessionID":"ses_abc","part":{"type":"step-start"}}`), + }, + { + name: "step_finish is skipped", + line: []byte(`{"type":"step_finish","sessionID":"ses_abc","part":{"type":"step-finish","tokens":{"total":100}}}`), + }, + { + name: "text event with empty text returns empty", + line: []byte(`{"type":"text","part":{"text":""}}`), + }, + { + name: "text event without part is empty", + line: []byte(`{"type":"text"}`), + }, + { + name: "malformed JSON returns empty", + line: []byte(`{"type":"text","part":{"text":`), + }, + { + name: "empty line returns empty", + line: []byte(``), + }, + { + name: "plain text returns empty", + line: []byte(`not json`), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := provider.parseOpencodeStreamLine(tt.line) + assert.Equal(t, "", got) + }) + } +} diff --git a/internal/infrastructure/agents/opencode_provider_unit_test.go b/internal/infrastructure/agents/opencode_provider_unit_test.go index 9d6aec8..9ab2915 100644 --- a/internal/infrastructure/agents/opencode_provider_unit_test.go +++ b/internal/infrastructure/agents/opencode_provider_unit_test.go @@ -453,7 +453,8 @@ func TestOpenCodeProvider_Execute_JSONDetection(t *testing.T) { mockExec.SetOutput(tt.mockOutput, nil) provider := NewOpenCodeProviderWithOptions(WithOpenCodeExecutor(mockExec)) - result, err := provider.Execute(context.Background(), "test prompt", nil, nil, nil) + // F082: Response auto-detection only runs under explicit output_format: json. + result, err := provider.Execute(context.Background(), "test prompt", map[string]any{"output_format": "json"}, nil, nil) require.NoError(t, err) require.NotNil(t, result) diff --git a/internal/infrastructure/agents/stream_filter.go b/internal/infrastructure/agents/stream_filter.go new file mode 100644 index 0000000..e072608 --- /dev/null +++ b/internal/infrastructure/agents/stream_filter.go @@ -0,0 +1,125 @@ +package agents + +import ( + "bytes" + "fmt" + "io" + "strings" +) + +const maxLineSize = 1024 * 1024 + +var newlineBytes = []byte{'\n'} + +// LineExtractor parses a single NDJSON line and returns extracted text. +// Returning "" indicates the line should be skipped. +type LineExtractor func(line []byte) string + +// StreamFilterWriter is an io.Writer decorator that buffers NDJSON lines, +// extracts text via LineExtractor, and writes filtered content to an inner writer. +// It enforces a 1 MB cap per line to prevent unbounded memory growth. +type StreamFilterWriter struct { + inner io.Writer + extract LineExtractor + buf []byte +} + +// NewStreamFilterWriter creates a new StreamFilterWriter that decorates the given writer. +// If extract is nil, lines are passed through unfiltered. +func NewStreamFilterWriter(inner io.Writer, extract LineExtractor) *StreamFilterWriter { + if inner == nil { + inner = io.Discard + } + return &StreamFilterWriter{ + inner: inner, + extract: extract, + buf: make([]byte, 0, 4096), + } +} + +// Write implements io.Writer. It buffers incoming data until a newline is encountered, +// then parses and filters the complete line. +func (w *StreamFilterWriter) Write(p []byte) (int, error) { + if w.extract == nil { + n, err := w.inner.Write(p) + if err != nil { + return n, fmt.Errorf("write to inner: %w", err) + } + return n, nil + } + + n := len(p) + w.buf = append(w.buf, p...) + + for { + idx := bytes.IndexByte(w.buf, '\n') + if idx < 0 { + if len(w.buf) > maxLineSize { + _, err := w.inner.Write(w.buf) + w.buf = w.buf[:0] + if err != nil { + return n, fmt.Errorf("write oversized buffer: %w", err) + } + } + break + } + + line := w.buf[:idx] + if extracted := w.extract(line); extracted != "" { + if _, err := io.WriteString(w.inner, extracted); err != nil { + return n, fmt.Errorf("write extracted text: %w", err) + } + if _, err := w.inner.Write(newlineBytes); err != nil { + return n, fmt.Errorf("write newline: %w", err) + } + } + + w.buf = w.buf[idx+1:] + } + + return n, nil +} + +// Flush emits any buffered partial line. +func (w *StreamFilterWriter) Flush() error { + if len(w.buf) == 0 { + return nil + } + + if extracted := w.extract(w.buf); extracted != "" { + if _, err := io.WriteString(w.inner, extracted); err != nil { + return fmt.Errorf("flush write extracted: %w", err) + } + if _, err := w.inner.Write(newlineBytes); err != nil { + return fmt.Errorf("flush write newline: %w", err) + } + } + + w.buf = w.buf[:0] + return nil +} + +// extractDisplayText applies the provided LineExtractor to each line of raw output +// and returns the concatenated filtered text. Returns empty string if extract is nil. +func extractDisplayText(raw string, extract LineExtractor) string { + if extract == nil { + return "" + } + + var result strings.Builder + lines := strings.Split(raw, "\n") + + for _, line := range lines { + if line == "" { + continue + } + if extracted := extract([]byte(line)); extracted != "" { + if result.Len() > 0 { + result.WriteRune('\n') + } + result.WriteString(extracted) + } + } + + return result.String() +} diff --git a/internal/infrastructure/agents/stream_filter_unit_test.go b/internal/infrastructure/agents/stream_filter_unit_test.go new file mode 100644 index 0000000..f5926ff --- /dev/null +++ b/internal/infrastructure/agents/stream_filter_unit_test.go @@ -0,0 +1,223 @@ +package agents + +import ( + "bytes" + "io" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestStreamFilterWriter_Write_HappyPath tests successful line filtering. +func TestStreamFilterWriter_Write_HappyPath(t *testing.T) { + var result bytes.Buffer + extractor := func(line []byte) string { + // Simple mock: extract text after "text:" + if bytes.Contains(line, []byte("text:")) { + parts := bytes.Split(line, []byte("text:")) + if len(parts) > 1 { + return string(bytes.TrimSpace(parts[1])) + } + } + return "" + } + + writer := NewStreamFilterWriter(&result, extractor) + _, err := writer.Write([]byte("text: hello\n")) + require.NoError(t, err) + + err = writer.Flush() + require.NoError(t, err) + + assert.Equal(t, "hello\n", result.String()) +} + +// TestStreamFilterWriter_Write_PartialLine tests buffering incomplete lines. +func TestStreamFilterWriter_Write_PartialLine(t *testing.T) { + var result bytes.Buffer + extractor := func(line []byte) string { + return string(line) + } + + writer := NewStreamFilterWriter(&result, extractor) + _, err := writer.Write([]byte("partial")) + require.NoError(t, err) + + assert.Equal(t, "", result.String(), "partial line should not be written yet") + + _, err = writer.Write([]byte(" line\n")) + require.NoError(t, err) + + assert.Contains(t, result.String(), "partial line") +} + +// TestStreamFilterWriter_Flush_EmitsResidual tests Flush emits buffered data. +func TestStreamFilterWriter_Flush_EmitsResidual(t *testing.T) { + var result bytes.Buffer + extractor := func(line []byte) string { + return string(line) + } + + writer := NewStreamFilterWriter(&result, extractor) + _, err := writer.Write([]byte("no newline")) + require.NoError(t, err) + + assert.Equal(t, "", result.String(), "partial line not yet flushed") + + err = writer.Flush() + require.NoError(t, err) + + assert.Equal(t, "no newline\n", result.String()) +} + +// TestStreamFilterWriter_OversizeBuffer tests 1 MB cap enforcement. +func TestStreamFilterWriter_OversizeBuffer(t *testing.T) { + var result bytes.Buffer + extractor := func(line []byte) string { + return string(line) + } + + writer := NewStreamFilterWriter(&result, extractor) + + bigLine := make([]byte, maxLineSize+1) + for i := range bigLine { + bigLine[i] = 'x' + } + + _, err := writer.Write(bigLine) + require.NoError(t, err) + + assert.True(t, result.Len() > 0, "oversized buffer should be flushed raw") +} + +// TestStreamFilterWriter_ExtractorReturnsEmpty tests silent skipping. +func TestStreamFilterWriter_ExtractorReturnsEmpty(t *testing.T) { + var result bytes.Buffer + extractor := func(line []byte) string { + return "" + } + + writer := NewStreamFilterWriter(&result, extractor) + _, err := writer.Write([]byte("ignored line\n")) + require.NoError(t, err) + + err = writer.Flush() + require.NoError(t, err) + + assert.Equal(t, "", result.String(), "empty extraction should skip line") +} + +// TestStreamFilterWriter_NilExtractor tests passthrough mode. +func TestStreamFilterWriter_NilExtractor(t *testing.T) { + var result bytes.Buffer + writer := NewStreamFilterWriter(&result, nil) + + _, err := writer.Write([]byte("raw line\n")) + require.NoError(t, err) + + assert.Equal(t, "raw line\n", result.String()) +} + +// TestStreamFilterWriter_InnerWriterError tests error propagation. +func TestStreamFilterWriter_InnerWriterError(t *testing.T) { + failWriter := &FailingWriter{} + extractor := func(line []byte) string { + return string(line) + } + + writer := NewStreamFilterWriter(failWriter, extractor) + _, err := writer.Write([]byte("test\n")) + + assert.Error(t, err) +} + +// TestExtractDisplayText_WithExtractor tests text extraction from multi-line output. +func TestExtractDisplayText_WithExtractor(t *testing.T) { + tests := []struct { + name string + raw string + extractor LineExtractor + want string + }{ + { + name: "single line", + raw: `{"type":"content_block_delta","delta":{"type":"text_delta","text":"hello"}}`, + extractor: func(line []byte) string { + if bytes.Contains(line, []byte("text_delta")) { + return "extracted" + } + return "" + }, + want: "extracted", + }, + { + name: "multi-line with mixed results", + raw: `line1 +line2 +line3`, + extractor: func(line []byte) string { + if bytes.Contains(line, []byte("2")) { + return "found line2" + } + return "" + }, + want: "found line2", + }, + { + name: "all skipped", + raw: "line1\nline2\nline3", + extractor: func(line []byte) string { return "" }, + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := extractDisplayText(tt.raw, tt.extractor) + assert.Equal(t, tt.want, got) + }) + } +} + +// TestExtractDisplayText_NilExtractor returns empty string. +func TestExtractDisplayText_NilExtractor(t *testing.T) { + result := extractDisplayText("any text", nil) + assert.Equal(t, "", result) +} + +// BenchmarkStreamFilterWriter measures the Write() overhead per NDJSON line under 4 KB +// (NFR-001, F082 T013). Observed threshold on dev machine: ~2.5 µs/op (well under the +// NFR-001 budget of 10 ms/op for agent-step execution). This benchmark is informational; +// there is no hard gate — re-run locally with `go test -bench=BenchmarkStreamFilterWriter +// ./internal/infrastructure/agents/...` after changes to stream_filter.go to catch regressions. +func BenchmarkStreamFilterWriter(b *testing.B) { + // Construct a realistic ~4 KB Claude stream-json line. + text := strings.Repeat("lorem ipsum dolor sit amet ", 140) // ~3780 bytes + line := []byte(`{"type":"content_block_delta","delta":{"type":"text_delta","text":"` + text + `"}}` + "\n") + + provider := NewClaudeProvider() + extractor := LineExtractor(provider.parseClaudeStreamLine) + + b.SetBytes(int64(len(line))) + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + writer := NewStreamFilterWriter(io.Discard, extractor) + if _, err := writer.Write(line); err != nil { + b.Fatal(err) + } + if err := writer.Flush(); err != nil { + b.Fatal(err) + } + } +} + +// FailingWriter is a test helper that always returns an error. +type FailingWriter struct{} + +func (f *FailingWriter) Write(p []byte) (int, error) { + return 0, io.ErrClosedPipe +} diff --git a/internal/interfaces/cli/run.go b/internal/interfaces/cli/run.go index 0656b35..bf06b31 100644 --- a/internal/interfaces/cli/run.go +++ b/internal/interfaces/cli/run.go @@ -721,12 +721,20 @@ func showExecutionDetails(formatter *ui.Formatter, execCtx *workflow.ExecutionCo } } +func displayValueOf(state *workflow.StepState) string { + if state.DisplayOutput != "" { + return state.DisplayOutput + } + return state.Output +} + func showStepOutputs(formatter *ui.Formatter, execCtx *workflow.ExecutionContext) { allStates := execCtx.GetAllStepStates() for name, state := range allStates { - if state.Output != "" { + displayOutput := displayValueOf(&state) + if displayOutput != "" { formatter.Printf("\n--- [%s] stdout ---\n", name) - formatter.Printf("%s", state.Output) + formatter.Printf("%s", displayOutput) } if state.Stderr != "" { formatter.Printf("\n--- [%s] stderr ---\n", name) diff --git a/internal/interfaces/cli/run_internal_test.go b/internal/interfaces/cli/run_internal_test.go index 172084a..dd6fdb6 100644 --- a/internal/interfaces/cli/run_internal_test.go +++ b/internal/interfaces/cli/run_internal_test.go @@ -97,9 +97,10 @@ func TestShowExecutionDetails(t *testing.T) { func TestShowStepOutputs(t *testing.T) { tests := []struct { - name string - execCtx *workflow.ExecutionContext - wantOut []string + name string + execCtx *workflow.ExecutionContext + wantOut []string + notWantOut []string }{ { name: "step with stdout only", @@ -141,6 +142,65 @@ func TestShowStepOutputs(t *testing.T) { }(), wantOut: []string{"clean"}, }, + { + name: "prefers DisplayOutput over Output when non-empty", + execCtx: func() *workflow.ExecutionContext { + ctx := workflow.NewExecutionContext("test-4", "wf") + ctx.States["agent-step"] = workflow.StepState{ + Name: "agent-step", + Status: workflow.StatusCompleted, + DisplayOutput: "Filtered text content", + Output: `{"raw":"ndjson","line":1}`, + } + return ctx + }(), + wantOut: []string{"agent-step", "stdout", "Filtered text content"}, + notWantOut: []string{`{"raw":"ndjson"}`}, + }, + { + name: "falls back to Output when DisplayOutput empty", + execCtx: func() *workflow.ExecutionContext { + ctx := workflow.NewExecutionContext("test-5", "wf") + ctx.States["plain-step"] = workflow.StepState{ + Name: "plain-step", + Status: workflow.StatusCompleted, + DisplayOutput: "", + Output: "Raw text output", + } + return ctx + }(), + wantOut: []string{"plain-step", "stdout", "Raw text output"}, + }, + { + name: "success feedback when both DisplayOutput and Output empty", + execCtx: func() *workflow.ExecutionContext { + ctx := workflow.NewExecutionContext("test-6", "wf") + ctx.States["silent-agent"] = workflow.StepState{ + Name: "silent-agent", + Status: workflow.StatusCompleted, + DisplayOutput: "", + Output: "", + Stderr: "", + } + return ctx + }(), + wantOut: []string{"silent-agent"}, + }, + { + name: "uses Output for success feedback detection, not DisplayOutput", + execCtx: func() *workflow.ExecutionContext { + ctx := workflow.NewExecutionContext("test-7", "wf") + ctx.States["output-step"] = workflow.StepState{ + Name: "output-step", + Status: workflow.StatusCompleted, + DisplayOutput: "", + Output: "some raw output", + Stderr: "", + } + return ctx + }(), + wantOut: []string{"output-step", "stdout", "some raw output"}, + }, } for _, tt := range tests { @@ -152,8 +212,58 @@ func TestShowStepOutputs(t *testing.T) { output := buf.String() for _, want := range tt.wantOut { - assert.Contains(t, output, want) + assert.Contains(t, output, want, "output should contain %q", want) } + for _, notWant := range tt.notWantOut { + assert.NotContains(t, output, notWant, "output should not contain %q", notWant) + } + }) + } +} + +func TestDisplayValueOf(t *testing.T) { + tests := []struct { + name string + displayOutput string + output string + expectedResult string + }{ + { + name: "prefers DisplayOutput when non-empty", + displayOutput: "Filtered text content", + output: `{"raw":"json"}`, + expectedResult: "Filtered text content", + }, + { + name: "falls back to Output when DisplayOutput empty", + displayOutput: "", + output: "Raw output content", + expectedResult: "Raw output content", + }, + { + name: "both empty returns empty string", + displayOutput: "", + output: "", + expectedResult: "", + }, + { + name: "DisplayOutput takes precedence over Output", + displayOutput: "Display text", + output: "Raw text should be ignored", + expectedResult: "Display text", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + state := &workflow.StepState{ + DisplayOutput: tt.displayOutput, + Output: tt.output, + } + + result := displayValueOf(state) + + assert.Equal(t, tt.expectedResult, result) }) } } diff --git a/pkg/interpolation/resolver_data_test.go b/pkg/interpolation/resolver_data_test.go index 44f6b75..26227ab 100644 --- a/pkg/interpolation/resolver_data_test.go +++ b/pkg/interpolation/resolver_data_test.go @@ -1,6 +1,7 @@ package interpolation_test import ( + "reflect" "testing" "github.com/awf-project/cli/pkg/interpolation" @@ -208,6 +209,43 @@ func TestTemplateResolver_StepStateDataData(t *testing.T) { } } +// TestStepStateData_DisplayOutputNotResolvable guards F082 FR-008: the UI-only +// DisplayOutput field on domain workflow.StepState MUST NOT be exposed to template +// interpolation. If a future change adds a DisplayOutput field to StepStateData or +// copies it into Data/Response, this test breaks — by design. +// +// Two assertions: +// 1. Structural: interpolation.StepStateData has no DisplayOutput field (reflection). +// 2. Behavioral: `{{.states.step.DisplayOutput}}` resolves to ``, not the +// filtered text that may live on the domain StepState. +func TestStepStateData_DisplayOutputNotResolvable(t *testing.T) { + // Structural guard. + typ := reflect.TypeOf(interpolation.StepStateData{}) + _, found := typ.FieldByName("DisplayOutput") + assert.False(t, found, + "interpolation.StepStateData must NOT expose DisplayOutput — it is a UI-only field (F082 FR-008)") + + // Behavioral guard: AWF's TemplateResolver rejects unknown reference fields hard, + // so any template referencing DisplayOutput must fail to resolve. + resolver := interpolation.NewTemplateResolver() + ctx := interpolation.NewContext() + ctx.States = map[string]interpolation.StepStateData{ + "agent_step": { + Output: `{"type":"content_block_delta","delta":{"type":"text_delta","text":"hello"}}`, + }, + } + + _, err := resolver.Resolve("{{.states.agent_step.DisplayOutput}}", ctx) + require.Error(t, err, "DisplayOutput must not resolve through the interpolation layer") + assert.Contains(t, err.Error(), "DisplayOutput", + "error must reference the rejected field name") + + // Sanity: raw Output still resolves. + got, err := resolver.Resolve("{{.states.agent_step.Output}}", ctx) + require.NoError(t, err) + assert.Contains(t, got, "text_delta") +} + func TestStepStateData_DataFieldType(t *testing.T) { state := interpolation.StepStateData{ Output: "test", diff --git a/tests/integration/agents/f082_display_matrix_test.go b/tests/integration/agents/f082_display_matrix_test.go new file mode 100644 index 0000000..aed8e47 --- /dev/null +++ b/tests/integration/agents/f082_display_matrix_test.go @@ -0,0 +1,282 @@ +//go:build integration + +package agents_test + +import ( + "bytes" + "context" + "io" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/application" + "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/domain/workflow" + "github.com/awf-project/cli/internal/infrastructure/agents" + "github.com/awf-project/cli/internal/infrastructure/executor" + infraExpr "github.com/awf-project/cli/internal/infrastructure/expression" + "github.com/awf-project/cli/internal/infrastructure/repository" + "github.com/awf-project/cli/internal/infrastructure/store" + "github.com/awf-project/cli/internal/testutil/mocks" + "github.com/awf-project/cli/pkg/interpolation" +) + +// Feature: F082 +// Functional tests validating the display matrix: output_format × --output interaction +// through real provider implementations (Claude, OpenCode) with mock CLI executors. + +func TestDisplayMatrix_ClaudeTextFormat_FiltersNDJSON(t *testing.T) { + ndjsonOutput := strings.Join([]string{ + `{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`, + `{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}`, + `{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":", world!"}}`, + `{"type":"content_block_stop","index":0}`, + `{"type":"message_stop"}`, + }, "\n") + + mockExec := mocks.NewMockCLIExecutor() + mockExec.SetOutput([]byte(ndjsonOutput), nil) + + provider := agents.NewClaudeProviderWithOptions( + agents.WithClaudeExecutor(mockExec), + ) + + registry := mocks.NewMockAgentRegistry() + registry.Register(provider) + + svc, _ := setupDisplayMatrixService(t) + svc.SetAgentRegistry(registry) + + wf := buildDisplayMatrixWorkflow("claude", workflow.OutputFormatText) + execCtx, err := svc.RunWithWorkflow(context.Background(), wf, nil) + + require.NoError(t, err) + state := execCtx.States["agent-step"] + + assert.Contains(t, state.Output, "content_block_delta", "raw Output must contain NDJSON") + assert.Equal(t, "Hello\n, world!", state.DisplayOutput) + assert.NotContains(t, state.DisplayOutput, "content_block_delta", + "DisplayOutput must not contain raw NDJSON event types") +} + +func TestDisplayMatrix_JSONFormat_PassthroughRaw(t *testing.T) { + // F065 post-processing requires valid JSON when output_format=json + jsonOutput := `{"result":"Hello","status":"ok"}` + + mockExec := mocks.NewMockCLIExecutor() + mockExec.SetOutput([]byte(jsonOutput), nil) + + provider := agents.NewClaudeProviderWithOptions( + agents.WithClaudeExecutor(mockExec), + ) + + registry := mocks.NewMockAgentRegistry() + registry.Register(provider) + + svc, _ := setupDisplayMatrixService(t) + svc.SetAgentRegistry(registry) + + wf := buildDisplayMatrixWorkflow("claude", workflow.OutputFormatJSON) + execCtx, err := svc.RunWithWorkflow(context.Background(), wf, nil) + + require.NoError(t, err) + state := execCtx.States["agent-step"] + + assert.Contains(t, state.Output, "result", "raw Output preserved") + assert.Empty(t, state.DisplayOutput, "json format must produce empty DisplayOutput") +} + +func TestDisplayMatrix_DefaultFormat_BehavesAsText(t *testing.T) { + ndjsonOutput := `{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Default filtered"}}` + "\n" + + mockExec := mocks.NewMockCLIExecutor() + mockExec.SetOutput([]byte(ndjsonOutput), nil) + + provider := agents.NewClaudeProviderWithOptions( + agents.WithClaudeExecutor(mockExec), + ) + + registry := mocks.NewMockAgentRegistry() + registry.Register(provider) + + svc, _ := setupDisplayMatrixService(t) + svc.SetAgentRegistry(registry) + + wf := buildDisplayMatrixWorkflow("claude", workflow.OutputFormatNone) + execCtx, err := svc.RunWithWorkflow(context.Background(), wf, nil) + + require.NoError(t, err) + state := execCtx.States["agent-step"] + + assert.Equal(t, "Default filtered", state.DisplayOutput, + "empty output_format must default to text filtering") + assert.Contains(t, state.Output, "content_block_delta", "raw Output preserved") +} + +func TestDisplayMatrix_StubProvider_FallsBackToRawOutput(t *testing.T) { + rawOutput := `{"type":"some_event","data":"test value"}` + "\n" + + mockExec := mocks.NewMockCLIExecutor() + mockExec.SetOutput([]byte(rawOutput), nil) + + // OpenCode has a real parser (not a stub), so use a provider with stub parser behavior + // by checking that when parser extracts nothing, DisplayOutput is empty + provider := agents.NewGeminiProviderWithOptions( + agents.WithGeminiExecutor(mockExec), + ) + + registry := mocks.NewMockAgentRegistry() + registry.Register(provider) + + svc, _ := setupDisplayMatrixService(t) + svc.SetAgentRegistry(registry) + + wf := buildDisplayMatrixWorkflow("gemini", workflow.OutputFormatText) + execCtx, err := svc.RunWithWorkflow(context.Background(), wf, nil) + + require.NoError(t, err) + state := execCtx.States["agent-step"] + + assert.Empty(t, state.DisplayOutput, "stub parser returns empty: DisplayOutput stays empty") + assert.Contains(t, state.Output, "some_event", "raw Output always preserved") +} + +func TestDisplayMatrix_StreamFilterWriter_WritesToLiveOutput(t *testing.T) { + ndjsonLines := []string{ + `{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"streamed"}}`, + `{"type":"message_stop"}`, + } + + var liveOutput bytes.Buffer + mockExec := &streamingMockExecutor{ + lines: ndjsonLines, + stdout: []byte(strings.Join(ndjsonLines, "\n")), + } + + provider := agents.NewClaudeProviderWithOptions( + agents.WithClaudeExecutor(mockExec), + ) + + result, err := provider.Execute( + context.Background(), + "test prompt", + map[string]any{"model": "claude-sonnet-4-20250514", "output_format": "text"}, + &liveOutput, + io.Discard, + ) + + require.NoError(t, err) + assert.Equal(t, "streamed", result.DisplayOutput) + assert.Contains(t, result.Output, "content_block_delta", "raw output preserved in result") +} + +func TestDisplayMatrix_OptionsMapCloning_OriginalUnmutated(t *testing.T) { + mockExec := mocks.NewMockCLIExecutor() + mockExec.SetOutput([]byte("output"), nil) + + provider := agents.NewClaudeProviderWithOptions( + agents.WithClaudeExecutor(mockExec), + ) + + registry := mocks.NewMockAgentRegistry() + registry.Register(provider) + + svc, _ := setupDisplayMatrixService(t) + svc.SetAgentRegistry(registry) + + originalOpts := map[string]any{"model": "claude-sonnet-4-20250514"} + wf := &workflow.Workflow{ + Name: "test", + Initial: "agent-step", + Steps: map[string]*workflow.Step{ + "agent-step": { + Name: "agent-step", + Type: workflow.StepTypeAgent, + Agent: &workflow.AgentConfig{ + Provider: "claude", + Prompt: "test prompt", + OutputFormat: workflow.OutputFormatText, + Options: originalOpts, + }, + OnSuccess: "done", + }, + "done": { + Name: "done", + Type: workflow.StepTypeTerminal, + Status: workflow.TerminalSuccess, + }, + }, + } + + _, err := svc.RunWithWorkflow(context.Background(), wf, nil) + require.NoError(t, err) + + assert.NotContains(t, originalOpts, "output_format", + "cloneAndInjectOutputFormat must not mutate original options") + assert.Len(t, originalOpts, 1, "original map must keep only its original keys") +} + +// streamingMockExecutor writes NDJSON lines to the stdout writer to exercise +// the streamFilterWriter live filtering path. +type streamingMockExecutor struct { + lines []string + stdout []byte +} + +func (m *streamingMockExecutor) Run(ctx context.Context, name string, stdoutW, stderrW io.Writer, args ...string) ([]byte, []byte, error) { + if stdoutW != nil { + for _, line := range m.lines { + _, _ = stdoutW.Write([]byte(line + "\n")) + } + } + return m.stdout, nil, nil +} + +func setupDisplayMatrixService(t *testing.T) (*application.ExecutionService, string) { + t.Helper() + tempDir := t.TempDir() + repo := repository.NewYAMLRepository(tempDir) + stateStore := store.NewJSONStore(tempDir) + exec := executor.NewShellExecutor() + wfSvc := application.NewWorkflowService(repo, stateStore, exec, &nopLogger{}, infraExpr.NewExprValidator()) + parallelExec := application.NewParallelExecutor(&nopLogger{}) + execSvc := application.NewExecutionServiceWithEvaluator( + wfSvc, exec, parallelExec, stateStore, &nopLogger{}, interpolation.NewTemplateResolver(), nil, infraExpr.NewExprEvaluator(), + ) + return execSvc, tempDir +} + +func buildDisplayMatrixWorkflow(provider string, format workflow.OutputFormat) *workflow.Workflow { + return &workflow.Workflow{ + Name: "test", + Initial: "agent-step", + Steps: map[string]*workflow.Step{ + "agent-step": { + Name: "agent-step", + Type: workflow.StepTypeAgent, + Agent: &workflow.AgentConfig{ + Provider: provider, + Prompt: "test prompt", + OutputFormat: format, + }, + OnSuccess: "done", + }, + "done": { + Name: "done", + Type: workflow.StepTypeTerminal, + Status: workflow.TerminalSuccess, + }, + }, + } +} + +type nopLogger struct{} + +func (m *nopLogger) Debug(msg string, fields ...any) {} +func (m *nopLogger) Info(msg string, fields ...any) {} +func (m *nopLogger) Warn(msg string, fields ...any) {} +func (m *nopLogger) Error(msg string, fields ...any) {} +func (m *nopLogger) WithContext(ctx map[string]any) ports.Logger { return m }