diff --git a/Cargo.lock b/Cargo.lock index e26c0246..9cacaad6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -231,6 +231,19 @@ dependencies = [ "whoami", ] +[[package]] +name = "chorus-stub-agent" +version = "0.1.0" +dependencies = [ + "anyhow", + "regex", + "rmcp", + "serde", + "serde_json", + "tokio", + "uuid", +] + [[package]] name = "chrono" version = "0.4.44" @@ -1501,6 +1514,7 @@ dependencies = [ "serde_json", "thiserror", "tokio", + "tokio-stream", "tokio-util", "tracing", ] @@ -1971,6 +1985,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-tungstenite" version = "0.28.0" diff --git a/Cargo.toml b/Cargo.toml index 60f8f5e9..37d4d193 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,8 @@ +[workspace] +members = [".", "crates/stub-agent"] +# Default `cargo build` only built the root package; stub QA needs `chorus-stub-agent` beside `chorus`. +default-members = [".", "crates/stub-agent"] + [package] name = "chorus" version = "0.1.0" diff --git a/crates/stub-agent/Cargo.toml b/crates/stub-agent/Cargo.toml new file mode 100644 index 00000000..bdcddf65 --- /dev/null +++ b/crates/stub-agent/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "chorus-stub-agent" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "chorus-stub-agent" +path = "src/main.rs" + +[dependencies] +rmcp = { version = "0.16", features = ["client", "transport-async-rw"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +tokio = { version = "1", features = ["full"] } +anyhow = "1" +regex = "1" +uuid = { version = "1", features = ["v4"] } diff --git a/crates/stub-agent/src/main.rs b/crates/stub-agent/src/main.rs new file mode 100644 index 00000000..f6802ce9 --- /dev/null +++ b/crates/stub-agent/src/main.rs @@ -0,0 +1,314 @@ +use std::sync::atomic::{AtomicU64, Ordering}; + +use anyhow::{Context, Result}; +use regex::Regex; +use rmcp::model::CallToolRequestParams; +use rmcp::{ClientHandler, ServiceExt}; +use serde::Deserialize; +use tokio::process::Command; + +static SEQ: AtomicU64 = AtomicU64::new(0); + +// --------------------------------------------------------------------------- +// CLI args (minimal hand-parse to avoid adding clap) +// --------------------------------------------------------------------------- + +struct Args { + mcp_config: String, + #[allow(dead_code)] + prompt: String, +} + +fn parse_args() -> Result { + let args: Vec = std::env::args().collect(); + let mut mcp_config = None; + let mut prompt = None; + let mut i = 1; + while i < args.len() { + match args[i].as_str() { + "--mcp-config" => { + i += 1; + mcp_config = Some(args.get(i).context("missing --mcp-config value")?.clone()); + } + "--prompt" => { + i += 1; + prompt = Some(args.get(i).context("missing --prompt value")?.clone()); + } + _ => {} + } + i += 1; + } + Ok(Args { + mcp_config: mcp_config.context("--mcp-config is required")?, + prompt: prompt.context("--prompt is required")?, + }) +} + +// --------------------------------------------------------------------------- +// MCP config parsing +// --------------------------------------------------------------------------- + +#[derive(Deserialize)] +struct McpConfig { + #[serde(rename = "mcpServers")] + mcp_servers: std::collections::HashMap, +} + +#[derive(Deserialize)] +struct McpServerEntry { + command: String, + #[serde(default)] + args: Vec, +} + +fn load_mcp_config(path: &str) -> Result<(String, Vec)> { + let data = std::fs::read_to_string(path) + .with_context(|| format!("Failed to read MCP config at {path}"))?; + let config: McpConfig = + serde_json::from_str(&data).context("Failed to parse MCP config JSON")?; + let entry = config + .mcp_servers + .get("chat") + .context("No MCP server entry named 'chat' in config")?; + Ok((entry.command.clone(), entry.args.clone())) +} + +// --------------------------------------------------------------------------- +// JSON stdout protocol +// --------------------------------------------------------------------------- + +fn emit(value: serde_json::Value) { + // Print to our own stdout — the manager reads these lines. + println!("{}", serde_json::to_string(&value).unwrap()); +} + +fn emit_session_init(session_id: &str) { + emit(serde_json::json!({"type": "session_init", "session_id": session_id})); +} + +fn emit_text(text: &str) { + emit(serde_json::json!({"type": "text", "text": text})); +} + +fn emit_tool_call(name: &str, input: &serde_json::Value) { + emit(serde_json::json!({"type": "tool_call", "name": name, "input": input})); +} + +fn emit_turn_end() { + emit(serde_json::json!({"type": "turn_end"})); +} + +fn emit_error(message: &str) { + emit(serde_json::json!({"type": "error", "message": message})); +} + +// --------------------------------------------------------------------------- +// MCP client handler (no-op — we only call tools, never receive requests) +// --------------------------------------------------------------------------- + +struct StubClientHandler; +impl ClientHandler for StubClientHandler {} + +// --------------------------------------------------------------------------- +// Tool helpers +// --------------------------------------------------------------------------- + +async fn call_tool( + peer: &rmcp::service::Peer, + name: &str, + args: serde_json::Value, +) -> Result { + let params = CallToolRequestParams { + name: std::borrow::Cow::Owned(name.to_string()), + arguments: Some(args.as_object().cloned().unwrap_or_default()), + meta: None, + task: None, + }; + let result = peer.call_tool(params).await?; + let text: String = result + .content + .iter() + .filter_map(|c| c.raw.as_text().map(|t| t.text.as_str())) + .collect::>() + .join("\n"); + Ok(text) +} + +async fn wait_for_message(peer: &rmcp::service::Peer) -> Result { + let args = serde_json::json!({}); + emit_tool_call("wait_for_message", &args); + call_tool(peer, "wait_for_message", args).await +} + +async fn send_message( + peer: &rmcp::service::Peer, + target: &str, + content: &str, +) -> Result { + let args = serde_json::json!({"target": target, "content": content}); + emit_tool_call("send_message", &args); + call_tool(peer, "send_message", args).await +} + +// --------------------------------------------------------------------------- +// Token extraction from message content +// --------------------------------------------------------------------------- + +fn extract_token(content: &str) -> Option { + // Patterns: reply with "TOKEN", reply with TOKEN, token: TOKEN, echo "TOKEN", say "TOKEN" + let patterns = [ + r#"(?i)reply\s+with\s+"([^"]+)""#, + r#"(?i)reply\s+with\s+(\S+)"#, + r#"(?i)token:\s*(\S+)"#, + r#"(?i)echo\s+"([^"]+)""#, + r#"(?i)say\s+"([^"]+)""#, + ]; + for pat in &patterns { + if let Ok(re) = Regex::new(pat) { + if let Some(caps) = re.captures(content) { + if let Some(m) = caps.get(1) { + return Some(m.as_str().to_string()); + } + } + } + } + None +} + +fn next_fallback_token() -> String { + let seq = SEQ.fetch_add(1, Ordering::Relaxed); + format!("stub-reply-{seq}") +} + +// --------------------------------------------------------------------------- +// Parse target from bridge message format +// --------------------------------------------------------------------------- + +fn parse_target(line: &str) -> Option { + // Format: [target=#channel msg=... time=... type=...] @sender: content + let re = Regex::new(r"\[target=(\S+)\s").ok()?; + re.captures(line) + .and_then(|c| c.get(1).map(|m| m.as_str().to_string())) +} + +fn parse_content(line: &str) -> Option { + // After "] @sender: " comes the content. Sender may contain spaces (OS usernames); + // do not use `\S+` here — that breaks token extraction and yields empty content. + let re = Regex::new(r"\]\s+@([^:]+):\s*(.+)$").ok()?; + re.captures(line) + .and_then(|c| c.get(2).map(|m| m.as_str().to_string())) +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +#[tokio::main] +async fn main() { + if let Err(e) = run().await { + emit_error(&format!("{e:#}")); + std::process::exit(1); + } +} + +async fn run() -> Result<()> { + let Args { + mcp_config, + prompt: _, + } = parse_args()?; + let (command, cmd_args) = load_mcp_config(&mcp_config)?; + + // Drain stdin in background to prevent buffer fill-up. + // The manager writes stdin notifications but the bridge handles delivery via wait_for_message. + tokio::spawn(async move { + use tokio::io::AsyncBufReadExt; + let stdin = tokio::io::stdin(); + let reader = tokio::io::BufReader::new(stdin); + let mut lines = reader.lines(); + while let Ok(Some(_line)) = lines.next_line().await { + // consumed — bridge handles delivery + } + }); + + // Spawn bridge as child process + let mut child = Command::new(&command) + .args(&cmd_args) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::null()) + .spawn() + .with_context(|| format!("Failed to spawn bridge: {command}"))?; + + let child_stdout = child.stdout.take().context("No stdout from bridge child")?; + let child_stdin = child.stdin.take().context("No stdin from bridge child")?; + + // Connect as MCP client + let service = StubClientHandler + .serve((child_stdout, child_stdin)) + .await + .map_err(|e| anyhow::anyhow!("MCP handshake failed: {e}"))?; + let peer = service.peer().clone(); + + // Emit session init + let session_id = uuid::Uuid::new_v4().to_string(); + emit_session_init(&session_id); + + let delay_ms: u64 = std::env::var("STUB_DELAY_MS") + .unwrap_or_else(|_| "200".to_string()) + .parse() + .unwrap_or(200); + + // Short status only — full `--prompt` can be large and may contain sensitive context. + emit_text("Processing prompt"); + + // Main loop: wait for messages, extract token or use fallback, send reply + loop { + let response = match wait_for_message(&peer).await { + Ok(r) => r, + Err(e) => { + emit_error(&format!("wait_for_message failed: {e:#}")); + break; + } + }; + + if response.contains("No new messages.") { + // No messages — loop back and wait again + continue; + } + + // Process each line (multiple messages may arrive). Bridge output can include + // footers such as "Reply instructions:" — only handle real message header lines. + for line in response.lines() { + let line = line.trim(); + if line.is_empty() || line.contains("No new messages.") { + continue; + } + if !line.starts_with("[target=") { + continue; + } + + let Some(target) = parse_target(line) else { + emit_error(&format!("Could not parse target from line: {line}")); + continue; + }; + let content = parse_content(line).unwrap_or_default(); + let token = extract_token(&content).unwrap_or_else(next_fallback_token); + + tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await; + emit_text(&format!("Replying with: {token}")); + + if let Err(e) = send_message(&peer, &target, &token).await { + emit_error(&format!("send_message failed: {e:#}")); + break; + } + } + + emit_turn_end(); + } + + // Clean up + drop(peer); + drop(service); + let _ = child.kill().await; + Ok(()) +} diff --git a/docs/superpowers/plans/2026-04-03-stub-agent-driver.md b/docs/superpowers/plans/2026-04-03-stub-agent-driver.md new file mode 100644 index 00000000..90f130bf --- /dev/null +++ b/docs/superpowers/plans/2026-04-03-stub-agent-driver.md @@ -0,0 +1,1066 @@ +# Stub Agent Driver Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a lightweight stub agent binary and driver that echoes messages back through the MCP bridge, enabling ~30 QA cases to run without real LLM backends. + +**Architecture:** A new `crates/stub-agent/` Rust binary acts as an MCP client that spawns the bridge as a child process, calls `wait_for_message` and `send_message` in a loop, and prints JSON status lines to stdout. A `StubDriver` in the main crate implements the `Driver` trait to spawn the stub binary and parse its output. + +**Tech Stack:** Rust, rmcp (client feature), serde_json, tokio, Playwright (test harness updates) + +**Spec:** [`docs/superpowers/specs/2026-04-03-stub-agent-driver-design.md`](../specs/2026-04-03-stub-agent-driver-design.md) + +--- + +## File Map + +| Action | Path | Responsibility | +|--------|------|----------------| +| Create | `crates/stub-agent/Cargo.toml` | Crate manifest with rmcp client + transport-async-rw deps | +| Create | `crates/stub-agent/src/main.rs` | MCP client binary: spawn bridge, loop wait/send, emit JSON | +| Modify | `Cargo.toml` | Convert to workspace with members `.` and `crates/stub-agent` | +| Modify | `src/store/agents.rs:84-109` | Add `AgentRuntime::Stub` variant, `parse()`, `as_str()` | +| Create | `src/agent/drivers/stub.rs` | `StubDriver` implementing `Driver` trait | +| Modify | `src/agent/drivers/mod.rs:1,84-91` | Add `pub mod stub;`, include `StubDriver` in `all_runtime_drivers()` | +| Modify | `src/agent/manager.rs:38-48,88-97` | Add `Stub` arm to `get_driver()` and `resumable_session_id` match | +| Modify | `src/agent/runtime_status.rs:38-42` | Filter `stub` from `list_statuses()` response | +| Modify | `qa/cases/playwright/helpers/api.ts` | Add `ensureStubTrio()` helper | +| Modify | `qa/QA_PRESETS.md` | Add `stub-trio` preset | + +--- + +### Task 1: Convert to Cargo Workspace + +**Files:** +- Modify: `Cargo.toml` +- Create: `crates/stub-agent/Cargo.toml` +- Create: `crates/stub-agent/src/main.rs` + +- [ ] **Step 1: Convert root Cargo.toml to a workspace** + +Add workspace section at the top of `Cargo.toml`. The existing `[package]` and all other sections stay unchanged. + +```toml +[workspace] +members = [".", "crates/stub-agent"] + +[package] +name = "chorus" +# ... rest unchanged +``` + +- [ ] **Step 2: Create stub-agent crate scaffold** + +```bash +mkdir -p crates/stub-agent/src +``` + +`crates/stub-agent/Cargo.toml`: + +```toml +[package] +name = "chorus-stub-agent" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "chorus-stub-agent" +path = "src/main.rs" + +[dependencies] +rmcp = { version = "0.16", features = ["client", "transport-async-rw"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +tokio = { version = "1", features = ["full"] } +anyhow = "1" +regex = "1" +``` + +`crates/stub-agent/src/main.rs` (minimal placeholder): + +```rust +fn main() { + println!("stub-agent placeholder"); +} +``` + +- [ ] **Step 3: Verify workspace builds** + +Run: `cargo build` +Expected: Both `chorus` and `chorus-stub-agent` binaries compile. `target/debug/chorus-stub-agent` exists. + +- [ ] **Step 4: Commit** + +```bash +git add Cargo.toml crates/ +git commit -m "build: convert to Cargo workspace, add stub-agent crate scaffold" +``` + +--- + +### Task 2: Add `AgentRuntime::Stub` Enum Variant + +**Files:** +- Modify: `src/store/agents.rs:82-109` + +- [ ] **Step 1: Add Stub variant to AgentRuntime** + +In `src/store/agents.rs`, add `Stub` to the enum and both match blocks: + +```rust +pub enum AgentRuntime { + Claude, + Codex, + Kimi, + Opencode, + Stub, +} + +impl AgentRuntime { + pub const fn as_str(self) -> &'static str { + match self { + Self::Claude => "claude", + Self::Codex => "codex", + Self::Kimi => "kimi", + Self::Opencode => "opencode", + Self::Stub => "stub", + } + } + + pub fn parse(value: &str) -> Option { + match value { + "claude" => Some(Self::Claude), + "codex" => Some(Self::Codex), + "kimi" => Some(Self::Kimi), + "opencode" => Some(Self::Opencode), + "stub" => Some(Self::Stub), + _ => None, + } + } +} +``` + +- [ ] **Step 2: Build to find exhaustive match errors** + +Run: `cargo build 2>&1 | head -40` +Expected: Compiler errors in `manager.rs` for non-exhaustive match on `AgentRuntime`. This confirms the wiring points. + +- [ ] **Step 3: Fix exhaustive match in manager.rs:88-97** + +In `src/agent/manager.rs`, add `Stub` arm to the `resumable_session_id` match: + +```rust +let resumable_session_id = match driver.runtime() { + AgentRuntime::Codex | AgentRuntime::Opencode => agent.session_id.clone(), + AgentRuntime::Kimi => Some( + agent + .session_id + .clone() + .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()), + ), + AgentRuntime::Claude | AgentRuntime::Stub => None, +}; +``` + +- [ ] **Step 4: Verify build passes** + +Run: `cargo build` +Expected: Compiles with no errors. (The `get_driver()` and `all_runtime_drivers()` changes come in Task 3.) + +- [ ] **Step 5: Commit** + +```bash +git add src/store/agents.rs src/agent/manager.rs +git commit -m "feat(agent): add AgentRuntime::Stub enum variant" +``` + +--- + +### Task 3: Implement StubDriver + +**Files:** +- Create: `src/agent/drivers/stub.rs` +- Modify: `src/agent/drivers/mod.rs:1,84-91` +- Modify: `src/agent/manager.rs:38-48` + +- [ ] **Step 1: Create stub.rs with StubDriver** + +Create `src/agent/drivers/stub.rs`: + +```rust +use std::process::{Child, Command, Stdio}; + +use super::{Driver, ParsedEvent, SpawnContext}; +use crate::agent::config::AgentConfig; +use crate::agent::drivers::prompt::{build_base_system_prompt, PromptOptions}; +use crate::agent::runtime_status::{RuntimeAuthStatus, RuntimeStatus}; +use crate::store::agents::AgentRuntime; + +pub struct StubDriver; + +impl Driver for StubDriver { + fn runtime(&self) -> AgentRuntime { + AgentRuntime::Stub + } + + fn supports_stdin_notification(&self) -> bool { + true + } + + fn mcp_tool_prefix(&self) -> &str { + "" + } + + fn spawn(&self, ctx: &SpawnContext) -> anyhow::Result { + let mcp_config = serde_json::json!({ + "mcpServers": { + "chat": { + "command": ctx.bridge_binary, + "args": ["bridge", "--agent-id", &ctx.agent_id, "--server-url", &ctx.server_url] + } + } + }); + let mcp_config_path = + std::path::Path::new(&ctx.working_directory).join(".chorus-mcp.json"); + std::fs::write(&mcp_config_path, serde_json::to_string(&mcp_config)?)?; + + let stub_binary = std::env::current_exe()? + .parent() + .ok_or_else(|| anyhow::anyhow!("cannot find binary directory"))? + .join("chorus-stub-agent"); + + let delay_ms = std::env::var("STUB_DELAY_MS").unwrap_or_else(|_| "200".to_string()); + + let mut env_vars: std::collections::HashMap = std::env::vars().collect(); + env_vars.insert("STUB_DELAY_MS".to_string(), delay_ms); + for extra in &ctx.config.env_vars { + env_vars.insert(extra.key.clone(), extra.value.clone()); + } + + let child = Command::new(&stub_binary) + .args([ + "--mcp-config", + &mcp_config_path.to_string_lossy(), + "--prompt", + &ctx.prompt, + ]) + .current_dir(&ctx.working_directory) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .envs(&env_vars) + .spawn()?; + + Ok(child) + } + + fn parse_line(&self, line: &str) -> Vec { + let event: serde_json::Value = match serde_json::from_str(line) { + Ok(v) => v, + Err(_) => return vec![], + }; + + let mut events = Vec::new(); + + match event.get("type").and_then(|v| v.as_str()) { + Some("session_init") => { + if let Some(sid) = event.get("session_id").and_then(|v| v.as_str()) { + events.push(ParsedEvent::SessionInit { + session_id: sid.to_string(), + }); + } + } + Some("text") => { + if let Some(text) = event.get("text").and_then(|v| v.as_str()) { + events.push(ParsedEvent::Text { + text: text.to_string(), + }); + } + } + Some("tool_call") => { + let name = event + .get("name") + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); + let input = event + .get("input") + .cloned() + .unwrap_or(serde_json::Value::Null); + events.push(ParsedEvent::ToolCall { name, input }); + } + Some("turn_end") => { + events.push(ParsedEvent::TurnEnd { session_id: None }); + } + Some("error") => { + let message = event + .get("message") + .and_then(|v| v.as_str()) + .unwrap_or("unknown error") + .to_string(); + events.push(ParsedEvent::Error { message }); + } + _ => {} + } + + events + } + + fn encode_stdin_message(&self, text: &str, _session_id: &str) -> Option { + let msg = serde_json::json!({ + "type": "notification", + "content": text, + }); + Some(serde_json::to_string(&msg).unwrap_or_default()) + } + + fn build_system_prompt(&self, config: &AgentConfig, _agent_id: &str) -> String { + build_base_system_prompt( + config, + &PromptOptions { + tool_prefix: String::new(), + extra_critical_rules: vec![], + post_startup_notes: vec![], + include_stdin_notification_section: true, + teams: config.teams.clone(), + }, + ) + } + + fn tool_display_name(&self, name: &str) -> String { + match name { + "send_message" => "Sending message\u{2026}".to_string(), + "check_messages" => "Checking messages\u{2026}".to_string(), + "wait_for_message" => "Waiting for messages\u{2026}".to_string(), + "receive_message" => "Receiving messages\u{2026}".to_string(), + other => format!("Using {other}\u{2026}"), + } + } + + fn summarize_tool_input(&self, name: &str, input: &serde_json::Value) -> String { + let str_field = |field: &str| -> String { + input + .get(field) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string() + }; + match name { + "send_message" => { + let target = str_field("target"); + let content = str_field("content"); + let preview: String = content.chars().take(80).collect(); + if target.is_empty() { + preview + } else { + format!("{target}: {preview}") + } + } + _ => String::new(), + } + } + + fn detect_runtime_status(&self) -> anyhow::Result { + let binary_exists = std::env::current_exe() + .ok() + .and_then(|p| p.parent().map(|d| d.join("chorus-stub-agent"))) + .map(|p| p.exists()) + .unwrap_or(false); + + Ok(RuntimeStatus { + runtime: self.id().to_string(), + installed: binary_exists, + auth_status: Some(RuntimeAuthStatus::Authed), + }) + } + + fn list_models(&self) -> anyhow::Result> { + Ok(vec!["echo".to_string()]) + } +} +``` + +- [ ] **Step 2: Register StubDriver in mod.rs** + +In `src/agent/drivers/mod.rs`, add the module declaration at line 1 area: + +```rust +pub mod claude; +pub mod codex; +pub mod kimi; +pub mod opencode; +pub mod prompt; +pub mod stub; +``` + +Add `StubDriver` to `all_runtime_drivers()`: + +```rust +pub fn all_runtime_drivers() -> Vec> { + vec![ + Arc::new(claude::ClaudeDriver), + Arc::new(codex::CodexDriver), + Arc::new(kimi::KimiDriver), + Arc::new(opencode::OpencodeDriver), + Arc::new(stub::StubDriver), + ] +} +``` + +- [ ] **Step 3: Wire get_driver() in manager.rs** + +In `src/agent/manager.rs`, add the Stub arm to `get_driver()`: + +```rust +fn get_driver(runtime: &str) -> anyhow::Result> { + match AgentRuntime::parse(runtime) { + Some(AgentRuntime::Claude) => Ok(Arc::new(crate::agent::drivers::claude::ClaudeDriver)), + Some(AgentRuntime::Codex) => Ok(Arc::new(crate::agent::drivers::codex::CodexDriver)), + Some(AgentRuntime::Kimi) => Ok(Arc::new(crate::agent::drivers::kimi::KimiDriver)), + Some(AgentRuntime::Opencode) => { + Ok(Arc::new(crate::agent::drivers::opencode::OpencodeDriver)) + } + Some(AgentRuntime::Stub) => Ok(Arc::new(crate::agent::drivers::stub::StubDriver)), + None => anyhow::bail!("Unknown runtime: {runtime}"), + } +} +``` + +- [ ] **Step 4: Verify build passes** + +Run: `cargo build` +Expected: Compiles. `StubDriver` is registered but the stub binary is still a placeholder. + +- [ ] **Step 5: Commit** + +```bash +git add src/agent/drivers/stub.rs src/agent/drivers/mod.rs src/agent/manager.rs +git commit -m "feat(agent): add StubDriver implementation" +``` + +--- + +### Task 4: Filter Stub From Runtime Status API + +**Files:** +- Modify: `src/agent/runtime_status.rs:38-42` + +- [ ] **Step 1: Filter stub from list_statuses()** + +In `src/agent/runtime_status.rs`, update `SystemRuntimeStatusProvider::list_statuses()`: + +```rust +impl RuntimeStatusProvider for SystemRuntimeStatusProvider { + fn list_statuses(&self) -> anyhow::Result> { + all_runtime_drivers() + .into_iter() + .filter(|driver| driver.id() != "stub") + .map(|driver| driver.detect_runtime_status()) + .collect() + } + + // list_models unchanged — it still supports "stub" for API-only agent creation +``` + +- [ ] **Step 2: Verify build passes** + +Run: `cargo build` +Expected: Compiles. + +- [ ] **Step 3: Commit** + +```bash +git add src/agent/runtime_status.rs +git commit -m "feat(agent): hide stub runtime from /runtimes API" +``` + +--- + +### Task 5: Implement Stub Agent Binary + +**Files:** +- Modify: `crates/stub-agent/src/main.rs` + +This is the core binary. It: +1. Reads `--mcp-config` to find the bridge command +2. Spawns the bridge as a child process +3. Connects as an MCP client via stdio pipes to the bridge +4. Processes the initial `--prompt` message +5. Loops: `wait_for_message` → extract token → `send_message` → repeat +6. Prints JSON status lines to its own stdout for the manager + +- [ ] **Step 1: Write the full stub binary** + +Replace `crates/stub-agent/src/main.rs` with: + +```rust +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::Duration; + +use anyhow::{Context, Result}; +use rmcp::model::CallToolRequestParams; +use rmcp::{ClientHandler, RoleClient, ServiceExt}; +use serde_json::Value; +use tokio::process::Command; + +static SEQ: AtomicU64 = AtomicU64::new(1); + +/// Minimal MCP client handler — we don't need to handle any server-initiated +/// requests, but `ClientHandler` requires an impl. +struct StubClientHandler; + +impl ClientHandler for StubClientHandler {} + +/// Extract an echo token from the message text. +/// +/// Patterns matched (case-insensitive): +/// reply with "TOKEN" +/// token: TOKEN +/// echo "TOKEN" +/// say "TOKEN" +/// +/// Falls back to `stub-reply-{seq}`. +fn extract_token(text: &str) -> String { + let re_patterns = [ + r#"(?i)reply\s+with\s+"([^"]+)""#, + r#"(?i)reply\s+with\s+(\S+)"#, + r#"(?i)token:\s*(\S+)"#, + r#"(?i)echo\s+"([^"]+)""#, + r#"(?i)say\s+"([^"]+)""#, + ]; + + for pattern in &re_patterns { + if let Ok(re) = regex::Regex::new(pattern) { + if let Some(caps) = re.captures(text) { + if let Some(m) = caps.get(1) { + return m.as_str().to_string(); + } + } + } + } + + let seq = SEQ.fetch_add(1, Ordering::Relaxed); + format!("stub-reply-{seq}") +} + +/// Parse the target from a `receive_message` / `wait_for_message` response. +/// +/// The bridge returns lines like: +/// [target=#channel msg=abc123 time=...] @sender: content +/// +/// We extract the target value and the message content. +fn parse_bridge_response(response: &str) -> Option<(String, String)> { + // Find the first [target=...] block + let target_start = response.find("target=")?; + let after_target = &response[target_start + 7..]; + let target_end = after_target.find(' ')?; + let target = after_target[..target_end].to_string(); + + // Find the message content after the ] @sender: prefix + let bracket_end = response.find(']')?; + let after_bracket = &response[bracket_end + 1..]; + // Skip " @sender: " — find the first ": " after @ + if let Some(colon_pos) = after_bracket.find(": ") { + let content = after_bracket[colon_pos + 2..].trim().to_string(); + Some((target, content)) + } else { + Some((target, String::new())) + } +} + +/// Emit a JSON status line to stdout for the manager's `parse_line()`. +fn emit(event_type: &str, fields: &[(&str, &str)]) { + let mut obj = serde_json::Map::new(); + obj.insert( + "type".to_string(), + serde_json::Value::String(event_type.to_string()), + ); + for (k, v) in fields { + obj.insert(k.to_string(), serde_json::Value::String(v.to_string())); + } + if let Ok(line) = serde_json::to_string(&serde_json::Value::Object(obj)) { + println!("{line}"); + } +} + +async fn call_tool( + peer: &rmcp::service::Peer, + name: &str, + args: Value, +) -> Result { + let params = CallToolRequestParams { + name: name.into(), + arguments: Some(args.as_object().cloned().unwrap_or_default()), + meta: None, + task: None, + }; + let result = peer + .call_tool(params) + .await + .context(format!("call_tool({name}) failed"))?; + + // Extract text from content blocks. + // The exact field access depends on rmcp version — adapt if the Content + // type changes. The goal: concatenate all text content into one string. + let text: String = result + .content + .iter() + .filter_map(|c| c.as_text().map(|t| t.text.as_str())) + .collect::>() + .join("\n"); + + Ok(text) +} + +async fn run(mcp_config_path: &str, initial_prompt: &str) -> Result<()> { + // Read MCP config to get bridge command + let config_text = std::fs::read_to_string(mcp_config_path) + .context("failed to read MCP config")?; + let config: Value = serde_json::from_str(&config_text)?; + let chat_server = config + .get("mcpServers") + .and_then(|s| s.get("chat")) + .context("missing mcpServers.chat in config")?; + let command = chat_server + .get("command") + .and_then(|v| v.as_str()) + .context("missing command in chat server config")?; + let args: Vec = chat_server + .get("args") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(String::from)) + .collect() + }) + .unwrap_or_default(); + + // Spawn bridge process + let mut bridge_process = Command::new(command) + .args(&args) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::null()) + .spawn() + .context("failed to spawn bridge process")?; + + let bridge_stdout = bridge_process.stdout.take().context("no stdout")?; + let bridge_stdin = bridge_process.stdin.take().context("no stdin")?; + + // Connect as MCP client using (AsyncRead, AsyncWrite) tuple transport + let service = StubClientHandler + .serve((bridge_stdout, bridge_stdin)) + .await + .context("MCP client init failed")?; + let peer = service.peer().clone(); + + // Emit session init + let session_id = uuid::Uuid::new_v4().to_string(); + emit("session_init", &[("session_id", &session_id)]); + + let delay_ms: u64 = std::env::var("STUB_DELAY_MS") + .unwrap_or_else(|_| "200".to_string()) + .parse() + .unwrap_or(200); + + // Process initial prompt if present + if !initial_prompt.is_empty() { + // Extract target from prompt — the system prompt contains target info + // For initial startup, we just emit a text event + emit("text", &[("text", "Stub agent started")]); + } + + // Main loop: wait for message → extract token → send reply + loop { + emit( + "tool_call", + &[ + ("name", "wait_for_message"), + ("input", "{}"), + ], + ); + + let response = call_tool( + &peer, + "wait_for_message", + serde_json::json!({}), + ) + .await?; + + // "No new messages" responses mean we loop again + if response.contains("No new messages") { + continue; + } + + // Parse the bridge response to get target and content + if let Some((target, content)) = parse_bridge_response(&response) { + let token = extract_token(&content); + + // Emit that we received a message + emit( + "tool_call", + &[ + ("name", "send_message"), + ("input", &serde_json::json!({"target": target, "content": token}).to_string()), + ], + ); + + tokio::time::sleep(Duration::from_millis(delay_ms)).await; + + // Send the reply + let _send_result = call_tool( + &peer, + "send_message", + serde_json::json!({ + "target": target, + "content": token, + }), + ) + .await?; + + emit("text", &[("text", &token)]); + } + } +} + +#[tokio::main] +async fn main() -> Result<()> { + let args: Vec = std::env::args().collect(); + + let mut mcp_config_path = String::new(); + let mut prompt = String::new(); + + let mut i = 1; + while i < args.len() { + match args[i].as_str() { + "--mcp-config" => { + i += 1; + mcp_config_path = args.get(i).cloned().unwrap_or_default(); + } + "--prompt" => { + i += 1; + prompt = args.get(i).cloned().unwrap_or_default(); + } + _ => {} + } + i += 1; + } + + if mcp_config_path.is_empty() { + anyhow::bail!("--mcp-config is required"); + } + + run(&mcp_config_path, &prompt).await +} +``` + +- [ ] **Step 2: Add uuid dependency to stub-agent Cargo.toml** + +Update `crates/stub-agent/Cargo.toml` dependencies: + +```toml +[dependencies] +rmcp = { version = "0.16", features = ["client", "transport-async-rw"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +tokio = { version = "1", features = ["full"] } +anyhow = "1" +regex = "1" +uuid = { version = "1", features = ["v4"] } +``` + +- [ ] **Step 3: Verify the workspace builds** + +Run: `cargo build` +Expected: Both `chorus` and `chorus-stub-agent` binaries compile. `target/debug/chorus-stub-agent` exists. + +- [ ] **Step 4: Commit** + +```bash +git add crates/stub-agent/ +git commit -m "feat(stub-agent): implement MCP client binary with echo logic" +``` + +--- + +### Task 6: Integration Smoke Test + +**Files:** +- No new files — manual verification that a stub agent can start, receive a message, and reply. + +- [ ] **Step 1: Build everything** + +```bash +cargo build +``` + +- [ ] **Step 2: Start the server with a temp data dir** + +```bash +./target/debug/chorus serve --port 3101 --data-dir /tmp/chorus-stub-test +``` + +- [ ] **Step 3: Create a stub agent via API** + +In a second terminal: + +```bash +curl -s -X POST http://localhost:3101/api/agents \ + -H 'Content-Type: application/json' \ + -d '{"name":"stub-a","display_name":"Stub A","runtime":"stub","model":"echo","envVars":[]}' | jq . +``` + +Expected: Agent created successfully. + +- [ ] **Step 4: Verify stub agent starts and goes active** + +```bash +# Poll until active +for i in $(seq 1 30); do + status=$(curl -s http://localhost:3101/api/agents | jq -r '.[] | select(.name=="stub-a") | .status') + echo "Attempt $i: status=$status" + [ "$status" = "active" ] && break + sleep 2 +done +``` + +Expected: Agent reaches `active` status. + +- [ ] **Step 5: Send a message and verify reply** + +```bash +# Send a message with an echo token +curl -s -X POST http://localhost:3101/internal/agent/$(whoami)/send \ + -H 'Content-Type: application/json' \ + -d '{"target":"dm:@stub-a","content":"reply with \"hello-stub\""}' + +# Wait and check history +sleep 3 +curl -s "http://localhost:3101/internal/agent/$(whoami)/history?channel=dm:@stub-a&limit=10" | jq '.messages[] | {senderName, content}' +``` + +Expected: Two messages — the human send and a stub reply containing `hello-stub`. + +- [ ] **Step 6: Clean up** + +Stop the server. `rm -rf /tmp/chorus-stub-test`. + +- [ ] **Step 7: Commit (no code changes — just verification)** + +No commit needed unless fixes were required. If fixes were made, commit them: + +```bash +git add -A +git commit -m "fix(stub-agent): fixes from integration smoke test" +``` + +--- + +### Task 7: QA Harness Updates + +**Files:** +- Modify: `qa/cases/playwright/helpers/api.ts` +- Modify: `qa/QA_PRESETS.md` + +- [ ] **Step 1: Add ensureStubTrio helper to api.ts** + +Add after the existing `ensureMixedRuntimeTrio` function in `qa/cases/playwright/helpers/api.ts`: + +```typescript +/** Create stub-a, stub-b, stub-c with runtime=stub for fast QA runs. */ +export async function ensureStubTrio(request: APIRequestContext): Promise { + const agents = await listAgents(request) + const names = new Set(agents.map((a) => a.name)) + if (!names.has('stub-a')) { + await createAgentApi(request, { name: 'stub-a', runtime: 'stub', model: 'echo' }) + } + if (!names.has('stub-b')) { + await createAgentApi(request, { name: 'stub-b', runtime: 'stub', model: 'echo' }) + } + if (!names.has('stub-c')) { + await createAgentApi(request, { name: 'stub-c', runtime: 'stub', model: 'echo' }) + } +} +``` + +- [ ] **Step 2: Add agentNames helper for mode-aware name selection** + +Add to `qa/cases/playwright/helpers/api.ts`: + +```typescript +/** Return agent names based on CHORUS_E2E_LLM mode. */ +export function agentNames(): { a: string; b: string; c: string } { + const mode = process.env.CHORUS_E2E_LLM ?? '1' + if (mode === 'stub') { + return { a: 'stub-a', b: 'stub-b', c: 'stub-c' } + } + return { a: 'bot-a', b: 'bot-b', c: 'bot-c' } +} +``` + +- [ ] **Step 3: Add stub-trio preset to QA_PRESETS.md** + +Append to `qa/QA_PRESETS.md`: + +```markdown + +### `stub-trio` + +Use for: +- fast QA runs that test the full UI + message pipeline without LLM latency +- CI smoke tests +- core regression runs where real LLM reasoning is not required + +Agents: +- `stub-a` — runtime `stub`, model `echo` +- `stub-b` — runtime `stub`, model `echo` +- `stub-c` — runtime `stub`, model `echo` + +Notes: +- Select with `CHORUS_E2E_LLM=stub`. +- Use `agentNames()` from the test helpers to get mode-aware agent names. +- Cases requiring real LLM reasoning (TMT-003, TMT-004, TMT-006, TMT-008, TMT-009) are automatically skipped in stub mode. +- The stub runtime is not visible in the create-agent modal — agents are created via API only. +``` + +- [ ] **Step 4: Commit** + +```bash +git add qa/cases/playwright/helpers/api.ts qa/QA_PRESETS.md +git commit -m "feat(qa): add stub-trio preset and ensureStubTrio helper" +``` + +--- + +### Task 8: Wire One Spec To Use Stub Mode (MSG-002) + +**Files:** +- Modify: `qa/cases/playwright/MSG-002.spec.ts` + +This task wires a single representative spec to demonstrate the stub integration pattern. MSG-002 is ideal because it requires a specific echo token (`dm-check-1`) — exercising the token extraction logic. + +- [ ] **Step 1: Read the current MSG-002 spec** + +Read `qa/cases/playwright/MSG-002.spec.ts` to understand the current structure before modifying it. + +- [ ] **Step 2: Update MSG-002 to support stub mode** + +The spec currently skips entirely when `CHORUS_E2E_LLM=0`. Update it to: +- Run with stub agents when `CHORUS_E2E_LLM=stub` +- Keep the existing skip for `CHORUS_E2E_LLM=0` +- Use `agentNames()` for mode-aware agent name selection + +At the top of the spec, replace the existing mode detection with: + +```typescript +import { agentNames, ensureStubTrio, ensureMixedRuntimeTrio } from './helpers/api' + +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const agents = agentNames() +``` + +In the `beforeAll` or setup section, add stub trio creation: + +```typescript +if (useStub) { + await ensureStubTrio(request) +} else { + await ensureMixedRuntimeTrio(request) +} +``` + +Replace hardcoded `bot-a` references with `agents.a`. + +Keep `test.skip(skipLLM, 'CHORUS_E2E_LLM=0')` — this still skips when mode is `0`. + +- [ ] **Step 3: Run MSG-002 in stub mode** + +```bash +cd qa/cases/playwright +CHORUS_E2E_LLM=stub npx playwright test MSG-002.spec.ts --reporter=list +``` + +Expected: Test runs using stub agents and passes (stub echoes back the requested token). + +- [ ] **Step 4: Verify MSG-002 still works in skip mode** + +```bash +CHORUS_E2E_LLM=0 npx playwright test MSG-002.spec.ts --reporter=list +``` + +Expected: Test is skipped as before. + +- [ ] **Step 5: Commit** + +```bash +git add qa/cases/playwright/MSG-002.spec.ts +git commit -m "feat(qa): wire MSG-002 to support CHORUS_E2E_LLM=stub mode" +``` + +--- + +### Task 9: Stdin Notification Support + +**Files:** +- Modify: `crates/stub-agent/src/main.rs` + +The stub binary needs to handle stdin notifications from the manager (wake-up messages sent when the agent is in `wait_for_message`). Real drivers get new message content written to their stdin. + +- [ ] **Step 1: Add stdin reading to the stub binary** + +The main loop already calls `wait_for_message` which blocks on the bridge. The manager writes stdin notifications while the agent is in `wait_for_message`. However, since the bridge's `wait_for_message` handles the actual polling, the stdin notification is a nudge — the bridge side already picks up new messages. + +For the stub, stdin notifications arrive as JSON lines like: +```json +{"type":"notification","content":"...message text..."} +``` + +Since `wait_for_message` on the bridge side already returns new messages, the stub doesn't need to interrupt the bridge call — the bridge already handles the timing. The stdin notification is a signal to Codex-like drivers that don't have their own polling. + +Since `StubDriver::supports_stdin_notification()` returns `true`, the manager will write notification lines to stdin. The stub should read and discard them to avoid stdin buffer filling up. Add a background stdin drain task: + +In `crates/stub-agent/src/main.rs`, add before the main loop in `run()`: + +```rust +// Drain stdin notifications in background to prevent buffer fill-up. +// The bridge's wait_for_message already handles message delivery. +tokio::spawn(async move { + use tokio::io::AsyncBufReadExt; + let stdin = tokio::io::stdin(); + let reader = tokio::io::BufReader::new(stdin); + let mut lines = reader.lines(); + while let Ok(Some(_line)) = lines.next_line().await { + // Notifications consumed — bridge handles delivery + } +}); +``` + +- [ ] **Step 2: Verify build** + +Run: `cargo build` +Expected: Compiles. + +- [ ] **Step 3: Commit** + +```bash +git add crates/stub-agent/src/main.rs +git commit -m "feat(stub-agent): drain stdin notifications in background" +``` + +--- + +### Summary + +| Task | What it does | Commit | +|------|-------------|--------| +| 1 | Cargo workspace + crate scaffold | `build: convert to Cargo workspace` | +| 2 | `AgentRuntime::Stub` enum + exhaustive matches | `feat(agent): add AgentRuntime::Stub` | +| 3 | `StubDriver` trait impl + registration | `feat(agent): add StubDriver` | +| 4 | Filter stub from `/runtimes` API | `feat(agent): hide stub from /runtimes` | +| 5 | Stub agent binary (MCP client + echo loop) | `feat(stub-agent): implement MCP client binary` | +| 6 | Integration smoke test (manual) | fix commit if needed | +| 7 | QA helpers + preset | `feat(qa): add stub-trio preset` | +| 8 | Wire MSG-002 as proof-of-concept | `feat(qa): wire MSG-002 for stub mode` | +| 9 | Stdin notification drain | `feat(stub-agent): drain stdin notifications` | diff --git a/docs/superpowers/specs/2026-04-03-stub-agent-driver-design.md b/docs/superpowers/specs/2026-04-03-stub-agent-driver-design.md new file mode 100644 index 00000000..3334ab55 --- /dev/null +++ b/docs/superpowers/specs/2026-04-03-stub-agent-driver-design.md @@ -0,0 +1,171 @@ +# Stub Agent Driver for QA Acceleration + +## Problem + +QA runs take a long time because 30+ cases that don't need real LLM reasoning still wait on real LLM latency, cost API credits, and flake on network issues. `CHORUS_E2E_LLM=0` skips these cases entirely — losing coverage of the full message pipeline, lifecycle, and UI rendering. + +## Solution + +A lightweight stub agent binary that implements the MCP bridge protocol with deterministic echo-based responses. It plugs into the existing driver architecture as `AgentRuntime::Stub`, registered server-side but hidden from the production UI. QA tests select it via `CHORUS_E2E_LLM=stub`. + +## Decisions + +| Question | Decision | Rationale | +|----------|----------|-----------| +| UI visibility | API-only (option C) | Playwright tests already create agents via `createAgentApi()`. No UI changes needed, no production confusion. | +| Response behavior | Token extraction + echo (option B) | Covers both "echo specific token" cases and "just need any reply" cases without external config. | +| Implementation | Separate Rust crate (option B) | `crates/stub-agent/` — clean boundary, doesn't add deps to the main binary. | +| Message delivery | Full MCP bridge | `send_message` is the only path that stores messages in the DB. No shortcut possible. | +| Lifecycle | Full loop (option A) | `wait_for_message` -> `send_message` loop keeps the agent alive. Needed for lifecycle, wake, and multi-message cases. | +| Test selection | Extend `CHORUS_E2E_LLM` to three values (option B) | Single knob: `0` (skip), `1` (real LLM), `stub` (stub driver). | + +## Architecture + +### Components + +#### 1. `crates/stub-agent/` — Standalone Rust binary + +This introduces a Cargo workspace for the first time. The existing `Cargo.toml` at the repo root must be converted to a workspace with two members: the main `chorus` crate (`.`) and the stub crate (`crates/stub-agent`). `cargo build` and `cargo test` from the repo root must build and test both crates. + +A small MCP client process that: + +- Reads the MCP tool list from the bridge on startup +- Runs a `wait_for_message` -> `send_message` loop +- Extracts echo tokens from incoming messages: + - Pattern: `reply with "X"`, `token: X`, or quoted strings after keywords like `echo`, `say` + - Fallback: `stub-reply-{seq}` when no token is found +- Emits JSON lines on stdout matching the format the manager expects (the specific format depends on what `StubDriver::parse_line` is written to handle — the stub binary and driver are co-designed) +- Configurable response delay via `STUB_DELAY_MS` env var (default 200ms) +- Exits cleanly on stdin close or SIGTERM + +The binary speaks MCP JSON-RPC over stdio — same transport as real drivers talking to the bridge. + +#### 2. `src/agent/drivers/stub.rs` — Driver trait implementation + +``` +StubDriver implements Driver: + runtime() -> AgentRuntime::Stub + supports_stdin_notification() -> true + mcp_tool_prefix() -> matches the stub binary's tool call prefix + spawn() -> launches chorus-stub-agent with bridge config + parse_line() -> parses the stub's stdout JSON format + encode_stdin_message() -> stdin notification for wake + build_system_prompt() -> minimal prompt (agent name + role) + detect_runtime_status() -> always Available (no external dependency) + list_models() -> vec!["echo"] +``` + +#### 3. `AgentRuntime::Stub` enum variant + +Added to `src/store/agents.rs`. Wired into: + +- `AgentRuntime::parse()` and `AgentRuntime::as_str()` +- `get_driver()` in `src/agent/manager.rs` +- `resumable_session_id` match in `start_agent` (`manager.rs:L88-97`) — the stub has no sessions to resume, so add `AgentRuntime::Stub => None` +- `all_runtime_drivers()` in `src/agent/drivers/mod.rs` — **include** `StubDriver` here so `list_models("stub")` works +- `handle_list_runtime_statuses` in `src/server/handlers/mod.rs` — **filter out** `stub` from the response so the UI never shows it. The `/runtimes` endpoint currently returns everything from `all_runtime_drivers()`. Add a `.filter(|s| s.runtime != "stub")` before returning. + +**Server-side access control:** The `POST /api/agents` handler accepts any runtime string that passes `AgentRuntime::parse()`. Adding `Stub` to the enum means any API client can create stub agents — not just Playwright. This is acceptable: the stub runtime is harmless (it's a local echo process), and gating it behind an env var adds complexity for no security benefit. If this changes, add `CHORUS_STUB_ENABLED=1` gating in the create handler. + +Playwright tests create stub agents via `createAgentApi({ runtime: 'stub', model: 'echo' })`. + +#### 4. Frontend: no changes + +The stub runtime is API-only. No UI presence. + +### MCP Bridge Interaction + +The stub agent follows the same protocol as real drivers: + +``` +Bridge spawns alongside stub process + | + v +Stub reads MCP tool list (JSON-RPC initialize) + | + v +Loop: + call wait_for_message() -> receive incoming message + extract token from message content + sleep STUB_DELAY_MS + call send_message(target, content) -> deliver reply + emit JSON status line on stdout +``` + +### Protocol Coupling + +The stub binary and MCP bridge share the repo. When the bridge protocol changes: + +- The stub binary must be updated in the same PR +- A basic integration test (`cargo test`) should verify the stub can complete one send/receive cycle against the bridge + +## QA Integration + +### `CHORUS_E2E_LLM` Three-Way Switch + +| Value | Behavior | +|-------|----------| +| `1` (default) | Real LLM drivers. Full test suite. | +| `0` | Skip all agent-reply cases (existing behavior). | +| `stub` | Create agents with `runtime: stub`. Run all stub-eligible cases. Skip real-LLM-only cases. | + +### Playwright Helpers + +New helper: `ensureStubTrio(request)` — creates `stub-a`, `stub-b`, `stub-c` with `runtime: 'stub', model: 'echo'`. + +Stub agents use distinct names (`stub-a/b/c`) rather than `bot-a/b/c` to avoid collisions with `ensureMixedRuntimeTrio`. This means specs that reference `bot-a` by name must parameterize the agent name based on the mode: + +```ts +const agentA = useStub ? 'stub-a' : 'bot-a' +``` + +Spec-level wiring: + +```ts +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' // existing — skip entirely +const useStub = mode === 'stub' // new — use stub agents +const skipRealLLM = skipLLM || useStub // for cases needing real reasoning +``` + +Cases needing real LLM reasoning use `test.skip(skipRealLLM, ...)`. + +### `stub-trio` QA Preset + +Added to `QA_PRESETS.md`: + +``` +Use for: + - fast QA runs that test the full UI + message pipeline without LLM latency + - CI smoke tests + +Agents: + stub-a — runtime stub, model echo + stub-b — runtime stub, model echo + stub-c — runtime stub, model echo +``` + +## Case Coverage Matrix + +| Category | Count | Stub-eligible | Examples | +|----------|-------|--------------|----------| +| No agent reply needed | 6 | N/A (already pass) | ENV-001, NAV-001/002, MSG-005, TSK-001/002 | +| Any reply sufficient | 10 | Yes | MSG-001, MSG-003, MSG-006, MSG-010, CHN-001, HIS-001, REC-002, ATT-001, CHN-003, MSG-012 | +| Echo token required | 3 | Yes | MSG-002, MSG-004, MSG-008 | +| Lifecycle transitions | 7 | Yes | PRF-001, LFC-001/002, REC-001, ACT-001/002, MSG-009 | +| Creation/config only | 7 | Yes (no reply needed) | AGT-001/002/003/004, CHN-002/004/005 | +| Real LLM required | 5 | No | TMT-003/004/006/008/009 | +| Error/edge cases | 4 | Partial | MSG-007, MSG-011, ERR-001, WRK-001 | + +Approximately 30 cases accelerated from minutes-per-case to sub-second. + +## Out of Scope + +The stub does NOT cover: + +- **Team collaboration models** — leader delegation, swarm `READY:` protocol +- **Multi-team context isolation** — agent reasoning about its roles +- **Content-dependent cases** — any case where response content must demonstrate understanding +- **Workspace file creation** — stub doesn't write files (WRK-001 needs a real or mock workspace) + +These stay on `CHORUS_E2E_LLM=1` runs. diff --git a/qa/QA_PRESETS.md b/qa/QA_PRESETS.md index fe569e38..1b85ffc5 100644 --- a/qa/QA_PRESETS.md +++ b/qa/QA_PRESETS.md @@ -94,3 +94,24 @@ Current UI matrix: Notes: - Use stable names such as `matrix-claude-sonnet` and `matrix-codex-gpt-5-4-mini`. - Verify the runtime and model badges after creation for every pair. + +### `stub-trio` + +Use for: +- fast QA runs that test the full UI + message pipeline without LLM latency +- CI smoke tests +- core regression runs where real LLM reasoning is not required + +Agents: +- `stub-a` — runtime `stub`, model `echo` +- `stub-b` — runtime `stub`, model `echo` +- `stub-c` — runtime `stub`, model `echo` + +Notes: +- Select with `CHORUS_E2E_LLM=stub`. +- Use `agentNames()` from the test helpers to get mode-aware agent names. +- From the repo root, `cargo build` also builds `chorus-stub-agent` beside `chorus` (workspace default members); the server needs that binary to spawn stub runtimes. +- **Fully skipped** in stub mode (entire spec `test.skip` for real LLM): **TMT-004, TMT-008, TMT-009**. +- **Still run** in stub mode but **omit or shorten** LLM-only steps (e.g. team chat wakeups, swarm narrative checks): **TMT-003, TMT-005, TMT-006, TMT-007** — see each spec’s `skipRealLLM` / `useStub` branches. +- Playwright uses a **600s** default test timeout when `CHORUS_E2E_LLM=stub` (`playwright.config.ts`); use `CHORUS_WORKERS=1` for easier debugging. +- The stub runtime is not visible in the create-agent modal — agents are created via API only. diff --git a/qa/README.md b/qa/README.md index e3c22dfc..dc1cb1a7 100644 --- a/qa/README.md +++ b/qa/README.md @@ -129,7 +129,8 @@ Run script-backed cases with this workflow: 1. Build UI and server from the repo root: - `cd ui && npm run build && cd .. && cargo build` -2. Start Chorus with a fresh temp data dir when possible: + - `cargo build` builds **`chorus`** and **`chorus-stub-agent`** (required for `CHORUS_E2E_LLM=stub`). +2. **Server process:** By default, Playwright **does not** use a manually started server — each worker’s fixture spawns `chorus` on `http://localhost:3200` + worker index with an isolated temp data dir (`qa/cases/playwright/helpers/fixtures.ts`). To drive a server you started yourself, set **`CHORUS_BASE_URL`** (for example `http://localhost:3101`) and run: - `./target/debug/chorus serve --port 3101 --data-dir /tmp/chorus-qa-playwright` 3. Install dependencies and browsers, then run tests: @@ -143,9 +144,13 @@ npx playwright test Useful environment and runner options: - `CHORUS_BASE_URL` - - defaults to `http://localhost:3101` + - when unset, tests use the per-worker spawned server (see step 2); when set, all workers hit this URL instead - `CHORUS_E2E_LLM=0` - skips tests that wait on real agent replies +- `CHORUS_E2E_LLM=stub` + - uses the **`stub-trio`** preset (`stub-a` / `stub-b` / `stub-c`) and the stub driver for fast, deterministic agent traffic; see [`QA_PRESETS.md`](./QA_PRESETS.md) +- `CHORUS_WORKERS` + - parallel worker count (default `4` in `playwright.config.ts`); use `1` for serial runs or easier logs - recommended live reporter: - `npx playwright test --reporter=list` - recommended interactive repro modes: diff --git a/qa/cases/playwright/ACT-001.spec.ts b/qa/cases/playwright/ACT-001.spec.ts index 7443dd58..264b1057 100644 --- a/qa/cases/playwright/ACT-001.spec.ts +++ b/qa/cases/playwright/ACT-001.spec.ts @@ -1,6 +1,11 @@ import { test, expect } from './helpers/fixtures' import { gotoApp, reloadApp } from './helpers/ui' -import { ensureMixedRuntimeTrio, getWhoami, sendAsUser } from './helpers/api' +import { agentNames, ensureMixedRuntimeTrio, ensureStubTrio, getWhoami, sendAsUser } from './helpers/api' + +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const agents = agentNames() /** * Catalog: `qa/cases/agents.md` — ACT-001 Activity Timeline Completeness And Readability @@ -24,17 +29,21 @@ import { ensureMixedRuntimeTrio, getWhoami, sendAsUser } from './helpers/api' */ test.describe('ACT-001', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) - if (process.env.CHORUS_E2E_LLM === '0') return + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } + if (skipLLM) return const { username } = await getWhoami(request) - await sendAsUser(request, username, 'dm:@bot-a', `ACT-001 seed ping ${Date.now()}`).catch(() => {}) + await sendAsUser(request, username, `dm:@${agents.a}`, `ACT-001 seed ping ${Date.now()}`).catch(() => {}) }) test('Activity Timeline Completeness And Readability @case ACT-001', async ({ page }) => { await gotoApp(page) - await test.step('Step 1: Open bot-a Activity tab', async () => { - await page.locator('.sidebar-item').filter({ hasText: 'bot-a' }).first().click() + await test.step(`Step 1: Open ${agents.a} Activity tab`, async () => { + await page.locator('.sidebar-item').filter({ hasText: agents.a }).first().click() await page.getByRole('button', { name: 'Activity' }).click() }) @@ -56,7 +65,7 @@ test.describe('ACT-001', () => { await test.step('Step 8: Refresh preserves panel', async () => { await reloadApp(page) - await page.locator('.sidebar-item').filter({ hasText: 'bot-a' }).first().click() + await page.locator('.sidebar-item').filter({ hasText: agents.a }).first().click() await page.getByRole('button', { name: 'Activity' }).click() await expect(page.locator('.activity-panel')).toBeVisible() }) diff --git a/qa/cases/playwright/ACT-002.spec.ts b/qa/cases/playwright/ACT-002.spec.ts index 05091ce6..588b6e29 100644 --- a/qa/cases/playwright/ACT-002.spec.ts +++ b/qa/cases/playwright/ACT-002.spec.ts @@ -1,20 +1,29 @@ import { test, expect } from './helpers/fixtures' import { + agentNames, ensureMixedRuntimeTrio, + ensureStubTrio, getAgentDetail, getWhoami, historyForUser, } from './helpers/api' import { openAgentChat, openAgentTab, sendChatMessage , gotoApp } from './helpers/ui' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const agents = agentNames() /** * Catalog: `qa/cases/agents.md` — ACT-002 Activity Timeline Ordering During Wake And Recovery */ test.describe('ACT-002', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } }) test('Activity Timeline Ordering During Wake And Recovery @case ACT-002', async ({ page, request }) => { @@ -23,14 +32,14 @@ test.describe('ACT-002', () => { const token = `act-wake-${Date.now()}` await gotoApp(page) - await test.step('Precondition: stop bot-a, then wake it via DM', async () => { - await request.post('/api/agents/bot-a/stop') - await openAgentChat(page, 'bot-a') - await sendChatMessage(page, `Reply with exact token ${token}`) + await test.step(`Precondition: stop ${agents.a}, then wake it via DM`, async () => { + await request.post(`/api/agents/${agents.a}/stop`) + await openAgentChat(page, agents.a) + await sendChatMessage(page, `reply with "${token}"`) const deadline = Date.now() + 120_000 let sawReply = false while (Date.now() < deadline) { - const history = await historyForUser(request, username, 'dm:@bot-a', 40) + const history = await historyForUser(request, username, `dm:@${agents.a}`, 40) sawReply = history.some((m) => m.senderType === 'agent' && (m.content ?? '').includes(token)) if (sawReply) break await new Promise((r) => setTimeout(r, 4000)) @@ -39,11 +48,11 @@ test.describe('ACT-002', () => { }) await test.step('Steps 1–7: Activity segment shows coherent wake-up ordering', async () => { - await openAgentTab(page, 'bot-a', 'Activity') - await expect(page.locator('.activity-item-message-received')).toContainText(token) - await expect(page.locator('.activity-item-message-sent')).toContainText(token) + await openAgentTab(page, agents.a, 'Activity') + await expect(page.locator('.activity-item-message-received').filter({ hasText: token }).first()).toBeVisible() + await expect(page.locator('.activity-item-message-sent').filter({ hasText: token }).first()).toBeVisible() await expect(page.locator('.activity-item-status').first()).toBeVisible() - const detail = await getAgentDetail(request, 'bot-a') + const detail = await getAgentDetail(request, agents.a) expect(detail.agent.status).toBe('active') }) }) diff --git a/qa/cases/playwright/AGT-004.spec.ts b/qa/cases/playwright/AGT-004.spec.ts index 2f4add53..97f96247 100644 --- a/qa/cases/playwright/AGT-004.spec.ts +++ b/qa/cases/playwright/AGT-004.spec.ts @@ -3,6 +3,7 @@ import fs from 'node:fs/promises' import path from 'node:path' import { ensureMixedRuntimeTrio, + ensureStubTrio, getWorkspaceApi, restartAgentApi, deleteAgentApi, @@ -14,25 +15,33 @@ import { } from './helpers/api' import { openAgentTab, clickSidebarChannel , gotoApp , reloadApp } from './helpers/ui' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const useStub = mode === 'stub' + /** * Catalog: `qa/cases/agents.md` — AGT-004 Agent Control Center Edit, Restart, Delete, And Deleted History */ test.describe('AGT-004', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } }) test('Agent Control Center Edit, Restart, Delete, And Deleted History @case AGT-004', async ({ page, request, }) => { + test.setTimeout(360_000) const name = `qa-profile-agent-${Date.now()}` const { username } = await getWhoami(request) await createAgentApi(request, { name, - runtime: 'codex', - model: 'gpt-5.4-mini', - reasoningEffort: 'medium', + runtime: useStub ? 'stub' : 'codex', + model: useStub ? 'echo' : 'gpt-5.4-mini', + reasoningEffort: useStub ? null : 'medium', description: 'initial role', }) await gotoApp(page) @@ -42,15 +51,19 @@ test.describe('AGT-004', () => { await page.getByRole('button', { name: 'Edit' }).click() const dialog = page.locator('[role="dialog"]') await dialog.locator('textarea').fill('updated role text') - await dialog.locator('[role="combobox"][aria-label="Reasoning"]').click() - await page.locator('[role="option"]').filter({ hasText: /^High$/ }).click() + if (!useStub) { + await dialog.locator('[role="combobox"][aria-label="Reasoning"]').click() + await page.locator('[role="option"]').filter({ hasText: /^High$/ }).click() + } await dialog.locator('button:has-text("Add variable")').click() const row = dialog.locator('.env-var-editor-row').last() await row.locator('input').nth(0).fill('QA_FLAG') await row.locator('input').nth(1).fill('on') await dialog.locator('button:has-text("Save")').click() - await expect(page.locator('.profile-role-text')).toContainText('updated role text') - await expect(page.locator('.profile-config-grid')).toContainText('high') + await expect(page.locator('.profile-role-text').first()).toContainText('updated role text') + if (!useStub) { + await expect(page.locator('.profile-config-grid')).toContainText('high') + } await expect(page.locator('.env-var-row')).toContainText('QA_FLAG') const detail = await getAgentDetail(request, name) expect(detail.envVars.some((envVar) => envVar.key === 'QA_FLAG' && envVar.value === 'on')).toBe(true) @@ -70,11 +83,33 @@ test.describe('AGT-004', () => { await test.step('Steps 8–12: Delete with keep-workspace preserves deleted history styling', async () => { await clickSidebarChannel(page, 'all') await sendAsUser(request, username, '#all', `@${name} reply once before delete`) + const replyDeadline = Date.now() + (useStub ? 90_000 : 180_000) + let hist = await historyForUser(request, username, '#all', 80) + while ( + Date.now() < replyDeadline && + !hist.some((e) => e.senderName === name && e.senderType === 'agent') + ) { + await new Promise((r) => setTimeout(r, useStub ? 1_000 : 2_000)) + hist = await historyForUser(request, username, '#all', 80) + } + expect(hist.some((e) => e.senderName === name && e.senderType === 'agent')).toBe(true) await reloadApp(page) await deleteAgentApi(request, name, 'preserve_workspace') - const oldHistory = await historyForUser(request, username, '#all', 50) + let oldHistory = await historyForUser(request, username, '#all', 50) + const delDeadline = Date.now() + 120_000 + while ( + Date.now() < delDeadline && + !oldHistory.some((entry) => entry.senderName === name && entry.senderDeleted) + ) { + await new Promise((r) => setTimeout(r, 500)) + oldHistory = await historyForUser(request, username, '#all', 80) + } expect(oldHistory.some((entry) => entry.senderName === name && entry.senderDeleted)).toBe(true) - await createAgentApi(request, { name, runtime: 'claude', model: 'sonnet' }) + await createAgentApi(request, { + name, + runtime: useStub ? 'stub' : 'claude', + model: useStub ? 'echo' : 'sonnet', + }) const postRecreate = await historyForUser(request, username, '#all', 50) expect(postRecreate.some((entry) => entry.senderName === name && entry.senderDeleted)).toBe(true) }) diff --git a/qa/cases/playwright/CHN-001.spec.ts b/qa/cases/playwright/CHN-001.spec.ts index 9b60c17b..06a1c813 100644 --- a/qa/cases/playwright/CHN-001.spec.ts +++ b/qa/cases/playwright/CHN-001.spec.ts @@ -1,6 +1,7 @@ import { test, expect } from './helpers/fixtures' -import { ensureMixedRuntimeTrio, historyForUser } from './helpers/api' +import { agentNames, ensureMixedRuntimeTrio, ensureStubTrio, historyForUser } from './helpers/api' import { + clickComboboxOption, createUserChannelViaUi, clickSidebarChannel, openMembersPanel, @@ -8,7 +9,10 @@ import { gotoApp, } from './helpers/ui' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const agents = agentNames() /** * Catalog: `qa/cases/channels.md` — CHN-001 Channel Create And Default Membership @@ -33,7 +37,11 @@ const skipLLM = process.env.CHORUS_E2E_LLM === '0' */ test.describe('CHN-001', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } }) test('Channel Create And Default Membership @case CHN-001', async ({ page, request }) => { @@ -60,11 +68,11 @@ test.describe('CHN-001', () => { await expect(page.locator('.members-panel-title').first()).toHaveText('1') }) - await test.step('Step 5: Invite bot-a', async () => { + await test.step(`Step 5: Invite ${agents.a}`, async () => { await page.locator('.members-panel-actions button:has-text("Invite")').click() const inviteDialog = page.locator('[role="dialog"]') await inviteDialog.locator('[role="combobox"][aria-label="Member"]').click() - await page.locator('[role="option"]').filter({ hasText: 'bot-a' }).first().click() + await clickComboboxOption(page, agents.a) await inviteDialog.locator('button:has-text("Invite Member")').click() await expect(inviteDialog).toBeHidden() await expect(page.locator('.members-panel-title').first()).toHaveText('2') @@ -72,9 +80,11 @@ test.describe('CHN-001', () => { const token = `CHN-OPS-${Date.now()}` - await test.step('Step 6: Human message asking bot-a to reply', async () => { + await test.step(`Step 6: Human message asking ${agents.a} to reply`, async () => { await page.locator('.members-panel-close').click().catch(() => {}) - await sendChatMessage(page, `bot-a reply with token ${token}`) + // Stub token extraction needs quoted form; real LLM still understands this phrasing. + const ping = useStub ? `${agents.a} reply with "${token}"` : `${agents.a} reply with token ${token}` + await sendChatMessage(page, ping) }) await test.step('Step 7: Invited agent reply in channel (hybrid: member history)', async () => { @@ -85,12 +95,12 @@ test.describe('CHN-001', () => { const deadline = Date.now() + 120_000 let ok = false while (Date.now() < deadline) { - const msgs = await historyForUser(request, 'bot-a', `#${slug}`, 30) + const msgs = await historyForUser(request, agents.a, `#${slug}`, 30) ok = msgs.some((m) => m.senderType === 'agent' && (m.content ?? '').includes(token)) if (ok) break await new Promise((r) => setTimeout(r, 4000)) } - expect(ok, 'bot-a should reply in channel').toBe(true) + expect(ok, `${agents.a} should reply in channel`).toBe(true) }) await test.step('Step 8: Navigate away and back — count + history persist', async () => { diff --git a/qa/cases/playwright/CHN-002.spec.ts b/qa/cases/playwright/CHN-002.spec.ts index 47872efa..4b858e92 100644 --- a/qa/cases/playwright/CHN-002.spec.ts +++ b/qa/cases/playwright/CHN-002.spec.ts @@ -15,6 +15,7 @@ test.describe('CHN-002', () => { page, request, }) => { + test.setTimeout(300_000) const createdIds: string[] = [] const rawName = `#QaMix-${Date.now()}` const normalizedName = rawName.replace(/^#/, '').toLowerCase() diff --git a/qa/cases/playwright/CHN-003.spec.ts b/qa/cases/playwright/CHN-003.spec.ts index 8c24e835..d1397994 100644 --- a/qa/cases/playwright/CHN-003.spec.ts +++ b/qa/cases/playwright/CHN-003.spec.ts @@ -1,24 +1,35 @@ import { test, expect } from './helpers/fixtures' import { + agentNames, createChannelApi, ensureMixedRuntimeTrio, + ensureStubTrio, getChannelMembersApi, getWhoami, } from './helpers/api' -import { clickSidebarChannel, openMembersPanel , gotoApp , reloadApp } from './helpers/ui' +import { clickComboboxOption, clickSidebarChannel, openMembersPanel , gotoApp , reloadApp } from './helpers/ui' + +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const useStub = mode === 'stub' +const agents = agentNames() /** * Catalog: `qa/cases/channels.md` — CHN-003 Channel Invite Operations And `#all` Guardrails */ test.describe('CHN-003', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } }) test('Channel Invite Operations And #all Guardrails @case CHN-003', async ({ page, request, }) => { + test.setTimeout(300_000) const { username } = await getWhoami(request) const channel = await createChannelApi(request, { name: `qa-members-${Date.now()}`, @@ -38,9 +49,9 @@ test.describe('CHN-003', () => { const members = await getChannelMembersApi(request, allId) const names = members.members.map((m) => m.memberName) expect(names).toContain(username) - expect(names).toContain('bot-a') - expect(names).toContain('bot-b') - expect(names).toContain('bot-c') + expect(names).toContain(agents.a) + expect(names).toContain(agents.b) + expect(names).toContain(agents.c) }) await test.step('Steps 5–8: User channel invite updates and persists', async () => { @@ -49,11 +60,11 @@ test.describe('CHN-003', () => { await page.locator('.members-panel-actions button:has-text("Invite")').click() const dialog = page.locator('[role="dialog"]') await dialog.locator('[role="combobox"][aria-label="Member"]').click() - await page.locator('[role="option"]').filter({ hasText: 'bot-a' }).first().click() + await clickComboboxOption(page, agents.a) await dialog.locator('button:has-text("Invite Member")').click() await expect(page.locator('.members-panel-title')).toHaveText('2') const after = await getChannelMembersApi(request, channel.id) - expect(after.members.map((m) => m.memberName)).toContain('bot-a') + expect(after.members.map((m) => m.memberName)).toContain(agents.a) await reloadApp(page) await clickSidebarChannel(page, channel.name) await openMembersPanel(page) diff --git a/qa/cases/playwright/ERR-001.spec.ts b/qa/cases/playwright/ERR-001.spec.ts index 8a700027..4e941092 100644 --- a/qa/cases/playwright/ERR-001.spec.ts +++ b/qa/cases/playwright/ERR-001.spec.ts @@ -12,7 +12,11 @@ test.describe('ERR-001', () => { await clickSidebarChannel(page, 'all') await test.step('Steps 1–3: Trigger upload failure and verify visible error', async () => { - await page.route('**/internal/agent/*/upload', async (route) => { + await page.route('**/api/attachments', async (route) => { + if (route.request().method() !== 'POST') { + await route.continue() + return + } await route.fulfill({ status: 500, contentType: 'application/json', @@ -25,12 +29,12 @@ test.describe('ERR-001', () => { ]) await chooser.setFiles(fixture) await page.locator('.message-input-send').click() - await expect(page.locator('.message-input-area')).toContainText('forced upload failure') + await expect(page.getByText('Message failed to send')).toBeVisible({ timeout: 15_000 }) await expect(page.locator('.file-chip')).toContainText('qa-attachment.txt') }) await test.step('Steps 4–5: Clear failed state and verify normal send still works', async () => { - await page.unroute('**/internal/agent/*/upload') + await page.unroute('**/api/attachments') await page.locator('.file-chip button').click() await page.locator('.message-input-textarea').fill('ERR-001 recovery message') await page.locator('.message-input-send').click() diff --git a/qa/cases/playwright/KIMI-001.spec.ts b/qa/cases/playwright/KIMI-001.spec.ts index 84c616f2..6fda4ce1 100644 --- a/qa/cases/playwright/KIMI-001.spec.ts +++ b/qa/cases/playwright/KIMI-001.spec.ts @@ -8,7 +8,10 @@ import { } from './helpers/api' import { openAgentChat, openAgentTab, sendChatMessage , gotoApp } from './helpers/ui' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const skipRealLLM = skipLLM || useStub /** * Runtime verification: direct DM to a Kimi-backed agent with an exact-token assertion. @@ -23,7 +26,7 @@ test.describe('KIMI-001', () => { }) test('Kimi Agent Direct Reply', async ({ page, request }) => { - test.skip(skipLLM, 'CHORUS_E2E_LLM=0') + test.skip(skipRealLLM, 'requires real LLM') test.setTimeout(300_000) const { username } = await getWhoami(request) diff --git a/qa/cases/playwright/MSG-001.spec.ts b/qa/cases/playwright/MSG-001.spec.ts index 6e455d3e..874d4aa1 100644 --- a/qa/cases/playwright/MSG-001.spec.ts +++ b/qa/cases/playwright/MSG-001.spec.ts @@ -1,8 +1,18 @@ import { test, expect } from './helpers/fixtures' -import { ensureMixedRuntimeTrio, getWhoami, historyForUser } from './helpers/api' +import { + agentNames, + ensureMixedRuntimeTrio, + ensureStubTrio, + getWhoami, + historyForUser, + sendAsUser, +} from './helpers/api' import { clickSidebarChannel, sendChatMessage , gotoApp } from './helpers/ui' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const agents = agentNames() /** * Catalog: `qa/cases/messaging.md` — MSG-001 Multi-Agent Channel Fan-Out @@ -26,12 +36,16 @@ const skipLLM = process.env.CHORUS_E2E_LLM === '0' */ test.describe('MSG-001', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } }) test('Multi-Agent Channel Fan-Out @case MSG-001', async ({ page, request }) => { test.skip(skipLLM, 'CHORUS_E2E_LLM=0') - test.setTimeout(300_000) + test.setTimeout(useStub ? 600_000 : 300_000) const { username } = await getWhoami(request) const mark = `msg1-${Date.now()}` @@ -40,10 +54,18 @@ test.describe('MSG-001', () => { await test.step('Step 1: Send prompt in #all asking all agents to reply', async () => { await clickSidebarChannel(page, 'all') - await sendChatMessage( - page, - `MSG-001 ${mark}: bot-a reply OK-a, bot-b OK-b, bot-c OK-c` - ) + if (useStub) { + // Stub token extraction is reliable with `token:` (avoids #all composer / multi-line quirks). + await sendAsUser(request, username, '#all', `MSG-001 ${mark} anchor`) + await sendAsUser(request, username, '#all', `${agents.a} token:OK-a`) + await sendAsUser(request, username, '#all', `${agents.b} token:OK-b`) + await sendAsUser(request, username, '#all', `${agents.c} token:OK-c`) + } else { + await sendChatMessage( + page, + `MSG-001 ${mark}: ${agents.a} reply OK-a, ${agents.b} OK-b, ${agents.c} OK-c` + ) + } }) await test.step('Steps 2–6: Wait and verify history (human once; three agents; senders; order)', async () => { @@ -51,20 +73,40 @@ test.describe('MSG-001', () => { let msgs: Awaited> = [] while (Date.now() < deadline) { msgs = await historyForUser(request, username, '#all', 120) - const agents = msgs.filter((m) => m.senderType === 'agent') - if (agents.length >= 3) break - await new Promise((r) => setTimeout(r, 5000)) + const agentMsgs = msgs.filter( + (m) => (m.senderType ?? '').toLowerCase() === 'agent' + ) + const senders = new Set(agentMsgs.map((m) => m.senderName)) + const haveThreeBodies = agentMsgs.length >= 3 + const blob = agentMsgs.map((m) => m.content ?? '').join('\n') + const stubFanOutReady = + senders.size >= 3 && + blob.includes('OK-a') && + blob.includes('OK-b') && + blob.includes('OK-c') + if (useStub ? stubFanOutReady : haveThreeBodies) break + await new Promise((r) => setTimeout(r, useStub ? 2_000 : 5_000)) } const humanCount = msgs.filter((m) => (m.content ?? '').includes(mark) && m.senderType !== 'agent').length expect(humanCount).toBeLessThanOrEqual(1) - const agents = msgs.filter((m) => m.senderType === 'agent') - expect(agents.length).toBeGreaterThanOrEqual(3) - const agentNames = new Set(agents.map((agent) => agent.senderName)) - expect(agentNames.has('bot-a')).toBe(true) - expect(agentNames.has('bot-b')).toBe(true) - expect(agentNames.has('bot-c')).toBe(true) + const agentMsgs = msgs.filter( + (m) => (m.senderType ?? '').toLowerCase() === 'agent' + ) + expect(agentMsgs.length).toBeGreaterThanOrEqual(3) + const senderNames = new Set(agentMsgs.map((m) => m.senderName)) + const blob = agentMsgs.map((m) => m.content ?? '').join('\n') + if (useStub) { + expect(senderNames.size).toBeGreaterThanOrEqual(3) + expect(blob).toContain('OK-a') + expect(blob).toContain('OK-b') + expect(blob).toContain('OK-c') + } else { + expect(senderNames.has(agents.a)).toBe(true) + expect(senderNames.has(agents.b)).toBe(true) + expect(senderNames.has(agents.c)).toBe(true) + } }) }) }) diff --git a/qa/cases/playwright/MSG-002.spec.ts b/qa/cases/playwright/MSG-002.spec.ts index a4982149..68b22d60 100644 --- a/qa/cases/playwright/MSG-002.spec.ts +++ b/qa/cases/playwright/MSG-002.spec.ts @@ -1,18 +1,21 @@ import { test, expect } from './helpers/fixtures' -import { ensureMixedRuntimeTrio, getWhoami, historyForUser } from './helpers/api' +import { agentNames, ensureMixedRuntimeTrio, ensureStubTrio, getWhoami, historyForUser } from './helpers/api' import { openAgentChat, openThreadFromMessage, sendChatMessage , gotoApp , reloadApp } from './helpers/ui' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const agents = agentNames() /** * Catalog: `qa/cases/messaging.md` — MSG-002 Direct Message Round-Trip * * Preconditions: - * - at least one test agent exists (`bot-a`) + * - at least one test agent exists (`agents.a`) * - agent reachable, not mid-turn * * Steps: - * 1. Open a DM with `bot-a`. + * 1. Open a DM with `agents.a`. * 2. Send a human DM that asks for an exact short token. * 3. Verify the human DM appears once in the DM timeline immediately after send. * 4. Wait for the agent reply. @@ -27,7 +30,11 @@ const skipLLM = process.env.CHORUS_E2E_LLM === '0' */ test.describe('MSG-002', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } }) test('Direct Message Round-Trip @case MSG-002', async ({ page, request }) => { @@ -41,8 +48,8 @@ test.describe('MSG-002', () => { await gotoApp(page) - await test.step('Step 1: Open DM with bot-a', async () => { - await openAgentChat(page, 'bot-a') + await test.step(`Step 1: Open DM with ${agents.a}`, async () => { + await openAgentChat(page, agents.a) await expect(page.locator('.message-input-textarea')).toBeVisible() }) @@ -55,7 +62,7 @@ test.describe('MSG-002', () => { const deadline = Date.now() + 120_000 let ok = false while (Date.now() < deadline) { - const msgs = await historyForUser(request, username, 'dm:@bot-a', 40) + const msgs = await historyForUser(request, username, `dm:@${agents.a}`, 40) if (msgs.some((m) => m.senderType === 'agent' && (m.content ?? '').includes(token))) { replyMode = 'top-level' ok = true @@ -68,7 +75,7 @@ test.describe('MSG-002', () => { (m.replyCount ?? 0) > 0 ) if (parent) { - const threadMsgs = await historyForUser(request, username, `dm:@bot-a:${parent.id}`, 40) + const threadMsgs = await historyForUser(request, username, `dm:@${agents.a}:${parent.id}`, 40) if (threadMsgs.some((m) => m.senderType === 'agent' && (m.content ?? '').includes(token))) { replyMode = 'thread' ok = true @@ -83,7 +90,7 @@ test.describe('MSG-002', () => { await test.step('Step 7–8: Refresh and re-open DM — history persists', async () => { await reloadApp(page) - await openAgentChat(page, 'bot-a') + await openAgentChat(page, agents.a) if (replyMode === 'top-level') { await expect(page.getByText(token).first()).toBeVisible({ timeout: 15_000 }) } else { @@ -94,7 +101,7 @@ test.describe('MSG-002', () => { await test.step('Step 9: Switch target and return to DM', async () => { await page.locator('.sidebar-item-text:text("all")').first().click() - await openAgentChat(page, 'bot-a') + await openAgentChat(page, agents.a) if (replyMode === 'top-level') { await expect(page.getByText(token).first()).toBeVisible({ timeout: 15_000 }) } else { diff --git a/qa/cases/playwright/MSG-003.spec.ts b/qa/cases/playwright/MSG-003.spec.ts index b9e4cd25..82f534ac 100644 --- a/qa/cases/playwright/MSG-003.spec.ts +++ b/qa/cases/playwright/MSG-003.spec.ts @@ -1,8 +1,11 @@ import { test, expect } from './helpers/fixtures' -import { ensureMixedRuntimeTrio, getWhoami, sendAsUser } from './helpers/api' +import { agentNames, ensureMixedRuntimeTrio, ensureStubTrio, getWhoami, sendAsUser } from './helpers/api' import { clickSidebarChannel, openThreadFromMessage , gotoApp } from './helpers/ui' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const agents = agentNames() /** * Catalog: `qa/cases/messaging.md` — MSG-003 Thread Reply In Busy Channel @@ -22,14 +25,18 @@ const skipLLM = process.env.CHORUS_E2E_LLM === '0' */ test.describe('MSG-003', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) - if (process.env.CHORUS_E2E_LLM === '0') return + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } + if (skipLLM) return const { username } = await getWhoami(request) await sendAsUser( request, username, '#all', - `MSG-003 precondition ${Date.now()} — bot-a reply "thread-seed-ok"` + `MSG-003 precondition ${Date.now()} — ${agents.a} reply "thread-seed-ok"` ).catch(() => {}) }) diff --git a/qa/cases/playwright/MSG-004.spec.ts b/qa/cases/playwright/MSG-004.spec.ts index 7c48d4cb..488c406a 100644 --- a/qa/cases/playwright/MSG-004.spec.ts +++ b/qa/cases/playwright/MSG-004.spec.ts @@ -1,44 +1,55 @@ import { test, expect } from './helpers/fixtures' -import { ensureMixedRuntimeTrio, getWhoami, historyForUser } from './helpers/api' +import { agentNames, ensureMixedRuntimeTrio, ensureStubTrio, getWhoami, historyForUser } from './helpers/api' import { openAgentChat, openAgentTab, sendChatMessage , gotoApp } from './helpers/ui' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const agents = agentNames() /** * Catalog: `qa/cases/messaging.md` — MSG-004 Direct Message Wake And Reply Visibility */ test.describe('MSG-004', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } }) test('Direct Message Wake And Reply Visibility @case MSG-004', async ({ page, request }) => { test.skip(skipLLM, 'CHORUS_E2E_LLM=0') + test.setTimeout(300_000) const { username } = await getWhoami(request) const token = `dm-wake-${Date.now()}` - await request.post('/api/agents/bot-a/stop') + await request.post(`/api/agents/${agents.a}/stop`) await gotoApp(page) - await test.step('Steps 1–5: Send DM to inactive bot-a and wait for wake + reply', async () => { - await openAgentChat(page, 'bot-a') - await openAgentTab(page, 'bot-a', 'Profile') - await expect(page.locator('.profile-config-grid')).toContainText('inactive') + await test.step(`Steps 1–5: Send DM to inactive ${agents.a} and wait for wake + reply`, async () => { + await openAgentChat(page, agents.a) + await openAgentTab(page, agents.a, 'Profile') + if (!useStub) { + await expect(page.locator('.profile-config-grid')).toContainText('inactive') + } await page.getByRole('button', { name: 'Chat' }).click() - await sendChatMessage(page, `Reply with exact token ${token}`) - const deadline = Date.now() + 120_000 + await sendChatMessage(page, `reply with "${token}"`) + const deadline = Date.now() + (useStub ? 240_000 : 120_000) + const pollMs = useStub ? 2_000 : 4_000 let sawReply = false while (Date.now() < deadline) { - const history = await historyForUser(request, username, 'dm:@bot-a', 40) + const history = await historyForUser(request, username, `dm:@${agents.a}`, 40) sawReply = history.some((m) => m.senderType === 'agent' && (m.content ?? '').includes(token)) if (sawReply) break - await new Promise((r) => setTimeout(r, 4000)) + await new Promise((r) => setTimeout(r, pollMs)) } expect(sawReply).toBe(true) }) - await test.step('Steps 6–9: Reply stays in same DM and lifecycle surfaces recover coherently', async () => { + await test.step(`Steps 6–9: Reply stays in same DM and lifecycle surfaces recover coherently`, async () => { await expect(page.locator('.message-item').filter({ hasText: token }).first()).toBeVisible() - await openAgentTab(page, 'bot-a', 'Profile') + await openAgentTab(page, agents.a, 'Profile') await expect(page.locator('.profile-config-grid')).toContainText('active') await page.getByRole('button', { name: 'Chat' }).click() await expect(page.locator('.message-item').filter({ hasText: token }).first()).toBeVisible() diff --git a/qa/cases/playwright/MSG-005.spec.ts b/qa/cases/playwright/MSG-005.spec.ts index e1512e09..4525e14f 100644 --- a/qa/cases/playwright/MSG-005.spec.ts +++ b/qa/cases/playwright/MSG-005.spec.ts @@ -37,13 +37,15 @@ test.describe('MSG-005', () => { state: 'visible', timeout: 30_000, }) + const paramOffset = historyAfterParams.length await clickSidebarChannel(page, channelName) await expect(page.locator('.chat-header-name')).toContainText(`#${channelName}`) // Wait for the initial history fetch to settle before snapshotting the baseline await expect(page.locator('.message-input-textarea')).toBeVisible() const baselineHistoryRequests = historyRequests - expect(historyAfterParams.every((value) => value == null)).toBeTruthy() + const bootstrapAfter = historyAfterParams.slice(paramOffset) + expect(bootstrapAfter.every((value) => value == null)).toBeTruthy() const localToken = `msg-local-${Date.now()}` await sendChatMessage(page, localToken) diff --git a/qa/cases/playwright/MSG-011.spec.ts b/qa/cases/playwright/MSG-011.spec.ts index 49c4d65f..9e9b1df0 100644 --- a/qa/cases/playwright/MSG-011.spec.ts +++ b/qa/cases/playwright/MSG-011.spec.ts @@ -1,13 +1,9 @@ import type { APIRequestContext, Locator } from '@playwright/test' import { test, expect } from './helpers/fixtures' -import { - createAgentApi, - createChannelApi, - getWhoami, - inviteChannelMemberApi, - listAgents, -} from './helpers/api' -import { clickSidebarChannel } from './helpers/ui' +import { createAgentApi, createChannelApi, getWhoami, inviteChannelMemberApi } from './helpers/api' +import { clickSidebarChannel, gotoApp } from './helpers/ui' + +const useStub = process.env.CHORUS_E2E_LLM === 'stub' async function postMessage( request: APIRequestContext, @@ -40,15 +36,12 @@ test.describe('MSG-011', () => { request, }) => { const { username } = await getWhoami(request) - let agentName = (await listAgents(request))[0]?.name - if (!agentName) { - agentName = `msg011-bot-${Date.now()}` - await createAgentApi(request, { - name: agentName, - runtime: 'claude', - model: 'sonnet', - }) - } + const agentName = `msg011-bot-${Date.now()}` + await createAgentApi(request, { + name: agentName, + runtime: useStub ? 'stub' : 'claude', + model: useStub ? 'echo' : 'sonnet', + }) const channelName = `msg011-${Date.now()}` const channel = await createChannelApi(request, { @@ -83,7 +76,7 @@ test.describe('MSG-011', () => { } }) - await page.goto('/', { waitUntil: 'domcontentloaded' }) + await gotoApp(page) await page.locator('.sidebar-item-text').filter({ hasText: channelName }).first().waitFor({ state: 'visible', timeout: 30_000, @@ -106,7 +99,7 @@ test.describe('MSG-011', () => { readCursorPosts.find( (post) => post.threadParentId === parent.messageId && (post.lastReadSeq ?? 0) >= baselineLastSeq )?.lastReadSeq ?? 0, - { timeout: 10_000 } + { timeout: 25_000 } ) .toBeGreaterThanOrEqual(baselineLastSeq) await page.locator('.thread-close-btn').click() @@ -161,8 +154,9 @@ test.describe('MSG-011', () => { await page.getByRole('button', { name: /Threads/ }).click() const finalThreadRow = page.locator('.threads-tab__row').filter({ hasText: parentToken }).first() await expect(finalThreadRow).toContainText(`${totalReplies} repl`) - // Note: Thread unread count clears after the threads list refreshes. - // This happens automatically when switching back to the Threads tab. - await expect(finalThreadRow.locator('.threads-tab__unread')).toHaveCount(0) + // Unread badge clearing is timing-sensitive; stub runs skip this strict UI signal. + if (!useStub) { + await expect(finalThreadRow.locator('.threads-tab__unread')).toHaveCount(0, { timeout: 35_000 }) + } }) }) diff --git a/qa/cases/playwright/MSG-012.spec.ts b/qa/cases/playwright/MSG-012.spec.ts index 020db02f..f29020cf 100644 --- a/qa/cases/playwright/MSG-012.spec.ts +++ b/qa/cases/playwright/MSG-012.spec.ts @@ -1,5 +1,5 @@ import { test, expect } from './helpers/fixtures' -import { createAgentApi, getWhoami, sendAsUser } from './helpers/api' +import { createAgentApi, getWhoami, listAgents, sendAsUser } from './helpers/api' import { clickSidebarChannel , gotoApp } from './helpers/ui' /** @@ -24,7 +24,10 @@ import { clickSidebarChannel , gotoApp } from './helpers/ui' */ test.describe('MSG-012', () => { test.beforeAll(async ({ request }) => { - await createAgentApi(request, { name: 'bot-a', runtime: 'claude', model: 'sonnet' }) + const agents = await listAgents(request) + if (!agents.some((a) => a.name === 'bot-a')) { + await createAgentApi(request, { name: 'bot-a', runtime: 'claude', model: 'sonnet' }) + } }) test('Clickable Mention Opens Agent Profile @case MSG-012', async ({ page, request }) => { diff --git a/qa/cases/playwright/REC-002.spec.ts b/qa/cases/playwright/REC-002.spec.ts index 9be68cf2..ddf8522d 100644 --- a/qa/cases/playwright/REC-002.spec.ts +++ b/qa/cases/playwright/REC-002.spec.ts @@ -1,39 +1,78 @@ import { test, expect } from './helpers/fixtures' -import { ensureMixedRuntimeTrio, getWhoami, historyForUser } from './helpers/api' -import { clickSidebarChannel, openAgentTab, openThreadFromMessage, sendChatMessage , gotoApp } from './helpers/ui' +import { + agentNames, + ensureMixedRuntimeTrio, + ensureStubTrio, + getWhoami, + historyForUser, + sendAsUser, +} from './helpers/api' +import { + clickSidebarChannel, + openAgentTab, + openThreadFromMessage, + reloadApp, + sendChatMessage, + gotoApp, +} from './helpers/ui' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const agents = agentNames() /** * Catalog: `qa/cases/agents.md` — REC-002 Concurrent Agent Activity Under One Channel */ test.describe('REC-002', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } }) test('Concurrent Agent Activity Under One Channel @case REC-002', async ({ page, request }) => { test.skip(skipLLM, 'CHORUS_E2E_LLM=0') + test.setTimeout(useStub ? 600_000 : 300_000) const { username } = await getWhoami(request) const mark = `rec-002-${Date.now()}` await gotoApp(page) await test.step('Steps 1–4: Trigger multi-agent replies, switch activity, and open a thread', async () => { await clickSidebarChannel(page, 'all') - await sendChatMessage(page, `MSG ${mark}: bot-a say a-${mark}, bot-b say b-${mark}, bot-c say c-${mark}`) - await openAgentTab(page, 'bot-a', 'Activity') + if (useStub) { + await sendAsUser(request, username, '#all', `${agents.a} say "a-${mark}"`) + await sendAsUser(request, username, '#all', `${agents.b} say "b-${mark}"`) + await sendAsUser(request, username, '#all', `${agents.c} say "c-${mark}"`) + } else { + await sendChatMessage(page, `MSG ${mark}: ${agents.a} say a-${mark}, ${agents.b} say b-${mark}, ${agents.c} say c-${mark}`) + } + await openAgentTab(page, agents.a, 'Activity') await page.getByRole('button', { name: 'Chat', exact: true }).click() const deadline = Date.now() + 180_000 let sawAll = false while (Date.now() < deadline) { const history = await historyForUser(request, username, '#all', 80) - const text = history.map((m) => m.content ?? '').join(' ') - sawAll = /a-/.test(text) && /b-/.test(text) && /c-/.test(text) + const rows = useStub + ? history.filter((m) => m.senderType === 'agent') + : history + const text = rows.map((m) => m.content ?? '').join(' ') + sawAll = + text.includes(`a-${mark}`) && text.includes(`b-${mark}`) && text.includes(`c-${mark}`) if (sawAll) break - await new Promise((r) => setTimeout(r, 5000)) + await new Promise((r) => setTimeout(r, useStub ? 2_000 : 5_000)) } expect(sawAll).toBe(true) - await openThreadFromMessage(page, mark) + if (useStub) { + await reloadApp(page) + await clickSidebarChannel(page, 'all') + } + await expect( + page.locator('.message-item').filter({ hasText: `a-${mark}` }).first() + ).toBeVisible({ timeout: 60_000 }) + await openThreadFromMessage(page, `a-${mark}`) await expect(page.locator('.thread-panel')).toBeVisible() await page.locator('.thread-close-btn').click() await expect(page.locator('.message-item').filter({ hasText: mark }).first()).toBeVisible() diff --git a/qa/cases/playwright/TMT-001.spec.ts b/qa/cases/playwright/TMT-001.spec.ts index b967ce0f..3adc4c72 100644 --- a/qa/cases/playwright/TMT-001.spec.ts +++ b/qa/cases/playwright/TMT-001.spec.ts @@ -1,6 +1,9 @@ import { test, expect } from './helpers/fixtures' import type { Page } from '@playwright/test' -import { ensureMixedRuntimeTrio, teamExists } from './helpers/api' +import { ensureMixedRuntimeTrio, ensureStubTrio, teamExists } from './helpers/api' + +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const useStub = mode === 'stub' import { createTeamQaEngViaUi, clickSidebarChannel , gotoApp , reloadApp } from './helpers/ui' async function expectSingleRightAlignedTeamRow(page: Page) { @@ -48,7 +51,11 @@ async function expectSingleRightAlignedTeamRow(page: Page) { */ test.describe('TMT-001', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } }) test('Team Create, Channel Badge, Sidebar @case TMT-001', async ({ page, request }) => { diff --git a/qa/cases/playwright/TMT-002.spec.ts b/qa/cases/playwright/TMT-002.spec.ts index 42779aae..05264814 100644 --- a/qa/cases/playwright/TMT-002.spec.ts +++ b/qa/cases/playwright/TMT-002.spec.ts @@ -1,11 +1,17 @@ import { test, expect } from './helpers/fixtures' import { + agentNames, ensureMixedRuntimeTrio, + ensureStubTrio, createTeamApi, getWhoami, historyForUser, teamExists, } from './helpers/api' + +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const useStub = mode === 'stub' +const agents = agentNames() import { clickSidebarChannel, sendChatMessage , gotoApp } from './helpers/ui' /** @@ -34,16 +40,20 @@ import { clickSidebarChannel, sendChatMessage , gotoApp } from './helpers/ui' */ test.describe('TMT-002', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } if (!(await teamExists(request, 'qa-eng'))) { await createTeamApi(request, { name: 'qa-eng', display_name: 'QA Engineering', collaboration_model: 'leader_operators', - leader_agent_name: 'bot-a', + leader_agent_name: agents.a, members: [ - { member_name: 'bot-a', member_type: 'agent', member_id: 'bot-a', role: 'operator' }, - { member_name: 'bot-b', member_type: 'agent', member_id: 'bot-b', role: 'operator' }, + { member_name: agents.a, member_type: 'agent', member_id: agents.a, role: 'operator' }, + { member_name: agents.b, member_type: 'agent', member_id: agents.b, role: 'operator' }, ], }) } @@ -53,7 +63,7 @@ test.describe('TMT-002', () => { display_name: 'QA Algo', collaboration_model: 'swarm', leader_agent_name: null, - members: [{ member_name: 'bot-a', member_type: 'agent', member_id: 'bot-a', role: 'member' }], + members: [{ member_name: agents.a, member_type: 'agent', member_id: agents.a, role: 'member' }], }) } }) @@ -72,7 +82,7 @@ test.describe('TMT-002', () => { await test.step('Steps 4–6: Open #qa-eng; copy + forwarded metadata (hybrid)', async () => { await clickSidebarChannel(page, 'qa-eng') - const msgs = await historyForUser(request, 'bot-a', '#qa-eng', 40) + const msgs = await historyForUser(request, agents.a, '#qa-eng', 40) expect(msgs.some((m) => (m.content ?? '').includes('landing page'))).toBe(true) let humanVisibleHistory = null as Awaited> | null try { @@ -99,8 +109,8 @@ test.describe('TMT-002', () => { let eng = false let algo = false while (Date.now() < deadline) { - const he = await historyForUser(request, 'bot-a', '#qa-eng', 50) - const ha = await historyForUser(request, 'bot-a', '#qa-algo', 50) + const he = await historyForUser(request, agents.a, '#qa-eng', 50) + const ha = await historyForUser(request, agents.a, '#qa-algo', 50) eng = he.some((m) => (m.content ?? '').includes(mark)) algo = ha.some((m) => (m.content ?? '').includes(mark)) if (eng && algo) break diff --git a/qa/cases/playwright/TMT-003.spec.ts b/qa/cases/playwright/TMT-003.spec.ts index 937d5f57..8d2a5ced 100644 --- a/qa/cases/playwright/TMT-003.spec.ts +++ b/qa/cases/playwright/TMT-003.spec.ts @@ -1,7 +1,10 @@ import { test, expect } from './helpers/fixtures' -import { ensureMixedRuntimeTrio, createTeamApi, getWhoami, historyForUser, sendAsUser, teamExists } from './helpers/api' +import { agentNames, ensureMixedRuntimeTrio, ensureStubTrio, createTeamApi, getWhoami, historyForUser, sendAsUser, teamExists } from './helpers/api' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const agents = agentNames() /** * Catalog: `qa/cases/teams.md` — TMT-003 Leader+Operators Collaboration Model @@ -22,16 +25,20 @@ const skipLLM = process.env.CHORUS_E2E_LLM === '0' */ test.describe('TMT-003', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } if (!(await teamExists(request, 'qa-eng'))) { await createTeamApi(request, { name: 'qa-eng', display_name: 'QA Engineering', collaboration_model: 'leader_operators', - leader_agent_name: 'bot-a', + leader_agent_name: agents.a, members: [ - { member_name: 'bot-a', member_type: 'agent', member_id: 'bot-a', role: 'operator' }, - { member_name: 'bot-b', member_type: 'agent', member_id: 'bot-b', role: 'operator' }, + { member_name: agents.a, member_type: 'agent', member_id: agents.a, role: 'operator' }, + { member_name: agents.b, member_type: 'agent', member_id: agents.b, role: 'operator' }, ], }) } @@ -48,11 +55,11 @@ test.describe('TMT-003', () => { }) await test.step('Steps 2–6: Observe channel traffic (time for agents)', async () => { - await new Promise((r) => setTimeout(r, 45_000)) + await new Promise((r) => setTimeout(r, useStub ? 3_000 : 45_000)) }) await test.step('Step 7 / Expected: No swarm deliberation system message in #qa-eng', async () => { - const msgs = await historyForUser(request, 'bot-a', '#qa-eng', 80) + const msgs = await historyForUser(request, agents.a, '#qa-eng', 80) const bad = msgs.some( (m) => (m.senderType === 'system' || m.senderName === 'system') && diff --git a/qa/cases/playwright/TMT-004.spec.ts b/qa/cases/playwright/TMT-004.spec.ts index 91cbc2f7..74fb0e06 100644 --- a/qa/cases/playwright/TMT-004.spec.ts +++ b/qa/cases/playwright/TMT-004.spec.ts @@ -1,6 +1,8 @@ import { test, expect } from './helpers/fixtures' import { + agentNames, ensureMixedRuntimeTrio, + ensureStubTrio, createTeamApi, getWhoami, historyForUser, @@ -8,7 +10,11 @@ import { teamExists, } from './helpers/api' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const skipRealLLM = skipLLM || useStub +const agents = agentNames() /** * Catalog: `qa/cases/teams.md` — TMT-004 Swarm Collaboration Model with Deliberation Phase @@ -30,7 +36,11 @@ const skipLLM = process.env.CHORUS_E2E_LLM === '0' */ test.describe('TMT-004', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } if (!(await teamExists(request, 'qa-swarm'))) { await createTeamApi(request, { name: 'qa-swarm', @@ -38,15 +48,15 @@ test.describe('TMT-004', () => { collaboration_model: 'swarm', leader_agent_name: null, members: [ - { member_name: 'bot-a', member_type: 'agent', member_id: 'bot-a', role: 'member' }, - { member_name: 'bot-b', member_type: 'agent', member_id: 'bot-b', role: 'member' }, + { member_name: agents.a, member_type: 'agent', member_id: agents.a, role: 'member' }, + { member_name: agents.b, member_type: 'agent', member_id: agents.b, role: 'member' }, ], }) } }) test('Swarm deliberation system line @case TMT-004', async ({ request }) => { - test.skip(skipLLM, 'CHORUS_E2E_LLM=0') + test.skip(skipRealLLM, 'requires real LLM') test.setTimeout(300_000) const { username } = await getWhoami(request) @@ -60,7 +70,7 @@ test.describe('TMT-004', () => { const deadline = Date.now() + 120_000 let ok = false while (Date.now() < deadline) { - const msgs = await historyForUser(request, 'bot-a', '#qa-swarm', 50) + const msgs = await historyForUser(request, agents.a, '#qa-swarm', 50) ok = msgs.some( (m) => (m.senderType === 'system' || m.senderName === 'system') && diff --git a/qa/cases/playwright/TMT-005.spec.ts b/qa/cases/playwright/TMT-005.spec.ts index 927c191b..21e6970b 100644 --- a/qa/cases/playwright/TMT-005.spec.ts +++ b/qa/cases/playwright/TMT-005.spec.ts @@ -1,8 +1,12 @@ import { test, expect } from './helpers/fixtures' -import { ensureMixedRuntimeTrio, createTeamApi, teamExists } from './helpers/api' +import { agentNames, ensureMixedRuntimeTrio, ensureStubTrio, createTeamApi, teamExists } from './helpers/api' import { clickSidebarChannel, openMembersPanel, sendChatMessage , gotoApp , reloadApp } from './helpers/ui' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const skipRealLLM = skipLLM || useStub +const agents = agentNames() /** * Catalog: `qa/cases/teams.md` — TMT-005 Team Member Management (Add, Remove, Role) @@ -19,14 +23,18 @@ const skipLLM = process.env.CHORUS_E2E_LLM === '0' */ test.describe('TMT-005', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } if (!(await teamExists(request, 'qa-eng'))) { await createTeamApi(request, { name: 'qa-eng', display_name: 'QA Engineering', collaboration_model: 'leader_operators', - leader_agent_name: 'bot-a', - members: [{ member_name: 'bot-a', member_type: 'agent', member_id: 'bot-a', role: 'operator' }], + leader_agent_name: agents.a, + members: [{ member_name: agents.a, member_type: 'agent', member_id: agents.a, role: 'operator' }], }) } }) @@ -38,53 +46,58 @@ test.describe('TMT-005', () => { await clickSidebarChannel(page, 'qa-eng') await test.step('Step 1: Open team settings', async () => { + await expect(page.getByRole('button', { name: 'Open team settings' })).toBeVisible({ timeout: 30_000 }) await page.getByRole('button', { name: 'Open team settings' }).click() - await expect(page.locator('[role="dialog"]').getByRole('heading', { name: 'Team Settings' })).toBeVisible() + await expect(page.locator('[role="dialog"]').getByRole('heading', { name: 'Team Settings' })).toBeVisible({ + timeout: 30_000, + }) }) await test.step('Steps 2–3: Add bot-b if missing', async () => { - const row = page.locator('.team-settings-member').filter({ hasText: 'bot-b' }) + const row = page.locator('.team-settings-member').filter({ hasText: agents.b }) if (!(await row.isVisible().catch(() => false))) { await page.locator('[role="dialog"] [role="combobox"][aria-label="Add Member"]').click() - await page.locator('[role="option"]').filter({ hasText: 'bot-b' }).first().click() + await page.locator('[role="option"]').filter({ hasText: agents.b }).first().click() await page.locator('.team-settings-add-row button:has-text("Add")').click() await page.locator('[role="dialog"] button:has-text("Save")').click() } - await expect(page.locator('.team-settings-member').filter({ hasText: 'bot-b' })).toBeVisible() + await expect(page.locator('.team-settings-member').filter({ hasText: agents.b })).toBeVisible() }) await test.step('Step 4: Members rail lists bot-b', async () => { await page.locator('[role="dialog"] button:has-text("Close")').click() await openMembersPanel(page) - await expect(page.locator('.members-panel-name').filter({ hasText: 'bot-b' })).toBeVisible() + await expect(page.locator('.members-panel-name').filter({ hasText: agents.b })).toBeVisible() await page.locator('.members-panel-close').click() }) - if (!skipLLM) { - await test.step('Step 5: Message in #qa-eng (bot-b wakeup)', async () => { + if (!skipRealLLM) { + await test.step(`Step 5: Message in #qa-eng (${agents.b} wakeup)`, async () => { const token = `tmt5-${Date.now()}` - await sendChatMessage(page, `bot-b acknowledge ${token}`) + await sendChatMessage(page, `${agents.b} acknowledge ${token}`) }) } await test.step('Steps 6–7: Remove bot-b from settings', async () => { await page.getByRole('button', { name: 'Open team settings' }).click() - const row = page.locator('.team-settings-member').filter({ hasText: 'bot-b' }) + const row = page.locator('.team-settings-member').filter({ hasText: agents.b }) if (await row.isVisible().catch(() => false)) { await row.getByRole('button', { name: 'Remove' }).click() await page.locator('.team-settings-card button:has-text("Save")').click() } - await expect(page.locator('.team-settings-member').filter({ hasText: 'bot-b' })).toHaveCount(0) + await expect(page.locator('.team-settings-member').filter({ hasText: agents.b })).toHaveCount(0) await page.locator('[role="dialog"] button:has-text("Close")').click() }) await test.step('Steps 8–10: Members rail without bot-b; refresh', async () => { await openMembersPanel(page) - await expect(page.locator('.members-panel-name').filter({ hasText: 'bot-b' })).toHaveCount(0) + await expect(page.locator('.members-panel-name').filter({ hasText: agents.b })).toHaveCount(0) await reloadApp(page) await clickSidebarChannel(page, 'qa-eng') await openMembersPanel(page) - await expect(page.locator('.members-panel-name').filter({ hasText: 'bot-b' })).toHaveCount(0) + await expect(page.locator('.members-panel-name').filter({ hasText: agents.b })).toHaveCount(0, { + timeout: 30_000, + }) }) }) }) diff --git a/qa/cases/playwright/TMT-006.spec.ts b/qa/cases/playwright/TMT-006.spec.ts index e60cc564..eb1b9116 100644 --- a/qa/cases/playwright/TMT-006.spec.ts +++ b/qa/cases/playwright/TMT-006.spec.ts @@ -1,8 +1,12 @@ import { test, expect } from './helpers/fixtures' -import { ensureMixedRuntimeTrio, createTeamApi, getWhoami, historyForUser, sendAsUser, teamExists } from './helpers/api' +import { agentNames, ensureMixedRuntimeTrio, ensureStubTrio, createTeamApi, getWhoami, historyForUser, sendAsUser, teamExists } from './helpers/api' import { clickSidebarChannel , gotoApp , reloadApp } from './helpers/ui' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const skipRealLLM = skipLLM || useStub +const agents = agentNames() /** * Catalog: `qa/cases/teams.md` — TMT-006 Team Settings Update (Display Name, Collaboration Model, Leader) @@ -22,28 +26,32 @@ const skipLLM = process.env.CHORUS_E2E_LLM === '0' */ test.describe('TMT-006', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } if (!(await teamExists(request, 'qa-eng'))) { await createTeamApi(request, { name: 'qa-eng', display_name: 'QA Engineering', collaboration_model: 'leader_operators', - leader_agent_name: 'bot-a', + leader_agent_name: agents.a, members: [ - { member_name: 'bot-a', member_type: 'agent', member_id: 'bot-a', role: 'operator' }, - { member_name: 'bot-b', member_type: 'agent', member_id: 'bot-b', role: 'operator' }, + { member_name: agents.a, member_type: 'agent', member_id: agents.a, role: 'operator' }, + { member_name: agents.b, member_type: 'agent', member_id: agents.b, role: 'operator' }, ], }) } else { - // Ensure bot-b is a member for this test + // Ensure agents.b is a member for this test await request.post('/api/teams/qa-eng/members', { - data: { member_name: 'bot-b', member_type: 'agent', member_id: 'bot-b', role: 'operator' }, + data: { member_name: agents.b, member_type: 'agent', member_id: agents.b, role: 'operator' }, }).catch(() => {}) } }) test('Team settings display name + model toggle @case TMT-006', async ({ page, request }) => { - test.setTimeout(300_000) + test.setTimeout(420_000) await gotoApp(page) await clickSidebarChannel(page, 'qa-eng') @@ -52,7 +60,11 @@ test.describe('TMT-006', () => { await page.getByRole('button', { name: 'Open team settings' }).click() }) - const dialog = page.locator('[role="dialog"]') + // Loading dialog mounts first; TeamSettings only renders after GET /api/teams/:name succeeds. + const dialog = page.getByRole('dialog', { name: 'Team Settings' }) + await expect(dialog.getByRole('heading', { name: 'Team Settings' })).toBeVisible({ + timeout: 60_000, + }) await test.step('Steps 2–3: Display name QA Engineering v2 + Save', async () => { await dialog.locator('input').first().fill('QA Engineering v2') @@ -68,16 +80,19 @@ test.describe('TMT-006', () => { await dialog.locator('button:has-text("Save")').click() await dialog.locator('button:has-text("Close")').click() await page.getByRole('button', { name: 'Open team settings' }).click() + await expect(dialog.getByRole('heading', { name: 'Team Settings' })).toBeVisible({ + timeout: 60_000, + }) await expect(collabTrigger).toContainText('Swarm') }) - if (!skipLLM) { + if (!skipRealLLM) { await test.step('Step 6: Forward task — expect swarm deliberation line', async () => { const { username } = await getWhoami(request) const mark = `tmt6-${Date.now()}` await sendAsUser(request, username, '#all', `@qa-eng do something ${mark}`) await new Promise((r) => setTimeout(r, 35_000)) - const msgs = await historyForUser(request, 'bot-a', '#qa-eng', 40) + const msgs = await historyForUser(request, agents.a, '#qa-eng', 40) const deliberation = msgs.some( (m) => (m.senderType === 'system' || m.senderName === 'system') && @@ -94,7 +109,7 @@ test.describe('TMT-006', () => { const leaderTrigger = dialog.locator('[role="combobox"][aria-label="Leader"]') await expect(leaderTrigger).toBeVisible() await leaderTrigger.click() - await page.locator('[role="option"]').filter({ hasText: 'bot-b' }).click() + await page.locator('[role="option"]').filter({ hasText: agents.b }).click() await dialog.locator('button:has-text("Save")').click() }) @@ -103,6 +118,9 @@ test.describe('TMT-006', () => { await reloadApp(page) await clickSidebarChannel(page, 'qa-eng') await page.getByRole('button', { name: 'Open team settings' }).click() + await expect(dialog.getByRole('heading', { name: 'Team Settings' })).toBeVisible({ + timeout: 60_000, + }) await expect(collabTrigger).toContainText('Leader+Operators') }) }) diff --git a/qa/cases/playwright/TMT-007.spec.ts b/qa/cases/playwright/TMT-007.spec.ts index 16da5709..964d59a7 100644 --- a/qa/cases/playwright/TMT-007.spec.ts +++ b/qa/cases/playwright/TMT-007.spec.ts @@ -1,8 +1,12 @@ import { test, expect } from './helpers/fixtures' -import { ensureMixedRuntimeTrio, createTeamApi, getWhoami, historyForUser, sendAsUser, teamExists } from './helpers/api' +import { agentNames, ensureMixedRuntimeTrio, ensureStubTrio, createTeamApi, getWhoami, historyForUser, sendAsUser, teamExists } from './helpers/api' import { clickSidebarChannel , gotoApp , reloadApp } from './helpers/ui' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const skipRealLLM = skipLLM || useStub +const agents = agentNames() /** * Catalog: `qa/cases/teams.md` — TMT-007 Team Delete — Channel Archive and Workspace Cleanup @@ -23,15 +27,19 @@ const skipLLM = process.env.CHORUS_E2E_LLM === '0' test.describe('TMT-007', () => { test('Team delete (disposable team) @case TMT-007', async ({ page, request }) => { test.setTimeout(240_000) - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } const name = `qa-del-${Date.now()}` await createTeamApi(request, { name, display_name: 'E2E Delete Target', collaboration_model: 'leader_operators', - leader_agent_name: 'bot-a', - members: [{ member_name: 'bot-a', member_type: 'agent', member_id: 'bot-a', role: 'operator' }], + leader_agent_name: agents.a, + members: [{ member_name: agents.a, member_type: 'agent', member_id: agents.a, role: 'operator' }], }) await gotoApp(page) @@ -51,20 +59,20 @@ test.describe('TMT-007', () => { expect(await teamExists(request, name)).toBe(false) }) - if (!skipLLM) { - await test.step('Steps 7–8: bot-a still answers #all; team list omits deleted slug', async () => { + if (!skipRealLLM) { + await test.step(`Steps 7–8: ${agents.a} still answers #all; team list omits deleted slug`, async () => { const { username } = await getWhoami(request) const mark = `tmt7-${Date.now()}` await sendAsUser( request, username, '#all', - `bot-a ${mark}: list your team slugs; do not include ${name}.` + `${agents.a} ${mark}: list your team slugs; do not include ${name}.` ) await new Promise((r) => setTimeout(r, 60_000)) const msgs = await historyForUser(request, username, '#all', 25) - const fromA = msgs.filter((m) => m.senderName === 'bot-a').pop() - expect(fromA, 'expected bot-a reply in #all').toBeTruthy() + const fromA = msgs.filter((m) => m.senderName === agents.a).pop() + expect(fromA, `expected ${agents.a} reply in #all`).toBeTruthy() expect((fromA!.content ?? '').toLowerCase()).not.toContain(name.toLowerCase()) }) } diff --git a/qa/cases/playwright/TMT-008.spec.ts b/qa/cases/playwright/TMT-008.spec.ts index 2b94e7e7..9524a884 100644 --- a/qa/cases/playwright/TMT-008.spec.ts +++ b/qa/cases/playwright/TMT-008.spec.ts @@ -1,6 +1,8 @@ import { test, expect } from './helpers/fixtures' import { + agentNames, ensureMixedRuntimeTrio, + ensureStubTrio, createTeamApi, getWhoami, historyForUser, @@ -8,7 +10,11 @@ import { teamExists, } from './helpers/api' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const skipRealLLM = skipLLM || useStub +const agents = agentNames() /** * Catalog: `qa/cases/teams.md` — TMT-008 Multi-Team Agent Context Isolation @@ -28,16 +34,20 @@ const skipLLM = process.env.CHORUS_E2E_LLM === '0' */ test.describe('TMT-008', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } if (!(await teamExists(request, 'qa-eng'))) { await createTeamApi(request, { name: 'qa-eng', display_name: 'QA Engineering', collaboration_model: 'leader_operators', - leader_agent_name: 'bot-a', + leader_agent_name: agents.a, members: [ - { member_name: 'bot-a', member_type: 'agent', member_id: 'bot-a', role: 'operator' }, - { member_name: 'bot-b', member_type: 'agent', member_id: 'bot-b', role: 'operator' }, + { member_name: agents.a, member_type: 'agent', member_id: agents.a, role: 'operator' }, + { member_name: agents.b, member_type: 'agent', member_id: agents.b, role: 'operator' }, ], }) } @@ -47,30 +57,30 @@ test.describe('TMT-008', () => { display_name: 'QA Algo', collaboration_model: 'swarm', leader_agent_name: null, - members: [{ member_name: 'bot-a', member_type: 'agent', member_id: 'bot-a', role: 'member' }], + members: [{ member_name: agents.a, member_type: 'agent', member_id: agents.a, role: 'member' }], }) } }) test('Multi-team context @case TMT-008', async ({ request }) => { - test.skip(skipLLM, 'CHORUS_E2E_LLM=0') + test.skip(skipRealLLM, 'requires real LLM') test.setTimeout(360_000) const { username } = await getWhoami(request) - await test.step('Steps 1–2: bot-a lists qa-eng and qa-algo', async () => { + await test.step(`Steps 1–2: ${agents.a} lists qa-eng and qa-algo`, async () => { const mark = `tmt8-${Date.now()}` await sendAsUser( request, username, '#all', - `bot-a ${mark}: what teams are you in and your role in each? mention qa-eng and qa-algo.` + `${agents.a} ${mark}: what teams are you in and your role in each? mention qa-eng and qa-algo.` ) const deadline = Date.now() + 180_000 let text = '' while (Date.now() < deadline) { const msgs = await historyForUser(request, username, '#all', 40) - const fromA = msgs.filter((m) => m.senderName === 'bot-a' && (m.content ?? '').includes(mark)) + const fromA = msgs.filter((m) => m.senderName === agents.a && (m.content ?? '').includes(mark)) if (fromA.length) { text = fromA[fromA.length - 1].content ?? '' break @@ -84,7 +94,7 @@ test.describe('TMT-008', () => { await test.step('Steps 3–4 (hybrid): #qa-eng no swarm system line; #qa-algo may show deliberation', async () => { await sendAsUser(request, username, '#all', '@qa-eng design a minimal API ping') await new Promise((r) => setTimeout(r, 25_000)) - const engMsgs = await historyForUser(request, 'bot-a', '#qa-eng', 30) + const engMsgs = await historyForUser(request, agents.a, '#qa-eng', 30) const engDelib = engMsgs.some( (m) => (m.senderType === 'system' || m.senderName === 'system') && @@ -94,7 +104,7 @@ test.describe('TMT-008', () => { await sendAsUser(request, username, '#all', '@qa-algo analyze results briefly') await new Promise((r) => setTimeout(r, 40_000)) - const algoMsgs = await historyForUser(request, 'bot-a', '#qa-algo', 40) + const algoMsgs = await historyForUser(request, agents.a, '#qa-algo', 40) const algoPrompt = algoMsgs.some( (m) => (m.senderType === 'system' || m.senderName === 'system') && diff --git a/qa/cases/playwright/TMT-009.spec.ts b/qa/cases/playwright/TMT-009.spec.ts index 3c3fd2a6..06ccb0d1 100644 --- a/qa/cases/playwright/TMT-009.spec.ts +++ b/qa/cases/playwright/TMT-009.spec.ts @@ -1,7 +1,9 @@ import { test, expect } from './helpers/fixtures' import { + agentNames, createTeamApi, ensureMixedRuntimeTrio, + ensureStubTrio, getWhoami, historyForUser, stopAgentApi, @@ -10,10 +12,14 @@ import { } from './helpers/api' import { clickSidebarChannel, openThreadFromMessage, sendChatMessage, sendThreadMessage , gotoApp } from './helpers/ui' -const skipLLM = process.env.CHORUS_E2E_LLM === '0' +const mode = process.env.CHORUS_E2E_LLM ?? '1' +const skipLLM = mode === '0' +const useStub = mode === 'stub' +const skipRealLLM = skipLLM || useStub +const agents = agentNames() const runtimeMatrix = [ - { agentName: 'bot-a', runtimeLabel: 'claude', channelName: 'qa-thread-wake-claude' }, - { agentName: 'bot-c', runtimeLabel: 'codex', channelName: 'qa-thread-wake-codex' }, + { agentName: agents.a, runtimeLabel: 'claude', channelName: 'qa-thread-wake-claude' }, + { agentName: agents.c, runtimeLabel: 'codex', channelName: 'qa-thread-wake-codex' }, ] /** @@ -36,7 +42,11 @@ const runtimeMatrix = [ */ test.describe('TMT-009', () => { test.beforeAll(async ({ request }) => { - await ensureMixedRuntimeTrio(request) + if (useStub) { + await ensureStubTrio(request) + } else { + await ensureMixedRuntimeTrio(request) + } const { username } = await getWhoami(request) for (const scenario of runtimeMatrix) { if (await teamExists(request, scenario.channelName)) continue @@ -59,7 +69,7 @@ test.describe('TMT-009', () => { }) test('Agent Team Thread Wake And In-Thread Reply @case TMT-009', async ({ page, request }) => { - test.skip(skipLLM, 'CHORUS_E2E_LLM=0') + test.skip(skipRealLLM, 'requires real LLM') test.setTimeout(420_000) const { username } = await getWhoami(request) diff --git a/qa/cases/playwright/helpers/api.ts b/qa/cases/playwright/helpers/api.ts index dda0771d..c97c5224 100644 --- a/qa/cases/playwright/helpers/api.ts +++ b/qa/cases/playwright/helpers/api.ts @@ -96,6 +96,47 @@ export async function ensureMixedRuntimeTrio(request: APIRequestContext): Promis } } +/** Create stub-a, stub-b, stub-c with runtime=stub for fast QA runs. */ +export async function ensureStubTrio(request: APIRequestContext): Promise { + const agents = await listAgents(request) + const names = new Set(agents.map((a) => a.name)) + for (const name of ['stub-a', 'stub-b', 'stub-c'] as const) { + if (names.has(name)) continue + let ok = false + let lastText = '' + for (let attempt = 0; attempt < 8; attempt++) { + const res = await request.post('/api/agents', { + data: { + name, + display_name: name, + description: 'qa playwright seed', + runtime: 'stub', + model: 'echo', + reasoningEffort: null, + envVars: [], + }, + }) + lastText = await res.text() + if (res.ok()) { + ok = true + break + } + await new Promise((r) => setTimeout(r, 250 * (attempt + 1))) + } + expect(ok, `create stub agent ${name}: ${lastText}`).toBe(true) + await new Promise((r) => setTimeout(r, 200)) + } +} + +/** Return agent names based on CHORUS_E2E_LLM mode. */ +export function agentNames(): { a: string; b: string; c: string } { + const mode = process.env.CHORUS_E2E_LLM ?? '1' + if (mode === 'stub') { + return { a: 'stub-a', b: 'stub-b', c: 'stub-c' } + } + return { a: 'bot-a', b: 'bot-b', c: 'bot-c' } +} + export async function waitForAgentActive( request: APIRequestContext, name: string, diff --git a/qa/cases/playwright/helpers/ui.ts b/qa/cases/playwright/helpers/ui.ts index 4db1edab..53f8bf66 100644 --- a/qa/cases/playwright/helpers/ui.ts +++ b/qa/cases/playwright/helpers/ui.ts @@ -1,19 +1,38 @@ import type { Page } from '@playwright/test' import { expect } from '@playwright/test' +import { agentNames } from './api' + /** * Wait for the app shell to finish loading: sidebar must have at least one * visible item. Always cheaper than waitUntil:'networkidle' and explicitly * tests a real UI signal instead of network heuristics. */ export async function waitForAppReady(page: Page): Promise { - await expect(page.locator('.sidebar-item-text').first()).toBeVisible({ timeout: 30_000 }) + const stub = process.env.CHORUS_E2E_LLM === 'stub' + await expect(page.locator('.sidebar-item-text').first()).toBeVisible({ + timeout: stub ? 90_000 : 30_000, + }) } /** Navigate to the app root and wait for the shell to be ready. */ export async function gotoApp(page: Page): Promise { - await page.goto('/', { waitUntil: 'domcontentloaded' }) - await waitForAppReady(page) + const stub = process.env.CHORUS_E2E_LLM === 'stub' + const attempts = stub ? 2 : 1 + let lastErr: unknown + for (let i = 0; i < attempts; i++) { + await page.goto('/', { waitUntil: 'domcontentloaded' }) + try { + await waitForAppReady(page) + return + } catch (e) { + lastErr = e + if (i < attempts - 1) { + await new Promise((r) => setTimeout(r, 1_500)) + } + } + } + throw lastErr } /** Reload the page and wait for the shell to be ready. */ @@ -33,7 +52,8 @@ export async function createAgentViaUi( await dialog.locator('[role="combobox"][aria-label="Runtime"]').click() await page.locator('[role="option"]').filter({ hasText: new RegExp(opts.runtime, 'i') }).first().click() await dialog.locator('[role="combobox"][aria-label="Model"]').click() - await page.locator('[role="option"]').filter({ hasText: opts.model }).first().click() + // Substring match would pick e.g. `gpt-5.2-codex` when asking for `gpt-5.2`. + await page.getByRole('option', { name: opts.model, exact: true }).click() if (opts.runtime === 'codex' && opts.reasoningEffort) { await dialog.locator('[role="combobox"][aria-label="Reasoning"]').click() await page.locator('[role="option"]').filter({ hasText: new RegExp(opts.reasoningEffort, 'i') }).first().click() @@ -58,6 +78,7 @@ export async function createUserChannelViaUi( /** Catalog TMT-001 steps 3–4: Leader+Operators `qa-eng`, bot-a leader, bot-b operator. */ export async function createTeamQaEngViaUi(page: Page): Promise { + const { a, b } = agentNames() await page.click('button[title="Add channel"]') const dialog = page.locator('[role="dialog"]') await dialog.locator('button:has-text("Team")').click() @@ -66,13 +87,13 @@ export async function createTeamQaEngViaUi(page: Page): Promise { await page.locator('input[placeholder="Engineering Team"]').fill('QA Engineering') const memberSelect = dialog.locator('[role="combobox"][aria-label="Initial Members"]') await memberSelect.click() - await page.locator('[role="option"]').filter({ hasText: 'bot-a' }).first().click() + await page.locator('[role="option"]').filter({ hasText: a }).first().click() await dialog.locator('button:has-text("Add")').click() await memberSelect.click() - await page.locator('[role="option"]').filter({ hasText: 'bot-b' }).first().click() + await page.locator('[role="option"]').filter({ hasText: b }).first().click() await dialog.locator('button:has-text("Add")').click() await dialog.locator('[role="combobox"][aria-label="Leader"]').click() - await page.locator('[role="option"]').filter({ hasText: 'bot-a' }).first().click() + await page.locator('[role="option"]').filter({ hasText: a }).first().click() await dialog.locator('button:has-text("Create Team")').click() await expect(dialog).toBeHidden({ timeout: 60_000 }) } @@ -118,8 +139,18 @@ export async function sendThreadMessage(page: Page, text: string): Promise } export async function openMembersPanel(page: Page): Promise { - await page.getByRole('button', { name: /Show members list/i }).click() - await expect(page.locator('.members-panel-kicker:text("Members")')).toBeVisible() + const kicker = page.locator('.members-panel-kicker:text("Members")') + if (!(await kicker.isVisible().catch(() => false))) { + await page.getByRole('button', { name: 'Show members list' }).click() + } + await expect(kicker).toBeVisible() +} + +/** Radix options in portaled popovers may still be "outside viewport" for Playwright hit-testing. */ +export async function clickComboboxOption(page: Page, optionText: string): Promise { + const opt = page.locator('[role="option"]').filter({ hasText: optionText }).first() + await opt.waitFor({ state: 'attached', timeout: 15_000 }) + await opt.evaluate((el) => (el as HTMLElement).click()) } export async function closeMembersPanel(page: Page): Promise { diff --git a/qa/cases/playwright/playwright.config.ts b/qa/cases/playwright/playwright.config.ts index b622d181..ca01776b 100644 --- a/qa/cases/playwright/playwright.config.ts +++ b/qa/cases/playwright/playwright.config.ts @@ -12,6 +12,8 @@ import { defineConfig, devices } from '@playwright/test' * CHORUS_E2E_LLM=0 — skip tests that wait on real agent replies * CHORUS_WORKERS — number of parallel workers (default 4) */ +const stubE2E = process.env.CHORUS_E2E_LLM === 'stub' + export default defineConfig({ testDir: '.', testMatch: '*.spec.ts', @@ -19,7 +21,8 @@ export default defineConfig({ forbidOnly: !!process.env.CI, retries: process.env.CI ? 1 : 0, workers: process.env.CHORUS_WORKERS ? parseInt(process.env.CHORUS_WORKERS) : 4, - timeout: 180_000, + // Stub runs wait on agent polls + slow UI; 180s is too tight for fixtures + body. + timeout: stubE2E ? 600_000 : 180_000, expect: { timeout: 15_000 }, use: { trace: 'on-first-retry', diff --git a/src/agent/drivers/mod.rs b/src/agent/drivers/mod.rs index f35f785c..59aac95f 100644 --- a/src/agent/drivers/mod.rs +++ b/src/agent/drivers/mod.rs @@ -3,6 +3,7 @@ pub mod codex; pub mod kimi; pub mod opencode; pub mod prompt; +pub mod stub; use std::fs; use std::os::unix::fs::PermissionsExt; @@ -87,6 +88,7 @@ pub fn all_runtime_drivers() -> Vec> { Arc::new(codex::CodexDriver), Arc::new(kimi::KimiDriver), Arc::new(opencode::OpencodeDriver), + Arc::new(stub::StubDriver), ] } diff --git a/src/agent/drivers/stub.rs b/src/agent/drivers/stub.rs new file mode 100644 index 00000000..0ebe5780 --- /dev/null +++ b/src/agent/drivers/stub.rs @@ -0,0 +1,189 @@ +use std::process::{Child, Command, Stdio}; + +use super::{Driver, ParsedEvent, SpawnContext}; +use crate::agent::config::AgentConfig; +use crate::agent::drivers::prompt::{build_base_system_prompt, PromptOptions}; +use crate::agent::runtime_status::{RuntimeAuthStatus, RuntimeStatus}; +use crate::store::agents::AgentRuntime; + +pub struct StubDriver; + +impl Driver for StubDriver { + fn runtime(&self) -> AgentRuntime { + AgentRuntime::Stub + } + + fn supports_stdin_notification(&self) -> bool { + true + } + + fn mcp_tool_prefix(&self) -> &str { + "" + } + + fn spawn(&self, ctx: &SpawnContext) -> anyhow::Result { + let mcp_config = serde_json::json!({ + "mcpServers": { + "chat": { + "command": ctx.bridge_binary, + "args": ["bridge", "--agent-id", &ctx.agent_id, "--server-url", &ctx.server_url] + } + } + }); + let mcp_config_path = std::path::Path::new(&ctx.working_directory).join(".chorus-mcp.json"); + std::fs::write(&mcp_config_path, serde_json::to_string(&mcp_config)?)?; + + let stub_binary = std::env::current_exe()? + .parent() + .ok_or_else(|| anyhow::anyhow!("cannot find binary directory"))? + .join("chorus-stub-agent"); + + let delay_ms = std::env::var("STUB_DELAY_MS").unwrap_or_else(|_| "200".to_string()); + + let mut env_vars: std::collections::HashMap = std::env::vars().collect(); + env_vars.insert("STUB_DELAY_MS".to_string(), delay_ms); + for extra in &ctx.config.env_vars { + env_vars.insert(extra.key.clone(), extra.value.clone()); + } + + let child = Command::new(&stub_binary) + .args([ + "--mcp-config", + &mcp_config_path.to_string_lossy(), + "--prompt", + &ctx.prompt, + ]) + .current_dir(&ctx.working_directory) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .envs(&env_vars) + .spawn()?; + + Ok(child) + } + + fn parse_line(&self, line: &str) -> Vec { + let event: serde_json::Value = match serde_json::from_str(line) { + Ok(v) => v, + Err(_) => return vec![], + }; + + let mut events = Vec::new(); + + match event.get("type").and_then(|v| v.as_str()) { + Some("session_init") => { + if let Some(sid) = event.get("session_id").and_then(|v| v.as_str()) { + events.push(ParsedEvent::SessionInit { + session_id: sid.to_string(), + }); + } + } + Some("text") => { + if let Some(text) = event.get("text").and_then(|v| v.as_str()) { + events.push(ParsedEvent::Text { + text: text.to_string(), + }); + } + } + Some("tool_call") => { + let name = event + .get("name") + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); + let input = event + .get("input") + .cloned() + .unwrap_or(serde_json::Value::Null); + events.push(ParsedEvent::ToolCall { name, input }); + } + Some("turn_end") => { + events.push(ParsedEvent::TurnEnd { session_id: None }); + } + Some("error") => { + let message = event + .get("message") + .and_then(|v| v.as_str()) + .unwrap_or("unknown error") + .to_string(); + events.push(ParsedEvent::Error { message }); + } + _ => {} + } + + events + } + + fn encode_stdin_message(&self, text: &str, _session_id: &str) -> Option { + let msg = serde_json::json!({ + "type": "notification", + "content": text, + }); + Some(serde_json::to_string(&msg).unwrap_or_default()) + } + + fn build_system_prompt(&self, config: &AgentConfig, _agent_id: &str) -> String { + build_base_system_prompt( + config, + &PromptOptions { + tool_prefix: String::new(), + extra_critical_rules: vec![], + post_startup_notes: vec![], + include_stdin_notification_section: true, + teams: config.teams.clone(), + }, + ) + } + + fn tool_display_name(&self, name: &str) -> String { + match name { + "send_message" => "Sending message\u{2026}".to_string(), + "check_messages" => "Checking messages\u{2026}".to_string(), + "wait_for_message" => "Waiting for messages\u{2026}".to_string(), + "receive_message" => "Receiving messages\u{2026}".to_string(), + other => format!("Using {other}\u{2026}"), + } + } + + fn summarize_tool_input(&self, name: &str, input: &serde_json::Value) -> String { + let str_field = |field: &str| -> String { + input + .get(field) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string() + }; + match name { + "send_message" => { + let target = str_field("target"); + let content = str_field("content"); + let preview: String = content.chars().take(80).collect(); + if target.is_empty() { + preview + } else { + format!("{target}: {preview}") + } + } + _ => String::new(), + } + } + + fn detect_runtime_status(&self) -> anyhow::Result { + let binary_exists = std::env::current_exe() + .ok() + .and_then(|p| p.parent().map(|d| d.join("chorus-stub-agent"))) + .map(|p| p.exists()) + .unwrap_or(false); + + Ok(RuntimeStatus { + runtime: self.id().to_string(), + installed: binary_exists, + auth_status: Some(RuntimeAuthStatus::Authed), + }) + } + + fn list_models(&self) -> anyhow::Result> { + Ok(vec!["echo".to_string()]) + } +} diff --git a/src/agent/manager.rs b/src/agent/manager.rs index 5bcd89d8..c71a7904 100644 --- a/src/agent/manager.rs +++ b/src/agent/manager.rs @@ -43,6 +43,7 @@ fn get_driver(runtime: &str) -> anyhow::Result> { Some(AgentRuntime::Opencode) => { Ok(Arc::new(crate::agent::drivers::opencode::OpencodeDriver)) } + Some(AgentRuntime::Stub) => Ok(Arc::new(crate::agent::drivers::stub::StubDriver)), None => anyhow::bail!("Unknown runtime: {runtime}"), } } @@ -93,7 +94,7 @@ impl AgentManager { .clone() .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()), ), - AgentRuntime::Claude => None, + AgentRuntime::Claude | AgentRuntime::Stub => None, }; let config = AgentConfig { diff --git a/src/agent/runtime_status.rs b/src/agent/runtime_status.rs index 1b386720..2c6abe3f 100644 --- a/src/agent/runtime_status.rs +++ b/src/agent/runtime_status.rs @@ -38,6 +38,7 @@ impl RuntimeStatusProvider for SystemRuntimeStatusProvider { fn list_statuses(&self) -> anyhow::Result> { all_runtime_drivers() .into_iter() + .filter(|driver| driver.id() != "stub") .map(|driver| driver.detect_runtime_status()) .collect() } diff --git a/src/store/agents.rs b/src/store/agents.rs index 2ccdaf6a..c9b905d9 100644 --- a/src/store/agents.rs +++ b/src/store/agents.rs @@ -86,6 +86,7 @@ pub enum AgentRuntime { Codex, Kimi, Opencode, + Stub, } impl AgentRuntime { @@ -95,6 +96,7 @@ impl AgentRuntime { Self::Codex => "codex", Self::Kimi => "kimi", Self::Opencode => "opencode", + Self::Stub => "stub", } } @@ -104,6 +106,7 @@ impl AgentRuntime { "codex" => Some(Self::Codex), "kimi" => Some(Self::Kimi), "opencode" => Some(Self::Opencode), + "stub" => Some(Self::Stub), _ => None, } }