diff --git a/CHANGELOG.md b/CHANGELOG.md index b3f50705..fbb16515 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ Repo: https://github.com/openclaw/acpx ### Changes +- CLI: add `acpx compare` to run one prompt across multiple agents, summarize timing, token usage, stop reason, final output preview, and persisted per-agent transcripts. + ### Breaking ### Fixes diff --git a/README.md b/README.md index 5dc548ef..cbe5f014 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ One command surface for Pi, OpenClaw ACP, Codex, Claude, and other ACP-compatibl - **Structured output**: typed ACP messages (thinking, tool calls, diffs) instead of ANSI scraping - **Any ACP agent**: built-in registry + `--agent` escape hatch for custom servers - **One-shot mode**: `exec` for stateless fire-and-forget tasks +- **Compare across agents**: `acpx compare pi openclaw codex 'fix the bug'` runs the same prompt against multiple ACP-compatible agents and shows wall-clock time, token usage, and final output side by side so you can pick the right agent for a task - **Experimental flows**: `flow run ` for TypeScript workflow modules over multiple prompts - **Runtime-owned flow actions**: shell-backed action steps can prepare workspaces and other deterministic mechanics outside the agent turn - **Flow workspace isolation**: `acp` nodes can target an explicit per-step cwd, so flows can keep agent work inside disposable worktrees diff --git a/docs/compare.md b/docs/compare.md new file mode 100644 index 00000000..ae5bfd00 --- /dev/null +++ b/docs/compare.md @@ -0,0 +1,90 @@ +# Compare Command + +`acpx compare` runs the same one-shot prompt across multiple ACP-compatible agents and summarizes the results side by side. + +```bash +acpx compare pi codex claude 'fix the failing test in checkout.spec.ts' +``` + +Each agent runs independently. By default, compare uses `deny-all` permissions, which is best for review, planning, and read-only evaluation prompts. + +## Usage + +```bash +acpx compare ... '' +acpx compare ... -- prompt words after the delimiter +acpx compare ... --prompt-file ./prompt.md +acpx compare ... -f ./prompt.md +``` + +The final positional argument is treated as the prompt unless `--prompt-file` is provided. When you use `--`, every token after the delimiter is joined into the prompt. + +## Options + +| Option | Description | +| -------------------------- | ----------------------------------------------------------------------------- | +| `--cwd ` | Target workspace. Defaults to the current working directory. | +| `--deny-all` | Deny all permission requests. This is the default compare permission mode. | +| `--approve-reads` | Auto-approve read/search requests and prompt for writes. | +| `--approve-all` | Auto-approve all permission requests. | +| `--timeout ` | Per-agent timeout in seconds. Defaults to `300`. Decimal seconds are allowed. | +| `--json` | Emit the full `CompareRow[]` payload instead of the text table. | +| `--diff` | Run each agent in an isolated git worktree and include diff summaries. | +| `-f, --prompt-file ` | Read prompt text from a file. Use `-` for stdin. | + +## Table Output + +Text output includes one row per agent: + +| Column | Meaning | +| --------------- | ---------------------------------------------------------- | +| `agent` | Agent name or raw command token. | +| `status` | `ok`, `cancelled`, or `error`. | +| `wall_ms` | Wall-clock runtime in milliseconds. | +| `input` | Input token count from the latest `usage_update`, if any. | +| `output` | Output token count from the latest `usage_update`, if any. | +| `context` | Context usage from `usage_update.size` or `used`, if any. | +| `stop_reason` | ACP `session/prompt` stop reason, such as `end_turn`. | +| `final_message` | First 200 characters of assistant text output. | +| `transcript` | NDJSON transcript path. | +| `diff` | Diff summary when `--diff` is set. | +| `error` | Error preview for failed or timed-out runs. | + +Transcripts are persisted under: + +```text +~/.acpx/compare//.ndjson +``` + +## JSON Output + +`--json` emits an array of rows: + +```json +[ + { + "agent": "codex", + "status": "ok", + "stop_reason": "end_turn", + "wall_ms": 1240, + "input_tokens": 1200, + "output_tokens": 340, + "context_used": 1540, + "final_message": "The failing test is caused by...", + "transcript_path": "/Users/me/.acpx/compare/2026-05-16T12-00-00-000Z-a1b2c3/codex.ndjson", + "error": null, + "diff_stat": null, + "diff_path": null + } +] +``` + +## Diff Mode + +When `--diff` is set, each agent runs in a separate detached git worktree created from the current repository `HEAD`. After the run completes, acpx writes the full diff to the compare transcript directory and includes `git diff --stat` in the table. + +```bash +acpx compare codex claude --approve-all --diff 'implement the smallest fix' +``` + +Use diff mode for write-capable comparisons. Without `--diff`, all agents run in the same `--cwd`, which is appropriate for `deny-all` review-style prompts. diff --git a/src/cli-core.ts b/src/cli-core.ts index 3ab87451..ed6dbba5 100644 --- a/src/cli-core.ts +++ b/src/cli-core.ts @@ -35,6 +35,7 @@ const TOP_LEVEL_VERBS = new Set([ "prompt", "exec", "cancel", + "compare", "flow", "set-mode", "set", diff --git a/src/cli/command-registration.ts b/src/cli/command-registration.ts index bfda66ff..27383113 100644 --- a/src/cli/command-registration.ts +++ b/src/cli/command-registration.ts @@ -15,6 +15,7 @@ import { handleSetMode, parseHistoryLimit, } from "./command-handlers.js"; +import { registerCompareCommand } from "./compare-command.js"; import { registerConfigCommand } from "./config-command.js"; import type { ResolvedAcpxConfig } from "./config.js"; import { @@ -280,5 +281,6 @@ export function registerDefaultCommands(program: Command, config: ResolvedAcpxCo registerSessionsCommand(program, undefined, config); registerConfigCommand(program, config); + registerCompareCommand(program, config); registerFlowCommand(program, config); } diff --git a/src/cli/compare-command.ts b/src/cli/compare-command.ts new file mode 100644 index 00000000..af4fd12e --- /dev/null +++ b/src/cli/compare-command.ts @@ -0,0 +1,597 @@ +import { execFile } from "node:child_process"; +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { performance } from "node:perf_hooks"; +import { promisify } from "node:util"; +import { Command, InvalidArgumentError } from "commander"; +import { resolveAgentCommand } from "../agent-registry.js"; +import { TimeoutError } from "../async-control.js"; +import { mergePromptSourceWithText, textPrompt } from "../prompt-content.js"; +import { runOnce } from "../session/session.js"; +import type { + AcpJsonRpcMessage, + OutputErrorAcpPayload, + OutputErrorCode, + OutputErrorOrigin, + OutputFormatter, + OutputFormatterContext, + PermissionEscalationEvent, + PermissionMode, + PromptInput, +} from "../types.js"; +import type { ResolvedAcpxConfig } from "./config.js"; +import { parseNonEmptyValue, parseTimeoutSeconds, resolvePermissionMode } from "./flags.js"; + +const execFileAsync = promisify(execFile); +const DEFAULT_COMPARE_TIMEOUT_MS = 300_000; +const FINAL_MESSAGE_PREVIEW_CHARS = 200; + +export type CompareRow = { + agent: string; + status: "ok" | "cancelled" | "error"; + stop_reason: string | null; + wall_ms: number; + input_tokens: number | null; + output_tokens: number | null; + context_used: number | null; + final_message: string; + transcript_path: string; + error: string | null; + diff_stat: string | null; + diff_path: string | null; +}; + +type CompareFlags = { + cwd?: string; + approveAll?: boolean; + approveReads?: boolean; + denyAll?: boolean; + timeout?: number; + json?: boolean; + diff?: boolean; + promptFile?: string; +}; + +type CompareOptions = { + cwd: string; + runId: string; + permissionMode: PermissionMode; + timeoutMs: number; + diff: boolean; + transcriptDir: string; +}; + +type TranscriptSummary = { + stopReason: string | null; + inputTokens: number | null; + outputTokens: number | null; + contextUsed: number | null; + finalMessage: string; +}; + +type WorktreeInfo = { + cwd: string; + root: string; + worktreePath: string; +}; + +class TranscriptFormatter implements OutputFormatter { + private lines: string[] = []; + + setContext(_context: OutputFormatterContext): void { + // The raw ACP stream already carries session ids. + } + + onAcpMessage(message: AcpJsonRpcMessage): void { + this.lines.push(`${JSON.stringify(message)}\n`); + } + + onError(params: { + code: OutputErrorCode; + detailCode?: string; + origin?: OutputErrorOrigin; + message: string; + retryable?: boolean; + acp?: OutputErrorAcpPayload; + timestamp?: string; + }): void { + this.lines.push( + `${JSON.stringify({ + jsonrpc: "2.0", + error: { + code: -32603, + message: params.message, + data: { + acpxCode: params.code, + detailCode: params.detailCode, + origin: params.origin, + retryable: params.retryable, + timestamp: params.timestamp, + acp: params.acp, + }, + }, + })}\n`, + ); + } + + onPermissionEscalation(_event: PermissionEscalationEvent): void { + // Permission details are represented by the ACP request/response messages. + } + + flush(): void { + // no-op + } + + async writeToFile(filePath: string): Promise { + await fs.writeFile(filePath, this.lines.join(""), "utf8"); + } +} + +function asRecord(value: unknown): Record | undefined { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return undefined; + } + return value as Record; +} + +function readNumber(source: Record, keys: string[]): number | null { + for (const key of keys) { + const value = source[key]; + if (typeof value === "number" && Number.isFinite(value)) { + return value; + } + } + return null; +} + +function collapseWhitespace(value: string): string { + return value.replace(/\s+/g, " ").trim(); +} + +function truncate(value: string, maxChars: number): string { + if (value.length <= maxChars) { + return value; + } + return `${value.slice(0, Math.max(0, maxChars - 3))}...`; +} + +function compareRunId(): string { + const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); + return `${timestamp}-${Math.random().toString(36).slice(2, 8)}`; +} + +function safeAgentFileName(agent: string): string { + return encodeURIComponent(agent).replace(/%/g, "_"); +} + +async function readPromptInput( + filePath: string | undefined, + promptText: string, + cwd: string, +): Promise { + if (!filePath) { + if (promptText.trim().length === 0) { + throw new InvalidArgumentError("Prompt is required unless --prompt-file is provided"); + } + return textPrompt(promptText); + } + + const source = + filePath === "-" ? await readStdin() : await fs.readFile(path.resolve(cwd, filePath), "utf8"); + const prompt = mergePromptSourceWithText(source, promptText); + if (prompt.length === 0) { + throw new InvalidArgumentError("Prompt from --prompt-file is empty"); + } + return prompt; +} + +async function readStdin(): Promise { + let data = ""; + for await (const chunk of process.stdin) { + data += String(chunk); + } + return data; +} + +async function summarizeTranscript(transcriptPath: string): Promise { + const content = await fs.readFile(transcriptPath, "utf8").catch((error: unknown) => { + if ((error as NodeJS.ErrnoException).code === "ENOENT") { + return ""; + } + throw error; + }); + let finalMessage = ""; + let stopReason: string | null = null; + let inputTokens: number | null = null; + let outputTokens: number | null = null; + let contextUsed: number | null = null; + + for (const line of content.split(/\r?\n/)) { + if (!line.trim()) { + continue; + } + + let parsed: unknown; + try { + parsed = JSON.parse(line); + } catch { + continue; + } + + const message = asRecord(parsed); + const params = asRecord(message?.params); + const update = asRecord(params?.update); + if (message && Object.hasOwn(message, "result")) { + const result = asRecord(message.result); + if (typeof result?.stopReason === "string") { + stopReason = result.stopReason; + } + } + + if (message?.method !== "session/update" || !update) { + continue; + } + + if (update.sessionUpdate === "agent_message_chunk") { + const contentBlock = asRecord(update.content); + if (contentBlock?.type === "text" && typeof contentBlock.text === "string") { + finalMessage += contentBlock.text; + } + continue; + } + + if (update.sessionUpdate === "usage_update") { + const usageMeta = asRecord(asRecord(update._meta)?.usage); + const source = usageMeta ?? update; + inputTokens = readNumber(source, ["input_tokens", "inputTokens", "input", "used"]); + outputTokens = readNumber(source, ["output_tokens", "outputTokens", "output"]); + contextUsed = readNumber(source, ["context_used", "contextUsed", "size", "used"]); + } + } + + return { + stopReason, + inputTokens, + outputTokens, + contextUsed, + finalMessage: collapseWhitespace(finalMessage), + }; +} + +function promptTokensAfterDoubleDash(command: Command): string[] { + const commandName = command.name(); + const commandIndex = process.argv.findIndex( + (token, index) => index >= 2 && token === commandName, + ); + if (commandIndex < 0) { + return []; + } + const delimiterIndex = process.argv.findIndex( + (token, index) => index > commandIndex && token === "--", + ); + return delimiterIndex < 0 ? [] : process.argv.slice(delimiterIndex + 1); +} + +function splitCompareArgs( + args: string[], + promptFile: string | undefined, + command: Command, +): { + agents: string[]; + promptText: string; +} { + if (promptFile) { + if (args.length === 0) { + throw new InvalidArgumentError("At least one agent is required"); + } + return { agents: args, promptText: "" }; + } + + const promptTokens = promptTokensAfterDoubleDash(command); + if (promptTokens.length > 0) { + const agents = args.slice(0, -promptTokens.length); + if (agents.length === 0) { + throw new InvalidArgumentError("At least one agent is required"); + } + return { agents, promptText: promptTokens.join(" ") }; + } + + if (args.length < 2) { + throw new InvalidArgumentError("Usage: acpx compare ... ''"); + } + + return { + agents: args.slice(0, -1), + promptText: args[args.length - 1] ?? "", + }; +} + +function resolveCompareCwd(command: Command, flags: CompareFlags): string { + const opts = command.optsWithGlobals() as { cwd?: unknown }; + const cwd = + typeof flags.cwd === "string" + ? flags.cwd + : typeof opts.cwd === "string" + ? opts.cwd + : process.cwd(); + return path.resolve(cwd); +} + +function resolveCompareTimeout(command: Command, flags: CompareFlags): number { + const opts = command.optsWithGlobals() as { timeout?: unknown }; + if (typeof flags.timeout === "number") { + return flags.timeout; + } + if (typeof opts.timeout === "number") { + return opts.timeout; + } + return DEFAULT_COMPARE_TIMEOUT_MS; +} + +function resolveComparePermissionMode(command: Command, flags: CompareFlags): PermissionMode { + const opts = command.optsWithGlobals() as { + approveAll?: unknown; + approveReads?: unknown; + denyAll?: unknown; + }; + return resolvePermissionMode( + { + approveAll: flags.approveAll === true || opts.approveAll === true ? true : undefined, + approveReads: flags.approveReads === true || opts.approveReads === true ? true : undefined, + denyAll: flags.denyAll === true || opts.denyAll === true ? true : undefined, + }, + "deny-all", + ); +} + +async function prepareWorktree(agent: string, cwd: string, runId: string): Promise { + const rootResult = await execFileAsync("git", ["-C", cwd, "rev-parse", "--show-toplevel"]); + const root = rootResult.stdout.trim(); + const relativeCwd = path.relative(root, cwd); + const worktreePath = path.join(os.tmpdir(), `acpx-compare-${runId}-${safeAgentFileName(agent)}`); + await fs.rm(worktreePath, { recursive: true, force: true }); + await execFileAsync("git", ["-C", root, "worktree", "add", "--detach", worktreePath, "HEAD"]); + return { + root, + worktreePath, + cwd: path.resolve(worktreePath, relativeCwd), + }; +} + +async function collectDiff( + agent: string, + transcriptDir: string, + worktree: WorktreeInfo | undefined, +): Promise> { + if (!worktree) { + return { diff_stat: null, diff_path: null }; + } + + const diffPath = path.join(transcriptDir, `${safeAgentFileName(agent)}.diff`); + const [stat, diff] = await Promise.all([ + execFileAsync("git", ["-C", worktree.worktreePath, "diff", "--stat"]).catch( + (error: unknown) => ({ + stdout: "", + stderr: error instanceof Error ? error.message : String(error), + }), + ), + execFileAsync("git", ["-C", worktree.worktreePath, "diff"]).catch((error: unknown) => ({ + stdout: "", + stderr: error instanceof Error ? error.message : String(error), + })), + ]); + const diffContent = diff.stdout || diff.stderr || ""; + await fs.writeFile(diffPath, diffContent, "utf8"); + + return { + diff_stat: collapseWhitespace(stat.stdout || stat.stderr || "no changes"), + diff_path: diffPath, + }; +} + +async function removeWorktree(worktree: WorktreeInfo | undefined): Promise { + if (!worktree) { + return; + } + await execFileAsync("git", [ + "-C", + worktree.root, + "worktree", + "remove", + "--force", + worktree.worktreePath, + ]).catch(() => undefined); +} + +async function runAgentForCompare( + agent: string, + prompt: PromptInput, + options: CompareOptions, + config: ResolvedAcpxConfig, +): Promise { + const transcriptPath = path.join(options.transcriptDir, `${safeAgentFileName(agent)}.ndjson`); + await fs.mkdir(path.dirname(transcriptPath), { recursive: true }); + + let worktree: WorktreeInfo | undefined; + const formatter = new TranscriptFormatter(); + const t0 = performance.now(); + let status: CompareRow["status"] = "ok"; + let error: string | null = null; + + try { + worktree = options.diff ? await prepareWorktree(agent, options.cwd, options.runId) : undefined; + const agentCommand = resolveAgentCommand(agent, config.agents); + const result = await runOnce({ + agentCommand, + cwd: worktree?.cwd ?? options.cwd, + prompt, + mcpServers: config.mcpServers, + permissionMode: options.permissionMode, + nonInteractivePermissions: config.nonInteractivePermissions, + authCredentials: config.auth, + authPolicy: config.authPolicy, + outputFormatter: formatter, + suppressSdkConsoleErrors: true, + timeoutMs: options.timeoutMs, + }); + if (result.stopReason === "cancelled") { + status = "cancelled"; + } + } catch (caught) { + status = caught instanceof TimeoutError ? "cancelled" : "error"; + error = caught instanceof Error ? caught.message : String(caught); + } finally { + await formatter.writeToFile(transcriptPath); + } + + const wallMs = Math.round(performance.now() - t0); + const [summary, diff] = await Promise.all([ + summarizeTranscript(transcriptPath), + collectDiff(agent, options.transcriptDir, worktree), + ]); + await removeWorktree(worktree); + + return { + agent, + status, + stop_reason: summary.stopReason, + wall_ms: wallMs, + input_tokens: summary.inputTokens, + output_tokens: summary.outputTokens, + context_used: summary.contextUsed, + final_message: truncate(summary.finalMessage, FINAL_MESSAGE_PREVIEW_CHARS), + transcript_path: transcriptPath, + error: error ? truncate(collapseWhitespace(error), FINAL_MESSAGE_PREVIEW_CHARS) : null, + diff_stat: diff.diff_stat, + diff_path: diff.diff_path, + }; +} + +function formatCell(value: unknown): string { + if (value == null || value === "") { + return "-"; + } + if (typeof value === "string") { + return collapseWhitespace(value); + } + if (typeof value === "number" || typeof value === "boolean") { + return String(value); + } + return collapseWhitespace(JSON.stringify(value)); +} + +function renderTable(rows: CompareRow[], includeDiff: boolean): string { + const headers = [ + "agent", + "status", + "wall_ms", + "input", + "output", + "context", + "stop_reason", + "final_message", + "transcript", + ...(includeDiff ? ["diff"] : []), + "error", + ]; + const body = rows.map((row) => [ + row.agent, + row.status, + row.wall_ms, + row.input_tokens, + row.output_tokens, + row.context_used, + row.stop_reason, + row.final_message, + row.transcript_path, + ...(includeDiff ? [row.diff_stat] : []), + row.error, + ]); + const widths = headers.map((header, index) => + Math.max(header.length, ...body.map((cells) => formatCell(cells[index]).length)), + ); + const formatRow = (cells: unknown[]) => + cells + .map((cell, index) => + truncate(formatCell(cell), widths[index] ?? 24).padEnd(widths[index] ?? 24), + ) + .join(" ") + .trimEnd(); + + return [ + formatRow(headers), + widths.map((width) => "-".repeat(width)).join(" "), + ...body.map(formatRow), + ].join("\n"); +} + +export function registerCompareCommand(program: Command, config: ResolvedAcpxConfig): void { + program + .command("compare") + .description("Run one prompt across multiple agents and compare the results") + .argument("", "Agents followed by prompt text, or agents with --prompt-file") + .option("--cwd ", "Target workspace") + .option("--approve-all", "Auto-approve all permission requests") + .option("--approve-reads", "Auto-approve read/search requests and prompt for writes") + .option("--deny-all", "Deny all permission requests") + .option("--timeout ", "Per-agent timeout in seconds", parseTimeoutSeconds) + .option("--json", "Emit CompareRow[] as JSON") + .option("--diff", "Run each agent in an isolated git worktree and report diff summaries") + .option( + "-f, --prompt-file ", + "Read prompt text from file path (use - for stdin)", + (value: string) => parseNonEmptyValue("Prompt file", value), + ) + .action(async function (this: Command, args: string[], flags: CompareFlags) { + if (config.disableExec) { + throw new Error("compare subcommand is disabled by configuration (disableExec: true)"); + } + + const cwd = resolveCompareCwd(this, flags); + const { agents, promptText } = splitCompareArgs(args, flags.promptFile, this); + const prompt = await readPromptInput(flags.promptFile, promptText, cwd); + const runId = compareRunId(); + const transcriptDir = path.join(os.homedir(), ".acpx", "compare", runId); + const permissionMode = resolveComparePermissionMode(this, flags); + const timeoutMs = resolveCompareTimeout(this, flags); + + const rows = await Promise.all( + agents.map((agent) => + runAgentForCompare( + agent, + prompt, + { + cwd, + runId, + permissionMode, + timeoutMs, + diff: flags.diff === true, + transcriptDir, + }, + config, + ).catch((error: unknown) => ({ + agent, + status: "error" as const, + stop_reason: null, + wall_ms: 0, + input_tokens: null, + output_tokens: null, + context_used: null, + final_message: "", + transcript_path: path.join(transcriptDir, `${safeAgentFileName(agent)}.ndjson`), + error: error instanceof Error ? error.message : String(error), + diff_stat: null, + diff_path: null, + })), + ), + ); + + if (flags.json) { + process.stdout.write(`${JSON.stringify(rows, null, 2)}\n`); + return; + } + + process.stdout.write(`${renderTable(rows, flags.diff === true)}\n`); + }); +} diff --git a/test/compare-command.test.ts b/test/compare-command.test.ts new file mode 100644 index 00000000..bd28356d --- /dev/null +++ b/test/compare-command.test.ts @@ -0,0 +1,260 @@ +import assert from "node:assert/strict"; +import { spawn } from "node:child_process"; +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import test from "node:test"; +import { fileURLToPath } from "node:url"; + +const CLI_PATH = fileURLToPath(new URL("../src/cli.js", import.meta.url)); +const REPO_ROOT = process.cwd(); + +type CliRunResult = { + code: number | null; + stdout: string; + stderr: string; +}; + +async function withTempHome(run: (homeDir: string) => Promise): Promise { + const tempHome = await fs.mkdtemp(path.join(os.tmpdir(), "acpx-compare-test-home-")); + try { + await run(tempHome); + } finally { + await fs.rm(tempHome, { recursive: true, force: true, maxRetries: 3, retryDelay: 50 }); + } +} + +async function runCli(args: string[], homeDir: string, cwd: string): Promise { + return await new Promise((resolve) => { + const child = spawn(process.execPath, [CLI_PATH, ...args], { + env: { + ...process.env, + HOME: homeDir, + ACPX_TEST_REPO_ROOT: REPO_ROOT, + }, + cwd, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stdout = ""; + let stderr = ""; + child.stdout.setEncoding("utf8"); + child.stderr.setEncoding("utf8"); + child.stdout.on("data", (chunk: string) => { + stdout += chunk; + }); + child.stderr.on("data", (chunk: string) => { + stderr += chunk; + }); + child.once("close", (code) => { + resolve({ code, stdout, stderr }); + }); + }); +} + +async function writeCompareAgent(homeDir: string): Promise { + const agentPath = path.join(homeDir, "compare-agent.mjs"); + await fs.writeFile( + agentPath, + ` +import { randomUUID } from "node:crypto"; +import { createRequire } from "node:module"; +import { Readable, Writable } from "node:stream"; + +const require = createRequire(process.env.ACPX_TEST_REPO_ROOT + "/package.json"); +const { + AgentSideConnection, + PROTOCOL_VERSION, + ndJsonStream, +} = await import(require.resolve("@agentclientprotocol/sdk")); + +const mode = process.argv[2] || "fast"; +if (mode === "error") { + process.exit(1); +} + +const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); +const promptText = (prompt) => + prompt + .filter((block) => block.type === "text") + .map((block) => block.text) + .join("") + .trim(); + +class CompareAgent { + constructor(connection) { + this.connection = connection; + this.sessions = new Set(); + } + + async initialize() { + return { + protocolVersion: PROTOCOL_VERSION, + authMethods: [], + agentCapabilities: {}, + }; + } + + async authenticate() {} + + async newSession() { + const sessionId = randomUUID(); + this.sessions.add(sessionId); + return { sessionId }; + } + + async prompt(params) { + const text = promptText(params.prompt); + const delay = mode === "slow" ? 1200 : 10; + await sleep(delay); + await this.connection.sessionUpdate({ + sessionId: params.sessionId, + update: { + sessionUpdate: "usage_update", + input_tokens: mode === "slow" ? 30 : 10, + output_tokens: mode === "slow" ? 40 : 20, + size: mode === "slow" ? 70 : 30, + }, + }); + await this.connection.sessionUpdate({ + sessionId: params.sessionId, + update: { + sessionUpdate: "agent_message_chunk", + content: { type: "text", text: mode + ": " + text }, + }, + }); + return { stopReason: "end_turn" }; + } + + async cancel() {} +} + +const output = Writable.toWeb(process.stdout); +const input = Readable.toWeb(process.stdin); +const stream = ndJsonStream(output, input); +new AgentSideConnection((connection) => new CompareAgent(connection), stream); +`, + "utf8", + ); + return agentPath; +} + +async function writeCompareConfig(homeDir: string, agentPath: string): Promise { + await fs.mkdir(path.join(homeDir, ".acpx"), { recursive: true }); + await fs.writeFile( + path.join(homeDir, ".acpx", "config.json"), + JSON.stringify( + { + defaultPermissions: "deny-all", + agents: { + fast: { command: process.execPath, args: [agentPath, "fast"] }, + slow: { command: process.execPath, args: [agentPath, "slow"] }, + error: { command: process.execPath, args: [agentPath, "error"] }, + }, + }, + null, + 2, + ), + "utf8", + ); +} + +async function setupCompareFixture(homeDir: string): Promise { + const cwd = path.join(homeDir, "workspace"); + await fs.mkdir(cwd, { recursive: true }); + const agentPath = await writeCompareAgent(homeDir); + await writeCompareConfig(homeDir, agentPath); + return cwd; +} + +type CompareRow = { + agent: string; + status: "ok" | "cancelled" | "error"; + stop_reason: string | null; + input_tokens: number | null; + output_tokens: number | null; + context_used: number | null; + final_message: string; + transcript_path: string; + error: string | null; +}; + +test("compare fast slow renders a table with both successful rows", async () => { + await withTempHome(async (homeDir) => { + const cwd = await setupCompareFixture(homeDir); + const result = await runCli(["compare", "fast", "slow", "summarize"], homeDir, cwd); + + assert.equal(result.code, 0, result.stderr); + assert.match(result.stdout, /agent\s+status\s+wall_ms/); + assert.match(result.stdout, /fast\s+ok/); + assert.match(result.stdout, /slow\s+ok/); + assert.match(result.stdout, /end_turn/); + assert.match(result.stdout, /fast: summarize/); + assert.match(result.stdout, /slow: summarize/); + }); +}); + +test("compare --json emits CompareRow array and persists transcripts", async () => { + await withTempHome(async (homeDir) => { + const cwd = await setupCompareFixture(homeDir); + const result = await runCli(["compare", "fast", "slow", "--json", "summarize"], homeDir, cwd); + + assert.equal(result.code, 0, result.stderr); + const rows = JSON.parse(result.stdout) as CompareRow[]; + assert.deepEqual( + rows.map((row) => [row.agent, row.status]), + [ + ["fast", "ok"], + ["slow", "ok"], + ], + ); + assert.equal(rows[0]?.input_tokens, 10); + assert.equal(rows[0]?.output_tokens, 20); + assert.equal(rows[0]?.context_used, 30); + assert.equal(rows[1]?.input_tokens, 30); + assert.equal(rows[1]?.output_tokens, 40); + + for (const row of rows) { + assert.match(row.transcript_path, /\.acpx\/compare\/.+\/.+\.ndjson$/); + const transcript = await fs.readFile(row.transcript_path, "utf8"); + assert.match(transcript, /session\/update/); + assert.match(transcript, /usage_update/); + } + }); +}); + +test("compare keeps successful rows when one agent errors", async () => { + await withTempHome(async (homeDir) => { + const cwd = await setupCompareFixture(homeDir); + const result = await runCli( + ["compare", "fast", "slow", "error", "--json", "summarize"], + homeDir, + cwd, + ); + + assert.equal(result.code, 0, result.stderr); + const rows = JSON.parse(result.stdout) as CompareRow[]; + assert.equal(rows.find((row) => row.agent === "fast")?.status, "ok"); + assert.equal(rows.find((row) => row.agent === "slow")?.status, "ok"); + const errorRow = rows.find((row) => row.agent === "error"); + assert.equal(errorRow?.status, "error"); + assert.equal(typeof errorRow?.error, "string"); + assert.notEqual(errorRow?.error, ""); + }); +}); + +test("compare timeout marks slow agents as cancelled", async () => { + await withTempHome(async (homeDir) => { + const cwd = await setupCompareFixture(homeDir); + const result = await runCli( + ["compare", "fast", "slow", "--timeout", "0.5", "--json", "summarize"], + homeDir, + cwd, + ); + + assert.equal(result.code, 0, result.stderr); + const rows = JSON.parse(result.stdout) as CompareRow[]; + assert.equal(rows.find((row) => row.agent === "fast")?.status, "ok"); + assert.equal(rows.find((row) => row.agent === "slow")?.status, "cancelled"); + }); +});