From 4f75a8cd3d9bf5ba0fdab3f14069577482ea4551 Mon Sep 17 00:00:00 2001 From: Dustin Hilgaertner Date: Tue, 2 Jun 2026 22:14:18 -0500 Subject: [PATCH 1/2] Fix managed terminal launch race (bare-zsh orphans) (#408) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Managed Claude/Codex terminals created by /crow-workspace intermittently came up as a bare zsh with no agent. Two race modes, both because setup.sh launched the agent out-of-band: 1. Blind-paste race: setup.sh did `new-terminal --managed` + `sleep 3` + `crow send`, pasting the launch command before the shell's line editor (zle) was live, so the keystrokes were dropped. Even `--command` was racy because registerTerminal pasted immediately without waiting for readiness. 2. No-window swallowed failure: under load `new-window` exceeded its 2s timeout, threw, and the new-terminal RPC only logged it — registering a window-less terminal while reporting success. Fix — route brand-new managed terminals through the existing race-free deferred-launch path (the one already used for restored terminals): - new-terminal RPC holds the launch command in AppState.pendingLaunchCommands and registers the window with command:nil; SessionService pastes it on the `.shellReady` sentinel (zle is live then), consuming both the pending command and autoLaunch membership so launchAgent can't also fire. The deferred paste replicates the `send` RPC's hook-config + OTEL env prep. - Bounded retry of registerTerminal with a longer per-call new-window timeout; on exhaustion mark the terminal .failed and return launch_failed instead of reporting success. - setup.sh launches via `new-terminal --command` (no sleep/send), keeps its own --rc/--name (the app can't inject for a `cd && claude` form), parses launch_failed, and polls the new `readiness` field on `list-terminals` to warn (non-fatally) if the agent never started. - Conservative reaper at launch: kill only cockpit windows that no terminal references AND that sit at a bare login shell (never a window running an agent, or one a terminal still references). Tests: pure resolveLaunch/registerWithRetry/shouldReapWindow helpers, a SessionService readiness-branch test, and a concurrent N-managed-terminal real-tmux regression asserting every window runs the launched process with zero bare shells. Serialized the TmuxBackend integration suite so its timing-sensitive tests don't contend. All package + app suites green. 🐦‍⬛ Generated with Claude Code, orchestrated by Crow Co-Authored-By: Claude Crow-Session: 1B4F94D3-4045-4DB0-9442-BA35B6050D6A --- .../CrowCore/Sources/CrowCore/AppState.swift | 8 ++ .../Sources/CrowTerminal/TmuxBackend.swift | 56 +++++++- .../Sources/CrowTerminal/TmuxController.swift | 25 +++- .../OrphanWindowReaperTests.swift | 31 +++++ .../CrowTerminalTests/TmuxBackendTests.swift | 77 ++++++++++- Sources/Crow/App/AppDelegate.swift | 108 +++++++++++++-- Sources/Crow/App/SessionService.swift | 113 +++++++++++++++- Tests/CrowTests/DeferredLaunchTests.swift | 123 ++++++++++++++++++ skills/crow-batch-workspace/SKILL.md | 3 +- skills/crow-workspace/SKILL.md | 3 +- skills/crow-workspace/setup.sh | 91 +++++++++---- 11 files changed, 593 insertions(+), 45 deletions(-) create mode 100644 Packages/CrowTerminal/Tests/CrowTerminalTests/OrphanWindowReaperTests.swift create mode 100644 Tests/CrowTests/DeferredLaunchTests.swift diff --git a/Packages/CrowCore/Sources/CrowCore/AppState.swift b/Packages/CrowCore/Sources/CrowCore/AppState.swift index a3b3cfea..0865fdcc 100644 --- a/Packages/CrowCore/Sources/CrowCore/AppState.swift +++ b/Packages/CrowCore/Sources/CrowCore/AppState.swift @@ -223,6 +223,14 @@ public final class AppState { /// not brand-new terminals created via the `new-terminal` RPC. public var autoLaunchTerminals: Set = [] + /// Pending agent launch command per terminal ID, for brand-new managed + /// terminals created via `new-terminal --command`. The command is NOT + /// pasted immediately (that races the shell's line editor — issue #408); + /// it is held here until the readiness sentinel fires `.shellReady`, at + /// which point `SessionService.wireTerminalReadiness` pastes it. In-memory + /// only — never persisted, so a relaunch can't re-paste a stale command. + public var pendingLaunchCommands: [UUID: String] = [:] + // MARK: - Hook Events (per-session Observable wrappers) /// Per-session hook state. Using @Observable class wrappers so mutations to one diff --git a/Packages/CrowTerminal/Sources/CrowTerminal/TmuxBackend.swift b/Packages/CrowTerminal/Sources/CrowTerminal/TmuxBackend.swift index 851d79bc..406c7f2a 100644 --- a/Packages/CrowTerminal/Sources/CrowTerminal/TmuxBackend.swift +++ b/Packages/CrowTerminal/Sources/CrowTerminal/TmuxBackend.swift @@ -178,7 +178,8 @@ public final class TmuxBackend { name: String, cwd: String, command: String?, - trackReadiness: Bool + trackReadiness: Bool, + newWindowTimeout: TimeInterval = TmuxController.defaultTimeout ) throws -> TmuxBinding { precondition(!tmuxBinary.isEmpty, "TmuxBackend.configure(...) must be called first") let ctrl: TmuxController @@ -219,7 +220,8 @@ public final class TmuxBackend { name: name, cwd: cwd.isEmpty ? nil : cwd, env: env, - command: wrapperPath + command: wrapperPath, + timeout: newWindowTimeout ) bindings[id] = windowIndex @@ -365,6 +367,56 @@ public final class TmuxBackend { } } + /// Bare login shells we consider "orphaned" when a cockpit window is not + /// referenced by any terminal — i.e. a window left at a shell with no agent + /// running (#408). Anything else (claude/codex/node/an editor/…) is left + /// alone. tmux reports `pane_current_command` without the login-shell `-` + /// prefix, but we match both forms defensively. + nonisolated static let orphanLoginShells: Set = [ + "zsh", "-zsh", "bash", "-bash", "sh", "-sh", + "fish", "-fish", "dash", "-dash", "ksh", "tcsh", "csh", "login", + ] + + /// Decide whether a single cockpit window should be reaped. Pure so the + /// policy is unit-testable: reap only when the window is NOT referenced by a + /// live terminal AND its pane is sitting at a bare login shell. Never reaps + /// a window running an agent (or any non-shell process), so an agent that + /// exited and left the user at a shell — but whose terminal still references + /// the window — is preserved (it's in `keep`). + nonisolated static func shouldReapWindow(index: Int, command: String, keep: Set) -> Bool { + if keep.contains(index) { return false } + return orphanLoginShells.contains(command) + } + + /// Reap cockpit windows that no live terminal references AND that are + /// sitting at a bare login shell — leaked windows from a timed-out + /// `new-window` or a forgotten terminal (#408). `keepWindowIndices` is the + /// set of window indices referenced by persisted terminals; it is unioned + /// with the in-memory `bindings` so a window created/adopted this run is + /// never reaped. Best-effort; returns the count reaped. + @discardableResult + public func reapUnboundCockpitWindows(keepWindowIndices: Set) -> Int { + guard let ctrl = controller else { return 0 } + let keep = keepWindowIndices.union(bindings.values) + let windows: [(index: Int, command: String)] + do { + windows = try ctrl.listWindowCommands() + } catch { + reportIfTimeout(error) + return 0 + } + var reaped = 0 + for window in windows where Self.shouldReapWindow(index: window.index, command: window.command, keep: keep) { + ctrl.killWindow(index: window.index) + NSLog("[CrowTelemetry tmux:orphan_window_reaped index=\(window.index) command=\(window.command)]") + reaped += 1 + } + if reaped > 0 { + NSLog("[Crow] Reaped \(reaped) orphaned bare-shell cockpit window(s) (#408)") + } + return reaped + } + /// Return the shared cockpit Ghostty surface, lazily creating it the /// first time. The surface attaches to the live tmux session via /// `tmux -S … attach-session -t …` as its child command. diff --git a/Packages/CrowTerminal/Sources/CrowTerminal/TmuxController.swift b/Packages/CrowTerminal/Sources/CrowTerminal/TmuxController.swift index 9b5358d3..e294eadf 100644 --- a/Packages/CrowTerminal/Sources/CrowTerminal/TmuxController.swift +++ b/Packages/CrowTerminal/Sources/CrowTerminal/TmuxController.swift @@ -104,13 +104,34 @@ public struct TmuxController: Sendable { return out.split(separator: "\n").compactMap { Int($0.trimmingCharacters(in: .whitespaces)) } } + /// List each window's index and the command currently running in its + /// active pane (`#{window_index}` + `#{pane_current_command}`). Used by the + /// orphan-window reaper to distinguish a window running an agent from a + /// bare login shell (#408). + public func listWindowCommands() throws -> [(index: Int, command: String)] { + let out = try run(["list-windows", "-t", sessionName, + "-F", "#{window_index}\t#{pane_current_command}"]) + return out.split(separator: "\n").compactMap { line in + let parts = line.split(separator: "\t", maxSplits: 1, omittingEmptySubsequences: false) + guard parts.count == 2, let idx = Int(parts[0].trimmingCharacters(in: .whitespaces)) else { + return nil + } + return (idx, parts[1].trimmingCharacters(in: .whitespaces)) + } + } + // MARK: - Windows + /// `timeout` defaults to the per-call default. Callers spawning a window + /// while the app is under load (many concurrent hydrations, a contended + /// main actor) can pass a longer budget so a slow `new-window` doesn't + /// SIGTERM and leave the terminal window-less (issue #408). public func newWindow( name: String? = nil, cwd: String? = nil, env: [String: String] = [:], - command: String? = nil + command: String? = nil, + timeout: TimeInterval = TmuxController.defaultTimeout ) throws -> Int { var args = ["new-window", "-P", "-F", "#{window_index}", "-t", sessionName] if let name { args.append(contentsOf: ["-n", name]) } @@ -122,7 +143,7 @@ public struct TmuxController: Sendable { if let cwd, !cwd.isEmpty { args.append(contentsOf: ["-c", cwd]) } for (k, v) in env { args.append(contentsOf: ["-e", "\(k)=\(v)"]) } if let command { args.append(command) } - let out = try run(args) + let out = try run(args, timeout: timeout) guard let idx = Int(out.trimmingCharacters(in: .whitespacesAndNewlines)) else { throw TmuxError.cliFailed( args: args, diff --git a/Packages/CrowTerminal/Tests/CrowTerminalTests/OrphanWindowReaperTests.swift b/Packages/CrowTerminal/Tests/CrowTerminalTests/OrphanWindowReaperTests.swift new file mode 100644 index 00000000..ac681003 --- /dev/null +++ b/Packages/CrowTerminal/Tests/CrowTerminalTests/OrphanWindowReaperTests.swift @@ -0,0 +1,31 @@ +import Foundation +import Testing +@testable import CrowTerminal + +/// Policy tests for the conservative orphan-window reaper (#408): reap a +/// cockpit window only when NO live terminal references it AND it is sitting at +/// a bare login shell. Pure over `shouldReapWindow`, so no tmux needed. +@Suite("Orphan cockpit window reaper policy (#408)") +struct OrphanWindowReaperTests { + + @Test func reapsUnboundBareShell() { + #expect(TmuxBackend.shouldReapWindow(index: 3, command: "zsh", keep: [])) + #expect(TmuxBackend.shouldReapWindow(index: 3, command: "-zsh", keep: [])) + #expect(TmuxBackend.shouldReapWindow(index: 3, command: "bash", keep: [])) + #expect(TmuxBackend.shouldReapWindow(index: 3, command: "sh", keep: [])) + } + + @Test func keepsBoundWindowEvenWhenBareShell() { + // A terminal references it (e.g. the agent exited and the user is now at + // the shell) — must be preserved. + #expect(!TmuxBackend.shouldReapWindow(index: 3, command: "zsh", keep: [3])) + } + + @Test func keepsWindowRunningAProcess() { + // Anything that isn't a bare login shell is left alone. + #expect(!TmuxBackend.shouldReapWindow(index: 4, command: "claude", keep: [])) + #expect(!TmuxBackend.shouldReapWindow(index: 4, command: "node", keep: [])) + #expect(!TmuxBackend.shouldReapWindow(index: 4, command: "codex", keep: [])) + #expect(!TmuxBackend.shouldReapWindow(index: 4, command: "tail", keep: [])) // session anchor + } +} diff --git a/Packages/CrowTerminal/Tests/CrowTerminalTests/TmuxBackendTests.swift b/Packages/CrowTerminal/Tests/CrowTerminalTests/TmuxBackendTests.swift index 41a3c813..bf18ff8f 100644 --- a/Packages/CrowTerminal/Tests/CrowTerminalTests/TmuxBackendTests.swift +++ b/Packages/CrowTerminal/Tests/CrowTerminalTests/TmuxBackendTests.swift @@ -8,7 +8,11 @@ import Testing /// covered separately by the visual demo path. Skipped automatically /// when no tmux binary is present on the host. @MainActor -@Suite("TmuxBackend integration", .enabled(if: discoveredTmuxBinary != nil)) +// Serialized: these are real-tmux integration tests that spawn login shells and +// assert on readiness *timing* (e.g. a 0.15s watch budget). Run in parallel they +// contend for CPU during shell startup and the tight-timing cases flake — most +// visibly once the #408 concurrent-launch test spins up several shells at once. +@Suite("TmuxBackend integration", .enabled(if: discoveredTmuxBinary != nil), .serialized) struct TmuxBackendTests { private func makeBackend() -> TmuxBackend { @@ -373,4 +377,75 @@ struct TmuxBackendTests { try await Task.sleep(nanoseconds: 100_000_000) #expect(received.isEmpty) } + + // MARK: - #408 concurrent managed launch: no bare-zsh orphans + + /// The #408 repro. Spawn several managed terminals at once, each deferring + /// its launch until `.shellReady` (registered with `command: nil`, then the + /// command pasted on the readiness fire — exactly what the new-terminal RPC + /// now does). Every window must reach readiness and run the pasted process; + /// none may be left at a bare login shell. + @Test func concurrentManagedLaunchesAllReachReadyNoBareShells() async throws { + let backend = makeBackend() + defer { backend.shutdown() } + + let n = 6 + let ids = (0..() + var timedOut = Set() + + backend.onReadinessChanged = { id, state in + switch state { + case .shellReady: + guard !ready.contains(id) else { return } + ready.insert(id) + // The deferred paste: `exec` a marker process so the pane is no + // longer a bare login shell — proof the launch landed in a live + // shell rather than being dropped (the #408 failure). + try? backend.sendText(id: id, text: "exec sleep 97\n") + case .timedOut: + timedOut.insert(id) + default: + break + } + } + + for id in ids { + let binding = try backend.registerTerminal( + id: id, name: "managed-\(id.uuidString.prefix(4))", + cwd: NSHomeDirectory(), command: nil, trackReadiness: true + ) + windowIndexByID[id] = binding.windowIndex + } + + // Wait (bounded) for all to report `.shellReady`. Shell startup is + // CPU-contended with N concurrent wrappers; 30s mirrors the backend's + // default readiness budget. + let deadline = Date().addingTimeInterval(30) + while ready.count < n && timedOut.isEmpty && Date() < deadline { + try await Task.sleep(nanoseconds: 100_000_000) + } + #expect(timedOut.isEmpty) + #expect(ready.count == n) + + // Assert no managed window is left at a bare login shell. Poll briefly: + // the exec'd markers replace their shells asynchronously. + let ctrl = TmuxController( + tmuxBinary: discoveredTmuxBinary!, socketPath: backend.socketPath, + sessionName: TmuxBackend.cockpitSessionName + ) + let ourIndices = Set(windowIndexByID.values) + var bareShells: [Int] = [] + let checkDeadline = Date().addingTimeInterval(5) + repeat { + try await Task.sleep(nanoseconds: 200_000_000) + let windows = try ctrl.listWindowCommands().filter { ourIndices.contains($0.index) } + bareShells = windows + .filter { TmuxBackend.orphanLoginShells.contains($0.command) } + .map { $0.index } + } while !bareShells.isEmpty && Date() < checkDeadline + + #expect(bareShells.isEmpty) + } } diff --git a/Sources/Crow/App/AppDelegate.swift b/Sources/Crow/App/AppDelegate.swift index ab9c43a7..2c66323e 100644 --- a/Sources/Crow/App/AppDelegate.swift +++ b/Sources/Crow/App/AppDelegate.swift @@ -436,6 +436,13 @@ final class AppDelegate: NSObject, NSApplicationDelegate { // Detect orphaned worktrees (runs async, updates UI when done) Task { await service.detectOrphanedWorktrees() } + // Reap leaked orphan cockpit windows (bare shells that no terminal + // references) once rehydration's async adoptions have settled (#408). + Task { @MainActor in + try? await Task.sleep(nanoseconds: 8_000_000_000) + service.reapOrphanedCockpitWindows() + } + // Check for runtime dependencies (non-blocking) Task { let missing = await Task.detached { @@ -1310,28 +1317,73 @@ final class AppDelegate: NSObject, NSApplicationDelegate { isManaged: isManaged, backend: .tmux ) + // Brand-new managed terminals DEFER their agent launch until + // the shell signals readiness (issue #408). Pasting the launch + // command immediately races the shell's line editor (zle): if + // the prompt isn't live yet the keystrokes are dropped and the + // window is left at a bare zsh with no agent. Instead hold the + // command in `pendingLaunchCommands` and register the window + // with `command: nil`, so the deferred paste happens in + // `SessionService.wireTerminalReadiness` on `.shellReady`. + let hasCommand = !(command?.isEmpty ?? true) + let deferLaunch = trackReadiness && hasCommand + let registerCommand = deferLaunch ? nil : command + // Seed readiness + pending-launch state BEFORE registering so + // the sentinel's `.shellReady` (which can only fire on a later + // main-actor turn) always finds the pending command and the + // autoLaunch membership populated. + if trackReadiness { + capturedAppState.terminalReadiness[terminal.id] = .uninitialized + } + if deferLaunch, let command { + capturedAppState.pendingLaunchCommands[terminal.id] = command + // Membership lets the existing `.timedOut` re-arm machinery + // (`reArmStuckReadinessWatches`) recover a slow launch. + capturedAppState.autoLaunchTerminals.insert(terminal.id) + } + var launchFailed = false do { - let binding = try TmuxBackend.shared.registerTerminal( - id: terminal.id, - name: terminalName, - cwd: cwd, - command: command, - trackReadiness: trackReadiness - ) + // Bounded retry with a longer per-call `new-window` budget: + // under load the tmux subprocess can exceed the 2s default + // and get SIGTERM'd, leaving a window-less terminal (#408). + let binding = try AppDelegate.registerWithRetry(attempts: 3) { _ in + try TmuxBackend.shared.registerTerminal( + id: terminal.id, + name: terminalName, + cwd: cwd, + command: registerCommand, + trackReadiness: trackReadiness, + newWindowTimeout: 4.0 + ) + } terminal.tmuxBinding = binding } catch { - NSLog("[Crow] tmux registerTerminal failed (\(error)); terminal will not render until tmux is available") + // The tmux window never materialized. Don't pretend the + // launch succeeded (#408): surface it so the UI shows a + // Retry affordance and the CLI caller reports honestly + // instead of leaving a silent window-less terminal. + NSLog("[Crow] tmux registerTerminal failed after retries (\(error)); surfacing launch failure") + launchFailed = true + if trackReadiness { + capturedAppState.terminalReadiness[terminal.id] = .failed + } + capturedAppState.pendingLaunchCommands.removeValue(forKey: terminal.id) + capturedAppState.autoLaunchTerminals.remove(terminal.id) } capturedAppState.terminals[sessionID, default: []].append(terminal) capturedStore.mutate { $0.terminals.append(terminal) } if trackReadiness { - capturedAppState.terminalReadiness[terminal.id] = .uninitialized TerminalRouter.trackReadiness(for: terminal) } if rcInjected { capturedAppState.remoteControlActiveTerminals.insert(terminal.id) } - return ["terminal_id": .string(terminal.id.uuidString), "session_id": .string(idStr)] + var result: [String: JSONValue] = [ + "terminal_id": .string(terminal.id.uuidString), + "session_id": .string(idStr), + ] + if launchFailed { result["launch_failed"] = .bool(true) } + return result } }, "list-terminals": { @Sendable params in @@ -1339,8 +1391,18 @@ final class AppDelegate: NSObject, NSApplicationDelegate { throw RPCError.invalidParams("session_id required") } let terms = await MainActor.run { capturedAppState.terminals(for: id) } + let readiness = await MainActor.run { capturedAppState.terminalReadiness } let items: [JSONValue] = terms.map { t in - .object(["id": .string(t.id.uuidString), "name": .string(t.name), "session_id": .string(t.sessionID.uuidString), "managed": .bool(t.isManaged)]) + // `readiness` lets CLI callers (setup.sh) verify the agent + // actually started rather than assuming a launch succeeded + // (#408). Defaults to `uninitialized` for un-tracked shells. + .object([ + "id": .string(t.id.uuidString), + "name": .string(t.name), + "session_id": .string(t.sessionID.uuidString), + "managed": .bool(t.isManaged), + "readiness": .string((readiness[t.id] ?? .uninitialized).rawValue), + ]) } return ["terminals": .array(items)] }, @@ -1363,6 +1425,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate { capturedAppState.terminals[sessionID]?.removeAll { $0.id == terminalID } capturedAppState.terminalReadiness.removeValue(forKey: terminalID) capturedAppState.autoLaunchTerminals.remove(terminalID) + capturedAppState.pendingLaunchCommands.removeValue(forKey: terminalID) if capturedAppState.activeTerminalID[sessionID] == terminalID { capturedAppState.activeTerminalID[sessionID] = capturedAppState.terminals[sessionID]?.first?.id } @@ -1710,6 +1773,29 @@ final class AppDelegate: NSObject, NSApplicationDelegate { NSLog("[Crow] Cleanup complete") } + // MARK: - Terminal Registration + + /// Run `create` up to `attempts` times, returning the first success or + /// rethrowing the last error after exhausting all attempts. Window + /// registration can transiently fail under load when `new-window` exceeds + /// its subprocess timeout (issue #408); a couple of retries turn most of + /// those into successes instead of silent window-less terminals. Pure over + /// the `create` closure so the retry policy is unit-testable without tmux. + nonisolated static func registerWithRetry( + attempts: Int, + create: (_ attempt: Int) throws -> T + ) throws -> T { + var lastError: Error? + for attempt in 0.. LaunchAction { + if let pending, !pending.isEmpty { return .pastePending(pending) } + return .launchAgent + } + + /// Paste the deferred agent-launch command for a brand-new managed terminal + /// now that its shell's line editor is live (#408). Consumes BOTH the + /// pending command and the `autoLaunchTerminals` membership so `launchAgent` + /// can never also fire (e.g. a later spurious `.shellReady` after adopt), + /// preventing a double launch. + func pasteDeferredLaunch(terminalID: UUID, command: String) { + guard appState.pendingLaunchCommands.removeValue(forKey: terminalID) != nil else { return } + appState.autoLaunchTerminals.remove(terminalID) + + guard let sessionID = appState.terminals.first(where: { _, terminals in + terminals.contains(where: { $0.id == terminalID }) + })?.key, + let routedTerminal = appState.terminals[sessionID]?.first(where: { $0.id == terminalID }) else { + NSLog("[SessionService] pasteDeferredLaunch: no terminal record for \(terminalID); cannot send") + return + } + + // Apply the same prep the legacy `crow send` path got from the `send` + // RPC (hook config + OTEL env vars) so hooks route back to this session + // and Claude telemetry is exported. + let text = prepareManagedLaunchText(sessionID: sessionID, command: command) + // Ensure a trailing newline so TmuxBackend.sendText delivers Enter. + TerminalRouter.send(routedTerminal, text: text.hasSuffix("\n") ? text : text + "\n") + appState.terminalReadiness[terminalID] = .agentLaunched + } + + /// Prepare a managed terminal's agent-launch command exactly as the `send` + /// RPC does for the legacy `crow send` path (#408): write the per-worktree + /// hook config so the agent's hooks route back to this session, and (for + /// Claude) prepend the OTEL telemetry exporter env vars. Returns the final + /// text to paste, or the command unchanged if it doesn't launch the + /// session's agent. + /// + /// NOTE: the OTEL env-var list is intentionally identical to the `send` RPC + /// handler in AppDelegate — keep the two in sync. + func prepareManagedLaunchText(sessionID: UUID, command: String) -> String { + guard let session = appState.sessions.first(where: { $0.id == sessionID }), + let agent = AgentRegistry.shared.agent(for: session.agentKind), + command.contains(agent.launchCommandToken) else { + return command + } + if let worktree = appState.primaryWorktree(for: sessionID), + let crowPath = ClaudeHookConfigWriter.findCrowBinary() { + do { + try agent.hookConfigWriter.writeHookConfig( + worktreePath: worktree.worktreePath, + sessionID: sessionID, + crowPath: crowPath + ) + } catch { + NSLog("[SessionService] Failed to write hook config for session %@: %@", + sessionID.uuidString, error.localizedDescription) + } + } + guard agent.kind == .claudeCode, let port = telemetryPort else { return command } + let vars = [ + "CLAUDE_CODE_ENABLE_TELEMETRY=1", + "OTEL_METRICS_EXPORTER=otlp", + "OTEL_LOGS_EXPORTER=otlp", + "OTEL_EXPORTER_OTLP_PROTOCOL=http/json", + "OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:\(port)", + "OTEL_RESOURCE_ATTRIBUTES=crow.session.id=\(sessionID.uuidString)", + ].joined(separator: " ") + return "export \(vars) && \(command)" + } + /// Re-arm the tmux readiness watch for a terminal whose first attempt /// timed out. Reverts AppState back to `.surfaceCreated` so the UI /// transitions out of the Retry overlay, and starts a longer-budget @@ -368,6 +466,19 @@ final class SessionService { NSLog("[SessionService] copied tmux diagnostics for terminal=\(terminalID) bytes=\(bundle.utf8.count)") } + /// Reap leaked orphan cockpit windows once at launch — bare-shell windows + /// that no persisted terminal references (#408). The keep-set is built from + /// the persisted terminals' window bindings; `reapUnboundCockpitWindows` + /// additionally unions the in-memory bindings, so a window adopted or freshly + /// registered this run is never reaped. Call AFTER the async per-terminal + /// rehydration has settled. Safe: never reaps a window running an agent. + @MainActor + func reapOrphanedCockpitWindows() { + let keep = Set(appState.terminals.values.flatMap { $0 }.compactMap { $0.tmuxBinding?.windowIndex }) + let n = TmuxBackend.shared.reapUnboundCockpitWindows(keepWindowIndices: keep) + if n > 0 { NSLog("[SessionService] reaped \(n) orphaned cockpit window(s) at launch (#408)") } + } + /// Re-arm any tmux readiness watches that have stalled while the app /// was backgrounded. Called from `NSApplication.didBecomeActiveNotification` /// so a user who returns to a long-idle app doesn't have to click diff --git a/Tests/CrowTests/DeferredLaunchTests.swift b/Tests/CrowTests/DeferredLaunchTests.swift new file mode 100644 index 00000000..f29a2974 --- /dev/null +++ b/Tests/CrowTests/DeferredLaunchTests.swift @@ -0,0 +1,123 @@ +import Foundation +import Testing +import CrowCore +import CrowPersistence +import CrowTerminal +@testable import Crow + +/// Covers the #408 fix: brand-new managed terminals defer their agent launch +/// until the shell signals `.shellReady`, instead of blind-pasting into a +/// not-yet-ready shell. Pure helpers are tested directly; the readiness-handler +/// branch is exercised against a real `SessionService` with seeded state. +@Suite("Deferred managed-terminal launch (#408)") +struct DeferredLaunchTests { + + // MARK: resolveLaunch (pure) + + @Test func resolveLaunchPastesPendingCommand() { + let cmd = "cd /x && claude \"hi\"" + #expect(SessionService.resolveLaunch(pending: cmd) == .pastePending(cmd)) + } + + @Test func resolveLaunchFallsBackToLaunchAgentWhenNoPending() { + #expect(SessionService.resolveLaunch(pending: nil) == .launchAgent) + #expect(SessionService.resolveLaunch(pending: "") == .launchAgent) + } + + // MARK: registerWithRetry (pure) + + private struct Boom: Error {} + + @Test func registerWithRetrySucceedsFirstTry() throws { + var calls = 0 + let result = try AppDelegate.registerWithRetry(attempts: 3) { _ in calls += 1; return 42 } + #expect(result == 42) + #expect(calls == 1) + } + + @Test func registerWithRetryRecoversAfterTransientFailures() throws { + var calls = 0 + let result = try AppDelegate.registerWithRetry(attempts: 3) { attempt -> Int in + calls += 1 + if attempt < 2 { throw Boom() } + return 7 + } + #expect(result == 7) + #expect(calls == 3) + } + + @Test func registerWithRetryThrowsAfterExhaustion() { + var calls = 0 + #expect(throws: Boom.self) { + try AppDelegate.registerWithRetry(attempts: 3) { _ -> Int in calls += 1; throw Boom() } + } + #expect(calls == 3) + } + + // MARK: deferred paste on .shellReady (instance) + + /// When the sentinel fires `.shellReady` for a terminal staged exactly as + /// the `new-terminal` RPC stages a brand-new managed launch, the readiness + /// handler pastes the pending command and advances to `.agentLaunched`, + /// consuming BOTH the pending command and the autoLaunch membership so + /// `launchAgent` can never also fire (no double launch). + @MainActor + @Test func shellReadyPastesPendingCommandAndConsumesState() { + let appState = AppState() + let tmp = URL(fileURLWithPath: NSTemporaryDirectory()) + .appendingPathComponent("crow-defer-\(UUID().uuidString)") + let store = JSONStore(directory: tmp) + let service = SessionService(store: store, appState: appState) + + let sessionID = UUID() + let terminalID = UUID() + let command = "cd \(tmp.path) && claude \"hi\"" + appState.sessions = [Session(id: sessionID, name: "feat", kind: .work)] + appState.terminals[sessionID] = [SessionTerminal( + id: terminalID, sessionID: sessionID, name: "Claude Code", + cwd: tmp.path, command: command, isManaged: true, tmuxBinding: nil + )] + // Stage the deferred launch exactly like the new-terminal RPC. + appState.terminalReadiness[terminalID] = .uninitialized + appState.pendingLaunchCommands[terminalID] = command + appState.autoLaunchTerminals.insert(terminalID) + + service.wireTerminalReadiness() + // Fire the sentinel's `.shellReady` exactly as the backend would. + TmuxBackend.shared.onReadinessChanged?(terminalID, .shellReady) + + #expect(appState.pendingLaunchCommands[terminalID] == nil) + #expect(!appState.autoLaunchTerminals.contains(terminalID)) + #expect(appState.terminalReadiness[terminalID] == .agentLaunched) + } + + /// A `.timedOut` before `.shellReady` must NOT paste or consume the pending + /// command — the launch is recoverable via Retry / app-foreground re-arm. + @MainActor + @Test func timedOutDoesNotConsumePendingLaunch() { + let appState = AppState() + let tmp = URL(fileURLWithPath: NSTemporaryDirectory()) + .appendingPathComponent("crow-defer-timeout-\(UUID().uuidString)") + let service = SessionService(store: JSONStore(directory: tmp), appState: appState) + + let sessionID = UUID() + let terminalID = UUID() + let command = "cd \(tmp.path) && claude \"hi\"" + appState.sessions = [Session(id: sessionID, name: "feat", kind: .work)] + appState.terminals[sessionID] = [SessionTerminal( + id: terminalID, sessionID: sessionID, name: "Claude Code", + cwd: tmp.path, command: command, isManaged: true, tmuxBinding: nil + )] + appState.terminalReadiness[terminalID] = .uninitialized + appState.pendingLaunchCommands[terminalID] = command + appState.autoLaunchTerminals.insert(terminalID) + + service.wireTerminalReadiness() + TmuxBackend.shared.onReadinessChanged?(terminalID, .timedOut) + + #expect(appState.terminalReadiness[terminalID] == .timedOut) + // Pending launch survives so a later `.shellReady` (Retry) still pastes. + #expect(appState.pendingLaunchCommands[terminalID] == command) + #expect(appState.autoLaunchTerminals.contains(terminalID)) + } +} diff --git a/skills/crow-batch-workspace/SKILL.md b/skills/crow-batch-workspace/SKILL.md index 68bbfb6b..7a5d5c3b 100644 --- a/skills/crow-batch-workspace/SKILL.md +++ b/skills/crow-batch-workspace/SKILL.md @@ -217,4 +217,5 @@ https://github.com/RadiusMethod/acme-api/issues/46 ``` - 5 parallel `setup.sh` calls - Each blocks one GCD thread in the socket server (well within the 64+ thread pool) -- `sleep 3` calls in each script overlap — total wait is ~3s, not 15s +- Each script's readiness poll (up to ~15s) overlaps with the others — they wait + concurrently, and most return as soon as the agent reports `agentLaunched` diff --git a/skills/crow-workspace/SKILL.md b/skills/crow-workspace/SKILL.md index 3d3be490..3d94c706 100644 --- a/skills/crow-workspace/SKILL.md +++ b/skills/crow-workspace/SKILL.md @@ -491,8 +491,7 @@ If `setup.sh` returns a JSON error (`"status": "error"`): | `git_worktree_add` — worktree creation failed | Branch may exist; script auto-retries after cleanup | | `new_session` — crow new-session failed | Crow app may not be running. Inform user. | | `add_worktree` — crow add-worktree failed | Use full UUID from session, check paths | -| `new_terminal` — crow new-terminal failed | Session may not exist; check session_id | -| `send_launch` — crow send failed | Terminal may not be ready; retry | +| `new_terminal` — crow new-terminal failed / could not create window | Session may not exist (check session_id), or tmux couldn't spawn a window under load | | `write_prompt` — prompt file not found | Verify prompt was written before calling setup.sh | ## crow CLI Reference diff --git a/skills/crow-workspace/setup.sh b/skills/crow-workspace/setup.sh index c234f37e..bc9463b4 100755 --- a/skills/crow-workspace/setup.sh +++ b/skills/crow-workspace/setup.sh @@ -56,6 +56,15 @@ json_val() { tr -d '\n' | sed 's/[[:space:]]*:[[:space:]]*/:/g' | grep -o "\"$key\":\"[^\"]*\"" | cut -d'"' -f4 | head -1 } +# Extract the `readiness` of a specific terminal id from `crow list-terminals` +# JSON. Walks the flat per-terminal objects, finds the one carrying our id, and +# reads its readiness field. Usage: terminal_readiness "$tid" "$json" +terminal_readiness() { + local tid="$1" json="$2" + printf '%s' "$json" | tr -d '\n' | grep -o '{[^{}]*}' | grep "\"$tid\"" \ + | grep -o '"readiness":"[^"]*"' | cut -d'"' -f4 | head -1 +} + # POSIX single-quote an arg so it's safe to interpolate into a shell command. # Mirrors Swift's shellQuote() in ClaudeLaunchArgs: wraps value in '...' and # escapes embedded single-quotes as '\''. @@ -550,16 +559,44 @@ launch_claude() { fi fi - # Create terminal - log "Creating terminal..." + # Build the agent launch command and hand it to crow at terminal-creation + # time via --command. Crow holds the command and pastes it only once the + # shell's line editor is live (the .shellReady sentinel), which is the + # race-free replacement for the old `sleep 3` + `crow send` handshake that + # intermittently dropped the launch into a not-yet-ready shell, leaving a + # bare zsh with no agent (#408). + # + # The prompt stays in its file — `"$(cat $prompt_path)"` is expanded by the + # TARGET shell at paste time, so the command sent over the socket is small + # (no ARG_MAX / payload concern). + local prompt_path="$DEV_ROOT/.claude/prompts/crow-prompt-$SESSION_NAME.md" + local rc_args="" + if is_remote_control_enabled; then + # Keep building --rc here: crow's resolveClaudeInCommand only injects + # remote-control flags for a bare `claude …` command, NOT a + # `cd … && claude …` form, so setup.sh remains the source of truth. + rc_args=" --rc --name $(posix_quote "$SESSION_NAME")" + log "Remote control enabled — launching with --rc --name '$SESSION_NAME'" + fi + local launch_cmd="cd $WORKTREE_PATH && $claude_bin --permission-mode plan$rc_args \"\$(cat $prompt_path)\"" + + # Create terminal with the launch command attached. + log "Creating terminal (deferred agent launch via --command)..." local term_result if ! term_result=$(crow new-terminal --session "$SESSION_ID" \ --cwd "$WORKTREE_PATH" \ --name "Claude Code" \ - --managed 2>&1); then + --managed \ + --command "$launch_cmd" 2>&1); then die "new_terminal" "crow new-terminal failed: $term_result" fi + # The RPC reports launch_failed when the tmux window could not be created + # (e.g. new-window timed out under load) — don't pretend it launched. + if grep -qE '"launch_failed"[[:space:]]*:[[:space:]]*true' <<< "$term_result"; then + die "new_terminal" "crow could not create the terminal window: $term_result" + fi + TERMINAL_ID=$(json_val "terminal_id" <<< "$term_result") if [[ -z "$TERMINAL_ID" ]]; then die "new_terminal" "Could not parse terminal_id from: $term_result" @@ -570,28 +607,32 @@ launch_claude() { crow select-session --session "$SESSION_ID" >/dev/null 2>&1 \ || log "Warning: select-session failed" - # Wait for the shell to initialize in the new terminal. - # The terminal surface is pre-initialized by crow new-terminal, but the - # shell process needs time to start and display its prompt. - log "Waiting for shell to initialize..." - sleep 3 - - # Build the prompt file path - local prompt_path="$DEV_ROOT/.claude/prompts/crow-prompt-$SESSION_NAME.md" - - # Send launch command - log "Launching Claude Code..." - local rc_args="" - if is_remote_control_enabled; then - rc_args=" --rc --name $(posix_quote "$SESSION_NAME")" - log "Remote control enabled — launching with --rc --name '$SESSION_NAME'" - fi - local send_text="cd $WORKTREE_PATH && $claude_bin --permission-mode plan$rc_args \"\$(cat $prompt_path)\"\\n" - if ! crow send --session "$SESSION_ID" --terminal "$TERMINAL_ID" "$send_text" >/dev/null 2>&1; then - die "send_launch" "crow send failed" - fi - - log "Claude Code launched" + # Verify the agent actually started rather than assuming success. Poll the + # terminal's readiness (exposed on `crow list-terminals`) until it reaches + # agentLaunched/shellReady. This is a warning, NOT a hard failure: the + # workspace (worktree/session/metadata) is already set up, and crow's UI + # shows a Retry affordance if the agent didn't start (#408). + log "Waiting for the agent to launch..." + local readiness="" attempt + for attempt in $(seq 1 15); do + local lt_result + lt_result=$(crow list-terminals --session "$SESSION_ID" 2>/dev/null) || true + readiness=$(terminal_readiness "$TERMINAL_ID" "$lt_result") + case "$readiness" in + agentLaunched|shellReady) + log "Claude Code launched (readiness=$readiness)" + return + ;; + failed|timedOut) + log "WARNING: agent did not start (readiness=$readiness). The terminal" \ + "is up but the agent isn't running — use Retry in the Crow UI." + return + ;; + esac + sleep 1 + done + log "WARNING: agent launch not confirmed after 15s (last readiness='${readiness:-unknown}')." \ + "The workspace is set up; check the Crow terminal and use Retry if needed." } # ─── Result ────────────────────────────────────────────────────────────────── From f1eee8d9832237f789698e8c9118c96df9b23e3b Mon Sep 17 00:00:00 2001 From: Dustin Hilgaertner Date: Tue, 2 Jun 2026 23:00:08 -0500 Subject: [PATCH 2/2] Address PR #409 review: recovery-path race, retry budget, dedup, test serialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four Yellow findings from the review: - Hydrate-fresh recovery race: brand-new managed terminals no longer persist the launch command on the SessionTerminal row (persist `registerCommand`, nil when deferring). The launch lives only in in-memory pendingLaunchCommands, so the hydrate-fresh fallback can't blind-paste it into a not-yet-ready shell on relaunch. Restored managed terminals relaunch via autoLaunch/launchAgent as before — and this no longer relies on the fragile `command.contains("claude")` nil-ing for non-claude agents. - registerWithRetry main-actor stall: tightened to 2 attempts × 3s (was 3 × 4s), capping worst-case main-actor stall at ~6s instead of ~12s; common case is one fast attempt. - Duplicated launch prep: extracted AppDelegate.prepareAgentLaunchText (hook config + OTEL env + agent-token guard), returning (text, didLaunch). The `send` RPC and pasteDeferredLaunch now share it, removing the copy-paste and the "keep in sync" comment; the `.agentLaunched` gating is now consistent. - DeferredLaunchTests: marked .serialized — the instance tests overwrite the singleton onReadinessChanged closure, which can stomp under parallel execution. All package + app suites green. 🐦‍⬛ Generated with Claude Code, orchestrated by Crow Co-Authored-By: Claude Crow-Session: 1B4F94D3-4045-4DB0-9442-BA35B6050D6A --- Sources/Crow/App/AppDelegate.swift | 124 ++++++++++++++-------- Sources/Crow/App/SessionService.swift | 56 +++------- Tests/CrowTests/DeferredLaunchTests.swift | 6 +- 3 files changed, 99 insertions(+), 87 deletions(-) diff --git a/Sources/Crow/App/AppDelegate.swift b/Sources/Crow/App/AppDelegate.swift index 2c66323e..1928ba32 100644 --- a/Sources/Crow/App/AppDelegate.swift +++ b/Sources/Crow/App/AppDelegate.swift @@ -1306,17 +1306,6 @@ final class AppDelegate: NSObject, NSApplicationDelegate { && !cmd.contains("--remote-control") } let trackReadiness = isManaged - // Every session, including the Manager (#314), runs on - // tmux (#303). Register the tmux window now — its shell - // starts immediately, so there's no offscreen pre-init. - var terminal = SessionTerminal( - sessionID: sessionID, - name: terminalName, - cwd: cwd, - command: command, - isManaged: isManaged, - backend: .tmux - ) // Brand-new managed terminals DEFER their agent launch until // the shell signals readiness (issue #408). Pasting the launch // command immediately races the shell's line editor (zle): if @@ -1328,6 +1317,25 @@ final class AppDelegate: NSObject, NSApplicationDelegate { let hasCommand = !(command?.isEmpty ?? true) let deferLaunch = trackReadiness && hasCommand let registerCommand = deferLaunch ? nil : command + // Every session, including the Manager (#314), runs on + // tmux (#303). Register the tmux window now — its shell + // starts immediately, so there's no offscreen pre-init. + // + // Persist `registerCommand` (nil for a deferred launch), NOT + // the raw launch command: the launch lives in + // `pendingLaunchCommands` (in-memory) and the persisted row + // must not carry it, or the hydrate-fresh fallback would + // blind-paste it into a not-yet-ready shell on the recovery + // path — the very race this fixes (#408). A restored managed + // terminal relaunches via the autoLaunch/launchAgent path. + var terminal = SessionTerminal( + sessionID: sessionID, + name: terminalName, + cwd: cwd, + command: registerCommand, + isManaged: isManaged, + backend: .tmux + ) // Seed readiness + pending-launch state BEFORE registering so // the sentinel's `.shellReady` (which can only fire on a later // main-actor turn) always finds the pending command and the @@ -1343,17 +1351,20 @@ final class AppDelegate: NSObject, NSApplicationDelegate { } var launchFailed = false do { - // Bounded retry with a longer per-call `new-window` budget: - // under load the tmux subprocess can exceed the 2s default - // and get SIGTERM'd, leaving a window-less terminal (#408). - let binding = try AppDelegate.registerWithRetry(attempts: 3) { _ in + // Bounded retry with a modestly-longer per-call `new-window` + // budget: under load the tmux subprocess can exceed the 2s + // default and get SIGTERM'd, leaving a window-less terminal + // (#408). This runs inside `MainActor.run`, so the budget is + // kept tight (2 attempts × 3s) to cap worst-case main-actor + // stall at ~6s rather than beachballing concurrent RPCs. + let binding = try AppDelegate.registerWithRetry(attempts: 2) { _ in try TmuxBackend.shared.registerTerminal( id: terminal.id, name: terminalName, cwd: cwd, command: registerCommand, trackReadiness: trackReadiness, - newWindowTimeout: 4.0 + newWindowTimeout: 3.0 ) } terminal.tmuxBinding = binding @@ -1475,35 +1486,19 @@ final class AppDelegate: NSObject, NSApplicationDelegate { let terminal = terminals.first(where: { $0.id == terminalID }), terminal.isManaged, let session = capturedAppState.sessions.first(where: { $0.id == sessionID }), - let agent = AgentRegistry.shared.agent(for: session.agentKind), - text.contains(agent.launchCommandToken) { - if let worktree = capturedAppState.primaryWorktree(for: sessionID), - let crowPath = ClaudeHookConfigWriter.findCrowBinary() { - do { - try agent.hookConfigWriter.writeHookConfig( - worktreePath: worktree.worktreePath, - sessionID: sessionID, - crowPath: crowPath - ) - } catch { - NSLog("[AppDelegate] Failed to write hook config for session %@: %@", - sessionID.uuidString, error.localizedDescription) - } - } - // OTEL telemetry env vars are Claude-specific — - // Codex has no equivalent OTLP exporter. - if agent.kind == .claudeCode, let port = capturedTelemetryPort { - let vars = [ - "CLAUDE_CODE_ENABLE_TELEMETRY=1", - "OTEL_METRICS_EXPORTER=otlp", - "OTEL_LOGS_EXPORTER=otlp", - "OTEL_EXPORTER_OTLP_PROTOCOL=http/json", - "OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:\(port)", - "OTEL_RESOURCE_ATTRIBUTES=crow.session.id=\(sessionID.uuidString)", - ].joined(separator: " ") - text = "export \(vars) && \(text)" + let agent = AgentRegistry.shared.agent(for: session.agentKind) { + let prepared = AppDelegate.prepareAgentLaunchText( + command: text, + agent: agent, + sessionID: sessionID, + worktreePath: capturedAppState.primaryWorktree(for: sessionID)?.worktreePath, + crowPath: ClaudeHookConfigWriter.findCrowBinary(), + telemetryPort: capturedTelemetryPort + ) + text = prepared.text + if prepared.didLaunch { + capturedAppState.terminalReadiness[terminalID] = .agentLaunched } - capturedAppState.terminalReadiness[terminalID] = .agentLaunched } if let routedTerminal { @@ -1796,6 +1791,47 @@ final class AppDelegate: NSObject, NSApplicationDelegate { throw lastError ?? RPCError.applicationError("registerWithRetry: no attempts run") } + /// Prepare an agent-launching command for a managed terminal: write the + /// per-worktree hook config so the agent's hooks route back to `sessionID`, + /// and (for Claude) prepend the OTEL telemetry exporter env vars. Returns + /// the final text plus whether `command` actually launches the agent (its + /// `launchCommandToken` is present). Single source of truth shared by the + /// `send` RPC and the #408 deferred-launch paste so the two never drift. + nonisolated static func prepareAgentLaunchText( + command: String, + agent: any CodingAgent, + sessionID: UUID, + worktreePath: String?, + crowPath: String?, + telemetryPort: UInt16? + ) -> (text: String, didLaunch: Bool) { + guard command.contains(agent.launchCommandToken) else { return (command, false) } + if let worktreePath, let crowPath { + do { + try agent.hookConfigWriter.writeHookConfig( + worktreePath: worktreePath, + sessionID: sessionID, + crowPath: crowPath + ) + } catch { + NSLog("[AppDelegate] Failed to write hook config for session %@: %@", + sessionID.uuidString, error.localizedDescription) + } + } + // OTEL telemetry env vars are Claude-specific — Codex has no equivalent + // OTLP exporter. + guard agent.kind == .claudeCode, let port = telemetryPort else { return (command, true) } + let vars = [ + "CLAUDE_CODE_ENABLE_TELEMETRY=1", + "OTEL_METRICS_EXPORTER=otlp", + "OTEL_LOGS_EXPORTER=otlp", + "OTEL_EXPORTER_OTLP_PROTOCOL=http/json", + "OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:\(port)", + "OTEL_RESOURCE_ATTRIBUTES=crow.session.id=\(sessionID.uuidString)", + ].joined(separator: " ") + return ("export \(vars) && \(command)", true) + } + // MARK: - Claude Binary Resolution /// Replace bare `claude` in a command string with the full path to the real binary, diff --git a/Sources/Crow/App/SessionService.swift b/Sources/Crow/App/SessionService.swift index b3e4ac48..85084bc0 100644 --- a/Sources/Crow/App/SessionService.swift +++ b/Sources/Crow/App/SessionService.swift @@ -396,53 +396,25 @@ final class SessionService { // Apply the same prep the legacy `crow send` path got from the `send` // RPC (hook config + OTEL env vars) so hooks route back to this session - // and Claude telemetry is exported. - let text = prepareManagedLaunchText(sessionID: sessionID, command: command) + // and Claude telemetry is exported — via the shared helper so the two + // launch paths never drift. + var text = command + if let session = appState.sessions.first(where: { $0.id == sessionID }), + let agent = AgentRegistry.shared.agent(for: session.agentKind) { + text = AppDelegate.prepareAgentLaunchText( + command: command, + agent: agent, + sessionID: sessionID, + worktreePath: appState.primaryWorktree(for: sessionID)?.worktreePath, + crowPath: ClaudeHookConfigWriter.findCrowBinary(), + telemetryPort: telemetryPort + ).text + } // Ensure a trailing newline so TmuxBackend.sendText delivers Enter. TerminalRouter.send(routedTerminal, text: text.hasSuffix("\n") ? text : text + "\n") appState.terminalReadiness[terminalID] = .agentLaunched } - /// Prepare a managed terminal's agent-launch command exactly as the `send` - /// RPC does for the legacy `crow send` path (#408): write the per-worktree - /// hook config so the agent's hooks route back to this session, and (for - /// Claude) prepend the OTEL telemetry exporter env vars. Returns the final - /// text to paste, or the command unchanged if it doesn't launch the - /// session's agent. - /// - /// NOTE: the OTEL env-var list is intentionally identical to the `send` RPC - /// handler in AppDelegate — keep the two in sync. - func prepareManagedLaunchText(sessionID: UUID, command: String) -> String { - guard let session = appState.sessions.first(where: { $0.id == sessionID }), - let agent = AgentRegistry.shared.agent(for: session.agentKind), - command.contains(agent.launchCommandToken) else { - return command - } - if let worktree = appState.primaryWorktree(for: sessionID), - let crowPath = ClaudeHookConfigWriter.findCrowBinary() { - do { - try agent.hookConfigWriter.writeHookConfig( - worktreePath: worktree.worktreePath, - sessionID: sessionID, - crowPath: crowPath - ) - } catch { - NSLog("[SessionService] Failed to write hook config for session %@: %@", - sessionID.uuidString, error.localizedDescription) - } - } - guard agent.kind == .claudeCode, let port = telemetryPort else { return command } - let vars = [ - "CLAUDE_CODE_ENABLE_TELEMETRY=1", - "OTEL_METRICS_EXPORTER=otlp", - "OTEL_LOGS_EXPORTER=otlp", - "OTEL_EXPORTER_OTLP_PROTOCOL=http/json", - "OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:\(port)", - "OTEL_RESOURCE_ATTRIBUTES=crow.session.id=\(sessionID.uuidString)", - ].joined(separator: " ") - return "export \(vars) && \(command)" - } - /// Re-arm the tmux readiness watch for a terminal whose first attempt /// timed out. Reverts AppState back to `.surfaceCreated` so the UI /// transitions out of the Retry overlay, and starts a longer-budget diff --git a/Tests/CrowTests/DeferredLaunchTests.swift b/Tests/CrowTests/DeferredLaunchTests.swift index f29a2974..b6d0def2 100644 --- a/Tests/CrowTests/DeferredLaunchTests.swift +++ b/Tests/CrowTests/DeferredLaunchTests.swift @@ -9,7 +9,11 @@ import CrowTerminal /// until the shell signals `.shellReady`, instead of blind-pasting into a /// not-yet-ready shell. Pure helpers are tested directly; the readiness-handler /// branch is exercised against a real `SessionService` with seeded state. -@Suite("Deferred managed-terminal launch (#408)") +// Serialized: the instance tests overwrite the singleton +// `TmuxBackend.shared.onReadinessChanged` via `wireTerminalReadiness()` and then +// fire it directly. Run in parallel (Swift Testing's default) one test could +// stomp another's closure before its sentinel fires. +@Suite("Deferred managed-terminal launch (#408)", .serialized) struct DeferredLaunchTests { // MARK: resolveLaunch (pure)