diff --git a/Packages/CrowCore/Sources/CrowCore/AppState.swift b/Packages/CrowCore/Sources/CrowCore/AppState.swift index a3b3cfe..0865fdc 100644 --- a/Packages/CrowCore/Sources/CrowCore/AppState.swift +++ b/Packages/CrowCore/Sources/CrowCore/AppState.swift @@ -223,6 +223,14 @@ public final class AppState { /// not brand-new terminals created via the `new-terminal` RPC. public var autoLaunchTerminals: Set = [] + /// Pending agent launch command per terminal ID, for brand-new managed + /// terminals created via `new-terminal --command`. The command is NOT + /// pasted immediately (that races the shell's line editor — issue #408); + /// it is held here until the readiness sentinel fires `.shellReady`, at + /// which point `SessionService.wireTerminalReadiness` pastes it. In-memory + /// only — never persisted, so a relaunch can't re-paste a stale command. + public var pendingLaunchCommands: [UUID: String] = [:] + // MARK: - Hook Events (per-session Observable wrappers) /// Per-session hook state. Using @Observable class wrappers so mutations to one diff --git a/Packages/CrowTerminal/Sources/CrowTerminal/TmuxBackend.swift b/Packages/CrowTerminal/Sources/CrowTerminal/TmuxBackend.swift index 851d79b..406c7f2 100644 --- a/Packages/CrowTerminal/Sources/CrowTerminal/TmuxBackend.swift +++ b/Packages/CrowTerminal/Sources/CrowTerminal/TmuxBackend.swift @@ -178,7 +178,8 @@ public final class TmuxBackend { name: String, cwd: String, command: String?, - trackReadiness: Bool + trackReadiness: Bool, + newWindowTimeout: TimeInterval = TmuxController.defaultTimeout ) throws -> TmuxBinding { precondition(!tmuxBinary.isEmpty, "TmuxBackend.configure(...) must be called first") let ctrl: TmuxController @@ -219,7 +220,8 @@ public final class TmuxBackend { name: name, cwd: cwd.isEmpty ? nil : cwd, env: env, - command: wrapperPath + command: wrapperPath, + timeout: newWindowTimeout ) bindings[id] = windowIndex @@ -365,6 +367,56 @@ public final class TmuxBackend { } } + /// Bare login shells we consider "orphaned" when a cockpit window is not + /// referenced by any terminal — i.e. a window left at a shell with no agent + /// running (#408). Anything else (claude/codex/node/an editor/…) is left + /// alone. tmux reports `pane_current_command` without the login-shell `-` + /// prefix, but we match both forms defensively. + nonisolated static let orphanLoginShells: Set = [ + "zsh", "-zsh", "bash", "-bash", "sh", "-sh", + "fish", "-fish", "dash", "-dash", "ksh", "tcsh", "csh", "login", + ] + + /// Decide whether a single cockpit window should be reaped. Pure so the + /// policy is unit-testable: reap only when the window is NOT referenced by a + /// live terminal AND its pane is sitting at a bare login shell. Never reaps + /// a window running an agent (or any non-shell process), so an agent that + /// exited and left the user at a shell — but whose terminal still references + /// the window — is preserved (it's in `keep`). + nonisolated static func shouldReapWindow(index: Int, command: String, keep: Set) -> Bool { + if keep.contains(index) { return false } + return orphanLoginShells.contains(command) + } + + /// Reap cockpit windows that no live terminal references AND that are + /// sitting at a bare login shell — leaked windows from a timed-out + /// `new-window` or a forgotten terminal (#408). `keepWindowIndices` is the + /// set of window indices referenced by persisted terminals; it is unioned + /// with the in-memory `bindings` so a window created/adopted this run is + /// never reaped. Best-effort; returns the count reaped. + @discardableResult + public func reapUnboundCockpitWindows(keepWindowIndices: Set) -> Int { + guard let ctrl = controller else { return 0 } + let keep = keepWindowIndices.union(bindings.values) + let windows: [(index: Int, command: String)] + do { + windows = try ctrl.listWindowCommands() + } catch { + reportIfTimeout(error) + return 0 + } + var reaped = 0 + for window in windows where Self.shouldReapWindow(index: window.index, command: window.command, keep: keep) { + ctrl.killWindow(index: window.index) + NSLog("[CrowTelemetry tmux:orphan_window_reaped index=\(window.index) command=\(window.command)]") + reaped += 1 + } + if reaped > 0 { + NSLog("[Crow] Reaped \(reaped) orphaned bare-shell cockpit window(s) (#408)") + } + return reaped + } + /// Return the shared cockpit Ghostty surface, lazily creating it the /// first time. The surface attaches to the live tmux session via /// `tmux -S … attach-session -t …` as its child command. diff --git a/Packages/CrowTerminal/Sources/CrowTerminal/TmuxController.swift b/Packages/CrowTerminal/Sources/CrowTerminal/TmuxController.swift index 9b5358d..e294ead 100644 --- a/Packages/CrowTerminal/Sources/CrowTerminal/TmuxController.swift +++ b/Packages/CrowTerminal/Sources/CrowTerminal/TmuxController.swift @@ -104,13 +104,34 @@ public struct TmuxController: Sendable { return out.split(separator: "\n").compactMap { Int($0.trimmingCharacters(in: .whitespaces)) } } + /// List each window's index and the command currently running in its + /// active pane (`#{window_index}` + `#{pane_current_command}`). Used by the + /// orphan-window reaper to distinguish a window running an agent from a + /// bare login shell (#408). + public func listWindowCommands() throws -> [(index: Int, command: String)] { + let out = try run(["list-windows", "-t", sessionName, + "-F", "#{window_index}\t#{pane_current_command}"]) + return out.split(separator: "\n").compactMap { line in + let parts = line.split(separator: "\t", maxSplits: 1, omittingEmptySubsequences: false) + guard parts.count == 2, let idx = Int(parts[0].trimmingCharacters(in: .whitespaces)) else { + return nil + } + return (idx, parts[1].trimmingCharacters(in: .whitespaces)) + } + } + // MARK: - Windows + /// `timeout` defaults to the per-call default. Callers spawning a window + /// while the app is under load (many concurrent hydrations, a contended + /// main actor) can pass a longer budget so a slow `new-window` doesn't + /// SIGTERM and leave the terminal window-less (issue #408). public func newWindow( name: String? = nil, cwd: String? = nil, env: [String: String] = [:], - command: String? = nil + command: String? = nil, + timeout: TimeInterval = TmuxController.defaultTimeout ) throws -> Int { var args = ["new-window", "-P", "-F", "#{window_index}", "-t", sessionName] if let name { args.append(contentsOf: ["-n", name]) } @@ -122,7 +143,7 @@ public struct TmuxController: Sendable { if let cwd, !cwd.isEmpty { args.append(contentsOf: ["-c", cwd]) } for (k, v) in env { args.append(contentsOf: ["-e", "\(k)=\(v)"]) } if let command { args.append(command) } - let out = try run(args) + let out = try run(args, timeout: timeout) guard let idx = Int(out.trimmingCharacters(in: .whitespacesAndNewlines)) else { throw TmuxError.cliFailed( args: args, diff --git a/Packages/CrowTerminal/Tests/CrowTerminalTests/OrphanWindowReaperTests.swift b/Packages/CrowTerminal/Tests/CrowTerminalTests/OrphanWindowReaperTests.swift new file mode 100644 index 0000000..ac68100 --- /dev/null +++ b/Packages/CrowTerminal/Tests/CrowTerminalTests/OrphanWindowReaperTests.swift @@ -0,0 +1,31 @@ +import Foundation +import Testing +@testable import CrowTerminal + +/// Policy tests for the conservative orphan-window reaper (#408): reap a +/// cockpit window only when NO live terminal references it AND it is sitting at +/// a bare login shell. Pure over `shouldReapWindow`, so no tmux needed. +@Suite("Orphan cockpit window reaper policy (#408)") +struct OrphanWindowReaperTests { + + @Test func reapsUnboundBareShell() { + #expect(TmuxBackend.shouldReapWindow(index: 3, command: "zsh", keep: [])) + #expect(TmuxBackend.shouldReapWindow(index: 3, command: "-zsh", keep: [])) + #expect(TmuxBackend.shouldReapWindow(index: 3, command: "bash", keep: [])) + #expect(TmuxBackend.shouldReapWindow(index: 3, command: "sh", keep: [])) + } + + @Test func keepsBoundWindowEvenWhenBareShell() { + // A terminal references it (e.g. the agent exited and the user is now at + // the shell) — must be preserved. + #expect(!TmuxBackend.shouldReapWindow(index: 3, command: "zsh", keep: [3])) + } + + @Test func keepsWindowRunningAProcess() { + // Anything that isn't a bare login shell is left alone. + #expect(!TmuxBackend.shouldReapWindow(index: 4, command: "claude", keep: [])) + #expect(!TmuxBackend.shouldReapWindow(index: 4, command: "node", keep: [])) + #expect(!TmuxBackend.shouldReapWindow(index: 4, command: "codex", keep: [])) + #expect(!TmuxBackend.shouldReapWindow(index: 4, command: "tail", keep: [])) // session anchor + } +} diff --git a/Packages/CrowTerminal/Tests/CrowTerminalTests/TmuxBackendTests.swift b/Packages/CrowTerminal/Tests/CrowTerminalTests/TmuxBackendTests.swift index 41a3c81..bf18ff8 100644 --- a/Packages/CrowTerminal/Tests/CrowTerminalTests/TmuxBackendTests.swift +++ b/Packages/CrowTerminal/Tests/CrowTerminalTests/TmuxBackendTests.swift @@ -8,7 +8,11 @@ import Testing /// covered separately by the visual demo path. Skipped automatically /// when no tmux binary is present on the host. @MainActor -@Suite("TmuxBackend integration", .enabled(if: discoveredTmuxBinary != nil)) +// Serialized: these are real-tmux integration tests that spawn login shells and +// assert on readiness *timing* (e.g. a 0.15s watch budget). Run in parallel they +// contend for CPU during shell startup and the tight-timing cases flake — most +// visibly once the #408 concurrent-launch test spins up several shells at once. +@Suite("TmuxBackend integration", .enabled(if: discoveredTmuxBinary != nil), .serialized) struct TmuxBackendTests { private func makeBackend() -> TmuxBackend { @@ -373,4 +377,75 @@ struct TmuxBackendTests { try await Task.sleep(nanoseconds: 100_000_000) #expect(received.isEmpty) } + + // MARK: - #408 concurrent managed launch: no bare-zsh orphans + + /// The #408 repro. Spawn several managed terminals at once, each deferring + /// its launch until `.shellReady` (registered with `command: nil`, then the + /// command pasted on the readiness fire — exactly what the new-terminal RPC + /// now does). Every window must reach readiness and run the pasted process; + /// none may be left at a bare login shell. + @Test func concurrentManagedLaunchesAllReachReadyNoBareShells() async throws { + let backend = makeBackend() + defer { backend.shutdown() } + + let n = 6 + let ids = (0..() + var timedOut = Set() + + backend.onReadinessChanged = { id, state in + switch state { + case .shellReady: + guard !ready.contains(id) else { return } + ready.insert(id) + // The deferred paste: `exec` a marker process so the pane is no + // longer a bare login shell — proof the launch landed in a live + // shell rather than being dropped (the #408 failure). + try? backend.sendText(id: id, text: "exec sleep 97\n") + case .timedOut: + timedOut.insert(id) + default: + break + } + } + + for id in ids { + let binding = try backend.registerTerminal( + id: id, name: "managed-\(id.uuidString.prefix(4))", + cwd: NSHomeDirectory(), command: nil, trackReadiness: true + ) + windowIndexByID[id] = binding.windowIndex + } + + // Wait (bounded) for all to report `.shellReady`. Shell startup is + // CPU-contended with N concurrent wrappers; 30s mirrors the backend's + // default readiness budget. + let deadline = Date().addingTimeInterval(30) + while ready.count < n && timedOut.isEmpty && Date() < deadline { + try await Task.sleep(nanoseconds: 100_000_000) + } + #expect(timedOut.isEmpty) + #expect(ready.count == n) + + // Assert no managed window is left at a bare login shell. Poll briefly: + // the exec'd markers replace their shells asynchronously. + let ctrl = TmuxController( + tmuxBinary: discoveredTmuxBinary!, socketPath: backend.socketPath, + sessionName: TmuxBackend.cockpitSessionName + ) + let ourIndices = Set(windowIndexByID.values) + var bareShells: [Int] = [] + let checkDeadline = Date().addingTimeInterval(5) + repeat { + try await Task.sleep(nanoseconds: 200_000_000) + let windows = try ctrl.listWindowCommands().filter { ourIndices.contains($0.index) } + bareShells = windows + .filter { TmuxBackend.orphanLoginShells.contains($0.command) } + .map { $0.index } + } while !bareShells.isEmpty && Date() < checkDeadline + + #expect(bareShells.isEmpty) + } } diff --git a/Sources/Crow/App/AppDelegate.swift b/Sources/Crow/App/AppDelegate.swift index ab9c43a..1928ba3 100644 --- a/Sources/Crow/App/AppDelegate.swift +++ b/Sources/Crow/App/AppDelegate.swift @@ -436,6 +436,13 @@ final class AppDelegate: NSObject, NSApplicationDelegate { // Detect orphaned worktrees (runs async, updates UI when done) Task { await service.detectOrphanedWorktrees() } + // Reap leaked orphan cockpit windows (bare shells that no terminal + // references) once rehydration's async adoptions have settled (#408). + Task { @MainActor in + try? await Task.sleep(nanoseconds: 8_000_000_000) + service.reapOrphanedCockpitWindows() + } + // Check for runtime dependencies (non-blocking) Task { let missing = await Task.detached { @@ -1299,39 +1306,95 @@ final class AppDelegate: NSObject, NSApplicationDelegate { && !cmd.contains("--remote-control") } let trackReadiness = isManaged + // Brand-new managed terminals DEFER their agent launch until + // the shell signals readiness (issue #408). Pasting the launch + // command immediately races the shell's line editor (zle): if + // the prompt isn't live yet the keystrokes are dropped and the + // window is left at a bare zsh with no agent. Instead hold the + // command in `pendingLaunchCommands` and register the window + // with `command: nil`, so the deferred paste happens in + // `SessionService.wireTerminalReadiness` on `.shellReady`. + let hasCommand = !(command?.isEmpty ?? true) + let deferLaunch = trackReadiness && hasCommand + let registerCommand = deferLaunch ? nil : command // Every session, including the Manager (#314), runs on // tmux (#303). Register the tmux window now — its shell // starts immediately, so there's no offscreen pre-init. + // + // Persist `registerCommand` (nil for a deferred launch), NOT + // the raw launch command: the launch lives in + // `pendingLaunchCommands` (in-memory) and the persisted row + // must not carry it, or the hydrate-fresh fallback would + // blind-paste it into a not-yet-ready shell on the recovery + // path — the very race this fixes (#408). A restored managed + // terminal relaunches via the autoLaunch/launchAgent path. var terminal = SessionTerminal( sessionID: sessionID, name: terminalName, cwd: cwd, - command: command, + command: registerCommand, isManaged: isManaged, backend: .tmux ) + // Seed readiness + pending-launch state BEFORE registering so + // the sentinel's `.shellReady` (which can only fire on a later + // main-actor turn) always finds the pending command and the + // autoLaunch membership populated. + if trackReadiness { + capturedAppState.terminalReadiness[terminal.id] = .uninitialized + } + if deferLaunch, let command { + capturedAppState.pendingLaunchCommands[terminal.id] = command + // Membership lets the existing `.timedOut` re-arm machinery + // (`reArmStuckReadinessWatches`) recover a slow launch. + capturedAppState.autoLaunchTerminals.insert(terminal.id) + } + var launchFailed = false do { - let binding = try TmuxBackend.shared.registerTerminal( - id: terminal.id, - name: terminalName, - cwd: cwd, - command: command, - trackReadiness: trackReadiness - ) + // Bounded retry with a modestly-longer per-call `new-window` + // budget: under load the tmux subprocess can exceed the 2s + // default and get SIGTERM'd, leaving a window-less terminal + // (#408). This runs inside `MainActor.run`, so the budget is + // kept tight (2 attempts × 3s) to cap worst-case main-actor + // stall at ~6s rather than beachballing concurrent RPCs. + let binding = try AppDelegate.registerWithRetry(attempts: 2) { _ in + try TmuxBackend.shared.registerTerminal( + id: terminal.id, + name: terminalName, + cwd: cwd, + command: registerCommand, + trackReadiness: trackReadiness, + newWindowTimeout: 3.0 + ) + } terminal.tmuxBinding = binding } catch { - NSLog("[Crow] tmux registerTerminal failed (\(error)); terminal will not render until tmux is available") + // The tmux window never materialized. Don't pretend the + // launch succeeded (#408): surface it so the UI shows a + // Retry affordance and the CLI caller reports honestly + // instead of leaving a silent window-less terminal. + NSLog("[Crow] tmux registerTerminal failed after retries (\(error)); surfacing launch failure") + launchFailed = true + if trackReadiness { + capturedAppState.terminalReadiness[terminal.id] = .failed + } + capturedAppState.pendingLaunchCommands.removeValue(forKey: terminal.id) + capturedAppState.autoLaunchTerminals.remove(terminal.id) } capturedAppState.terminals[sessionID, default: []].append(terminal) capturedStore.mutate { $0.terminals.append(terminal) } if trackReadiness { - capturedAppState.terminalReadiness[terminal.id] = .uninitialized TerminalRouter.trackReadiness(for: terminal) } if rcInjected { capturedAppState.remoteControlActiveTerminals.insert(terminal.id) } - return ["terminal_id": .string(terminal.id.uuidString), "session_id": .string(idStr)] + var result: [String: JSONValue] = [ + "terminal_id": .string(terminal.id.uuidString), + "session_id": .string(idStr), + ] + if launchFailed { result["launch_failed"] = .bool(true) } + return result } }, "list-terminals": { @Sendable params in @@ -1339,8 +1402,18 @@ final class AppDelegate: NSObject, NSApplicationDelegate { throw RPCError.invalidParams("session_id required") } let terms = await MainActor.run { capturedAppState.terminals(for: id) } + let readiness = await MainActor.run { capturedAppState.terminalReadiness } let items: [JSONValue] = terms.map { t in - .object(["id": .string(t.id.uuidString), "name": .string(t.name), "session_id": .string(t.sessionID.uuidString), "managed": .bool(t.isManaged)]) + // `readiness` lets CLI callers (setup.sh) verify the agent + // actually started rather than assuming a launch succeeded + // (#408). Defaults to `uninitialized` for un-tracked shells. + .object([ + "id": .string(t.id.uuidString), + "name": .string(t.name), + "session_id": .string(t.sessionID.uuidString), + "managed": .bool(t.isManaged), + "readiness": .string((readiness[t.id] ?? .uninitialized).rawValue), + ]) } return ["terminals": .array(items)] }, @@ -1363,6 +1436,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate { capturedAppState.terminals[sessionID]?.removeAll { $0.id == terminalID } capturedAppState.terminalReadiness.removeValue(forKey: terminalID) capturedAppState.autoLaunchTerminals.remove(terminalID) + capturedAppState.pendingLaunchCommands.removeValue(forKey: terminalID) if capturedAppState.activeTerminalID[sessionID] == terminalID { capturedAppState.activeTerminalID[sessionID] = capturedAppState.terminals[sessionID]?.first?.id } @@ -1412,35 +1486,19 @@ final class AppDelegate: NSObject, NSApplicationDelegate { let terminal = terminals.first(where: { $0.id == terminalID }), terminal.isManaged, let session = capturedAppState.sessions.first(where: { $0.id == sessionID }), - let agent = AgentRegistry.shared.agent(for: session.agentKind), - text.contains(agent.launchCommandToken) { - if let worktree = capturedAppState.primaryWorktree(for: sessionID), - let crowPath = ClaudeHookConfigWriter.findCrowBinary() { - do { - try agent.hookConfigWriter.writeHookConfig( - worktreePath: worktree.worktreePath, - sessionID: sessionID, - crowPath: crowPath - ) - } catch { - NSLog("[AppDelegate] Failed to write hook config for session %@: %@", - sessionID.uuidString, error.localizedDescription) - } - } - // OTEL telemetry env vars are Claude-specific — - // Codex has no equivalent OTLP exporter. - if agent.kind == .claudeCode, let port = capturedTelemetryPort { - let vars = [ - "CLAUDE_CODE_ENABLE_TELEMETRY=1", - "OTEL_METRICS_EXPORTER=otlp", - "OTEL_LOGS_EXPORTER=otlp", - "OTEL_EXPORTER_OTLP_PROTOCOL=http/json", - "OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:\(port)", - "OTEL_RESOURCE_ATTRIBUTES=crow.session.id=\(sessionID.uuidString)", - ].joined(separator: " ") - text = "export \(vars) && \(text)" + let agent = AgentRegistry.shared.agent(for: session.agentKind) { + let prepared = AppDelegate.prepareAgentLaunchText( + command: text, + agent: agent, + sessionID: sessionID, + worktreePath: capturedAppState.primaryWorktree(for: sessionID)?.worktreePath, + crowPath: ClaudeHookConfigWriter.findCrowBinary(), + telemetryPort: capturedTelemetryPort + ) + text = prepared.text + if prepared.didLaunch { + capturedAppState.terminalReadiness[terminalID] = .agentLaunched } - capturedAppState.terminalReadiness[terminalID] = .agentLaunched } if let routedTerminal { @@ -1710,6 +1768,70 @@ final class AppDelegate: NSObject, NSApplicationDelegate { NSLog("[Crow] Cleanup complete") } + // MARK: - Terminal Registration + + /// Run `create` up to `attempts` times, returning the first success or + /// rethrowing the last error after exhausting all attempts. Window + /// registration can transiently fail under load when `new-window` exceeds + /// its subprocess timeout (issue #408); a couple of retries turn most of + /// those into successes instead of silent window-less terminals. Pure over + /// the `create` closure so the retry policy is unit-testable without tmux. + nonisolated static func registerWithRetry( + attempts: Int, + create: (_ attempt: Int) throws -> T + ) throws -> T { + var lastError: Error? + for attempt in 0.. (text: String, didLaunch: Bool) { + guard command.contains(agent.launchCommandToken) else { return (command, false) } + if let worktreePath, let crowPath { + do { + try agent.hookConfigWriter.writeHookConfig( + worktreePath: worktreePath, + sessionID: sessionID, + crowPath: crowPath + ) + } catch { + NSLog("[AppDelegate] Failed to write hook config for session %@: %@", + sessionID.uuidString, error.localizedDescription) + } + } + // OTEL telemetry env vars are Claude-specific — Codex has no equivalent + // OTLP exporter. + guard agent.kind == .claudeCode, let port = telemetryPort else { return (command, true) } + let vars = [ + "CLAUDE_CODE_ENABLE_TELEMETRY=1", + "OTEL_METRICS_EXPORTER=otlp", + "OTEL_LOGS_EXPORTER=otlp", + "OTEL_EXPORTER_OTLP_PROTOCOL=http/json", + "OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:\(port)", + "OTEL_RESOURCE_ATTRIBUTES=crow.session.id=\(sessionID.uuidString)", + ].joined(separator: " ") + return ("export \(vars) && \(command)", true) + } + // MARK: - Claude Binary Resolution /// Replace bare `claude` in a command string with the full path to the real binary, diff --git a/Sources/Crow/App/SessionService.swift b/Sources/Crow/App/SessionService.swift index b26e434..85084bc 100644 --- a/Sources/Crow/App/SessionService.swift +++ b/Sources/Crow/App/SessionService.swift @@ -254,6 +254,10 @@ final class SessionService { // didBecomeActive re-arm) and mark readiness terminal so // launchClaude's `== .shellReady` guard can never fire. appState.autoLaunchTerminals.remove(terminal.id) + // Defensive: a brand-new terminal's deferred launch must not + // survive into an adopt (#408). In-memory only, so normally + // empty at launch — clear anyway to be safe. + appState.pendingLaunchCommands.removeValue(forKey: terminal.id) if appState.terminalReadiness[terminal.id] != nil { appState.terminalReadiness[terminal.id] = .agentLaunched } @@ -333,7 +337,18 @@ final class SessionService { NSLog("[SessionService] tmux readiness: terminal=\(terminalID), state=\(readiness), current=\(currentState)") if readiness == .shellReady, currentState < .shellReady { self.appState.terminalReadiness[terminalID] = .shellReady - self.launchAgent(terminalID: terminalID) + // Brand-new managed terminals created via `new-terminal --command` + // hold their launch in `pendingLaunchCommands` and paste it HERE, + // now that the shell's line editor is live — the race-free + // replacement for setup.sh's old `sleep 3` + `crow send` (#408). + // Restored/recovered terminals have no pending command and fall + // through to `launchAgent` (rebuild via autoLaunchCommand). + switch SessionService.resolveLaunch(pending: self.appState.pendingLaunchCommands[terminalID]) { + case .pastePending(let command): + self.pasteDeferredLaunch(terminalID: terminalID, command: command) + case .launchAgent: + self.launchAgent(terminalID: terminalID) + } } else if readiness == .timedOut, currentState < .shellReady { // First-prompt watch expired. Do NOT advance to .shellReady or // auto-paste — the shell may still be starting and a paste now @@ -345,6 +360,61 @@ final class SessionService { } } + /// What the `.shellReady` handler should do for a managed terminal. Pure + /// so the brand-new-vs-restored branch is unit-testable without tmux or a + /// live AppState (#408). + enum LaunchAction: Equatable { + /// Brand-new terminal launched via `new-terminal --command`: paste the + /// stored command now that the shell is ready. + case pastePending(String) + /// Restored/recovered terminal: rebuild the command via the agent's + /// `autoLaunchCommand` (the existing `launchAgent` path). + case launchAgent + } + + nonisolated static func resolveLaunch(pending: String?) -> LaunchAction { + if let pending, !pending.isEmpty { return .pastePending(pending) } + return .launchAgent + } + + /// Paste the deferred agent-launch command for a brand-new managed terminal + /// now that its shell's line editor is live (#408). Consumes BOTH the + /// pending command and the `autoLaunchTerminals` membership so `launchAgent` + /// can never also fire (e.g. a later spurious `.shellReady` after adopt), + /// preventing a double launch. + func pasteDeferredLaunch(terminalID: UUID, command: String) { + guard appState.pendingLaunchCommands.removeValue(forKey: terminalID) != nil else { return } + appState.autoLaunchTerminals.remove(terminalID) + + guard let sessionID = appState.terminals.first(where: { _, terminals in + terminals.contains(where: { $0.id == terminalID }) + })?.key, + let routedTerminal = appState.terminals[sessionID]?.first(where: { $0.id == terminalID }) else { + NSLog("[SessionService] pasteDeferredLaunch: no terminal record for \(terminalID); cannot send") + return + } + + // Apply the same prep the legacy `crow send` path got from the `send` + // RPC (hook config + OTEL env vars) so hooks route back to this session + // and Claude telemetry is exported — via the shared helper so the two + // launch paths never drift. + var text = command + if let session = appState.sessions.first(where: { $0.id == sessionID }), + let agent = AgentRegistry.shared.agent(for: session.agentKind) { + text = AppDelegate.prepareAgentLaunchText( + command: command, + agent: agent, + sessionID: sessionID, + worktreePath: appState.primaryWorktree(for: sessionID)?.worktreePath, + crowPath: ClaudeHookConfigWriter.findCrowBinary(), + telemetryPort: telemetryPort + ).text + } + // Ensure a trailing newline so TmuxBackend.sendText delivers Enter. + TerminalRouter.send(routedTerminal, text: text.hasSuffix("\n") ? text : text + "\n") + appState.terminalReadiness[terminalID] = .agentLaunched + } + /// Re-arm the tmux readiness watch for a terminal whose first attempt /// timed out. Reverts AppState back to `.surfaceCreated` so the UI /// transitions out of the Retry overlay, and starts a longer-budget @@ -368,6 +438,19 @@ final class SessionService { NSLog("[SessionService] copied tmux diagnostics for terminal=\(terminalID) bytes=\(bundle.utf8.count)") } + /// Reap leaked orphan cockpit windows once at launch — bare-shell windows + /// that no persisted terminal references (#408). The keep-set is built from + /// the persisted terminals' window bindings; `reapUnboundCockpitWindows` + /// additionally unions the in-memory bindings, so a window adopted or freshly + /// registered this run is never reaped. Call AFTER the async per-terminal + /// rehydration has settled. Safe: never reaps a window running an agent. + @MainActor + func reapOrphanedCockpitWindows() { + let keep = Set(appState.terminals.values.flatMap { $0 }.compactMap { $0.tmuxBinding?.windowIndex }) + let n = TmuxBackend.shared.reapUnboundCockpitWindows(keepWindowIndices: keep) + if n > 0 { NSLog("[SessionService] reaped \(n) orphaned cockpit window(s) at launch (#408)") } + } + /// Re-arm any tmux readiness watches that have stalled while the app /// was backgrounded. Called from `NSApplication.didBecomeActiveNotification` /// so a user who returns to a long-idle app doesn't have to click diff --git a/Tests/CrowTests/DeferredLaunchTests.swift b/Tests/CrowTests/DeferredLaunchTests.swift new file mode 100644 index 0000000..b6d0def --- /dev/null +++ b/Tests/CrowTests/DeferredLaunchTests.swift @@ -0,0 +1,127 @@ +import Foundation +import Testing +import CrowCore +import CrowPersistence +import CrowTerminal +@testable import Crow + +/// Covers the #408 fix: brand-new managed terminals defer their agent launch +/// until the shell signals `.shellReady`, instead of blind-pasting into a +/// not-yet-ready shell. Pure helpers are tested directly; the readiness-handler +/// branch is exercised against a real `SessionService` with seeded state. +// Serialized: the instance tests overwrite the singleton +// `TmuxBackend.shared.onReadinessChanged` via `wireTerminalReadiness()` and then +// fire it directly. Run in parallel (Swift Testing's default) one test could +// stomp another's closure before its sentinel fires. +@Suite("Deferred managed-terminal launch (#408)", .serialized) +struct DeferredLaunchTests { + + // MARK: resolveLaunch (pure) + + @Test func resolveLaunchPastesPendingCommand() { + let cmd = "cd /x && claude \"hi\"" + #expect(SessionService.resolveLaunch(pending: cmd) == .pastePending(cmd)) + } + + @Test func resolveLaunchFallsBackToLaunchAgentWhenNoPending() { + #expect(SessionService.resolveLaunch(pending: nil) == .launchAgent) + #expect(SessionService.resolveLaunch(pending: "") == .launchAgent) + } + + // MARK: registerWithRetry (pure) + + private struct Boom: Error {} + + @Test func registerWithRetrySucceedsFirstTry() throws { + var calls = 0 + let result = try AppDelegate.registerWithRetry(attempts: 3) { _ in calls += 1; return 42 } + #expect(result == 42) + #expect(calls == 1) + } + + @Test func registerWithRetryRecoversAfterTransientFailures() throws { + var calls = 0 + let result = try AppDelegate.registerWithRetry(attempts: 3) { attempt -> Int in + calls += 1 + if attempt < 2 { throw Boom() } + return 7 + } + #expect(result == 7) + #expect(calls == 3) + } + + @Test func registerWithRetryThrowsAfterExhaustion() { + var calls = 0 + #expect(throws: Boom.self) { + try AppDelegate.registerWithRetry(attempts: 3) { _ -> Int in calls += 1; throw Boom() } + } + #expect(calls == 3) + } + + // MARK: deferred paste on .shellReady (instance) + + /// When the sentinel fires `.shellReady` for a terminal staged exactly as + /// the `new-terminal` RPC stages a brand-new managed launch, the readiness + /// handler pastes the pending command and advances to `.agentLaunched`, + /// consuming BOTH the pending command and the autoLaunch membership so + /// `launchAgent` can never also fire (no double launch). + @MainActor + @Test func shellReadyPastesPendingCommandAndConsumesState() { + let appState = AppState() + let tmp = URL(fileURLWithPath: NSTemporaryDirectory()) + .appendingPathComponent("crow-defer-\(UUID().uuidString)") + let store = JSONStore(directory: tmp) + let service = SessionService(store: store, appState: appState) + + let sessionID = UUID() + let terminalID = UUID() + let command = "cd \(tmp.path) && claude \"hi\"" + appState.sessions = [Session(id: sessionID, name: "feat", kind: .work)] + appState.terminals[sessionID] = [SessionTerminal( + id: terminalID, sessionID: sessionID, name: "Claude Code", + cwd: tmp.path, command: command, isManaged: true, tmuxBinding: nil + )] + // Stage the deferred launch exactly like the new-terminal RPC. + appState.terminalReadiness[terminalID] = .uninitialized + appState.pendingLaunchCommands[terminalID] = command + appState.autoLaunchTerminals.insert(terminalID) + + service.wireTerminalReadiness() + // Fire the sentinel's `.shellReady` exactly as the backend would. + TmuxBackend.shared.onReadinessChanged?(terminalID, .shellReady) + + #expect(appState.pendingLaunchCommands[terminalID] == nil) + #expect(!appState.autoLaunchTerminals.contains(terminalID)) + #expect(appState.terminalReadiness[terminalID] == .agentLaunched) + } + + /// A `.timedOut` before `.shellReady` must NOT paste or consume the pending + /// command — the launch is recoverable via Retry / app-foreground re-arm. + @MainActor + @Test func timedOutDoesNotConsumePendingLaunch() { + let appState = AppState() + let tmp = URL(fileURLWithPath: NSTemporaryDirectory()) + .appendingPathComponent("crow-defer-timeout-\(UUID().uuidString)") + let service = SessionService(store: JSONStore(directory: tmp), appState: appState) + + let sessionID = UUID() + let terminalID = UUID() + let command = "cd \(tmp.path) && claude \"hi\"" + appState.sessions = [Session(id: sessionID, name: "feat", kind: .work)] + appState.terminals[sessionID] = [SessionTerminal( + id: terminalID, sessionID: sessionID, name: "Claude Code", + cwd: tmp.path, command: command, isManaged: true, tmuxBinding: nil + )] + appState.terminalReadiness[terminalID] = .uninitialized + appState.pendingLaunchCommands[terminalID] = command + appState.autoLaunchTerminals.insert(terminalID) + + service.wireTerminalReadiness() + TmuxBackend.shared.onReadinessChanged?(terminalID, .timedOut) + + #expect(appState.terminalReadiness[terminalID] == .timedOut) + // Pending launch survives so a later `.shellReady` (Retry) still pastes. + #expect(appState.pendingLaunchCommands[terminalID] == command) + #expect(appState.autoLaunchTerminals.contains(terminalID)) + } +} diff --git a/skills/crow-batch-workspace/SKILL.md b/skills/crow-batch-workspace/SKILL.md index 68bbfb6..7a5d5c3 100644 --- a/skills/crow-batch-workspace/SKILL.md +++ b/skills/crow-batch-workspace/SKILL.md @@ -217,4 +217,5 @@ https://github.com/RadiusMethod/acme-api/issues/46 ``` - 5 parallel `setup.sh` calls - Each blocks one GCD thread in the socket server (well within the 64+ thread pool) -- `sleep 3` calls in each script overlap — total wait is ~3s, not 15s +- Each script's readiness poll (up to ~15s) overlaps with the others — they wait + concurrently, and most return as soon as the agent reports `agentLaunched` diff --git a/skills/crow-workspace/SKILL.md b/skills/crow-workspace/SKILL.md index 3d3be49..3d94c70 100644 --- a/skills/crow-workspace/SKILL.md +++ b/skills/crow-workspace/SKILL.md @@ -491,8 +491,7 @@ If `setup.sh` returns a JSON error (`"status": "error"`): | `git_worktree_add` — worktree creation failed | Branch may exist; script auto-retries after cleanup | | `new_session` — crow new-session failed | Crow app may not be running. Inform user. | | `add_worktree` — crow add-worktree failed | Use full UUID from session, check paths | -| `new_terminal` — crow new-terminal failed | Session may not exist; check session_id | -| `send_launch` — crow send failed | Terminal may not be ready; retry | +| `new_terminal` — crow new-terminal failed / could not create window | Session may not exist (check session_id), or tmux couldn't spawn a window under load | | `write_prompt` — prompt file not found | Verify prompt was written before calling setup.sh | ## crow CLI Reference diff --git a/skills/crow-workspace/setup.sh b/skills/crow-workspace/setup.sh index c234f37..bc9463b 100755 --- a/skills/crow-workspace/setup.sh +++ b/skills/crow-workspace/setup.sh @@ -56,6 +56,15 @@ json_val() { tr -d '\n' | sed 's/[[:space:]]*:[[:space:]]*/:/g' | grep -o "\"$key\":\"[^\"]*\"" | cut -d'"' -f4 | head -1 } +# Extract the `readiness` of a specific terminal id from `crow list-terminals` +# JSON. Walks the flat per-terminal objects, finds the one carrying our id, and +# reads its readiness field. Usage: terminal_readiness "$tid" "$json" +terminal_readiness() { + local tid="$1" json="$2" + printf '%s' "$json" | tr -d '\n' | grep -o '{[^{}]*}' | grep "\"$tid\"" \ + | grep -o '"readiness":"[^"]*"' | cut -d'"' -f4 | head -1 +} + # POSIX single-quote an arg so it's safe to interpolate into a shell command. # Mirrors Swift's shellQuote() in ClaudeLaunchArgs: wraps value in '...' and # escapes embedded single-quotes as '\''. @@ -550,16 +559,44 @@ launch_claude() { fi fi - # Create terminal - log "Creating terminal..." + # Build the agent launch command and hand it to crow at terminal-creation + # time via --command. Crow holds the command and pastes it only once the + # shell's line editor is live (the .shellReady sentinel), which is the + # race-free replacement for the old `sleep 3` + `crow send` handshake that + # intermittently dropped the launch into a not-yet-ready shell, leaving a + # bare zsh with no agent (#408). + # + # The prompt stays in its file — `"$(cat $prompt_path)"` is expanded by the + # TARGET shell at paste time, so the command sent over the socket is small + # (no ARG_MAX / payload concern). + local prompt_path="$DEV_ROOT/.claude/prompts/crow-prompt-$SESSION_NAME.md" + local rc_args="" + if is_remote_control_enabled; then + # Keep building --rc here: crow's resolveClaudeInCommand only injects + # remote-control flags for a bare `claude …` command, NOT a + # `cd … && claude …` form, so setup.sh remains the source of truth. + rc_args=" --rc --name $(posix_quote "$SESSION_NAME")" + log "Remote control enabled — launching with --rc --name '$SESSION_NAME'" + fi + local launch_cmd="cd $WORKTREE_PATH && $claude_bin --permission-mode plan$rc_args \"\$(cat $prompt_path)\"" + + # Create terminal with the launch command attached. + log "Creating terminal (deferred agent launch via --command)..." local term_result if ! term_result=$(crow new-terminal --session "$SESSION_ID" \ --cwd "$WORKTREE_PATH" \ --name "Claude Code" \ - --managed 2>&1); then + --managed \ + --command "$launch_cmd" 2>&1); then die "new_terminal" "crow new-terminal failed: $term_result" fi + # The RPC reports launch_failed when the tmux window could not be created + # (e.g. new-window timed out under load) — don't pretend it launched. + if grep -qE '"launch_failed"[[:space:]]*:[[:space:]]*true' <<< "$term_result"; then + die "new_terminal" "crow could not create the terminal window: $term_result" + fi + TERMINAL_ID=$(json_val "terminal_id" <<< "$term_result") if [[ -z "$TERMINAL_ID" ]]; then die "new_terminal" "Could not parse terminal_id from: $term_result" @@ -570,28 +607,32 @@ launch_claude() { crow select-session --session "$SESSION_ID" >/dev/null 2>&1 \ || log "Warning: select-session failed" - # Wait for the shell to initialize in the new terminal. - # The terminal surface is pre-initialized by crow new-terminal, but the - # shell process needs time to start and display its prompt. - log "Waiting for shell to initialize..." - sleep 3 - - # Build the prompt file path - local prompt_path="$DEV_ROOT/.claude/prompts/crow-prompt-$SESSION_NAME.md" - - # Send launch command - log "Launching Claude Code..." - local rc_args="" - if is_remote_control_enabled; then - rc_args=" --rc --name $(posix_quote "$SESSION_NAME")" - log "Remote control enabled — launching with --rc --name '$SESSION_NAME'" - fi - local send_text="cd $WORKTREE_PATH && $claude_bin --permission-mode plan$rc_args \"\$(cat $prompt_path)\"\\n" - if ! crow send --session "$SESSION_ID" --terminal "$TERMINAL_ID" "$send_text" >/dev/null 2>&1; then - die "send_launch" "crow send failed" - fi - - log "Claude Code launched" + # Verify the agent actually started rather than assuming success. Poll the + # terminal's readiness (exposed on `crow list-terminals`) until it reaches + # agentLaunched/shellReady. This is a warning, NOT a hard failure: the + # workspace (worktree/session/metadata) is already set up, and crow's UI + # shows a Retry affordance if the agent didn't start (#408). + log "Waiting for the agent to launch..." + local readiness="" attempt + for attempt in $(seq 1 15); do + local lt_result + lt_result=$(crow list-terminals --session "$SESSION_ID" 2>/dev/null) || true + readiness=$(terminal_readiness "$TERMINAL_ID" "$lt_result") + case "$readiness" in + agentLaunched|shellReady) + log "Claude Code launched (readiness=$readiness)" + return + ;; + failed|timedOut) + log "WARNING: agent did not start (readiness=$readiness). The terminal" \ + "is up but the agent isn't running — use Retry in the Crow UI." + return + ;; + esac + sleep 1 + done + log "WARNING: agent launch not confirmed after 15s (last readiness='${readiness:-unknown}')." \ + "The workspace is set up; check the Crow terminal and use Retry if needed." } # ─── Result ──────────────────────────────────────────────────────────────────