From beb1fd2971e101c2de6f0fa850d64db623ceca64 Mon Sep 17 00:00:00 2001 From: Ryder Freeman Date: Sun, 17 May 2026 09:03:51 -0700 Subject: [PATCH 1/2] fix(session): read result.toml for accurate wait exit code (#1442) `csa session wait` was reporting `status=failure exit=1` for sessions whose `result.toml` recorded `status=success exit_code=0`. The wait loop loaded the refreshed `result.toml` after seeing the daemon completion packet, then dropped it via an mtime-based filter that preferred the daemon completion packet whenever `result.toml` was older or had the same mtime (except for one narrow corner case). In practice this filter always lost: the agent writes `result.toml` before the daemon process exits and writes `daemon-completion.toml`, so `result.toml.mtime <= daemon-completion.toml.mtime` is the normal ordering. Any session where the daemon happened to exit non-zero for reasons unrelated to the session outcome (post-write cleanup failure, parent SIGTERM, etc.) would be reported as a failure to callers. Trust `result.toml` whenever the refresh succeeds. It is the canonical session artifact recording the actual outcome; the daemon completion packet only records the daemon process exit and is meant to be a fallback when `result.toml` is unavailable. The earlier regression test `handle_session_wait_prefers_daemon_completion_exit_code` codified the buggy behaviour and is renamed/inverted to `handle_session_wait_prefers_result_toml_over_daemon_completion_exit_code` so the result-authority contract is explicit. Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 32 +++++++++---------- Cargo.toml | 2 +- .../src/session_cmds_daemon_wait.rs | 28 ++++------------ .../src/session_cmds_tests_tail.rs | 12 +++++-- 4 files changed, 32 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 43ef6f13..0fbf9e15 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -509,7 +509,7 @@ checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "cli-sub-agent" -version = "0.1.734" +version = "0.1.735" dependencies = [ "anyhow", "chrono", @@ -701,7 +701,7 @@ dependencies = [ [[package]] name = "csa-acp" -version = "0.1.734" +version = "0.1.735" dependencies = [ "agent-client-protocol", "anyhow", @@ -721,7 +721,7 @@ dependencies = [ [[package]] name = "csa-config" -version = "0.1.734" +version = "0.1.735" dependencies = [ "anyhow", "chrono", @@ -739,7 +739,7 @@ dependencies = [ [[package]] name = "csa-core" -version = "0.1.734" +version = "0.1.735" dependencies = [ "agent-teams", "chrono", @@ -755,7 +755,7 @@ dependencies = [ [[package]] name = "csa-eval" -version = "0.1.734" +version = "0.1.735" dependencies = [ "anyhow", "chrono", @@ -769,7 +769,7 @@ dependencies = [ [[package]] name = "csa-executor" -version = "0.1.734" +version = "0.1.735" dependencies = [ "agent-teams", "anyhow", @@ -797,7 +797,7 @@ dependencies = [ [[package]] name = "csa-hooks" -version = "0.1.734" +version = "0.1.735" dependencies = [ "anyhow", "chrono", @@ -816,7 +816,7 @@ dependencies = [ [[package]] name = "csa-lock" -version = "0.1.734" +version = "0.1.735" dependencies = [ "anyhow", "chrono", @@ -830,7 +830,7 @@ dependencies = [ [[package]] name = "csa-mcp-hub" -version = "0.1.734" +version = "0.1.735" dependencies = [ "anyhow", "axum", @@ -853,7 +853,7 @@ dependencies = [ [[package]] name = "csa-memory" -version = "0.1.734" +version = "0.1.735" dependencies = [ "anyhow", "async-trait", @@ -872,7 +872,7 @@ dependencies = [ [[package]] name = "csa-process" -version = "0.1.734" +version = "0.1.735" dependencies = [ "anyhow", "chrono", @@ -891,7 +891,7 @@ dependencies = [ [[package]] name = "csa-resource" -version = "0.1.734" +version = "0.1.735" dependencies = [ "anyhow", "csa-core", @@ -907,7 +907,7 @@ dependencies = [ [[package]] name = "csa-scheduler" -version = "0.1.734" +version = "0.1.735" dependencies = [ "anyhow", "chrono", @@ -925,7 +925,7 @@ dependencies = [ [[package]] name = "csa-session" -version = "0.1.734" +version = "0.1.735" dependencies = [ "anyhow", "chrono", @@ -951,7 +951,7 @@ dependencies = [ [[package]] name = "csa-todo" -version = "0.1.734" +version = "0.1.735" dependencies = [ "anyhow", "chrono", @@ -4516,7 +4516,7 @@ dependencies = [ [[package]] name = "weave" -version = "0.1.734" +version = "0.1.735" dependencies = [ "anyhow", "clap", diff --git a/Cargo.toml b/Cargo.toml index a628703f..afb41a76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ default-members = [ resolver = "2" [workspace.package] -version = "0.1.734" +version = "0.1.735" edition = "2024" rust-version = "1.88" license = "Apache-2.0" diff --git a/crates/cli-sub-agent/src/session_cmds_daemon_wait.rs b/crates/cli-sub-agent/src/session_cmds_daemon_wait.rs index f05fe9e8..e3019859 100644 --- a/crates/cli-sub-agent/src/session_cmds_daemon_wait.rs +++ b/crates/cli-sub-agent/src/session_cmds_daemon_wait.rs @@ -308,28 +308,12 @@ where let refreshed_result = refreshed_result.ok().flatten(); let mut synthetic = false; let refreshed_result_available = refreshed_result.is_some(); - let mut loaded_result = refreshed_result.filter(|result| { - match ( - fs::metadata(session_dir.join(csa_session::result::RESULT_FILE_NAME)) - .ok() - .and_then(|metadata| metadata.modified().ok()), - fs::metadata(daemon_completion_path(&session_dir)) - .ok() - .and_then(|metadata| metadata.modified().ok()), - ) { - (Some(result_modified), Some(completion_modified)) - if result_modified > completion_modified => - { - true - } - (Some(result_modified), Some(completion_modified)) - if result_modified == completion_modified => - { - completion.exit_code == 0 && result.exit_code != 0 - } - _ => false, - } - }); + // result.toml is the authoritative session artifact; trust it over the daemon + // completion packet (which records the daemon process exit, not the session + // outcome). The daemon may exit non-zero after writing a successful result.toml + // (e.g., post-write cleanup failure, parent SIGTERM), so prior mtime-based + // filtering caused #1442 false failures. See #1442. + let mut loaded_result = refreshed_result; if refreshed_result_available { crate::session_cmds::retire_if_dead_with_result( effective_root, diff --git a/crates/cli-sub-agent/src/session_cmds_tests_tail.rs b/crates/cli-sub-agent/src/session_cmds_tests_tail.rs index 42a927be..d8f5b246 100644 --- a/crates/cli-sub-agent/src/session_cmds_tests_tail.rs +++ b/crates/cli-sub-agent/src/session_cmds_tests_tail.rs @@ -568,9 +568,15 @@ fn handle_session_wait_ignores_incomplete_result_while_daemon_alive() { assert!(status.success()); } +/// Regression test for #1442: result.toml is the authoritative session artifact. +/// When result.toml records success/exit_code=0 but the daemon process exited with +/// non-zero (recording failure in daemon-completion.toml), the wait command MUST +/// report the session's actual outcome from result.toml — not the daemon's process +/// exit code. The daemon may exit non-zero for reasons unrelated to the session +/// outcome (post-write cleanup failure, parent SIGTERM, etc.). #[cfg(unix)] #[test] -fn handle_session_wait_prefers_daemon_completion_exit_code() { +fn handle_session_wait_prefers_result_toml_over_daemon_completion_exit_code() { let td = tempdir().unwrap(); let _env_lock = TEST_ENV_LOCK.blocking_lock(); let state_home = td.path().join("xdg-state"); @@ -594,8 +600,8 @@ fn handle_session_wait_prefers_daemon_completion_exit_code() { handle_session_wait(session_id, Some(project.to_string_lossy().into_owned()), 1).unwrap(); assert_eq!( - exit_code, 1, - "session wait should return the daemon's final exit code, not the intermediate result.toml exit code" + exit_code, 0, + "result.toml records the session's actual outcome and must override the daemon process exit code" ); } From 2e139ece9b6ee763214e89ba5e611ef26245b9d3 Mon Sep 17 00:00:00 2001 From: Ryder Freeman Date: Sun, 17 May 2026 09:23:24 -0700 Subject: [PATCH 2/2] fix(session): distinguish KV-warm exit from real timeout (#1439) `csa session wait` previously emitted exit code 124 whenever the configured wait cap fired, regardless of whether the underlying session was still healthy. AI agents (and shell callers reading the standard Unix timeout convention) interpreted 124 as failure, even though the loop's intent has always been a polite poll cap that keeps the caller's KV cache warm. This change splits the exit-cap behaviour: * Session daemon still alive at the cap (the common case): emit `CSA:SESSION_WAIT_KV_WARM` with `status=alive` and return exit code 0. The accompanying CSA:CALLER_HINT explicitly tells callers the exit is healthy and to re-wait in a fresh Bash call. * Daemon already gone with no `result.toml` at the cap (defensive, rare): emit `CSA:SESSION_WAIT_TIMEOUT` with `status=dead` and return 124. Earlier loop branches usually catch this case with exit-1 first. Existing tests that asserted the legacy 124-on-alive behaviour are updated, and a new regression test `kv_warm_tests::handle_session_wait_ kv_warm_exit_when_daemon_alive_at_cap` pins the contract. Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 32 ++--- Cargo.toml | 2 +- .../src/session_cmds_daemon_wait.rs | 63 +++++---- .../cli-sub-agent/src/session_cmds_tests.rs | 2 + .../src/session_cmds_tests_daemon_pid_tail.rs | 7 +- .../src/session_cmds_tests_kv_warm.rs | 122 ++++++++++++++++++ .../src/session_cmds_tests_tail.rs | 8 +- .../src/session_cmds_tests_tail_wait.rs | 8 +- .../session_cmds_tests_tail_wait_liveness.rs | 10 +- 9 files changed, 199 insertions(+), 55 deletions(-) create mode 100644 crates/cli-sub-agent/src/session_cmds_tests_kv_warm.rs diff --git a/Cargo.lock b/Cargo.lock index 0fbf9e15..a1020b14 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -509,7 +509,7 @@ checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "cli-sub-agent" -version = "0.1.735" +version = "0.1.736" dependencies = [ "anyhow", "chrono", @@ -701,7 +701,7 @@ dependencies = [ [[package]] name = "csa-acp" -version = "0.1.735" +version = "0.1.736" dependencies = [ "agent-client-protocol", "anyhow", @@ -721,7 +721,7 @@ dependencies = [ [[package]] name = "csa-config" -version = "0.1.735" +version = "0.1.736" dependencies = [ "anyhow", "chrono", @@ -739,7 +739,7 @@ dependencies = [ [[package]] name = "csa-core" -version = "0.1.735" +version = "0.1.736" dependencies = [ "agent-teams", "chrono", @@ -755,7 +755,7 @@ dependencies = [ [[package]] name = "csa-eval" -version = "0.1.735" +version = "0.1.736" dependencies = [ "anyhow", "chrono", @@ -769,7 +769,7 @@ dependencies = [ [[package]] name = "csa-executor" -version = "0.1.735" +version = "0.1.736" dependencies = [ "agent-teams", "anyhow", @@ -797,7 +797,7 @@ dependencies = [ [[package]] name = "csa-hooks" -version = "0.1.735" +version = "0.1.736" dependencies = [ "anyhow", "chrono", @@ -816,7 +816,7 @@ dependencies = [ [[package]] name = "csa-lock" -version = "0.1.735" +version = "0.1.736" dependencies = [ "anyhow", "chrono", @@ -830,7 +830,7 @@ dependencies = [ [[package]] name = "csa-mcp-hub" -version = "0.1.735" +version = "0.1.736" dependencies = [ "anyhow", "axum", @@ -853,7 +853,7 @@ dependencies = [ [[package]] name = "csa-memory" -version = "0.1.735" +version = "0.1.736" dependencies = [ "anyhow", "async-trait", @@ -872,7 +872,7 @@ dependencies = [ [[package]] name = "csa-process" -version = "0.1.735" +version = "0.1.736" dependencies = [ "anyhow", "chrono", @@ -891,7 +891,7 @@ dependencies = [ [[package]] name = "csa-resource" -version = "0.1.735" +version = "0.1.736" dependencies = [ "anyhow", "csa-core", @@ -907,7 +907,7 @@ dependencies = [ [[package]] name = "csa-scheduler" -version = "0.1.735" +version = "0.1.736" dependencies = [ "anyhow", "chrono", @@ -925,7 +925,7 @@ dependencies = [ [[package]] name = "csa-session" -version = "0.1.735" +version = "0.1.736" dependencies = [ "anyhow", "chrono", @@ -951,7 +951,7 @@ dependencies = [ [[package]] name = "csa-todo" -version = "0.1.735" +version = "0.1.736" dependencies = [ "anyhow", "chrono", @@ -4516,7 +4516,7 @@ dependencies = [ [[package]] name = "weave" -version = "0.1.735" +version = "0.1.736" dependencies = [ "anyhow", "clap", diff --git a/Cargo.toml b/Cargo.toml index afb41a76..9c2a8be9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ default-members = [ resolver = "2" [workspace.package] -version = "0.1.735" +version = "0.1.736" edition = "2024" rust-version = "1.88" license = "Apache-2.0" diff --git a/crates/cli-sub-agent/src/session_cmds_daemon_wait.rs b/crates/cli-sub-agent/src/session_cmds_daemon_wait.rs index e3019859..f778afd7 100644 --- a/crates/cli-sub-agent/src/session_cmds_daemon_wait.rs +++ b/crates/cli-sub-agent/src/session_cmds_daemon_wait.rs @@ -7,6 +7,11 @@ use std::os::fd::AsRawFd; pub(crate) const SESSION_WAIT_MEMORY_WARN_EXIT_CODE: i32 = 33; const SESSION_WAIT_SUCCESS_EXIT_CODE: i32 = 0; const SESSION_WAIT_FAILURE_EXIT_CODE: i32 = 1; +/// Healthy poll-cap exit when the session is still alive: callers should +/// process tokens (warming their KV cache) and re-wait. See #1439. +const SESSION_WAIT_KV_WARM_EXIT_CODE: i32 = 0; +/// Reserved for the rare case where the wait cap is reached but the session +/// daemon is no longer alive and no result.toml was produced. const SESSION_WAIT_TIMEOUT_EXIT_CODE: i32 = 124; const SESSION_WAIT_MEMORY_SAMPLE_INTERVAL: std::time::Duration = std::time::Duration::from_secs(15); @@ -471,38 +476,46 @@ where let elapsed = start.elapsed().as_secs(); if elapsed >= wait_behavior.wait_timeout_secs { - eprintln!( - "Timeout: session {} did not complete within {}s", - resolved.session_id, wait_behavior.wait_timeout_secs, - ); - // Emit structured retry hint for orchestrators / agents. let cd_arg = cd .as_ref() .map(|path| format!(" --cd '{}'", path)) .unwrap_or_default(); + let session_alive = session_has_terminal_process(&session_dir) + || csa_process::ToolLiveness::is_alive(&session_dir); + if session_alive { + // KV-warm exit: session still alive at the wait cap. See #1439. + eprintln!( + "Session {} still running after {}s wait cap; returning so caller can warm its KV cache before re-waiting.", + resolved.session_id, wait_behavior.wait_timeout_secs, + ); + eprintln!( + "", + resolved.session_id, elapsed, resolved.session_id, cd_arg, + ); + eprintln!( + "", + sid = resolved.session_id, + cd = cd + .as_ref() + .map(|p| format!(" --cd '{p}'")) + .unwrap_or_default(), + ); + return Ok(SESSION_WAIT_KV_WARM_EXIT_CODE); + } + // Defensive: daemon gone with no result.toml (rare; earlier loop branches usually exit-1 first). eprintln!( - "", - resolved.session_id, elapsed, resolved.session_id, cd_arg, - ); - eprintln!( - "Hint: Call `csa session wait` again individually (not in a tight loop script). \ - The {}s timeout is designed to let the calling agent generate tokens between waits, \ - keeping its KV cache warm.", - wait_behavior.wait_timeout_secs, + "Timeout: session {} did not complete within {}s and no live daemon process remains.", + resolved.session_id, wait_behavior.wait_timeout_secs, ); eprintln!( - "", - sid = resolved.session_id, - cd = cd - .as_ref() - .map(|p| format!(" --cd '{p}'")) - .unwrap_or_default(), + "", + resolved.session_id, elapsed, resolved.session_id, cd_arg, ); return Ok(SESSION_WAIT_TIMEOUT_EXIT_CODE); } diff --git a/crates/cli-sub-agent/src/session_cmds_tests.rs b/crates/cli-sub-agent/src/session_cmds_tests.rs index 8f4a3b14..69642961 100644 --- a/crates/cli-sub-agent/src/session_cmds_tests.rs +++ b/crates/cli-sub-agent/src/session_cmds_tests.rs @@ -743,6 +743,8 @@ include!("session_cmds_tests_fork_tail.rs"); #[path = "session_cmds_tests_daemon_pid_tail.rs"] mod daemon_pid_tail_tests; +#[path = "session_cmds_tests_kv_warm.rs"] +mod kv_warm_tests; #[path = "session_cmds_tests_list_format.rs"] mod list_format_tests; #[path = "session_cmds_tests_result_cli.rs"] diff --git a/crates/cli-sub-agent/src/session_cmds_tests_daemon_pid_tail.rs b/crates/cli-sub-agent/src/session_cmds_tests_daemon_pid_tail.rs index 44dc079a..947c115e 100644 --- a/crates/cli-sub-agent/src/session_cmds_tests_daemon_pid_tail.rs +++ b/crates/cli-sub-agent/src/session_cmds_tests_daemon_pid_tail.rs @@ -68,9 +68,12 @@ fn handle_session_wait_ignores_completion_packet_while_raw_daemon_pid_is_alive() let exit_code = handle_session_wait(session_id, Some(project.to_string_lossy().into_owned()), 1).unwrap(); + // daemon.pid is still alive when the 1s wait cap fires, so the completion + // packet (which only records the daemon-process exit) must be ignored. + // Under #1439, alive-at-cap returns the KV-warm code (0) rather than 124. assert_eq!( - exit_code, 124, - "wait should time out instead of trusting daemon-completion while daemon.pid is still alive" + exit_code, 0, + "wait should emit the KV-warm exit instead of trusting daemon-completion while daemon.pid is still alive" ); child.kill().ok(); diff --git a/crates/cli-sub-agent/src/session_cmds_tests_kv_warm.rs b/crates/cli-sub-agent/src/session_cmds_tests_kv_warm.rs new file mode 100644 index 00000000..a996bea0 --- /dev/null +++ b/crates/cli-sub-agent/src/session_cmds_tests_kv_warm.rs @@ -0,0 +1,122 @@ +use super::*; +use crate::session_cmds_daemon::{ + WaitBehavior, WaitLoopTiming, WaitReconciliationOutcome, handle_session_wait_with_hooks, +}; +use crate::test_env_lock::TEST_ENV_LOCK; +use tempfile::tempdir; + +struct EnvVarGuard { + key: &'static str, + original: Option, +} + +impl EnvVarGuard { + fn set(key: &'static str, value: impl AsRef) -> Self { + let original = std::env::var(key).ok(); + // SAFETY: test-scoped env mutation guarded by a process-wide mutex. + unsafe { std::env::set_var(key, value) }; + Self { key, original } + } +} + +impl Drop for EnvVarGuard { + fn drop(&mut self) { + // SAFETY: test-scoped env mutation guarded by a process-wide mutex. + unsafe { + match self.original.as_deref() { + Some(value) => std::env::set_var(self.key, value), + None => std::env::remove_var(self.key), + } + } + } +} + +#[cfg(target_os = "linux")] +fn read_process_start_time_ticks(pid: u32) -> u64 { + let stat_path = format!("/proc/{pid}/stat"); + let content = std::fs::read_to_string(stat_path).expect("read /proc stat"); + let close_paren = content.rfind(')').expect("stat comm terminator"); + let after_comm = &content[close_paren + 1..]; + let mut parts = after_comm.split_whitespace(); + parts.next().expect("state"); + parts.next().expect("ppid"); + parts.next().expect("pgrp"); + for _ in 0..16 { + parts.next().expect("intermediate stat field"); + } + parts + .next() + .expect("starttime") + .parse::() + .expect("starttime parse") +} + +#[cfg(target_os = "linux")] +fn daemon_pid_record(pid: u32) -> String { + format!("{pid} {}\n", read_process_start_time_ticks(pid)) +} + +/// Regression test for #1439: when the wait cap fires and the session daemon is +/// still alive, the wait must exit with code 0 (KV-warm), not 124 (legacy +/// timeout). The accompanying `CSA:SESSION_WAIT_KV_WARM` marker is verified by +/// code inspection in `session_cmds_daemon_wait.rs`; this test pins the +/// exit-code contract that callers (especially AI agents in `run_in_background` +/// task-notification loops) depend on. +#[cfg(target_os = "linux")] +#[test] +fn handle_session_wait_kv_warm_exit_when_daemon_alive_at_cap() { + let td = tempdir().expect("tempdir"); + let _env_lock = TEST_ENV_LOCK.blocking_lock(); + let state_home = td.path().join("xdg-state"); + std::fs::create_dir_all(&state_home).expect("create state home"); + let _home_guard = EnvVarGuard::set("HOME", td.path()); + let _state_guard = EnvVarGuard::set("XDG_STATE_HOME", &state_home); + let project = td.path(); + + let session = + create_session(project, Some("wait-kv-warm-alive"), None, Some("codex")).expect("create"); + let session_id = session.meta_session_id; + let session_dir = get_session_dir(project, &session_id).expect("session dir"); + + let mut child = std::process::Command::new("sleep") + .arg("30") + .spawn() + .expect("spawn child"); + std::fs::write( + session_dir.join("daemon.pid"), + daemon_pid_record(child.id()), + ) + .expect("write daemon pid"); + assert!(csa_process::ToolLiveness::daemon_pid_is_alive(&session_dir)); + + let exit_code = handle_session_wait_with_hooks( + session_id, + Some(project.to_string_lossy().into_owned()), + WaitBehavior { + wait_timeout_secs: 0, + memory_warn_mb: None, + timing: WaitLoopTiming { + poll_interval: std::time::Duration::from_millis(1), + memory_sample_interval: std::time::Duration::from_secs(15), + }, + }, + |_project_root, _current_session_id, _trigger| { + Ok(WaitReconciliationOutcome { + result_became_available: false, + synthetic: false, + }) + }, + |_sid, _status, _exit_code, _synthetic, _mirror_to_stdout| { + panic!("alive-at-cap path must not emit a completion signal"); + }, + ) + .expect("wait should reach KV-warm exit"); + + let _ = child.kill(); + let _ = child.wait(); + + assert_eq!( + exit_code, 0, + "live daemon at wait cap must emit KV-warm exit (0), not legacy timeout (124)" + ); +} diff --git a/crates/cli-sub-agent/src/session_cmds_tests_tail.rs b/crates/cli-sub-agent/src/session_cmds_tests_tail.rs index d8f5b246..fbcf0bc7 100644 --- a/crates/cli-sub-agent/src/session_cmds_tests_tail.rs +++ b/crates/cli-sub-agent/src/session_cmds_tests_tail.rs @@ -649,9 +649,13 @@ fn handle_session_wait_ignores_completion_packet_while_daemon_alive() { let exit_code = handle_session_wait(session_id, Some(project.to_string_lossy().into_owned()), 1).unwrap(); + // Daemon is still alive when the 1s wait cap fires; the completion packet + // recorded the daemon process exit, not the session outcome, so the wait + // must NOT surface it. Under #1439, alive-at-cap returns the KV-warm code + // (0) rather than the legacy 124. assert_eq!( - exit_code, 124, - "wait should time out instead of reporting completion while the daemon is still alive" + exit_code, 0, + "wait should emit the KV-warm exit instead of reporting completion while the daemon is still alive" ); child.kill().ok(); diff --git a/crates/cli-sub-agent/src/session_cmds_tests_tail_wait.rs b/crates/cli-sub-agent/src/session_cmds_tests_tail_wait.rs index 4e334cba..c52d3b96 100644 --- a/crates/cli-sub-agent/src/session_cmds_tests_tail_wait.rs +++ b/crates/cli-sub-agent/src/session_cmds_tests_tail_wait.rs @@ -634,12 +634,12 @@ fn test_session_wait_memory_warn_disabled_when_zero() { panic!("disabled sampler must not emit marker"); }, ) - .expect("wait should fall back to timeout"); + .expect("wait should fall back to KV-warm exit"); let _ = child.kill(); let _ = child.wait(); - assert_eq!(exit_code, 124); + assert_eq!(exit_code, 0); assert_eq!(sample_calls, 0); } @@ -704,12 +704,12 @@ fn test_session_wait_procfs_failure_fallback() { panic!("sampling failure fallback must not emit marker"); }, ) - .expect("wait should fall back to classic timeout"); + .expect("wait should fall back to KV-warm exit"); let _ = child.kill(); let _ = child.wait(); - assert_eq!(exit_code, 124); + assert_eq!(exit_code, 0); assert_eq!(sample_calls, 1); } diff --git a/crates/cli-sub-agent/src/session_cmds_tests_tail_wait_liveness.rs b/crates/cli-sub-agent/src/session_cmds_tests_tail_wait_liveness.rs index ebb48a47..a459786c 100644 --- a/crates/cli-sub-agent/src/session_cmds_tests_tail_wait_liveness.rs +++ b/crates/cli-sub-agent/src/session_cmds_tests_tail_wait_liveness.rs @@ -33,9 +33,9 @@ impl Drop for EnvVarGuard { /// When PID-level detection misses (session_has_terminal_process=false) but broader /// filesystem liveness signals remain (is_alive=true due to recent session writes), -/// the wait must continue polling and eventually return 124 (timeout) — not 1 (failure). -/// -/// Regression test for #1396. +/// the wait must continue polling and eventually return the KV-warm exit (0) — not +/// failure (1). Original regression for #1396; updated for #1439 which reclassifies +/// the alive-at-cap exit code from 124 to 0. #[test] fn handle_session_wait_continues_polling_when_pid_missing_but_liveness_signals_present() { let td = tempdir().expect("tempdir"); @@ -95,7 +95,7 @@ fn handle_session_wait_continues_polling_when_pid_missing_but_liveness_signals_p .expect("wait should succeed"); assert_eq!( - exit_code, 124, - "wait must time out (124) not report failure (1) when liveness signals exist" + exit_code, 0, + "wait must emit the KV-warm exit (0), not report failure (1), when liveness signals exist" ); }