From 3d274a3b32378f073d7f9bf9a836428ef26b0c88 Mon Sep 17 00:00:00 2001 From: prateek310524 Date: Fri, 29 May 2026 12:04:36 +0530 Subject: [PATCH 1/5] fix(provider): restore native OpenRouter endpoint when switching back from a direct profile Selecting a built-in OpenAI-compatible profile (e.g. NVIDIA NIM via "nvidia-nim:...") calls force_apply_openai_compatible_profile_env(Some(profile)), which stamps that profile's endpoint and API key into the global JCODE_OPENROUTER_* env. Switching back to a native OpenRouter catalog model ("openrouter/owl-alpha") never cleared those overrides, so the native model was POSTed to the stale profile endpoint (https://integrate.api.nvidia.com/v1) with the wrong key and returned 404. Because the leak lives in process-global env, even brand-new sessions kept failing until the server was restarted. Fix: in the OpenRouter set_model arm, when the previous selection was a built-in direct profile (profile_id is Some) and the target is a native openrouter.ai catalog model, reset the profile env to None and rebuild the provider so it talks to the native endpoint again. Raw/custom JCODE_OPENROUTER_API_BASE endpoints (profile_id == None), @-pinned ids, and locked named profiles are deliberately left untouched. Adds a regression test that reproduces the OpenRouter -> NVIDIA -> OpenRouter switch-back and asserts the endpoint override is cleared (fails without the fix with the exact integrate.api.nvidia.com URL). --- crates/jcode-base/src/provider/mod.rs | 58 ++++++++++++ crates/jcode-base/src/provider/openrouter.rs | 8 ++ .../src/provider/tests/model_resolution.rs | 90 +++++++++++++++++++ 3 files changed, 156 insertions(+) diff --git a/crates/jcode-base/src/provider/mod.rs b/crates/jcode-base/src/provider/mod.rs index 52532fc4b..b5ec45bdb 100644 --- a/crates/jcode-base/src/provider/mod.rs +++ b/crates/jcode-base/src/provider/mod.rs @@ -612,6 +612,64 @@ impl MultiProvider { Ok(()) } ActiveProvider::OpenRouter => { + // A native OpenRouter catalog model (e.g. "openrouter/owl-alpha") + // must run on the native OpenRouter runtime. If the previous + // selection was a direct OpenAI-compatible profile (e.g. NVIDIA + // NIM via "nvidia-nim:..."), that profile's endpoint and API key + // were written into the global JCODE_OPENROUTER_* env by + // `force_apply_openai_compatible_profile_env(Some(profile))` and + // are never cleared on the way back. Without resetting them, the + // native model would be POSTed to the stale profile endpoint + // (e.g. https://integrate.api.nvidia.com/v1) with the wrong API + // key, yielding a 404. Clear the override and rebuild the + // provider so it picks up the native OpenRouter endpoint again. + // + // We must not clobber an intentionally locked named profile + // (JCODE_PROVIDER_PROFILE_ACTIVE), and only reset when the target + // is a native catalog model rather than a profile-prefixed one. + // We only reset when the *previous* selection was a built-in + // direct profile (it has a profile id, e.g. "nvidia-nim") and + // the *target* is a native openrouter.ai catalog model + // (id begins with "openrouter/"). This deliberately excludes: + // - raw/custom endpoints the user configured directly via + // JCODE_OPENROUTER_API_BASE (profile_id == None), which must + // be preserved, and + // - "@provider"-pinned or opaque model ids on forced + // OpenRouter providers. + let target_is_native_openrouter = self + .openrouter_provider() + .and_then(|existing| existing.active_profile_id().map(|id| id.to_string())) + .is_some() + && Self::openai_compatible_model_prefix(model).is_none() + && model.starts_with("openrouter/") + && openrouter_catalog_model_id(model).is_some(); + let named_profile_locked = + std::env::var_os("JCODE_PROVIDER_PROFILE_ACTIVE").is_some(); + + if target_is_native_openrouter && !named_profile_locked { + crate::logging::info( + "Resetting stale OpenAI-compatible profile env before selecting native OpenRouter model", + ); + crate::provider_catalog::force_apply_openai_compatible_profile_env(None); + if openrouter::OpenRouterProvider::has_credentials() { + match openrouter::OpenRouterProvider::new() { + Ok(rebuilt) => { + *self + .openrouter + .write() + .unwrap_or_else(|poisoned| poisoned.into_inner()) = + Some(Arc::new(rebuilt)); + } + Err(e) => { + crate::logging::warn(&format!( + "Failed to rebuild native OpenRouter provider after profile reset: {}", + e + )); + } + } + } + } + let Some(openrouter) = self.openrouter_provider() else { anyhow::bail!( "OpenRouter/OpenAI-compatible credentials not available. Set the configured API key or run `jcode login --provider openrouter` first." diff --git a/crates/jcode-base/src/provider/openrouter.rs b/crates/jcode-base/src/provider/openrouter.rs index bb93fc39b..1b5ca053b 100644 --- a/crates/jcode-base/src/provider/openrouter.rs +++ b/crates/jcode-base/src/provider/openrouter.rs @@ -1204,6 +1204,14 @@ impl OpenRouterProvider { self.supports_provider_features || self.profile_id.is_none() } + /// The active built-in OpenAI-compatible profile id (e.g. "nvidia-nim", + /// "deepseek"), if this provider was constructed from one. Returns `None` + /// for the native OpenRouter runtime and for raw/custom endpoints that the + /// user configured directly via `JCODE_OPENROUTER_API_BASE`. + pub(crate) fn active_profile_id(&self) -> Option<&str> { + self.profile_id.as_deref() + } + pub(crate) fn filter_profile_chat_supported_models(&self, models: Vec) -> Vec { let Some(profile_id) = self.profile_id.as_deref() else { return models; diff --git a/crates/jcode-base/src/provider/tests/model_resolution.rs b/crates/jcode-base/src/provider/tests/model_resolution.rs index 11d069ba3..1b794415b 100644 --- a/crates/jcode-base/src/provider/tests/model_resolution.rs +++ b/crates/jcode-base/src/provider/tests/model_resolution.rs @@ -435,6 +435,96 @@ fn test_state_space_openrouter_default_survives_switch_to_nvidia_nim() { }); } +/// Regression test for the NVIDIA-endpoint leak: selecting a direct +/// OpenAI-compatible profile (NVIDIA NIM) writes that profile's endpoint and +/// API key into the global `JCODE_OPENROUTER_*` env. Switching *back* to a +/// native OpenRouter catalog model used to leave those overrides in place, so +/// the native model was POSTed to https://integrate.api.nvidia.com/v1 with the +/// NVIDIA key and returned 404. Switching back must restore the native +/// OpenRouter runtime. +#[test] +fn test_switch_back_to_native_openrouter_restores_endpoint_after_nvidia() { + with_clean_provider_test_env(|| { + let nvidia = crate::provider_catalog::openai_compatible_profile_by_id("nvidia-nim") + .expect("NVIDIA NIM profile exists"); + + save_test_openrouter_model_cache( + "openrouter", + "https://openrouter.ai/api/v1", + &["openrouter/owl-alpha"], + ); + + crate::env::set_var("OPENROUTER_API_KEY", "test-openrouter-key"); + crate::provider_catalog::force_apply_openai_compatible_profile_env(None); + let openrouter = Arc::new( + openrouter::OpenRouterProvider::new().expect("OpenRouter provider should initialize"), + ); + openrouter + .set_model("openrouter/owl-alpha") + .expect("OpenRouter default model should be selectable"); + + let provider = MultiProvider { + claude: RwLock::new(None), + anthropic: RwLock::new(None), + openai: RwLock::new(None), + copilot_api: RwLock::new(None), + antigravity: RwLock::new(None), + gemini: RwLock::new(None), + cursor: RwLock::new(None), + bedrock: RwLock::new(None), + openrouter: RwLock::new(Some(openrouter)), + active: RwLock::new(ActiveProvider::OpenRouter), + use_claude_cli: false, + startup_notices: RwLock::new(Vec::new()), + forced_provider: None, + }; + + // 1) Switch to NVIDIA NIM. This stamps the NVIDIA endpoint + key into + // the global JCODE_OPENROUTER_* env. + crate::env::set_var(nvidia.api_key_env, "test-nvidia-key"); + provider + .set_model("nvidia-nim:nvidia/llama-3.1-nemotron-ultra-253b-v1") + .expect("NVIDIA NIM model should be selectable after OpenRouter default"); + assert_eq!( + std::env::var("JCODE_OPENROUTER_API_BASE").as_deref(), + Ok("https://integrate.api.nvidia.com/v1"), + "NVIDIA switch should set the NVIDIA endpoint in the global env" + ); + assert!( + provider + .openrouter_provider() + .expect("NVIDIA direct provider installed") + .direct_openai_compatible_route_parts() + .is_some(), + "NVIDIA NIM should be a direct OpenAI-compatible route" + ); + + // 2) Switch back to the native OpenRouter catalog model. + provider + .set_model("openrouter/owl-alpha") + .expect("native OpenRouter model should be selectable after NVIDIA"); + + assert_eq!(provider.active_provider(), ActiveProvider::OpenRouter); + assert_eq!(provider.model(), "openrouter/owl-alpha"); + + // The stale NVIDIA endpoint override must be cleared so the native + // model talks to openrouter.ai again, not integrate.api.nvidia.com. + assert!( + std::env::var("JCODE_OPENROUTER_API_BASE").is_err(), + "Switching back to a native OpenRouter model must clear the NVIDIA endpoint override; got {:?}", + std::env::var("JCODE_OPENROUTER_API_BASE") + ); + assert!( + provider + .openrouter_provider() + .expect("OpenRouter provider installed after switch-back") + .direct_openai_compatible_route_parts() + .is_none(), + "Native OpenRouter model must run on the native runtime, not a direct OpenAI-compatible profile" + ); + }); +} + #[test] fn test_set_model_accepts_bare_openai_openrouter_pin_when_openrouter_available() { with_clean_provider_test_env(|| { From 8e35c8d712b67c67cfb329b0417f135420300332 Mon Sep 17 00:00:00 2001 From: prateek310524 Date: Fri, 29 May 2026 18:40:11 +0530 Subject: [PATCH 2/5] fix(swarm): make Auto spawn reliable on headless hosts Spawned swarm agents got stuck forever at 'startup queued' because the default spawn mode was Visible: the server forks a terminal launcher (e.g. 'open -a Terminal'), the fork succeeds, but on a server/headless host (jcode serve shared server, no GUI) no interactive client ever attaches to drive the agent loop. The member sits 'running / startup queued', DMs land in an unread mailbox, and wake/resume fail because no task ever ran. Fixes: - Auto mode now verifies a visible launch actually produced a live client attachment (SwarmMember.event_txs becomes non-empty) within a short timeout; if not, it tears down the orphaned visible session and falls back to the in-process headless runner, which always executes. - register_visible_spawned_member no longer clobbers a member that a real client already attached to (avoids a race when a client connects during the Auto attach-wait window). - Default swarm_spawn_mode changed Visible -> Auto so swarm works out of the box on both desktop and headless hosts. Adds unit tests for attach detection, timeout fallback, and the non-clobber guard. --- .../jcode-app-core/src/server/comm_session.rs | 140 +++++++++++++++--- .../src/server/comm_session_tests.rs | 100 ++++++++++++- crates/jcode-base/src/config_tests.rs | 4 +- crates/jcode-config-types/src/lib.rs | 6 +- 4 files changed, 225 insertions(+), 25 deletions(-) diff --git a/crates/jcode-app-core/src/server/comm_session.rs b/crates/jcode-app-core/src/server/comm_session.rs index 263726b57..7b6699d15 100644 --- a/crates/jcode-app-core/src/server/comm_session.rs +++ b/crates/jcode-app-core/src/server/comm_session.rs @@ -191,6 +191,71 @@ fn cleanup_prepared_visible_spawn_session(session_id: &str) { } } +/// Maximum time to wait for a visibly-spawned session's interactive client to +/// attach before concluding the launch did not actually produce a live worker. +const VISIBLE_SPAWN_ATTACH_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(8); +/// Poll interval while waiting for a client attachment. +const VISIBLE_SPAWN_ATTACH_POLL: std::time::Duration = std::time::Duration::from_millis(200); + +/// Wait for a visibly-spawned session to register a live client attachment. +/// +/// A successful terminal-launch fork does not guarantee that an interactive +/// client connected (e.g. on a headless host or `jcode serve` shared server). +/// We treat the appearance of at least one live event channel +/// (`SwarmMember::event_txs`) as proof that a real client attached. Returns +/// `true` if an attachment is observed before the timeout, `false` otherwise. +async fn wait_for_live_attachment( + session_id: &str, + swarm_members: &Arc>>, +) -> bool { + wait_for_live_attachment_with( + session_id, + swarm_members, + VISIBLE_SPAWN_ATTACH_TIMEOUT, + VISIBLE_SPAWN_ATTACH_POLL, + ) + .await +} + +/// Parameterized core of [`wait_for_live_attachment`] so tests can use short +/// timeouts/poll intervals without controlling the global clock. +async fn wait_for_live_attachment_with( + session_id: &str, + swarm_members: &Arc>>, + timeout: std::time::Duration, + poll: std::time::Duration, +) -> bool { + let deadline = Instant::now() + timeout; + loop { + { + let members = swarm_members.read().await; + if let Some(member) = members.get(session_id) + && !member.event_txs.is_empty() + { + return true; + } + } + if Instant::now() >= deadline { + return false; + } + tokio::time::sleep(poll).await; + } +} + +/// Remove a visibly-spawned session that never attached a live client so its +/// swarm membership and on-disk records do not linger as a stuck "startup +/// queued" ghost after we fall back to a headless worker. +async fn cleanup_orphaned_visible_spawn_session( + session_id: &str, + swarm_members: &Arc>>, +) { + { + let mut members = swarm_members.write().await; + members.remove(session_id); + } + cleanup_prepared_visible_spawn_session(session_id); +} + fn prepare_visible_spawn_session( working_dir: Option<&str>, model_override: Option<&str>, @@ -262,26 +327,36 @@ async fn register_visible_spawned_member( { let mut members = swarm_members.write().await; - members.insert( - session_id.to_string(), - SwarmMember { - session_id: session_id.to_string(), - event_tx, - event_txs: HashMap::new(), - working_dir: working_dir.map(PathBuf::from), - swarm_id: Some(swarm_id.to_string()), - swarm_enabled: true, - status, - detail, - friendly_name: Some(friendly_name), - report_back_to_session_id: report_back_to_session_id.map(str::to_string), - latest_completion_report: None, - role: "agent".to_string(), - joined_at: now, - last_status_change: now, - is_headless: false, - }, - ); + // If a real interactive client has already attached and registered this + // member (possible under Auto mode, where we wait for an attachment + // before registering), do not clobber its live event channels. + if members + .get(session_id) + .is_some_and(|member| !member.event_txs.is_empty()) + { + // Client already owns this member; nothing to register. + } else { + members.insert( + session_id.to_string(), + SwarmMember { + session_id: session_id.to_string(), + event_tx, + event_txs: HashMap::new(), + working_dir: working_dir.map(PathBuf::from), + swarm_id: Some(swarm_id.to_string()), + swarm_enabled: true, + status, + detail, + friendly_name: Some(friendly_name), + report_back_to_session_id: report_back_to_session_id.map(str::to_string), + latest_completion_report: None, + role: "agent".to_string(), + joined_at: now, + last_status_change: now, + is_headless: false, + }, + ); + } } { @@ -371,6 +446,31 @@ pub(super) async fn spawn_swarm_agent( ), }; + // In Auto mode a visible launch only *forks* a terminal launcher; it does + // not guarantee that an interactive client actually attached and started + // driving the session. On a server/headless host (no GUI terminal, or a + // `jcode serve` shared server) the launcher fork succeeds but no client + // ever connects, leaving the agent stuck at "startup queued" forever. + // + // To make Auto reliable, after a "successful" visible launch we wait a + // short window for the spawned session to register a live client + // attachment. If none arrives we tear down the orphaned visible session and + // fall back to the in-process headless runner (which always executes). + let visible_spawn = match (resolved_spawn_mode, visible_spawn) { + (SwarmSpawnMode::Auto, Ok((candidate_session_id, true))) => { + if wait_for_live_attachment(&candidate_session_id, swarm_members).await { + Ok((candidate_session_id, true)) + } else { + crate::logging::warn(&format!( + "Auto swarm spawn: visible client did not attach within timeout for session {candidate_session_id}; falling back to headless" + )); + cleanup_orphaned_visible_spawn_session(&candidate_session_id, swarm_members).await; + Ok((candidate_session_id, false)) + } + } + (_, other) => other, + }; + let (new_session_id, is_headless_fallback) = match visible_spawn { Ok((new_session_id, true)) => Ok((new_session_id, false)), Ok((_, false)) | Err(_) => { diff --git a/crates/jcode-app-core/src/server/comm_session_tests.rs b/crates/jcode-app-core/src/server/comm_session_tests.rs index 1c87492a1..af29f860f 100644 --- a/crates/jcode-app-core/src/server/comm_session_tests.rs +++ b/crates/jcode-app-core/src/server/comm_session_tests.rs @@ -1,7 +1,8 @@ use super::{ ensure_spawn_coordinator_swarm, prepare_visible_spawn_session, register_visible_spawned_member, require_coordinator_swarm, resolve_spawn_working_dir, resolve_stop_target_session, - resolve_swarm_spawn_model_and_provider, swarm_stop_allowed_by_owner, + resolve_swarm_spawn_model_and_provider, swarm_stop_allowed_by_owner, wait_for_live_attachment, + wait_for_live_attachment_with, }; use crate::agent::Agent; use crate::message::{Message, ToolDefinition}; @@ -231,6 +232,103 @@ async fn register_visible_spawned_member_marks_startup_as_running() { })); } +#[tokio::test] +async fn wait_for_live_attachment_times_out_when_no_client_attaches() { + let swarm_members = Arc::new(RwLock::new(HashMap::new())); + // Member exists but has no live event channels (the visible-launch ghost). + { + let (ghost, _rx) = member("child-ghost", Some("swarm-1"), "agent"); + swarm_members + .write() + .await + .insert("child-ghost".to_string(), ghost); + } + // Use short timeout/poll so the test is fast; a session that never attaches + // must return false. + let attached = wait_for_live_attachment_with( + "child-ghost", + &swarm_members, + std::time::Duration::from_millis(120), + std::time::Duration::from_millis(20), + ) + .await; + assert!( + !attached, + "no client ever attached, so wait_for_live_attachment must return false" + ); +} + +#[tokio::test] +async fn wait_for_live_attachment_detects_attached_client() { + let swarm_members = Arc::new(RwLock::new(HashMap::new())); + let (client_tx, _client_rx_keep_alive) = mpsc::unbounded_channel(); + { + let (mut attached, _rx) = member("child-live", Some("swarm-1"), "agent"); + attached + .event_txs + .insert("conn-1".to_string(), client_tx); + swarm_members + .write() + .await + .insert("child-live".to_string(), attached); + } + let attached = wait_for_live_attachment("child-live", &swarm_members).await; + assert!( + attached, + "a member with a live event channel must be detected as attached" + ); +} + +#[tokio::test] +async fn register_visible_spawned_member_does_not_clobber_attached_client() { + let swarm_members = Arc::new(RwLock::new(HashMap::new())); + let swarms_by_id = Arc::new(RwLock::new(HashMap::new())); + let event_history = Arc::new(RwLock::new(VecDeque::new())); + let event_counter = Arc::new(AtomicU64::new(0)); + let (swarm_event_tx, _swarm_event_rx) = broadcast::channel(8); + + // Simulate a real interactive client having already attached and registered + // this member with a live event channel before registration runs. Keep the + // receiver alive so the sender is not considered closed (and pruned). + let (client_tx, _client_rx_keep_alive) = mpsc::unbounded_channel(); + { + let (mut attached, _rx) = member("child-1", Some("swarm-1"), "agent"); + attached + .event_txs + .insert("conn-1".to_string(), client_tx); + attached.status = "ready".to_string(); + swarm_members + .write() + .await + .insert("child-1".to_string(), attached); + } + + register_visible_spawned_member( + "child-1", + "swarm-1", + Some("/tmp/worktree"), + true, + Some("owner"), + &swarm_members, + &swarms_by_id, + &event_history, + &event_counter, + &swarm_event_tx, + ) + .await; + + let members = swarm_members.read().await; + let member = members.get("child-1").expect("member should still exist"); + assert!( + !member.event_txs.is_empty(), + "registration must not clobber the live client's event channels" + ); + assert_eq!( + member.status, "ready", + "registration must not reset the attached client's status to startup queued" + ); +} + #[test] fn prepare_visible_spawn_session_persists_startup_before_launch() { let _guard = crate::storage::lock_test_env(); diff --git a/crates/jcode-base/src/config_tests.rs b/crates/jcode-base/src/config_tests.rs index f10d61844..c9de77893 100644 --- a/crates/jcode-base/src/config_tests.rs +++ b/crates/jcode-base/src/config_tests.rs @@ -35,10 +35,10 @@ fn preserve_reasoning_context_defaults_to_enabled() { } #[test] -fn swarm_spawn_mode_defaults_to_visible() { +fn swarm_spawn_mode_defaults_to_auto() { assert_eq!( Config::default().agents.swarm_spawn_mode, - SwarmSpawnMode::Visible + SwarmSpawnMode::Auto ); } diff --git a/crates/jcode-config-types/src/lib.rs b/crates/jcode-config-types/src/lib.rs index 9be027f17..14a3f13b7 100644 --- a/crates/jcode-config-types/src/lib.rs +++ b/crates/jcode-config-types/src/lib.rs @@ -370,11 +370,13 @@ pub struct AgentsConfig { #[serde(rename_all = "lowercase")] pub enum SwarmSpawnMode { /// Open a visible/headed terminal window. This preserves historical behavior. - #[default] Visible, /// Create the worker in-process without opening a terminal window. Headless, - /// Try visible first and fall back to headless if a window cannot be opened. + /// Try visible first and fall back to headless if a window cannot be opened + /// or if a launched window never attaches a live client. This is the + /// default because it is reliable on both desktop and headless/server hosts. + #[default] Auto, } From 1a9bb87794bac08de8a5c9b59588caaa1216aaec Mon Sep 17 00:00:00 2001 From: prateek310524 Date: Fri, 29 May 2026 19:32:48 +0530 Subject: [PATCH 3/5] chore(scripts): add sync_upstream.sh to rebase fork fixes onto 1jehuang/master Provides a one-command, safe way to pull upstream (origin/master) updates timely while keeping local fix commits on top. Tags a backup before rewriting history and aborts cleanly on conflict. --- scripts/sync_upstream.sh | 78 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100755 scripts/sync_upstream.sh diff --git a/scripts/sync_upstream.sh b/scripts/sync_upstream.sh new file mode 100755 index 000000000..37403be23 --- /dev/null +++ b/scripts/sync_upstream.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# sync_upstream.sh +# ----------------------------------------------------------------------------- +# Pull 1jehuang/jcode's latest (origin/master) and replay YOUR local fix commits +# on top of it, then rebuild + install + push to your fork. +# +# Why this exists: +# `jcode update` runs `git pull --ff-only` against your branch's upstream +# (your fork). With your own commits on top, you can never fast-forward to +# 1jehuang's master directly. This script does the safe rebase for you so you +# get upstream updates *timely* while keeping your fixes. +# +# Usage: +# scripts/sync_upstream.sh # rebase onto origin/master, build, install +# scripts/sync_upstream.sh --no-build # just rebase + push, skip build/install +# +# It is safe: it tags a backup before rewriting history and aborts a rebase on +# conflict instead of leaving you in a broken state. +# ----------------------------------------------------------------------------- +set -euo pipefail + +repo_root="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" +cd "$repo_root" + +UPSTREAM_REMOTE="${JCODE_UPSTREAM_REMOTE:-origin}" # 1jehuang/jcode +UPSTREAM_BRANCH="${JCODE_UPSTREAM_BRANCH:-master}" +FORK_REMOTE="${JCODE_FORK_REMOTE:-fork}" # your fork +do_build=1 +[[ "${1:-}" == "--no-build" ]] && do_build=0 + +branch="$(git rev-parse --abbrev-ref HEAD)" +echo "==> Current branch: $branch" + +if [[ -n "$(git status --porcelain)" ]]; then + echo "ERROR: working tree is dirty. Commit or stash first." >&2 + git status --short >&2 + exit 1 +fi + +backup_tag="sync-backup-$(date +%Y%m%d%H%M%S)" +git tag -f "$backup_tag" HEAD >/dev/null +echo "==> Backup tag: $backup_tag -> $(git rev-parse --short HEAD)" + +echo "==> Fetching $UPSTREAM_REMOTE ($UPSTREAM_BRANCH) ..." +git fetch "$UPSTREAM_REMOTE" "$UPSTREAM_BRANCH" --quiet + +ahead="$(git rev-list --count "$UPSTREAM_REMOTE/$UPSTREAM_BRANCH"..HEAD)" +behind="$(git rev-list --count HEAD.."$UPSTREAM_REMOTE/$UPSTREAM_BRANCH")" +echo "==> You have $ahead local commit(s); upstream is $behind commit(s) ahead." + +if [[ "$behind" -eq 0 ]]; then + echo "==> Already up to date with $UPSTREAM_REMOTE/$UPSTREAM_BRANCH. Nothing to rebase." +else + echo "==> Rebasing your $ahead commit(s) onto $UPSTREAM_REMOTE/$UPSTREAM_BRANCH ..." + if ! git rebase "$UPSTREAM_REMOTE/$UPSTREAM_BRANCH"; then + echo "ERROR: rebase hit conflicts. Aborting and restoring your previous state." >&2 + git rebase --abort || true + echo "Your branch is unchanged. Resolve manually with:" >&2 + echo " git rebase $UPSTREAM_REMOTE/$UPSTREAM_BRANCH" >&2 + echo "Backup is at tag: $backup_tag" >&2 + exit 1 + fi +fi + +if [[ "$do_build" -eq 1 ]]; then + echo "==> Building (release) ..." + cargo build --release --bin jcode + echo "==> Installing into ~/.jcode builds ..." + JCODE_RELEASE_PROFILE=release bash "$repo_root/scripts/install_release.sh" --fast +fi + +echo "==> Force-pushing rebased branch to $FORK_REMOTE/$branch ..." +git push "$FORK_REMOTE" "+HEAD:$branch" + +echo "" +echo "Done. You now have $UPSTREAM_REMOTE/$UPSTREAM_BRANCH + your fixes on top." +echo "Backup of the pre-sync state is tag: $backup_tag" +echo "Restart jcode (pkill -f 'jcode.*serve' then launch) to run the new binary." From e60908322a08602678abdb367efcaa0aa795dafe Mon Sep 17 00:00:00 2001 From: prateek310524 Date: Fri, 29 May 2026 19:53:34 +0530 Subject: [PATCH 4/5] fix(swarm): skip orphan visible spawn on headless server + fix streaming "action missing" label MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two swarm UX bugs surfaced when running a research swarm on a headless \`jcode serve\` shared server: 1. Auto spawn opened a useless bare-jcode Terminal window per child and then waited out the 8s attach timeout before falling back to headless. Now Auto checks up-front whether the requesting coordinator itself has a live interactive client (event_txs). If not (headless server), it skips the visible attempt entirely and spawns the child headless immediately, eliminating the orphan window and the per-spawn delay. The post-launch wait_for_live_attachment safety net is retained for the case where an attached coordinator opens a child window that fails to attach. 2. The TUI rendered swarm/memory/initiative/side_panel tool calls as "action missing" (with a warning logged) whenever the streamed tool input had not yet populated its arguments (empty object). This flashed "swarm action missing" for every spawned agent. Added tool_input_is_unpopulated + resolve_tool_action_for_display so an unpopulated/streaming call shows a neutral "…" without logging, while a genuinely malformed (populated-but-action-less) call still surfaces the diagnostic. Adds 6 unit tests (3 in jcode-app-core, 3 in jcode-tui). --- .../jcode-app-core/src/server/comm_session.rs | 44 ++++++++++++ .../src/server/comm_session_tests.rs | 51 +++++++++++++- crates/jcode-tui/src/tui/ui_tests/tools.rs | 55 +++++++++++++++ crates/jcode-tui/src/tui/ui_tools.rs | 69 ++++++++++--------- 4 files changed, 185 insertions(+), 34 deletions(-) diff --git a/crates/jcode-app-core/src/server/comm_session.rs b/crates/jcode-app-core/src/server/comm_session.rs index 7b6699d15..22dd6aae5 100644 --- a/crates/jcode-app-core/src/server/comm_session.rs +++ b/crates/jcode-app-core/src/server/comm_session.rs @@ -217,6 +217,26 @@ async fn wait_for_live_attachment( .await } +/// Whether a session currently has at least one live interactive client +/// attached (i.e. an active TUI is draining its event stream). +/// +/// Used as a fast, deployment-agnostic proxy for "can a visible spawn actually +/// attach a driver?". If the coordinator that requested the spawn is itself +/// running headless inside a `jcode serve` shared server (no attached TUI), +/// then opening a fresh Terminal for a child will not produce a live worker in +/// this deployment, so Auto mode should skip the visible attempt entirely +/// rather than open an orphan window and wait out the attach timeout. +async fn session_has_live_attachment( + session_id: &str, + swarm_members: &Arc>>, +) -> bool { + let members = swarm_members.read().await; + members + .get(session_id) + .map(|member| !member.event_txs.is_empty()) + .unwrap_or(false) +} + /// Parameterized core of [`wait_for_live_attachment`] so tests can use short /// timeouts/poll intervals without controlling the global clock. async fn wait_for_live_attachment_with( @@ -434,8 +454,32 @@ pub(super) async fn spawn_swarm_agent( .as_deref() .map(append_swarm_completion_report_instructions); + // In Auto mode, decide up-front whether a visible spawn can possibly work. + // A visible spawn only helps when an interactive client can attach and drive + // the child. We use the requesting coordinator's own attachment state as a + // fast, deployment-agnostic proxy: if the coordinator is itself running + // headless inside a `jcode serve` shared server (no attached TUI), opening a + // fresh Terminal for the child would just orphan a window and stall on the + // attach timeout, so we skip straight to the in-process headless runner. + let auto_should_try_visible = match resolved_spawn_mode { + SwarmSpawnMode::Auto => { + let coordinator_attached = + session_has_live_attachment(req_session_id, swarm_members).await; + if !coordinator_attached { + crate::logging::info( + "Auto swarm spawn: coordinator has no live interactive client (headless server); spawning child headless directly", + ); + } + coordinator_attached + } + _ => true, + }; + let visible_spawn = match resolved_spawn_mode { SwarmSpawnMode::Headless => Err(anyhow::anyhow!("headless spawn requested")), + SwarmSpawnMode::Auto if !auto_should_try_visible => { + Err(anyhow::anyhow!("auto spawn: headless server, skipping visible")) + } SwarmSpawnMode::Visible | SwarmSpawnMode::Auto => prepare_visible_spawn_session( resolved_working_dir.as_deref(), spawn_model.as_deref(), diff --git a/crates/jcode-app-core/src/server/comm_session_tests.rs b/crates/jcode-app-core/src/server/comm_session_tests.rs index af29f860f..77cf47f86 100644 --- a/crates/jcode-app-core/src/server/comm_session_tests.rs +++ b/crates/jcode-app-core/src/server/comm_session_tests.rs @@ -1,8 +1,8 @@ use super::{ ensure_spawn_coordinator_swarm, prepare_visible_spawn_session, register_visible_spawned_member, require_coordinator_swarm, resolve_spawn_working_dir, resolve_stop_target_session, - resolve_swarm_spawn_model_and_provider, swarm_stop_allowed_by_owner, wait_for_live_attachment, - wait_for_live_attachment_with, + resolve_swarm_spawn_model_and_provider, session_has_live_attachment, + swarm_stop_allowed_by_owner, wait_for_live_attachment, wait_for_live_attachment_with, }; use crate::agent::Agent; use crate::message::{Message, ToolDefinition}; @@ -279,6 +279,53 @@ async fn wait_for_live_attachment_detects_attached_client() { ); } +#[tokio::test] +async fn session_has_live_attachment_false_for_headless_coordinator() { + let swarm_members = Arc::new(RwLock::new(HashMap::new())); + { + // Coordinator member present but with no live event channels: this is a + // coordinator running headless inside a `jcode serve` shared server. + let (coordinator, _rx) = member("coordinator-headless", Some("swarm-1"), "coordinator"); + swarm_members + .write() + .await + .insert("coordinator-headless".to_string(), coordinator); + } + assert!( + !session_has_live_attachment("coordinator-headless", &swarm_members).await, + "a coordinator with no live event channels must report no attachment so Auto skips the visible spawn" + ); +} + +#[tokio::test] +async fn session_has_live_attachment_false_for_unknown_session() { + let swarm_members = Arc::new(RwLock::new(HashMap::new())); + assert!( + !session_has_live_attachment("does-not-exist", &swarm_members).await, + "an unknown session must report no attachment" + ); +} + +#[tokio::test] +async fn session_has_live_attachment_true_for_attached_coordinator() { + let swarm_members = Arc::new(RwLock::new(HashMap::new())); + let (client_tx, _client_rx_keep_alive) = mpsc::unbounded_channel(); + { + let (mut coordinator, _rx) = member("coordinator-live", Some("swarm-1"), "coordinator"); + coordinator + .event_txs + .insert("conn-1".to_string(), client_tx); + swarm_members + .write() + .await + .insert("coordinator-live".to_string(), coordinator); + } + assert!( + session_has_live_attachment("coordinator-live", &swarm_members).await, + "a coordinator with a live event channel must report an attachment so Auto tries the visible spawn" + ); +} + #[tokio::test] async fn register_visible_spawned_member_does_not_clobber_attached_client() { let swarm_members = Arc::new(RwLock::new(HashMap::new())); diff --git a/crates/jcode-tui/src/tui/ui_tests/tools.rs b/crates/jcode-tui/src/tui/ui_tests/tools.rs index 7c4f29e62..02be4f7a4 100644 --- a/crates/jcode-tui/src/tui/ui_tests/tools.rs +++ b/crates/jcode-tui/src/tui/ui_tests/tools.rs @@ -348,6 +348,61 @@ fn test_render_tool_message_batch_all_failed_marks_all_children_failed() { ); } +#[test] +fn test_swarm_summary_shows_placeholder_while_streaming_before_args_arrive() { + // Streaming providers surface a tool call as soon as the name is known, + // before any argument tokens arrive (input is an empty object). The summary + // must not flash "action missing"; it should show a neutral placeholder. + for input in [ + serde_json::json!({}), + serde_json::Value::Null, + serde_json::json!(""), + ] { + let tool = ToolCall { + id: "call_swarm_streaming".to_string(), + name: "swarm".to_string(), + input, + intent: None, + }; + let summary = tools_ui::get_tool_summary_with_budget(&tool, 50, Some(40)); + assert!( + !summary.contains("action missing"), + "streaming swarm call must not show 'action missing'; summary={summary:?}" + ); + assert_eq!(summary, "…", "summary={summary:?}"); + } +} + +#[test] +fn test_swarm_summary_reports_missing_action_when_args_present_but_no_action() { + // If the call has real arguments but genuinely lacks `action`, the + // diagnostic label is still surfaced (this is a real malformed call). + let tool = ToolCall { + id: "call_swarm_no_action".to_string(), + name: "swarm".to_string(), + input: serde_json::json!({ "target_session": "abc" }), + intent: None, + }; + let summary = tools_ui::get_tool_summary_with_budget(&tool, 50, Some(40)); + assert!( + summary.contains("action missing"), + "a populated-but-action-less swarm call should surface the diagnostic; summary={summary:?}" + ); +} + +#[test] +fn test_swarm_summary_renders_spawn_action_with_prompt() { + let tool = ToolCall { + id: "call_swarm_spawn".to_string(), + name: "swarm".to_string(), + input: serde_json::json!({ "action": "spawn", "message": "do the thing" }), + intent: None, + }; + let summary = tools_ui::get_tool_summary_with_budget(&tool, 50, Some(40)); + assert!(summary.starts_with("spawn"), "summary={summary:?}"); + assert!(!summary.contains("action missing"), "summary={summary:?}"); +} + #[test] fn test_tool_summary_read_supports_start_line_end_line() { let tool = ToolCall { diff --git a/crates/jcode-tui/src/tui/ui_tools.rs b/crates/jcode-tui/src/tui/ui_tools.rs index 76dbc12c7..19a26ac2a 100644 --- a/crates/jcode-tui/src/tui/ui_tools.rs +++ b/crates/jcode-tui/src/tui/ui_tools.rs @@ -59,6 +59,39 @@ fn infer_selfdev_action_from_display_text(text: Option<&str>) -> Option<&'static } } +/// Returns true when a tool call's input has no streamed arguments yet. +/// +/// Streaming providers surface a tool call as soon as the tool *name* is known, +/// before any argument tokens arrive, so `input` is momentarily an empty object +/// (`{}`), `null`, or otherwise non-populated. Action-keyed summaries must treat +/// this as "still streaming" rather than "field missing" to avoid flashing a +/// bogus "action missing" label (and spamming warnings) for every spawned agent. +fn tool_input_is_unpopulated(tool: &ToolCall) -> bool { + match &tool.input { + serde_json::Value::Null => true, + serde_json::Value::Object(map) => map.is_empty(), + serde_json::Value::String(s) => s.trim().is_empty(), + _ => false, + } +} + +/// Resolves a tool's `action` field for display. +/// +/// When the field is present, returns it. When the call is still streaming +/// (no args yet), returns a neutral placeholder without logging. Only when the +/// call has real arguments but genuinely lacks `action` do we log and surface +/// the diagnostic "action missing" label. +fn resolve_tool_action_for_display<'a>(tool: &'a ToolCall, display_context: &str) -> &'a str { + if let Some(action) = tool.input.get("action").and_then(|v| v.as_str()) { + return action; + } + if tool_input_is_unpopulated(tool) { + return "…"; + } + log_missing_tool_action_for_display(tool, display_context); + "action missing" +} + fn log_missing_tool_action_for_display(tool: &ToolCall, display_context: &str) { log_missing_tool_field_for_display(tool, display_context, "action"); } @@ -307,14 +340,7 @@ fn truncate_swarm_text(value: &str, max_width: usize) -> String { } fn summarize_swarm_tool_action(tool: &ToolCall, bounded: &dyn Fn(usize) -> usize) -> String { - let action = tool - .input - .get("action") - .and_then(|v| v.as_str()) - .unwrap_or_else(|| { - log_missing_tool_action_for_display(tool, "swarm"); - "action missing" - }); + let action = resolve_tool_action_for_display(tool, "swarm"); let target = tool .input .get("to_session") @@ -1134,14 +1160,7 @@ pub(super) fn get_tool_summary_with_budget( }) .unwrap_or_default(), "memory" => { - let action = tool - .input - .get("action") - .and_then(|v| v.as_str()) - .unwrap_or_else(|| { - log_missing_tool_action_for_display(tool, "memory"); - "action missing" - }); + let action = resolve_tool_action_for_display(tool, "memory"); match action { "remember" => { let content = tool @@ -1202,14 +1221,7 @@ pub(super) fn get_tool_summary_with_budget( } } "initiative" => { - let action = tool - .input - .get("action") - .and_then(|v| v.as_str()) - .unwrap_or_else(|| { - log_missing_tool_action_for_display(tool, "initiative"); - "action missing" - }); + let action = resolve_tool_action_for_display(tool, "initiative"); let id = tool.input.get("id").and_then(|v| v.as_str()); let title = tool.input.get("title").and_then(|v| v.as_str()); match (action, id, title) { @@ -1249,14 +1261,7 @@ pub(super) fn get_tool_summary_with_budget( action.to_string() } "side_panel" => { - let action = tool - .input - .get("action") - .and_then(|v| v.as_str()) - .unwrap_or_else(|| { - log_missing_tool_action_for_display(tool, "side_panel"); - "action missing" - }); + let action = resolve_tool_action_for_display(tool, "side_panel"); let target = tool .input .get("title") From 144e2252b297919304805bb4529944b218b869ee Mon Sep 17 00:00:00 2001 From: prateek310524 Date: Fri, 29 May 2026 20:08:29 +0530 Subject: [PATCH 5/5] fix(swarm): use controlling-TTY check to skip visible spawn on detached serve server The previous Auto gate keyed off the coordinator session having a live event channel, but an interactive coordinator attached to a *detached* \`jcode serve\` shared server still reports as attached, so visible spawns were still attempted (orphan window + 8s attach-timeout per child). Switch the signal to whether THIS process has a controlling TTY: a detached \`jcode serve\` server has none (ps TTY \`??\`), while an interactive jcode/desktop run by a user does. When detached, Auto spawns children headless directly. Add JCODE_SWARM_FORCE_VISIBLE=1 escape hatch. session_has_live_attachment is retained under #[cfg(test)]. Adds running_as_detached_server_respects_force_visible_override test. --- .../jcode-app-core/src/server/comm_session.rs | 75 ++++++++++++++----- .../src/server/comm_session_tests.rs | 26 ++++++- 2 files changed, 80 insertions(+), 21 deletions(-) diff --git a/crates/jcode-app-core/src/server/comm_session.rs b/crates/jcode-app-core/src/server/comm_session.rs index 22dd6aae5..09e751ef3 100644 --- a/crates/jcode-app-core/src/server/comm_session.rs +++ b/crates/jcode-app-core/src/server/comm_session.rs @@ -218,14 +218,9 @@ async fn wait_for_live_attachment( } /// Whether a session currently has at least one live interactive client -/// attached (i.e. an active TUI is draining its event stream). -/// -/// Used as a fast, deployment-agnostic proxy for "can a visible spawn actually -/// attach a driver?". If the coordinator that requested the spawn is itself -/// running headless inside a `jcode serve` shared server (no attached TUI), -/// then opening a fresh Terminal for a child will not produce a live worker in -/// this deployment, so Auto mode should skip the visible attempt entirely -/// rather than open an orphan window and wait out the attach timeout. +/// attached (i.e. an active TUI is draining its event stream). Retained as a +/// tested building block for attachment-state checks. +#[cfg(test)] async fn session_has_live_attachment( session_id: &str, swarm_members: &Arc>>, @@ -237,6 +232,48 @@ async fn session_has_live_attachment( .unwrap_or(false) } +/// Whether this process is running as a detached server (no controlling TTY). +/// +/// A persistent `jcode serve` shared server is started detached and has no +/// controlling terminal (`ps` shows TTY `??`). In that mode, optimistically +/// opening a fresh Terminal window for a swarm child and hoping it attaches +/// back over the socket just orphans an empty window and stalls on the attach +/// timeout, because nothing wires that interactive client to the spawned +/// session. An interactive `jcode` (or desktop app) run by a user *does* have a +/// controlling TTY. So "no controlling TTY" is a reliable, deployment-agnostic +/// signal that Auto should spawn swarm children headless directly. +/// +/// Overridable via `JCODE_SWARM_FORCE_VISIBLE=1` for power users on exotic +/// setups who really do want the visible path attempted regardless. +fn running_as_detached_server() -> bool { + if std::env::var("JCODE_SWARM_FORCE_VISIBLE") + .map(|v| v == "1" || v.eq_ignore_ascii_case("true")) + .unwrap_or(false) + { + return false; + } + !process_has_controlling_tty() +} + +/// Returns true if the current process has a controlling terminal. +#[cfg(unix)] +fn process_has_controlling_tty() -> bool { + // Opening /dev/tty succeeds only when the process has a controlling + // terminal; on a detached server it fails with ENXIO/ENODEV. + std::fs::OpenOptions::new() + .read(true) + .write(true) + .open("/dev/tty") + .is_ok() +} + +#[cfg(not(unix))] +fn process_has_controlling_tty() -> bool { + // On non-unix we conservatively assume an interactive context; the + // post-launch attach-wait still guards against a failed attach. + true +} + /// Parameterized core of [`wait_for_live_attachment`] so tests can use short /// timeouts/poll intervals without controlling the global clock. async fn wait_for_live_attachment_with( @@ -455,22 +492,20 @@ pub(super) async fn spawn_swarm_agent( .map(append_swarm_completion_report_instructions); // In Auto mode, decide up-front whether a visible spawn can possibly work. - // A visible spawn only helps when an interactive client can attach and drive - // the child. We use the requesting coordinator's own attachment state as a - // fast, deployment-agnostic proxy: if the coordinator is itself running - // headless inside a `jcode serve` shared server (no attached TUI), opening a - // fresh Terminal for the child would just orphan a window and stall on the - // attach timeout, so we skip straight to the in-process headless runner. + // A visible spawn only helps when a freshly-opened Terminal can attach and + // drive the child. When this process is a detached `jcode serve` shared + // server (no controlling TTY), opening that window just orphans it and + // stalls on the attach timeout, so we skip straight to the in-process + // headless runner. See `running_as_detached_server`. let auto_should_try_visible = match resolved_spawn_mode { SwarmSpawnMode::Auto => { - let coordinator_attached = - session_has_live_attachment(req_session_id, swarm_members).await; - if !coordinator_attached { + let detached_server = running_as_detached_server(); + if detached_server { crate::logging::info( - "Auto swarm spawn: coordinator has no live interactive client (headless server); spawning child headless directly", + "Auto swarm spawn: detached server (no controlling TTY); spawning child headless directly", ); } - coordinator_attached + !detached_server } _ => true, }; @@ -478,7 +513,7 @@ pub(super) async fn spawn_swarm_agent( let visible_spawn = match resolved_spawn_mode { SwarmSpawnMode::Headless => Err(anyhow::anyhow!("headless spawn requested")), SwarmSpawnMode::Auto if !auto_should_try_visible => { - Err(anyhow::anyhow!("auto spawn: headless server, skipping visible")) + Err(anyhow::anyhow!("auto spawn: detached server, skipping visible")) } SwarmSpawnMode::Visible | SwarmSpawnMode::Auto => prepare_visible_spawn_session( resolved_working_dir.as_deref(), diff --git a/crates/jcode-app-core/src/server/comm_session_tests.rs b/crates/jcode-app-core/src/server/comm_session_tests.rs index 77cf47f86..aeb50a84b 100644 --- a/crates/jcode-app-core/src/server/comm_session_tests.rs +++ b/crates/jcode-app-core/src/server/comm_session_tests.rs @@ -1,7 +1,7 @@ use super::{ ensure_spawn_coordinator_swarm, prepare_visible_spawn_session, register_visible_spawned_member, require_coordinator_swarm, resolve_spawn_working_dir, resolve_stop_target_session, - resolve_swarm_spawn_model_and_provider, session_has_live_attachment, + resolve_swarm_spawn_model_and_provider, running_as_detached_server, session_has_live_attachment, swarm_stop_allowed_by_owner, wait_for_live_attachment, wait_for_live_attachment_with, }; use crate::agent::Agent; @@ -326,6 +326,30 @@ async fn session_has_live_attachment_true_for_attached_coordinator() { ); } +#[test] +fn running_as_detached_server_respects_force_visible_override() { + // The JCODE_SWARM_FORCE_VISIBLE escape hatch must force the visible path + // (i.e. report "not a detached server") regardless of TTY state. This is + // the only branch we can assert deterministically across CI/interactive + // environments without controlling the process's controlling terminal. + let prev = std::env::var("JCODE_SWARM_FORCE_VISIBLE").ok(); + // SAFETY: single-threaded test; restored below. + unsafe { std::env::set_var("JCODE_SWARM_FORCE_VISIBLE", "1") }; + assert!( + !running_as_detached_server(), + "JCODE_SWARM_FORCE_VISIBLE=1 must force the visible path" + ); + unsafe { std::env::set_var("JCODE_SWARM_FORCE_VISIBLE", "true") }; + assert!( + !running_as_detached_server(), + "JCODE_SWARM_FORCE_VISIBLE=true must force the visible path" + ); + match prev { + Some(value) => unsafe { std::env::set_var("JCODE_SWARM_FORCE_VISIBLE", value) }, + None => unsafe { std::env::remove_var("JCODE_SWARM_FORCE_VISIBLE") }, + } +} + #[tokio::test] async fn register_visible_spawned_member_does_not_clobber_attached_client() { let swarm_members = Arc::new(RwLock::new(HashMap::new()));