Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
219 changes: 199 additions & 20 deletions crates/jcode-app-core/src/server/comm_session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,128 @@ fn cleanup_prepared_visible_spawn_session(session_id: &str) {
}
}

/// Maximum time to wait for a visibly-spawned session's interactive client to
/// attach before concluding the launch did not actually produce a live worker.
const VISIBLE_SPAWN_ATTACH_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(8);
/// Poll interval while waiting for a client attachment.
const VISIBLE_SPAWN_ATTACH_POLL: std::time::Duration = std::time::Duration::from_millis(200);

/// Wait for a visibly-spawned session to register a live client attachment.
///
/// A successful terminal-launch fork does not guarantee that an interactive
/// client connected (e.g. on a headless host or `jcode serve` shared server).
/// We treat the appearance of at least one live event channel
/// (`SwarmMember::event_txs`) as proof that a real client attached. Returns
/// `true` if an attachment is observed before the timeout, `false` otherwise.
async fn wait_for_live_attachment(
session_id: &str,
swarm_members: &Arc<RwLock<HashMap<String, SwarmMember>>>,
) -> bool {
wait_for_live_attachment_with(
session_id,
swarm_members,
VISIBLE_SPAWN_ATTACH_TIMEOUT,
VISIBLE_SPAWN_ATTACH_POLL,
)
.await
}

/// Whether a session currently has at least one live interactive client
/// attached (i.e. an active TUI is draining its event stream). Retained as a
/// tested building block for attachment-state checks.
#[cfg(test)]
async fn session_has_live_attachment(
session_id: &str,
swarm_members: &Arc<RwLock<HashMap<String, SwarmMember>>>,
) -> bool {
let members = swarm_members.read().await;
members
.get(session_id)
.map(|member| !member.event_txs.is_empty())
.unwrap_or(false)
}

/// Whether this process is running as a detached server (no controlling TTY).
///
/// A persistent `jcode serve` shared server is started detached and has no
/// controlling terminal (`ps` shows TTY `??`). In that mode, optimistically
/// opening a fresh Terminal window for a swarm child and hoping it attaches
/// back over the socket just orphans an empty window and stalls on the attach
/// timeout, because nothing wires that interactive client to the spawned
/// session. An interactive `jcode` (or desktop app) run by a user *does* have a
/// controlling TTY. So "no controlling TTY" is a reliable, deployment-agnostic
/// signal that Auto should spawn swarm children headless directly.
///
/// Overridable via `JCODE_SWARM_FORCE_VISIBLE=1` for power users on exotic
/// setups who really do want the visible path attempted regardless.
fn running_as_detached_server() -> bool {
if std::env::var("JCODE_SWARM_FORCE_VISIBLE")
.map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
.unwrap_or(false)
{
return false;
}
!process_has_controlling_tty()
}

/// Returns true if the current process has a controlling terminal.
#[cfg(unix)]
fn process_has_controlling_tty() -> bool {
// Opening /dev/tty succeeds only when the process has a controlling
// terminal; on a detached server it fails with ENXIO/ENODEV.
std::fs::OpenOptions::new()
.read(true)
.write(true)
.open("/dev/tty")
.is_ok()
}

#[cfg(not(unix))]
fn process_has_controlling_tty() -> bool {
// On non-unix we conservatively assume an interactive context; the
// post-launch attach-wait still guards against a failed attach.
true
}

/// Parameterized core of [`wait_for_live_attachment`] so tests can use short
/// timeouts/poll intervals without controlling the global clock.
async fn wait_for_live_attachment_with(
session_id: &str,
swarm_members: &Arc<RwLock<HashMap<String, SwarmMember>>>,
timeout: std::time::Duration,
poll: std::time::Duration,
) -> bool {
let deadline = Instant::now() + timeout;
loop {
{
let members = swarm_members.read().await;
if let Some(member) = members.get(session_id)
&& !member.event_txs.is_empty()
{
return true;
}
}
if Instant::now() >= deadline {
return false;
}
tokio::time::sleep(poll).await;
}
}

/// Remove a visibly-spawned session that never attached a live client so its
/// swarm membership and on-disk records do not linger as a stuck "startup
/// queued" ghost after we fall back to a headless worker.
async fn cleanup_orphaned_visible_spawn_session(
session_id: &str,
swarm_members: &Arc<RwLock<HashMap<String, SwarmMember>>>,
) {
{
let mut members = swarm_members.write().await;
members.remove(session_id);
}
cleanup_prepared_visible_spawn_session(session_id);
}

fn prepare_visible_spawn_session<F>(
working_dir: Option<&str>,
model_override: Option<&str>,
Expand Down Expand Up @@ -262,26 +384,36 @@ async fn register_visible_spawned_member(

{
let mut members = swarm_members.write().await;
members.insert(
session_id.to_string(),
SwarmMember {
session_id: session_id.to_string(),
event_tx,
event_txs: HashMap::new(),
working_dir: working_dir.map(PathBuf::from),
swarm_id: Some(swarm_id.to_string()),
swarm_enabled: true,
status,
detail,
friendly_name: Some(friendly_name),
report_back_to_session_id: report_back_to_session_id.map(str::to_string),
latest_completion_report: None,
role: "agent".to_string(),
joined_at: now,
last_status_change: now,
is_headless: false,
},
);
// If a real interactive client has already attached and registered this
// member (possible under Auto mode, where we wait for an attachment
// before registering), do not clobber its live event channels.
if members
.get(session_id)
.is_some_and(|member| !member.event_txs.is_empty())
{
// Client already owns this member; nothing to register.
} else {
members.insert(
session_id.to_string(),
SwarmMember {
session_id: session_id.to_string(),
event_tx,
event_txs: HashMap::new(),
working_dir: working_dir.map(PathBuf::from),
swarm_id: Some(swarm_id.to_string()),
swarm_enabled: true,
status,
detail,
friendly_name: Some(friendly_name),
report_back_to_session_id: report_back_to_session_id.map(str::to_string),
latest_completion_report: None,
role: "agent".to_string(),
joined_at: now,
last_status_change: now,
is_headless: false,
},
);
}
}

{
Expand Down Expand Up @@ -359,8 +491,30 @@ pub(super) async fn spawn_swarm_agent(
.as_deref()
.map(append_swarm_completion_report_instructions);

// In Auto mode, decide up-front whether a visible spawn can possibly work.
// A visible spawn only helps when a freshly-opened Terminal can attach and
// drive the child. When this process is a detached `jcode serve` shared
// server (no controlling TTY), opening that window just orphans it and
// stalls on the attach timeout, so we skip straight to the in-process
// headless runner. See `running_as_detached_server`.
let auto_should_try_visible = match resolved_spawn_mode {
SwarmSpawnMode::Auto => {
let detached_server = running_as_detached_server();
if detached_server {
crate::logging::info(
"Auto swarm spawn: detached server (no controlling TTY); spawning child headless directly",
);
}
!detached_server
}
_ => true,
};

let visible_spawn = match resolved_spawn_mode {
SwarmSpawnMode::Headless => Err(anyhow::anyhow!("headless spawn requested")),
SwarmSpawnMode::Auto if !auto_should_try_visible => {
Err(anyhow::anyhow!("auto spawn: detached server, skipping visible"))
}
SwarmSpawnMode::Visible | SwarmSpawnMode::Auto => prepare_visible_spawn_session(
resolved_working_dir.as_deref(),
spawn_model.as_deref(),
Expand All @@ -371,6 +525,31 @@ pub(super) async fn spawn_swarm_agent(
),
};

// In Auto mode a visible launch only *forks* a terminal launcher; it does
// not guarantee that an interactive client actually attached and started
// driving the session. On a server/headless host (no GUI terminal, or a
// `jcode serve` shared server) the launcher fork succeeds but no client
// ever connects, leaving the agent stuck at "startup queued" forever.
//
// To make Auto reliable, after a "successful" visible launch we wait a
// short window for the spawned session to register a live client
// attachment. If none arrives we tear down the orphaned visible session and
// fall back to the in-process headless runner (which always executes).
let visible_spawn = match (resolved_spawn_mode, visible_spawn) {
(SwarmSpawnMode::Auto, Ok((candidate_session_id, true))) => {
if wait_for_live_attachment(&candidate_session_id, swarm_members).await {
Ok((candidate_session_id, true))
} else {
crate::logging::warn(&format!(
"Auto swarm spawn: visible client did not attach within timeout for session {candidate_session_id}; falling back to headless"
));
cleanup_orphaned_visible_spawn_session(&candidate_session_id, swarm_members).await;
Ok((candidate_session_id, false))
}
}
(_, other) => other,
};

let (new_session_id, is_headless_fallback) = match visible_spawn {
Ok((new_session_id, true)) => Ok((new_session_id, false)),
Ok((_, false)) | Err(_) => {
Expand Down
Loading