diff --git a/Cargo.lock b/Cargo.lock index 499204bc7..6244492a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3265,7 +3265,7 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jcode" -version = "0.21.0" +version = "0.22.0" dependencies = [ "agentgrep", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index 35b32d17b..134a7fe74 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "jcode" -version = "0.21.0" +version = "0.22.0" description = "Possibly the greatest coding agent ever built — blazing-fast TUI, multi-model, swarm coordination, 30+ tools" edition = "2024" autobins = false diff --git a/crates/jcode-app-core/src/ambient/runner.rs b/crates/jcode-app-core/src/ambient/runner.rs index 733f2da38..4c49ffcb9 100644 --- a/crates/jcode-app-core/src/ambient/runner.rs +++ b/crates/jcode-app-core/src/ambient/runner.rs @@ -419,6 +419,7 @@ impl AmbientRunnerHandle { child.replace_messages(parent.messages.clone()); child.compaction = parent.compaction.clone(); child.provider_key = parent.provider_key.clone(); + child.route_api_method = parent.route_api_method.clone(); child.model = parent.model.clone(); child.subagent_model = parent.subagent_model.clone(); child.improve_mode = parent.improve_mode; diff --git a/crates/jcode-app-core/src/catchup.rs b/crates/jcode-app-core/src/catchup.rs index c3c64159d..536bd312c 100644 --- a/crates/jcode-app-core/src/catchup.rs +++ b/crates/jcode-app-core/src/catchup.rs @@ -19,6 +19,42 @@ pub fn needs_catchup(session_id: &str, updated_at: DateTime, status: &Sessi needs_catchup_with_seen(updated_at.timestamp_millis(), seen, status) } +/// Snapshot of the persisted catch-up "seen" state, so callers that need to +/// evaluate many sessions at once (e.g. the session picker building its list) +/// can avoid re-reading and re-parsing `catchup_seen.json` once per session. +#[derive(Clone, Default)] +pub struct CatchupSeenSnapshot { + state: PersistedCatchupState, +} + +impl CatchupSeenSnapshot { + /// Load the persisted seen-state once from disk. + pub fn load() -> Self { + Self { + state: load_seen_state(), + } + } + + /// Same semantics as [`needs_catchup`] but uses this preloaded snapshot + /// instead of re-reading the state file for every call. + pub fn needs_catchup( + &self, + session_id: &str, + updated_at: DateTime, + status: &SessionStatus, + ) -> bool { + if !is_attention_status(status) { + return false; + } + let seen = self + .state + .seen_at_ms_by_session + .get(session_id) + .copied(); + needs_catchup_with_seen(updated_at.timestamp_millis(), seen, status) + } +} + pub(crate) fn needs_catchup_with_seen( updated_at_ms: i64, seen_at_ms: Option, diff --git a/crates/jcode-app-core/src/external_auth.rs b/crates/jcode-app-core/src/external_auth.rs index faec2ef4c..80aa225a9 100644 --- a/crates/jcode-app-core/src/external_auth.rs +++ b/crates/jcode-app-core/src/external_auth.rs @@ -101,10 +101,59 @@ impl ExternalAuthReviewCandidate { } } +impl ExternalAuthReviewCandidate { + /// Coarse telemetry `(provider, method)` labels for the providers this + /// candidate activates on a successful import. Used by the onboarding flow + /// to record `auth_success` so auto-imported logins show up in the + /// activation funnel (they previously did not, because auto-import never + /// flows through the manual `pending_login` telemetry path). + /// + /// The method is reported as `"import"` so import-driven activation can be + /// distinguished from manual login in the funnel. + pub fn telemetry_auth_labels(&self) -> Vec<(&'static str, &'static str)> { + const METHOD: &str = "import"; + match &self.action { + ExternalAuthReviewAction::CodexLegacy => vec![("openai", METHOD)], + ExternalAuthReviewAction::ClaudeCode => vec![("claude", METHOD)], + ExternalAuthReviewAction::GeminiCli => vec![("gemini", METHOD)], + ExternalAuthReviewAction::Copilot(_) => vec![("copilot", METHOD)], + ExternalAuthReviewAction::Cursor(_) => vec![("cursor", METHOD)], + ExternalAuthReviewAction::SharedExternal(source) => { + auth::external::source_provider_labels(*source) + .into_iter() + .filter_map(|label| { + telemetry_provider_id_for_label(label).map(|id| (id, METHOD)) + }) + .collect() + } + } + } +} + +/// Map a human-facing provider label (as produced by +/// [`auth::external::source_provider_labels`]) to the canonical telemetry +/// provider id used by the activation funnel. +fn telemetry_provider_id_for_label(label: &str) -> Option<&'static str> { + match label { + "OpenAI/Codex" => Some("openai"), + "Claude" => Some("claude"), + "Gemini" => Some("gemini"), + "Antigravity" => Some("antigravity"), + "GitHub Copilot" => Some("copilot"), + "OpenRouter/API-key providers" => Some("openrouter"), + _ => None, + } +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct ExternalAuthAutoImportOutcome { pub imported: usize, pub messages: Vec, + /// Coarse `(provider, method)` telemetry labels for each provider that was + /// successfully imported, so callers can record `auth_success` for the + /// activation funnel. May contain more entries than `imported` when a + /// single source carries multiple providers. + pub imported_auth_labels: Vec<(&'static str, &'static str)>, } impl ExternalAuthAutoImportOutcome { @@ -535,6 +584,7 @@ pub async fn run_external_auth_auto_import_candidates( let mut outcome = ExternalAuthAutoImportOutcome { imported: 0, messages: Vec::new(), + imported_auth_labels: Vec::new(), }; for &index in selected { @@ -545,6 +595,9 @@ pub async fn run_external_auth_auto_import_candidates( match validate_external_auth_review_candidate(candidate).await { Ok(detail) => { outcome.imported += 1; + outcome + .imported_auth_labels + .extend(candidate.telemetry_auth_labels()); outcome.messages.push(format!( "✓ {} (from {}): {}", candidate.provider_summary, candidate.source_name, detail @@ -573,6 +626,7 @@ mod render_markdown_tests { let outcome = ExternalAuthAutoImportOutcome { imported: 0, messages: Vec::new(), + imported_auth_labels: Vec::new(), }; assert_eq!( outcome.render_markdown(), @@ -590,6 +644,7 @@ mod render_markdown_tests { "✓ Claude (from Claude Code): Loaded Claude credentials.".to_string(), "✕ Cursor (from Cursor native): no usable auth token.".to_string(), ], + imported_auth_labels: vec![("openai", "import"), ("claude", "import")], }; let md = outcome.render_markdown(); assert!(md.starts_with("**Logins imported**"), "got: {md}"); @@ -613,8 +668,20 @@ mod render_markdown_tests { let outcome = ExternalAuthAutoImportOutcome { imported: 1, messages: vec!["✓ Gemini (from Gemini CLI): Loaded Gemini credentials.".to_string()], + imported_auth_labels: vec![("gemini", "import")], }; let md = outcome.render_markdown(); assert!(md.contains("Reusing 1 existing login:"), "got: {md}"); } + + #[test] + fn fixture_candidate_reports_import_auth_labels() { + use super::ExternalAuthReviewCandidate; + // The fixture points at the legacy Codex action -> OpenAI provider. + let candidate = ExternalAuthReviewCandidate::fixture("OpenAI/Codex", "Codex auth.json"); + assert_eq!( + candidate.telemetry_auth_labels(), + vec![("openai", "import")] + ); + } } diff --git a/crates/jcode-app-core/src/overnight.rs b/crates/jcode-app-core/src/overnight.rs index a619cdaaf..e23c9663d 100644 --- a/crates/jcode-app-core/src/overnight.rs +++ b/crates/jcode-app-core/src/overnight.rs @@ -175,6 +175,7 @@ fn create_coordinator_session(parent: &Session, mission: &Option) -> Res child.replace_messages(parent.messages.clone()); child.compaction = parent.compaction.clone(); child.provider_key = parent.provider_key.clone(); + child.route_api_method = parent.route_api_method.clone(); child.reasoning_effort = parent.reasoning_effort.clone(); child.subagent_model = parent.subagent_model.clone(); child.improve_mode = parent.improve_mode; diff --git a/crates/jcode-app-core/src/server/client_actions.rs b/crates/jcode-app-core/src/server/client_actions.rs index 77c34abf0..ead8ff6d6 100644 --- a/crates/jcode-app-core/src/server/client_actions.rs +++ b/crates/jcode-app-core/src/server/client_actions.rs @@ -737,6 +737,7 @@ fn create_transfer_child_session( child.working_dir = parent.working_dir.clone(); child.model = parent.model.clone(); child.provider_key = parent.provider_key.clone(); + child.route_api_method = parent.route_api_method.clone(); child.subagent_model = parent.subagent_model.clone(); child.improve_mode = parent.improve_mode; child.autoreview_enabled = parent.autoreview_enabled; diff --git a/crates/jcode-app-core/src/server/util.rs b/crates/jcode-app-core/src/server/util.rs index 7a2e09a58..1d6f09243 100644 --- a/crates/jcode-app-core/src/server/util.rs +++ b/crates/jcode-app-core/src/server/util.rs @@ -758,7 +758,7 @@ mod newest_reload_candidate_integration_tests { //! a temp `JCODE_HOME`. This reproduces the field "/update -> new client, //! stale server" state and proves the fix: a self-dev daemon now reloads into //! the freshly installed release instead of its old pinned binary. - use super::newest_reload_candidate; + use super::{canonicalize_or, newer_binary_available, newest_reload_candidate}; use crate::build; use std::path::Path; use std::time::{Duration, SystemTime}; @@ -860,6 +860,82 @@ mod newest_reload_candidate_integration_tests { crate::env::remove_var("JCODE_HOME"); } } + + /// Re-implements `server_has_newer_binary`'s decision against an *injected* + /// running-daemon path + mtime, so a test can model "the daemon is still the + /// OLD binary" without spawning a real process. It scans the exact same + /// candidate set (both flavors) and uses the same `newer_binary_available` + /// core the production function uses. + fn daemon_reports_update(running: &Path, running_mtime: SystemTime) -> bool { + let running_canonical = canonicalize_or(running.to_path_buf()); + let mut candidates = std::collections::HashSet::new(); + for is_selfdev in [false, true] { + if let Some((candidate, _label)) = super::server_update_candidate(is_selfdev) { + candidates.insert(canonicalize_or(candidate)); + } + } + let with_mtimes = candidates.into_iter().map(|candidate| { + let m = std::fs::metadata(&candidate) + .ok() + .and_then(|m| m.modified().ok()); + (candidate, m) + }); + newer_binary_available( + Some(running_mtime), + Some(running_canonical.as_path()), + with_mtimes, + ) + } + + /// The question that matters for shipped users: after a NORMAL (non-self-dev) + /// `/update`, does the long-lived daemon actually advertise + apply the + /// upgrade on reconnect? + /// + /// Models a normal install: `shared-server` was tracking `stable`, the daemon + /// is running the old release, and `/update` installs a newer release and + /// advances stable/current/shared-server. We then drive the REAL + /// update-detection core and reload-target resolver and assert both: + /// (1) the daemon reports `server_has_update = true`, and + /// (2) the binary it reloads into is the freshly installed release. + #[test] + fn normal_user_daemon_detects_and_targets_update_after_update() { + let _guard = crate::storage::lock_test_env(); + let temp = tempfile::TempDir::new().expect("temp dir"); + let prev_home = std::env::var_os("JCODE_HOME"); + crate::env::set_var("JCODE_HOME", temp.path()); + + let base = SystemTime::UNIX_EPOCH + Duration::from_secs(1_000_000); + let old_release = "0.14.3"; + let new_release = "0.15.0"; + let old_path = install_versioned_binary(old_release, base); + install_versioned_binary(new_release, base + Duration::from_secs(60)); + + // Pre-update state: every channel on the old release (shared-server + // tracking stable). This is the steady state for a normal user. + build::update_stable_symlink(old_release).expect("stable old"); + build::update_current_symlink(old_release).expect("current old"); + build::update_shared_server_symlink(old_release).expect("shared old"); + + // `/update` installs the new release and advances the channels. Because + // shared-server was tracking stable, it advances too. + build::advance_shared_server_if_tracking_stable(new_release).expect("advance shared"); + build::update_stable_symlink(new_release).expect("stable new"); + build::update_current_symlink(new_release).expect("current new"); + + // (1) The daemon (still the OLD binary) must now SEE the update so it + // reports server_has_update = true to reconnecting clients. + assert!( + daemon_reports_update(&old_path, base), + "normal-user daemon should report a server update after /update advanced the channels" + ); + + // (2) The binary it reloads into must be the freshly installed release. + assert_eq!( + candidate_version_for(false).as_deref(), + Some(new_release), + "normal-user daemon should reload into the freshly installed release" + ); + } } #[cfg(test)] diff --git a/crates/jcode-app-core/src/tool/selfdev/launch.rs b/crates/jcode-app-core/src/tool/selfdev/launch.rs index f4ab3c39b..17194ebd1 100644 --- a/crates/jcode-app-core/src/tool/selfdev/launch.rs +++ b/crates/jcode-app-core/src/tool/selfdev/launch.rs @@ -20,6 +20,7 @@ pub fn enter_selfdev_session( child.compaction = parent.compaction.clone(); child.model = parent.model.clone(); child.provider_key = parent.provider_key.clone(); + child.route_api_method = parent.route_api_method.clone(); child.subagent_model = parent.subagent_model.clone(); child.improve_mode = parent.improve_mode; child.autoreview_enabled = parent.autoreview_enabled; diff --git a/crates/jcode-base/src/auth/live_provider_probes.rs b/crates/jcode-base/src/auth/live_provider_probes.rs index 1da06404b..749c43bee 100644 --- a/crates/jcode-base/src/auth/live_provider_probes.rs +++ b/crates/jcode-base/src/auth/live_provider_probes.rs @@ -1145,143 +1145,21 @@ pub async fn run_live_antigravity_native_stream_smoke( /// Stage: tool-call parse + execution loop + result follow-up. /// -/// Full two-turn round-trip: ask the model to call a tool (assert a parseable -/// tool_use), then feed a synthetic tool_result back (assert the model consumes -/// it). Gemini-3 attaches a `thought_signature` to its function call that the -/// Cloud Code backend requires replayed on the follow-up turn, so we carry it -/// onto the assistant tool_use block. Evidence for the `tool_call_parse`, -/// `tool_execution_loop`, `tool_result_followup`, and `real_jcode_tool_smoke` -/// checkpoints. +/// Delegates to the shared native tool smoke ([`run_live_native_provider_tool_smoke`]) +/// so Antigravity exercises the same two phases as every other native runtime: +/// a single round-trip plus a **multi-call signature replay** that rebuilds a +/// history of two assistant `tool_use` blocks. Gemini-3 attaches a +/// `thought_signature` to each function call that the Cloud Code backend +/// requires replayed on later turns; the multi-call phase is what actually +/// reproduces the `400 ... "Function call is missing a thought_signature ... +/// position N"` field failure (a single round-trip cannot). Evidence for the +/// `tool_call_parse`, `tool_execution_loop`, `tool_result_followup`, and +/// `real_jcode_tool_smoke` checkpoints. pub async fn run_live_antigravity_native_tool_smoke( model: &str, ) -> anyhow::Result { - let started = std::time::Instant::now(); let provider = build_native_antigravity_provider(model)?; - - let tool_name = "read"; - let tools = vec![ToolDefinition { - name: tool_name.to_string(), - description: "Reads a file from the local filesystem.".to_string(), - input_schema: serde_json::json!({ - "type": "object", - "properties": {"file_path": {"type": "string"}}, - "required": ["file_path"], - "additionalProperties": false - }), - }]; - let system = "You are a live provider tool smoke test. When asked to read a file, you MUST \ - call the read tool with the given path. Do not answer in text first."; - - let first_turn = vec![Message { - role: Role::User, - content: vec![ContentBlock::Text { - text: "Read the file at /tmp/auth_tool_probe.txt using the read tool. \ - Call the tool now; do not answer in text." - .to_string(), - cache_control: None, - }], - timestamp: None, - tool_duration_ms: None, - }]; - - let first = consume_native_stream( - &provider, - &first_turn, - &tools, - system, - std::time::Duration::from_secs(120), - ) - .await?; - - ensure!( - !first.tool_calls.is_empty(), - "native Antigravity tool smoke produced no tool call (stop_reason={:?}, text={:?})", - first.stop_reason, - crate::util::truncate_str(first.text.trim(), 200) - ); - let tool_call = first.tool_calls[0].clone(); - ensure!( - tool_call.name == tool_name, - "native Antigravity tool smoke called unexpected tool {:?} (expected {tool_name})", - tool_call.name - ); - let parsed_arguments = crate::message::ToolCall::parse_streamed_input_to_object( - if tool_call.input_json.trim().is_empty() { - "{}" - } else { - tool_call.input_json.trim() - }, - ); - ensure!( - parsed_arguments.is_object(), - "native Antigravity tool smoke produced non-object tool arguments: {:?}", - tool_call.input_json - ); - - // Second turn: replay the assistant's tool_use (carrying the Gemini-3 - // thought signature, required by the Cloud Code backend) and answer it with - // a synthetic tool_result, then assert the model consumes the result. - let mut followup = first_turn.clone(); - followup.push(Message { - role: Role::Assistant, - content: vec![ContentBlock::ToolUse { - id: tool_call.id.clone(), - name: tool_call.name.clone(), - input: parsed_arguments.clone(), - thought_signature: tool_call.thought_signature.clone(), - }], - timestamp: None, - tool_duration_ms: None, - }); - followup.push(Message { - role: Role::User, - content: vec![ContentBlock::ToolResult { - tool_use_id: tool_call.id.clone(), - content: "TOOL_RESULT_TOKEN=42. Report this token back to confirm you read it." - .to_string(), - is_error: Some(false), - }], - timestamp: None, - tool_duration_ms: None, - }); - - let second = consume_native_stream( - &provider, - &followup, - &tools, - system, - std::time::Duration::from_secs(120), - ) - .await?; - - ensure!( - second.saw_message_end, - "native Antigravity tool follow-up ended without a message_end event" - ); - ensure!( - second.text.contains("42"), - "native Antigravity tool follow-up did not reflect the tool result token: {:?}", - crate::util::truncate_str(second.text.trim(), 200) - ); - - let total_input = first.input_tokens + second.input_tokens; - let total_output = first.output_tokens + second.output_tokens; - let mut stage = crate::live_tests::LiveVerificationStage::passed( - crate::live_tests::checkpoints::TOOL_CALL_PARSE, - ) - .with_duration_ms(started.elapsed().as_millis() as u64) - .with_evidence("model", serde_json::json!(model)) - .with_evidence("tool_name", serde_json::json!(tool_call.name)) - .with_evidence("tool_arguments", parsed_arguments) - .with_evidence( - "thought_signature_present", - serde_json::json!(tool_call.thought_signature.is_some()), - ) - .with_evidence("followup_consumed_result", serde_json::json!(true)); - if total_input != 0 || total_output != 0 { - stage = stage.with_evidence("usage", usage_evidence(total_input, total_output, 0, 0)); - } - Ok(stage) + run_live_native_provider_tool_smoke(&provider, model, "Antigravity").await } // === Generic native-runtime probes ======================================== @@ -1442,11 +1320,24 @@ pub async fn run_live_native_provider_stream_smoke( /// Stage: tool-call parse + execution loop + result follow-up against an /// arbitrary native provider. /// -/// Full two-turn round-trip: ask the model to call a tool (assert a parseable -/// tool_use), then feed a synthetic tool_result back (assert the model consumes -/// it). Any provider-emitted `thought_signature` (e.g. Gemini-3 via the Cloud -/// Code backend) is carried onto the replayed assistant tool_use block, since -/// some backends reject a follow-up turn that omits it. +/// Two phases: +/// +/// 1. **Single round-trip (gating):** ask the model to call a tool (assert a +/// parseable tool_use), then feed a synthetic tool_result back (assert the +/// model consumes it). This mirrors the historical assertion so providers +/// that already passed keep passing. +/// 2. **Multi-call signature replay (best-effort):** chain a *second* tool call +/// and replay a history that now contains **two** assistant `tool_use` +/// blocks, each carrying its own provider-emitted `thought_signature`. The +/// Antigravity/Cloud Code backend validates every `functionCall` in the +/// replayed history (not just the latest), so a transcript that drops an +/// earlier signature is rejected with `400 ... "Function call is missing a +/// thought_signature ... position N"`. A single round-trip can never +/// reproduce that, so we exercise the multi-call shape here. If the model +/// declines the second tool call (common for providers that do not emit +/// signatures at all), the phase records `multi_tool_replay: "skipped"` +/// rather than failing, so it never turns a previously-green provider red +/// for a non-signature reason. pub async fn run_live_native_provider_tool_smoke( provider: &dyn Provider, model: &str, @@ -1501,49 +1392,26 @@ pub async fn run_live_native_provider_tool_smoke( "native {label} tool smoke called unexpected tool {:?} (expected {tool_name})", tool_call.name ); - let parsed_arguments = crate::message::ToolCall::parse_streamed_input_to_object( - if tool_call.input_json.trim().is_empty() { - "{}" - } else { - tool_call.input_json.trim() - }, - ); + let parsed_arguments = parse_tool_arguments(&tool_call.input_json); ensure!( parsed_arguments.is_object(), "native {label} tool smoke produced non-object tool arguments: {:?}", tool_call.input_json ); - // Second turn: replay the assistant's tool_use (carrying any thought + // Phase 1 (gating): replay the assistant's tool_use (carrying any thought // signature the backend requires) and answer it with a synthetic // tool_result, then assert the model consumes the result. - let mut followup = first_turn.clone(); - followup.push(Message { - role: Role::Assistant, - content: vec![ContentBlock::ToolUse { - id: tool_call.id.clone(), - name: tool_call.name.clone(), - input: parsed_arguments.clone(), - thought_signature: tool_call.thought_signature.clone(), - }], - timestamp: None, - tool_duration_ms: None, - }); - followup.push(Message { - role: Role::User, - content: vec![ContentBlock::ToolResult { - tool_use_id: tool_call.id.clone(), - content: "TOOL_RESULT_TOKEN=42. Report this token back to confirm you read it." - .to_string(), - is_error: Some(false), - }], - timestamp: None, - tool_duration_ms: None, - }); + let mut history = first_turn.clone(); + history.push(assistant_tool_use(&tool_call, &parsed_arguments)); + history.push(tool_result_then_text( + &tool_call.id, + "TOOL_RESULT_TOKEN=42. Report this token back to confirm you read it.", + )); let second = consume_native_stream( provider, - &followup, + &history, &tools, system, std::time::Duration::from_secs(120), @@ -1560,8 +1428,99 @@ pub async fn run_live_native_provider_tool_smoke( crate::util::truncate_str(second.text.trim(), 200) ); - let total_input = first.input_tokens + second.input_tokens; - let total_output = first.output_tokens + second.output_tokens; + // Phase 2 (best-effort): drive an agentic loop that requires reading TWO + // files so the model emits a *sequence* of tool calls. Each call is replayed + // (carrying its captured signature) and answered with a synthetic result, so + // by the final turn the request we send carries two assistant `functionCall` + // blocks. That multi-call history is the only shape that reproduces the + // Antigravity/Cloud Code `400 ... "Function call is missing a + // thought_signature ... position N"`: a backend that validates *every* + // signature rejects the request here if an earlier one was dropped, so the + // `consume_native_stream` below surfaces the regression. If the model never + // makes a second tool call (common for providers that emit no signatures at + // all), the phase records `multi_tool_replay: "skipped"` rather than failing. + let mut total_input = first.input_tokens + second.input_tokens; + let mut total_output = first.output_tokens + second.output_tokens; + let mut multi_tool_replay = "skipped"; + let mut signatures_present: Vec = Vec::new(); + + let mut convo = vec![Message { + role: Role::User, + content: vec![ContentBlock::Text { + text: "Read two files using the read tool, one tool call at a time: first read \ + /tmp/auth_tool_probe.txt, then read /tmp/auth_tool_probe_2.txt. After both \ + reads, reply with the single word DONE. Call the tool now; do not answer \ + in text first." + .to_string(), + cache_control: None, + }], + timestamp: None, + tool_duration_ms: None, + }]; + let synthetic_results = [ + "Contents of /tmp/auth_tool_probe.txt: alpha.", + "Contents of /tmp/auth_tool_probe_2.txt: bravo.", + ]; + // Cap the loop so a model that keeps calling tools cannot run forever. + const MAX_TOOL_ROUNDS: usize = 4; + let mut tool_round = 0usize; + + loop { + // Number of assistant function calls already in the history we are about + // to replay. Once this reaches two, a successful response proves the + // backend accepted a multi-`functionCall` transcript with every + // signature intact. + let prior_calls = convo + .iter() + .filter(|message| { + matches!(message.role, Role::Assistant) + && message + .content + .iter() + .any(|block| matches!(block, ContentBlock::ToolUse { .. })) + }) + .count(); + + let turn = consume_native_stream( + provider, + &convo, + &tools, + system, + std::time::Duration::from_secs(120), + ) + .await + .with_context(|| { + format!( + "native {label} multi-tool signature replay was rejected (replayed history \ + carried {prior_calls} function call(s); a backend that validates every \ + functionCall signature fails here when an earlier thought_signature is dropped)" + ) + })?; + total_input += turn.input_tokens; + total_output += turn.output_tokens; + if prior_calls >= 2 { + multi_tool_replay = "verified"; + } + + let Some(call) = turn.tool_calls.first().cloned() else { + // Model produced a final (text) answer; the loop is done. + break; + }; + signatures_present.push(call.thought_signature.is_some()); + let args = parse_tool_arguments(&call.input_json); + convo.push(assistant_tool_use(&call, &args)); + let result = synthetic_results + .get(tool_round) + .copied() + .unwrap_or("Contents: omega."); + convo.push(tool_result_then_text(&call.id, result)); + + tool_round += 1; + if tool_round >= MAX_TOOL_ROUNDS { + break; + } + } + let mut stage = crate::live_tests::LiveVerificationStage::passed( crate::live_tests::checkpoints::TOOL_CALL_PARSE, ) @@ -1573,9 +1532,57 @@ pub async fn run_live_native_provider_tool_smoke( "thought_signature_present", serde_json::json!(tool_call.thought_signature.is_some()), ) + .with_evidence("multi_tool_replay", serde_json::json!(multi_tool_replay)) + .with_evidence("multi_tool_call_count", serde_json::json!(tool_round)) + .with_evidence( + "tool_call_signatures_present", + serde_json::json!(signatures_present), + ) .with_evidence("followup_consumed_result", serde_json::json!(true)); if total_input != 0 || total_output != 0 { stage = stage.with_evidence("usage", usage_evidence(total_input, total_output, 0, 0)); } Ok(stage) } + +/// Parse a streamed tool-call argument blob into a JSON object (empty object for +/// a blank payload), shared by the native tool smoke probes. +fn parse_tool_arguments(input_json: &str) -> serde_json::Value { + crate::message::ToolCall::parse_streamed_input_to_object(if input_json.trim().is_empty() { + "{}" + } else { + input_json.trim() + }) +} + +/// Build the assistant `tool_use` replay block for a captured native tool call, +/// preserving any provider-emitted `thought_signature` so backends that require +/// it (Gemini-3 via the Cloud Code/Antigravity runtime) accept the follow-up. +fn assistant_tool_use(call: &NativeClaudeToolCall, arguments: &serde_json::Value) -> Message { + Message { + role: Role::Assistant, + content: vec![ContentBlock::ToolUse { + id: call.id.clone(), + name: call.name.clone(), + input: arguments.clone(), + thought_signature: call.thought_signature.clone(), + }], + timestamp: None, + tool_duration_ms: None, + } +} + +/// Build a user turn carrying a synthetic `tool_result` for a captured native +/// tool call, used to answer each step of the multi-call replay loop. +fn tool_result_then_text(tool_use_id: &str, result: &str) -> Message { + Message { + role: Role::User, + content: vec![ContentBlock::ToolResult { + tool_use_id: tool_use_id.to_string(), + content: result.to_string(), + is_error: Some(false), + }], + timestamp: None, + tool_duration_ms: None, + } +} diff --git a/crates/jcode-base/src/auth/provider_e2e.rs b/crates/jcode-base/src/auth/provider_e2e.rs index 0998d10da..d4356db5c 100644 --- a/crates/jcode-base/src/auth/provider_e2e.rs +++ b/crates/jcode-base/src/auth/provider_e2e.rs @@ -283,6 +283,28 @@ fn label_for(checkpoint: &str) -> &'static str { .unwrap_or("Checkpoint") } +/// Human-readable detail for a passed tool-smoke stage, surfacing whether the +/// multi-call thought-signature replay phase was exercised. The native tool +/// smoke records `multi_tool_replay` as `verified` (a two-`functionCall` +/// history was replayed and accepted, the shape that reproduces the +/// "missing a thought_signature ... position N" 400) or `skipped` (the model +/// declined a second tool call). Surfacing it keeps the coverage observable in +/// the doctor report instead of collapsing to a generic pass string. +fn tool_stage_detail(stage: &crate::live_tests::LiveVerificationStage) -> String { + match stage + .evidence + .get("multi_tool_replay") + .and_then(|value| value.as_str()) + { + Some("verified") => "tool call parsed and executed; multi-call signature replay verified".to_string(), + Some("skipped") => { + "tool call parsed and executed; multi-call signature replay skipped (no 2nd tool call)" + .to_string() + } + _ => "tool call parsed and executed".to_string(), + } +} + /// Checkpoints that require a real API response and are therefore skipped on the /// offline/catalog tiers. const API_DEPENDENT_CHECKPOINTS: &[&str] = &[ @@ -1139,6 +1161,7 @@ async fn run_native_antigravity_api_checks( match run_live_antigravity_native_tool_smoke(selected).await { Ok(stage) => { spend.accumulate(stage.evidence.get("usage"), stage.evidence.get("cost")); + let detail = tool_stage_detail(&stage); for checkpoint in [ checkpoints::TOOL_CALL_PARSE, checkpoints::TOOL_EXECUTION_LOOP, @@ -1148,7 +1171,7 @@ async fn run_native_antigravity_api_checks( checks.push(DoctorCheck::passed( checkpoint, label_for(checkpoint), - "tool call parsed and executed".to_string(), + detail.clone(), )); } } @@ -1773,6 +1796,7 @@ async fn run_generic_native_api_checks( match run_live_native_provider_tool_smoke(provider, selected, label).await { Ok(stage) => { spend.accumulate(stage.evidence.get("usage"), stage.evidence.get("cost")); + let detail = tool_stage_detail(&stage); for checkpoint in [ checkpoints::TOOL_CALL_PARSE, checkpoints::TOOL_EXECUTION_LOOP, @@ -1782,7 +1806,7 @@ async fn run_generic_native_api_checks( checks.push(DoctorCheck::passed( checkpoint, label_for(checkpoint), - "tool call parsed and executed".to_string(), + detail.clone(), )); } } diff --git a/crates/jcode-base/src/provider/gemini_tests.rs b/crates/jcode-base/src/provider/gemini_tests.rs index 9eeae7a77..4dedb2fa7 100644 --- a/crates/jcode-base/src/provider/gemini_tests.rs +++ b/crates/jcode-base/src/provider/gemini_tests.rs @@ -231,6 +231,55 @@ fn build_contents_replays_thought_signature_on_function_call() { ); } +#[test] +fn build_contents_replays_every_signature_across_multi_tool_history() { + // Regression guard for the Antigravity/Cloud Code 400 + // ("Function call is missing a thought_signature ... position 5"): the + // backend validates *every* functionCall in the replayed history, not just + // the latest one. A multi-turn transcript where an earlier tool_use drops + // its signature is exactly what triggers the field failure, so assert that + // each captured signature survives serialization onto its matching part. + let signatures = ["SIG_A", "SIG_B", "SIG_C"]; + let mut messages = Vec::new(); + for (idx, sig) in signatures.iter().enumerate() { + messages.push(Message { + role: Role::Assistant, + content: vec![ContentBlock::ToolUse { + id: format!("call_{idx}"), + name: "bash".to_string(), + input: json!({ "command": format!("echo {idx}") }), + thought_signature: Some(sig.to_string()), + }], + timestamp: None, + tool_duration_ms: None, + }); + messages.push(Message { + role: Role::User, + content: vec![ContentBlock::ToolResult { + tool_use_id: format!("call_{idx}"), + content: format!("out {idx}"), + is_error: Some(false), + }], + timestamp: None, + tool_duration_ms: None, + }); + } + + let contents = build_contents(&messages); + let replayed: Vec> = contents + .iter() + .flat_map(|content| content.parts.iter()) + .filter(|part| part.function_call.is_some()) + .map(|part| part.thought_signature.as_deref()) + .collect(); + assert_eq!( + replayed, + vec![Some("SIG_A"), Some("SIG_B"), Some("SIG_C")], + "every functionCall in the history must carry its captured thought_signature, \ + not just the most recent one" + ); +} + #[test] fn build_contents_preserves_tool_calls_and_results() { let messages = vec![ diff --git a/crates/jcode-base/src/provider/selection.rs b/crates/jcode-base/src/provider/selection.rs index b0362ada1..40a9ac453 100644 --- a/crates/jcode-base/src/provider/selection.rs +++ b/crates/jcode-base/src/provider/selection.rs @@ -192,6 +192,34 @@ impl MultiProvider { } } + /// Canonicalize a persisted session `provider_key` into the legacy + /// vocabulary the reconstruction helpers below understand. + /// + /// Two vocabularies persist into sessions and must be treated as + /// equivalent, otherwise the OAuth-vs-API-key auth mode is silently lost on + /// restore/model-switch: + /// + /// - Legacy `/model` + login path: `claude` / `claude-api` / `openai` / + /// `openai-api`. + /// - Structured model-route picker (`RouteSelection::stable_id`): + /// `claude-oauth` / `anthropic-api-key` / `openai-oauth` / + /// `openai-api-key`. + /// + /// Both encode the same auth route; we fold the picker forms back onto the + /// canonical keys so a session whose `provider_key` is `anthropic-api-key` + /// (and whose `route_api_method` was not also persisted, e.g. inherited by a + /// child/forked session) still reconstructs the Anthropic API-key route + /// instead of falling through to Auto (which prefers OAuth). + pub(crate) fn canonical_session_provider_key(provider_key: &str) -> &str { + match provider_key.trim() { + "claude-oauth" => "claude", + "anthropic-api-key" => "claude-api", + "openai-oauth" => "openai", + "openai-api-key" => "openai-api", + other => other, + } + } + fn explicit_session_provider_key_for_model_request(model_request: &str) -> Option { let model_request = model_request.trim(); if let Some((prefix, rest)) = model_request.split_once(':') { @@ -287,7 +315,7 @@ impl MultiProvider { } fn session_provider_key_matches_provider_name(provider_key: &str, provider_name: &str) -> bool { - let provider_key = provider_key.trim(); + let provider_key = Self::canonical_session_provider_key(provider_key.trim()); let Some(derived) = Self::session_provider_key_from_provider_name(provider_name) .or_else(|| crate::session::derive_session_provider_key(provider_name)) else { @@ -342,6 +370,11 @@ impl MultiProvider { else { return model.to_string(); }; + // Fold the structured-picker vocabulary (`anthropic-api-key`, + // `openai-oauth`, ...) onto the canonical keys so the OAuth-vs-API-key + // route survives even when only `provider_key` was persisted (e.g. a + // forked/child session that inherited it without `route_api_method`). + let provider_key = Self::canonical_session_provider_key(provider_key); match provider_key { "claude-api" => format!("claude-api:{model}"), @@ -615,6 +648,65 @@ mod tests { ); } + #[test] + fn session_provider_key_picker_vocabulary_preserves_auth_mode_without_route() { + // The structured model-route picker persists `RuntimeKey::stable_id()` + // values (`anthropic-api-key`, `openai-oauth`, ...). When a child/forked + // session inherits only `provider_key` without `route_api_method`, the + // reconstruction helpers must still recover the exact OAuth-vs-API-key + // route instead of dropping to Auto (which prefers OAuth) and silently + // shifting an API-key user onto the subscription. + for (model, provider_key, expected_request) in [ + ( + "claude-opus-4-8", + Some("anthropic-api-key"), + "claude-api:claude-opus-4-8", + ), + ( + "claude-opus-4-8", + Some("claude-oauth"), + "claude-oauth:claude-opus-4-8", + ), + ("gpt-5.5", Some("openai-api-key"), "openai-api:gpt-5.5"), + ("gpt-5.5", Some("openai-oauth"), "openai-oauth:gpt-5.5"), + ] { + assert_eq!( + MultiProvider::model_switch_request_for_session_model(model, provider_key), + expected_request, + "restore {model:?} with picker provider_key {provider_key:?}" + ); + } + + // The same picker vocabulary must be recognized as matching its provider + // so an auth-change rewrite keeps the persisted key instead of + // overwriting it with the canonical name (losing the auth mode). + for (model, provider_name, previous_key, expected_key) in [ + ( + "claude-opus-4-8", + "Anthropic", + Some("anthropic-api-key"), + Some("anthropic-api-key"), + ), + ( + "gpt-5.5", + "OpenAI", + Some("openai-api-key"), + Some("openai-api-key"), + ), + ] { + assert_eq!( + MultiProvider::session_provider_key_after_model_switch( + model, + provider_name, + previous_key, + ) + .as_deref(), + expected_key, + "{model:?} via {provider_name:?} keeps picker key {previous_key:?}" + ); + } + } + #[test] fn route_defaults_are_derived_consistently() { let copilot = MultiProvider::default_model_selection_from_route( diff --git a/crates/jcode-base/src/session/crash.rs b/crates/jcode-base/src/session/crash.rs index 1896c4da1..d49a7dac9 100644 --- a/crates/jcode-base/src/session/crash.rs +++ b/crates/jcode-base/src/session/crash.rs @@ -128,6 +128,7 @@ fn recover_loaded_crashed_sessions(mut crashed: Vec) -> Result, } /// Curated list of skills endorsed by jcode. Used by the `/skills` command to /// show users which recommended skills they have installed and which they are /// missing. This is the single source of truth for endorsed skills. +/// +/// The NVIDIA CUDA-X entries mirror the official NVIDIA-verified catalog at +/// ; install them with +/// `npx skills add nvidia/skills --skill --yes`. pub const ENDORSED_SKILLS: &[EndorsedSkill] = &[ EndorsedSkill { name: "optimization", description: "Improve performance, latency, throughput, memory usage, or general efficiency by defining metrics, measuring, attributing bottlenecks, and prioritizing macro-optimizations.", + category: "jcode", source: "bundled in jcode repo (.jcode/skills/optimization)", + install: None, }, EndorsedSkill { name: "todo-planning-skill", description: "Create thorough, well-structured todo lists for long tasks, including reflection, static analysis, verification, and next-step updates.", + category: "jcode", source: "bundled with jcode / Claude Code skills", + install: None, }, EndorsedSkill { name: "firefox-browser", description: "Control the user's Firefox browser with their logins and cookies intact to browse, fill forms, click, screenshot, and read authenticated pages.", + category: "jcode", source: "bundled with jcode / Claude Code skills", + install: None, + }, + // Anthropic official skills (github.com/anthropics/skills, Apache-2.0). + EndorsedSkill { + name: "frontend-design", + description: "Create distinctive, production-grade frontend interfaces with high design quality (web components, pages, apps). Generates creative, polished code that avoids generic AI aesthetics.", + category: "Anthropic Design", + source: "anthropics/skills (official Anthropic catalog)", + install: Some("npx skills add anthropics/skills --skill frontend-design --yes (or Claude Code: /plugin marketplace add anthropics/skills)"), + }, + // NVIDIA CUDA-X / GPU accelerated-computing skills from the official + // NVIDIA-verified catalog (github.com/NVIDIA/skills). + EndorsedSkill { + name: "cuopt-developer", + description: "Modify, build, test, debug, and contribute to NVIDIA cuOpt (C++/CUDA, Python, server, CI) — solver internals, PRs, DCO, and code conventions.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some("npx skills add nvidia/skills --skill cuopt-developer --yes"), + }, + EndorsedSkill { + name: "cuopt-install", + description: "Install NVIDIA cuOpt for Python, C, or server via pip, conda, or Docker, and verify the install.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some("npx skills add nvidia/skills --skill cuopt-install --yes"), + }, + EndorsedSkill { + name: "cuopt-numerical-optimization-api-c", + description: "Solve LP, MILP, and QP (beta) with the cuOpt C API for embedding optimization in C/C++.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some( + "npx skills add nvidia/skills --skill cuopt-numerical-optimization-api-c --yes", + ), + }, + EndorsedSkill { + name: "cuopt-numerical-optimization-api-cli", + description: "Solve LP, MILP, and QP (beta) with cuOpt from MPS files via the cuopt_cli command line.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some( + "npx skills add nvidia/skills --skill cuopt-numerical-optimization-api-cli --yes", + ), + }, + EndorsedSkill { + name: "cuopt-numerical-optimization-api-python", + description: "Solve LP, MILP, and QP (beta) with the cuOpt Python API — linear/quadratic objectives, integer variables, scheduling, portfolio, and least squares.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some( + "npx skills add nvidia/skills --skill cuopt-numerical-optimization-api-python --yes", + ), + }, + EndorsedSkill { + name: "cuopt-numerical-optimization-formulation", + description: "LP, MILP, and QP concepts and formulation patterns (parameters, constraints, decisions, objective). Concepts only; no API.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some( + "npx skills add nvidia/skills --skill cuopt-numerical-optimization-formulation --yes", + ), + }, + EndorsedSkill { + name: "cuopt-routing-api-python", + description: "Solve vehicle routing (VRP, TSP, PDP) with the cuOpt Python API.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some("npx skills add nvidia/skills --skill cuopt-routing-api-python --yes"), + }, + EndorsedSkill { + name: "cuopt-routing-formulation", + description: "Vehicle routing (VRP, TSP, PDP) problem types and data requirements. Domain concepts; no API or interface.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some("npx skills add nvidia/skills --skill cuopt-routing-formulation --yes"), + }, + EndorsedSkill { + name: "cuopt-server-api-python", + description: "Run the cuOpt REST server — start it, call endpoints, and use Python/curl client examples.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some("npx skills add nvidia/skills --skill cuopt-server-api-python --yes"), + }, + EndorsedSkill { + name: "cuopt-server-common", + description: "Understand what the cuOpt REST server does and how requests flow. Concepts only; no deploy or client code.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some("npx skills add nvidia/skills --skill cuopt-server-common --yes"), + }, + EndorsedSkill { + name: "cuopt-user-rules", + description: "Base rules for end users calling NVIDIA cuOpt (routing/LP/MILP/QP/install/server).", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some("npx skills add nvidia/skills --skill cuopt-user-rules --yes"), + }, + EndorsedSkill { + name: "cupynumeric-install", + description: "Install and verify NVIDIA cuPyNumeric (NumPy/SciPy on multi-node multi-GPU) for Python — requirements, commands, and verification.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some("npx skills add nvidia/skills --skill cupynumeric-install --yes"), + }, + EndorsedSkill { + name: "cupynumeric-migration-readiness", + description: "Assess NumPy code before porting to cuPyNumeric — which patterns scale on GPU, what must be refactored, and a READY/REFACTOR/NOT-RECOMMENDED verdict.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some("npx skills add nvidia/skills --skill cupynumeric-migration-readiness --yes"), + }, + EndorsedSkill { + name: "cupynumeric-hdf5", + description: "Read and write large cuPyNumeric arrays to HDF5 with Legate's parallel, distributed HDF5 I/O (legate.io.hdf5), including GPUDirect Storage.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some("npx skills add nvidia/skills --skill cupynumeric-hdf5 --yes"), + }, + EndorsedSkill { + name: "cupynumeric-parallel-data-load", + description: "Load sharded on-disk datasets (.npy, Parquet/Arrow, raw binary, sharded HDF5) into a distributed cuPyNumeric ndarray via manual partition + leaf task launch.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some("npx skills add nvidia/skills --skill cupynumeric-parallel-data-load --yes"), + }, + EndorsedSkill { + name: "accelerated-computing-cudf", + description: "Official NVIDIA guidance for cuDF GPU DataFrames, pandas acceleration, dask-cuDF, ETL, joins, groupby, CSV/Parquet I/O, and multi-GPU DataFrame workloads.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some("npx skills add nvidia/skills --skill accelerated-computing-cudf --yes"), + }, + EndorsedSkill { + name: "cudaq-guide", + description: "NVIDIA CUDA-Q (CUDA Quantum) onboarding guide for installation, test programs, GPU simulation, QPU hardware, and quantum applications.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some("npx skills add nvidia/skills --skill cudaq-guide --yes"), + }, + EndorsedSkill { + name: "tilegym-adding-cutile-kernel", + description: "Add a new cuTile GPU kernel operator to NVIDIA TileGym — dispatch registration, cuTile backend implementation, exports, tests, and benchmarks.", + category: "NVIDIA CUDA-X", + source: "NVIDIA/skills (official NVIDIA-verified catalog)", + install: Some("npx skills add nvidia/skills --skill tilegym-adding-cutile-kernel --yes"), }, ]; @@ -611,6 +770,11 @@ mod tests { "endorsed skill {} needs a description", skill.name ); + assert!( + !skill.category.is_empty(), + "endorsed skill {} needs a category", + skill.name + ); assert!( !skill.source.is_empty(), "endorsed skill {} needs a source", @@ -620,6 +784,13 @@ mod tests { !skill.name.starts_with('/'), "endorsed skill name should not include the leading slash" ); + if let Some(install) = skill.install { + assert!( + install.contains(skill.name), + "endorsed skill {} install hint should reference its name", + skill.name + ); + } assert!( seen.insert(skill.name), "duplicate endorsed skill name: {}", @@ -628,6 +799,49 @@ mod tests { } } + #[test] + fn endorsed_skills_include_nvidia_cuda_x_catalog() { + let endorsed = endorsed_skills(); + // Spot-check representative NVIDIA CUDA-X skills sourced from the + // official NVIDIA/skills catalog. + for expected in [ + "cuopt-numerical-optimization-api-python", + "cupynumeric-install", + "accelerated-computing-cudf", + "cudaq-guide", + "tilegym-adding-cutile-kernel", + ] { + let skill = endorsed + .iter() + .find(|s| s.name == expected) + .unwrap_or_else(|| panic!("expected endorsed NVIDIA skill {expected}")); + assert_eq!(skill.category, "NVIDIA CUDA-X"); + assert!( + skill + .install + .is_some_and(|cmd| cmd.contains("nvidia/skills")), + "NVIDIA skill {expected} should have an nvidia/skills install hint" + ); + } + } + + #[test] + fn endorsed_skills_include_anthropic_frontend_design() { + let skill = endorsed_skills() + .iter() + .find(|s| s.name == "frontend-design") + .expect("expected endorsed Anthropic frontend-design skill"); + assert_eq!(skill.category, "Anthropic Design"); + assert!( + skill.source.contains("anthropics/skills"), + "frontend-design should be sourced from anthropics/skills" + ); + assert!( + skill.install.is_some_and(|cmd| cmd.contains("anthropics/skills")), + "frontend-design should have an anthropics/skills install hint" + ); + } + #[test] fn registry_contains_reports_loaded_skills() { let temp = tempfile::tempdir().expect("tempdir"); diff --git a/crates/jcode-provider-gemini/src/lib.rs b/crates/jcode-provider-gemini/src/lib.rs index 30e7bbca9..a7eb062c0 100644 --- a/crates/jcode-provider-gemini/src/lib.rs +++ b/crates/jcode-provider-gemini/src/lib.rs @@ -153,7 +153,14 @@ pub struct VertexGenerateContentRequest { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct GeminiContent { + // Requests always set `role` (see `build_contents`), but `generateContent` + // responses occasionally omit it on a candidate's `content` (observed on + // Antigravity/Cloud Code Gemini-3 turns). The response-side value is never + // read, so default it rather than failing the whole decode with + // "missing field `role`". + #[serde(default)] pub role: String, + #[serde(default)] pub parts: Vec, } @@ -465,4 +472,54 @@ mod tests { ] ); } + + #[test] + fn candidate_content_decodes_without_role() { + // Antigravity/Cloud Code Gemini-3 responses occasionally omit `role` on + // a candidate's `content` (and sometimes `parts` entirely). The whole + // generateContent decode used to fail with "missing field `role`", + // which aborted the turn; assert the response now decodes and the + // function call survives. + let raw = json!({ + "response": { + "candidates": [{ + "content": { + "parts": [{ + "functionCall": {"name": "read", "args": {"file_path": "/tmp/x"}}, + "thoughtSignature": "SIG_XYZ" + }] + }, + "finishReason": "STOP" + }] + } + }) + .to_string(); + + let decoded: CodeAssistGenerateResponse = + serde_json::from_str(&raw).expect("decode response with role-less content"); + let candidates = decoded.response.unwrap().candidates.unwrap(); + let part = &candidates[0].content.as_ref().unwrap().parts[0]; + assert_eq!(part.function_call.as_ref().unwrap().name, "read"); + assert_eq!(part.thought_signature.as_deref(), Some("SIG_XYZ")); + } + + #[test] + fn candidate_content_decodes_without_parts() { + // A bare `content: {}` (no `role`, no `parts`) must not abort the decode. + let raw = json!({ + "response": { + "candidates": [{ "content": {}, "finishReason": "STOP" }] + } + }) + .to_string(); + + let decoded: CodeAssistGenerateResponse = + serde_json::from_str(&raw).expect("decode response with empty content"); + let content = decoded.response.unwrap().candidates.unwrap()[0] + .content + .clone() + .unwrap(); + assert!(content.role.is_empty()); + assert!(content.parts.is_empty()); + } } diff --git a/crates/jcode-tui/src/tui/app/auth.rs b/crates/jcode-tui/src/tui/app/auth.rs index 0e31dfd6d..32e3437bb 100644 --- a/crates/jcode-tui/src/tui/app/auth.rs +++ b/crates/jcode-tui/src/tui/app/auth.rs @@ -2281,6 +2281,13 @@ impl App { .await { Ok(outcome) => { + // Auto-import bypasses the manual `pending_login` + // telemetry path, so record `auth_success` for each + // imported provider to keep the activation funnel + // accurate. + for (provider, method) in &outcome.imported_auth_labels { + crate::telemetry::record_auth_success(provider, method); + } Bus::global().publish(BusEvent::LoginCompleted(LoginCompleted { provider: "auto-import".to_string(), success: outcome.imported > 0, diff --git a/crates/jcode-tui/src/tui/app/inline_interactive.rs b/crates/jcode-tui/src/tui/app/inline_interactive.rs index 20373e7e9..4106e9e1a 100644 --- a/crates/jcode-tui/src/tui/app/inline_interactive.rs +++ b/crates/jcode-tui/src/tui/app/inline_interactive.rs @@ -1636,6 +1636,34 @@ impl App { }); } + /// Rebuild the picker overlay from a freshly loaded session list, applying + /// the filter for the active picker mode. Returns true when the overlay was + /// (re)built so the caller can request a redraw. + fn apply_loaded_session_picker( + &mut self, + server_groups: Vec, + orphan_sessions: Vec, + ) -> bool { + match self.session_picker_mode { + SessionPickerMode::Resume => { + let picker = SessionPicker::new_grouped(server_groups, orphan_sessions); + self.session_picker_overlay = Some(RefCell::new(picker)); + self.set_status_notice("Sessions loaded"); + true + } + SessionPickerMode::CatchUp => { + let mut picker = SessionPicker::new_grouped(server_groups, orphan_sessions); + picker.activate_catchup_filter(); + self.session_picker_overlay = Some(RefCell::new(picker)); + self.set_status_notice("Catch Up sessions loaded"); + true + } + // Onboarding loads its scoped transcript list synchronously, so it + // never flows through this async path. + SessionPickerMode::Onboarding { .. } => false, + } + } + pub(super) fn poll_session_picker_load(&mut self) -> bool { let recv_result = { let Some(pending) = self.pending_session_picker_load.as_ref() else { @@ -1644,24 +1672,23 @@ impl App { pending.receiver.try_recv() }; + let picker_active = self.session_picker_overlay.is_some() + && matches!( + self.session_picker_mode, + SessionPickerMode::Resume | SessionPickerMode::CatchUp + ); + match recv_result { Ok(Ok((server_groups, orphan_sessions))) => { self.pending_session_picker_load = None; - if self.session_picker_overlay.is_some() - && self.session_picker_mode == SessionPickerMode::Resume - { - let picker = SessionPicker::new_grouped(server_groups, orphan_sessions); - self.session_picker_overlay = Some(RefCell::new(picker)); - self.set_status_notice("Sessions loaded"); - return true; + if picker_active { + return self.apply_loaded_session_picker(server_groups, orphan_sessions); } false } Ok(Err(e)) => { self.pending_session_picker_load = None; - if self.session_picker_overlay.is_some() - && self.session_picker_mode == SessionPickerMode::Resume - { + if picker_active { self.session_picker_overlay = None; self.push_display_message(DisplayMessage::error(format!( "Failed to load sessions: {}", @@ -1675,9 +1702,7 @@ impl App { Err(std::sync::mpsc::TryRecvError::Empty) => false, Err(std::sync::mpsc::TryRecvError::Disconnected) => { self.pending_session_picker_load = None; - if self.session_picker_overlay.is_some() - && self.session_picker_mode == SessionPickerMode::Resume - { + if picker_active { self.session_picker_overlay = None; self.push_display_message(DisplayMessage::error( "Session loading stopped before returning a result.".to_string(), @@ -1700,20 +1725,26 @@ impl App { return; } - match session_picker::load_sessions_grouped() { - Ok((server_groups, orphan_sessions)) => { - let mut picker = SessionPicker::new_grouped(server_groups, orphan_sessions); - picker.activate_catchup_filter(); - self.session_picker_overlay = Some(RefCell::new(picker)); - self.session_picker_mode = SessionPickerMode::CatchUp; - } - Err(e) => { - self.push_display_message(DisplayMessage::error(format!( - "Failed to load catch-up sessions: {}", - e - ))); - } - } + // Show the picker overlay immediately (using the cached list when + // available) and load the full session list off-thread. This keeps the + // live TUI responsive instead of blocking on a multi-hundred-ms scan of + // every historical session. + let mut picker = if let Some((server_groups, orphan_sessions)) = + session_picker::load_cached_sessions_grouped() + { + let mut picker = SessionPicker::new_grouped(server_groups, orphan_sessions); + picker.activate_catchup_filter(); + picker + } else { + SessionPicker::loading() + }; + // Ensure the filter is applied even on the loading placeholder so the + // refreshed list lands in the catch-up view. + picker.activate_catchup_filter(); + self.session_picker_overlay = Some(RefCell::new(picker)); + self.session_picker_mode = SessionPickerMode::CatchUp; + self.set_status_notice("Loading Catch Up sessions..."); + self.start_session_picker_load(); } pub(super) fn handle_session_picker_selection(&mut self, targets: &[ResumeTarget]) { diff --git a/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs b/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs index 4d283b3b1..0803c626c 100644 --- a/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs +++ b/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs @@ -585,6 +585,13 @@ impl App { return; } }; + // Auto-import bypasses the manual `pending_login` path, so record + // `auth_success` here for each imported provider. Without this the + // onboarding activation funnel undercounts every imported login + // (the happy path of the guided first-run flow). + for (provider, method) in &outcome.imported_auth_labels { + crate::telemetry::record_auth_success(provider, method); + } crate::bus::Bus::global().publish(crate::bus::BusEvent::LoginCompleted( crate::bus::LoginCompleted { provider: "auto-import".to_string(), @@ -603,17 +610,12 @@ impl App { ExternalCli::ClaudeCode => SessionFilterMode::ClaudeCode, }; - let (server_groups, orphan_sessions) = match session_picker::load_sessions_grouped() { - Ok(loaded) => loaded, - Err(err) => { - crate::logging::error(&format!( - "onboarding: failed to load {} sessions: {err}", - cli.label() - )); - self.onboarding_fallback_to_session_search(cli); - return; - } - }; + // The onboarding picker only ever shows this one external CLI's + // transcripts, so load just those instead of paying the full + // `load_sessions_grouped` cost (parsing every jcode snapshot, the other + // CLIs, and listing servers). This keeps first-run onboarding snappy. + let (server_groups, orphan_sessions) = + session_picker::load_external_cli_sessions_grouped(cli); let mut picker = SessionPicker::new_grouped(server_groups, orphan_sessions); picker.activate_external_cli_filter(filter); diff --git a/crates/jcode-tui/src/tui/app/state_ui.rs b/crates/jcode-tui/src/tui/app/state_ui.rs index ed0c543e7..03ca7df41 100644 --- a/crates/jcode-tui/src/tui/app/state_ui.rs +++ b/crates/jcode-tui/src/tui/app/state_ui.rs @@ -1514,19 +1514,50 @@ fn build_skills_report(app: &App) -> String { .collect() }; out.push_str("\nEndorsed skills (recommended by jcode)\n"); + // Group by category, preserving first-seen category order. + let mut category_order: Vec<&str> = Vec::new(); for endorsed in crate::skill::endorsed_skills() { - let status = if installed.contains(endorsed.name) { - "installed" - } else { - "not installed" - }; - out.push_str(&format!("- /{} [{}]\n", endorsed.name, status)); - out.push_str(&format!(" {}\n", endorsed.description)); - out.push_str(&format!(" source: {}\n", endorsed.source)); + if !category_order.contains(&endorsed.category) { + category_order.push(endorsed.category); + } + } + for category in category_order { + let installed_in_category = crate::skill::endorsed_skills() + .iter() + .filter(|e| e.category == category && installed.contains(e.name)) + .count(); + let total_in_category = crate::skill::endorsed_skills() + .iter() + .filter(|e| e.category == category) + .count(); + out.push_str(&format!( + "\n {} ({}/{} installed)\n", + category, installed_in_category, total_in_category + )); + for endorsed in crate::skill::endorsed_skills() + .iter() + .filter(|e| e.category == category) + { + let is_installed = installed.contains(endorsed.name); + let status = if is_installed { + "installed" + } else { + "not installed" + }; + out.push_str(&format!(" - /{} [{}]\n", endorsed.name, status)); + out.push_str(&format!(" {}\n", endorsed.description)); + out.push_str(&format!(" source: {}\n", endorsed.source)); + if !is_installed && let Some(install) = endorsed.install { + out.push_str(&format!(" install: {}\n", install)); + } + } } out.push_str("\nActivate a skill by typing its slash command (e.g. /optimization).\n"); out.push_str("Manage skills with the skill_manage tool (list/load/read/reload).\n"); + out.push_str( + "NVIDIA CUDA-X skills come from the official catalog at https://github.com/NVIDIA/skills.\n", + ); out.trim_end().to_string() } diff --git a/crates/jcode-tui/src/tui/app/tests.rs b/crates/jcode-tui/src/tui/app/tests.rs index d0f524d29..2bc3232d3 100644 --- a/crates/jcode-tui/src/tui/app/tests.rs +++ b/crates/jcode-tui/src/tui/app/tests.rs @@ -430,6 +430,20 @@ fn skills_command_lists_loaded_and_endorsed_skills() { content.contains("[installed]") || content.contains("[not installed]"), "{content}" ); + // NVIDIA CUDA-X skills are grouped under their own category with install hints. + assert!(content.contains("NVIDIA CUDA-X"), "{content}"); + assert!( + content.contains("/cuopt-numerical-optimization-api-python"), + "{content}" + ); + assert!( + content.contains("install: npx skills add nvidia/skills"), + "{content}" + ); + assert!( + content.contains("https://github.com/NVIDIA/skills"), + "{content}" + ); assert_eq!( app.display_messages().last().unwrap().title.as_deref(), Some("Skills") diff --git a/crates/jcode-tui/src/tui/session_picker.rs b/crates/jcode-tui/src/tui/session_picker.rs index ef3c3cc9e..988dfe8b1 100644 --- a/crates/jcode-tui/src/tui/session_picker.rs +++ b/crates/jcode-tui/src/tui/session_picker.rs @@ -34,6 +34,7 @@ mod render; #[cfg(test)] use loading::collect_recent_session_stems; pub(crate) use loading::latest_external_cli_session_secs; +pub(crate) use loading::load_external_cli_sessions_grouped; use loading::{build_messages_preview, build_search_index, crashed_sessions_from_all_sessions}; pub use loading::{ invalidate_session_list_cache, load_cached_sessions_grouped, load_servers, load_sessions, diff --git a/crates/jcode-tui/src/tui/session_picker/loading.rs b/crates/jcode-tui/src/tui/session_picker/loading.rs index b1f6a8343..26d5e935a 100644 --- a/crates/jcode-tui/src/tui/session_picker/loading.rs +++ b/crates/jcode-tui/src/tui/session_picker/loading.rs @@ -53,6 +53,77 @@ const SAVED_METADATA_TAIL_SCAN_BYTES: u64 = 64 * 1024; const INITIAL_TRANSCRIPT_SEARCH_BUDGET_BYTES: usize = 64 * 1024; const MESSAGE_SEARCH_EXCERPT_BYTES: usize = 8 * 1024; +/// Upper bound on worker threads used to parse/stat session files in parallel. +/// The session picker load is dominated by per-file IO + JSON parsing across +/// hundreds of snapshots; fanning that work out across cores turns the cold +/// `/resume` load from a serial slog into a roughly core-count-bounded scan. +const SESSION_LOAD_MAX_THREADS: usize = 8; + +/// Number of worker threads to use for a parallel pass over `item_count` items. +/// Returns 1 for tiny batches so we never pay thread-spawn overhead when there +/// is barely any work to do. +fn session_load_thread_count(item_count: usize) -> usize { + if item_count <= 1 { + return 1; + } + let cores = std::thread::available_parallelism() + .map(|n| n.get()) + .unwrap_or(1); + cores.clamp(1, SESSION_LOAD_MAX_THREADS).min(item_count) +} + +/// Map `f` over `items` across a bounded scoped thread pool, preserving input +/// order in the returned vector. Falls back to a plain serial map when only one +/// worker is warranted. `f` must be `Sync` because every worker shares it. +fn parallel_map(items: Vec, f: F) -> Vec +where + T: Send, + R: Send, + F: Fn(T) -> R + Sync, +{ + let thread_count = session_load_thread_count(items.len()); + if thread_count <= 1 { + return items.into_iter().map(f).collect(); + } + + // Partition the work into `thread_count` owned chunks so each worker can + // take its inputs by value (no clone, no shared mutation). We remember the + // starting offset of each chunk to stitch results back into input order. + let chunk_size = items.len().div_ceil(thread_count); + let mut chunks: Vec<(usize, Vec)> = Vec::with_capacity(thread_count); + let mut offset = 0usize; + let mut remaining = items; + while !remaining.is_empty() { + let take = chunk_size.min(remaining.len()); + let rest = remaining.split_off(take); + chunks.push((offset, remaining)); + offset += take; + remaining = rest; + } + + let f = &f; + let mut results: Vec<(usize, Vec)> = std::thread::scope(|scope| { + let mut handles = Vec::with_capacity(chunks.len()); + for (start, chunk) in chunks { + handles.push(scope.spawn(move || { + (start, chunk.into_iter().map(f).collect::>()) + })); + } + handles + .into_iter() + .filter_map(|handle| handle.join().ok()) + .collect() + }); + + results.sort_by_key(|(start, _)| *start); + let total: usize = results.iter().map(|(_, chunk)| chunk.len()).sum(); + let mut out = Vec::with_capacity(total); + for (_, chunk) in results { + out.extend(chunk); + } + out +} + #[derive(Clone)] struct SessionListCacheEntry { loaded_at: Instant, @@ -419,9 +490,8 @@ fn session_sort_key(stem: &str) -> u64 { .unwrap_or(0) } -fn entry_modified_sort_key(entry: &std::fs::DirEntry) -> u128 { - entry - .metadata() +fn path_modified_sort_key(path: &Path) -> u128 { + path.metadata() .and_then(|meta| meta.modified()) .ok() .and_then(|time| time.duration_since(std::time::UNIX_EPOCH).ok()) @@ -805,8 +875,10 @@ fn collect_recent_session_candidates( sessions_dir: &Path, candidate_limit: usize, ) -> Result> { - let mut by_stem: HashMap = HashMap::new(); - + // Phase 1: a single cheap `readdir` pass to enumerate candidate files. We + // defer the per-file `stat` (the expensive part on directories with 100k+ + // session files) to a parallel pass so it does not serialize startup. + let mut raw: Vec<(String, bool, PathBuf)> = Vec::new(); for entry in std::fs::read_dir(sessions_dir)? { let entry = entry?; let file_name = entry.file_name(); @@ -819,11 +891,20 @@ fn collect_recent_session_candidates( if stem.starts_with("imported_") { continue; } + raw.push((stem.to_string(), has_snapshot, entry.path())); + } + + // Phase 2: stat each file's modification time in parallel. + let stamped = parallel_map(raw, |(stem, has_snapshot, path)| { + (stem, has_snapshot, path_modified_sort_key(&path)) + }); - let modified = entry_modified_sort_key(&entry); + // Phase 3: merge per-stem metadata (snapshot + newest journal/snapshot mtime). + let mut by_stem: HashMap = HashMap::new(); + for (stem, has_snapshot, modified) in stamped { by_stem - .entry(stem.to_string()) - .or_insert_with(|| SessionCandidateMeta::new(stem)) + .entry(stem.clone()) + .or_insert_with(|| SessionCandidateMeta::new(&stem)) .update(modified, has_snapshot); } @@ -1424,6 +1505,99 @@ pub(super) fn crashed_sessions_from_all_sessions( }) } +/// Parse a single jcode session snapshot (+ journal) into a [`SessionInfo`], +/// returning `None` for empty/imported sessions or read/parse errors. Pulled out +/// of `load_sessions` so the summary pass can run across a scoped thread pool. +fn parse_jcode_session_info( + sessions_dir: &Path, + stem: &str, + catchup_seen: &crate::catchup::CatchupSeenSnapshot, +) -> Option { + // Imported stems are filtered out by `collect_recent_session_candidates`, but + // keep the cheap defensive check so this helper is safe to call directly. + if stem.starts_with("imported_cc_") + || stem.starts_with("imported_codex_") + || stem.starts_with("imported_pi_") + || stem.starts_with("imported_opencode_") + { + return None; + } + + let path = sessions_dir.join(format!("{stem}.json")); + let session = load_session_summary(&path).ok()?; + + let visible_message_count = session.messages.visible_message_count; + if visible_message_count == 0 { + return None; + } + + let short_name = session + .short_name + .clone() + .or_else(|| extract_session_name(stem).map(|s| s.to_string())) + .unwrap_or_else(|| stem.to_string()); + let icon = session_icon(&short_name); + + let user_message_count = session.messages.user_message_count; + let assistant_message_count = session.messages.assistant_message_count; + let estimated_tokens = session.messages.estimated_tokens; + + let status = session.status.clone(); + let needs_catchup = catchup_seen.needs_catchup(stem, session.updated_at, &status); + let source = classify_session_source( + stem, + session.provider_key.as_deref(), + session.model.as_deref(), + ); + + let title = session + .custom_title + .or(session.title) + .unwrap_or_else(|| short_name.clone()); + let search_index = build_search_index_from_summary( + stem, + &short_name, + &title, + session.working_dir.as_deref(), + session.save_label.as_deref(), + &session.messages.search_text, + ); + + Some(SessionInfo { + id: stem.to_string(), + parent_id: session.parent_id, + short_name, + icon: icon.to_string(), + title, + message_count: visible_message_count, + user_message_count, + assistant_message_count, + created_at: session.created_at, + last_message_time: session.updated_at, + last_active_at: session.last_active_at, + working_dir: session.working_dir, + model: session.model, + provider_key: session.provider_key, + is_canary: session.is_canary, + is_debug: session.is_debug, + saved: session.saved, + save_label: session.save_label, + status, + needs_catchup, + estimated_tokens, + first_user_prompt: session.messages.first_user_prompt, + messages_preview: Vec::new(), + search_index, + server_name: None, + server_icon: None, + source, + resume_target: ResumeTarget::JcodeSession { + session_id: stem.to_string(), + }, + external_path: None, + }) +} + pub fn load_sessions() -> Result> { let sessions_dir = storage::jcode_dir()?.join("sessions"); let scan_limit = session_scan_limit(); @@ -1437,8 +1611,6 @@ pub fn load_sessions() -> Result> { return Ok(entry.sessions.clone()); } - let mut sessions: Vec = Vec::new(); - let candidates = if sessions_dir.exists() { // Keep startup responsive by avoiding `session_has_history` here. That helper parses // snapshots/journals, and `load_session_summary` below parses the same files again. @@ -1459,100 +1631,65 @@ pub fn load_sessions() -> Result> { Vec::new() }; - let external_sessions = std::thread::scope(|scope| { + // Loading the catch-up "seen" state once (instead of per session) avoids + // re-reading and re-parsing `catchup_seen.json` for every candidate. + let catchup_seen = crate::catchup::CatchupSeenSnapshot::load(); + let sessions_dir_ref = &sessions_dir; + let catchup_ref = &catchup_seen; + + let (mut sessions, external_sessions) = std::thread::scope(|scope| { let claude_handle = scope.spawn(|| load_external_claude_code_sessions(scan_limit)); let codex_handle = scope.spawn(|| load_external_codex_sessions(scan_limit)); let pi_handle = scope.spawn(|| load_external_pi_sessions(scan_limit)); let opencode_handle = scope.spawn(|| load_external_opencode_sessions(scan_limit)); - for stem in candidates { - if sessions.len() >= scan_limit { - let saved = sessions_dir.join(format!("{stem}.json")); - if !session_snapshot_or_journal_has_saved_metadata(&saved) { - continue; + // Phase 1: walk the recency-ordered candidates in parallel windows until + // we have collected `scan_limit` non-empty sessions. `boundary` marks the + // candidate index where the serial fill would start applying the saved + // gate, so beyond it we only keep saved sessions (Phase 2). Parsing each + // window in parallel keeps the per-file JSON cost off the critical path. + // + // Windows are sized to `scan_limit`: only the final window (the one that + // crosses `scan_limit`) can over-parse, so wasted work is bounded to a + // single window's worth of candidates while still parallelizing widely. + let mut sessions: Vec = Vec::new(); + let mut boundary = candidates.len(); + let window = scan_limit.max(1); + let mut start = 0; + 'fill: while start < candidates.len() { + let end = (start + window).min(candidates.len()); + let batch = candidates[start..end].to_vec(); + let parsed = parallel_map(batch, move |stem| { + parse_jcode_session_info(sessions_dir_ref, &stem, catchup_ref) + }); + for (offset, parsed_session) in parsed.into_iter().enumerate() { + if let Some(info) = parsed_session { + sessions.push(info); + if sessions.len() >= scan_limit { + boundary = start + offset + 1; + break 'fill; + } } } - if stem.starts_with("imported_cc_") - || stem.starts_with("imported_codex_") - || stem.starts_with("imported_pi_") - || stem.starts_with("imported_opencode_") - { - continue; - } - let path = sessions_dir.join(format!("{stem}.json")); - if let Ok(session) = load_session_summary(&path) { - let short_name = session - .short_name - .clone() - .or_else(|| extract_session_name(&stem).map(|s| s.to_string())) - .unwrap_or_else(|| stem.clone()); - let icon = session_icon(&short_name); - - let visible_message_count = session.messages.visible_message_count; - if visible_message_count == 0 { - continue; - } - let user_message_count = session.messages.user_message_count; - let assistant_message_count = session.messages.assistant_message_count; - let estimated_tokens = session.messages.estimated_tokens; - - let status = session.status.clone(); - let needs_catchup = - crate::catchup::needs_catchup(&stem, session.updated_at, &status); - let source = classify_session_source( - &stem, - session.provider_key.as_deref(), - session.model.as_deref(), - ); - - let title = session - .custom_title - .or(session.title) - .unwrap_or_else(|| short_name.clone()); - let messages_preview: Vec = Vec::new(); - let search_index = build_search_index_from_summary( - &stem, - &short_name, - &title, - session.working_dir.as_deref(), - session.save_label.as_deref(), - &session.messages.search_text, - ); + start = end; + } - sessions.push(SessionInfo { - id: stem.to_string(), - parent_id: session.parent_id, - short_name, - icon: icon.to_string(), - title, - message_count: visible_message_count, - user_message_count, - assistant_message_count, - created_at: session.created_at, - last_message_time: session.updated_at, - last_active_at: session.last_active_at, - working_dir: session.working_dir, - model: session.model, - provider_key: session.provider_key, - is_canary: session.is_canary, - is_debug: session.is_debug, - saved: session.saved, - save_label: session.save_label, - status, - needs_catchup, - estimated_tokens, - first_user_prompt: session.messages.first_user_prompt, - messages_preview, - search_index, - server_name: None, - server_icon: None, - source, - resume_target: ResumeTarget::JcodeSession { - session_id: stem.to_string(), - }, - external_path: None, - }); - } + // Phase 2: beyond the fill boundary the serial loader only keeps saved + // sessions. Compute the cheap saved tail-gate across the remaining + // candidates in parallel, then fully parse just the gate-passers. + if boundary < candidates.len() { + let tail: Vec = candidates[boundary..].to_vec(); + let gate_passers: Vec = parallel_map(tail, move |stem| { + let path = sessions_dir_ref.join(format!("{stem}.json")); + session_snapshot_or_journal_has_saved_metadata(&path).then_some(stem) + }) + .into_iter() + .flatten() + .collect(); + let saved_sessions = parallel_map(gate_passers, move |stem| { + parse_jcode_session_info(sessions_dir_ref, &stem, catchup_ref) + }); + sessions.extend(saved_sessions.into_iter().flatten()); } let mut external = Vec::new(); @@ -1560,7 +1697,7 @@ pub fn load_sessions() -> Result> { external.extend(codex_handle.join().unwrap_or_default()); external.extend(pi_handle.join().unwrap_or_default()); external.extend(opencode_handle.join().unwrap_or_default()); - external + (sessions, external) }); sessions.extend(external_sessions); @@ -1706,9 +1843,10 @@ fn load_external_codex_sessions(scan_limit: usize) -> Vec { return Vec::new(); } - collect_recent_files_recursive(&root, "jsonl", scan_limit) + let paths = collect_recent_files_recursive(&root, "jsonl", scan_limit); + parallel_map(paths, |path| load_codex_session_stub(&path).ok().flatten()) .into_iter() - .filter_map(|path| load_codex_session_stub(&path).ok().flatten()) + .flatten() .collect() } @@ -1915,9 +2053,10 @@ fn load_external_pi_sessions(scan_limit: usize) -> Vec { return Vec::new(); } - collect_recent_files_recursive(&root, "jsonl", scan_limit) + let paths = collect_recent_files_recursive(&root, "jsonl", scan_limit); + parallel_map(paths, |path| load_pi_session_stub(&path).ok().flatten()) .into_iter() - .filter_map(|path| load_pi_session_stub(&path).ok().flatten()) + .flatten() .collect() } @@ -2169,9 +2308,10 @@ fn load_external_opencode_sessions(scan_limit: usize) -> Vec { return Vec::new(); } - collect_recent_files_recursive(&root, "json", scan_limit) + let paths = collect_recent_files_recursive(&root, "json", scan_limit); + parallel_map(paths, |path| load_opencode_session_stub(&path).ok().flatten()) .into_iter() - .filter_map(|path| load_opencode_session_stub(&path).ok().flatten()) + .flatten() .collect() } @@ -2473,6 +2613,27 @@ pub fn load_sessions_grouped() -> Result<(Vec, Vec)> { Ok((groups, orphan_sessions)) } +/// Load only the sessions for a single external CLI (Codex or Claude Code), +/// returned as orphan [`SessionInfo`] grouped output compatible with +/// `SessionPicker::new_grouped`. +/// +/// First-run onboarding's "continue where you left off" picker is filtered to a +/// single external CLI, so the full `load_sessions_grouped` work (parsing every +/// jcode snapshot, the other CLIs, and listing servers) is wasted there. This +/// scoped loader keeps onboarding responsive by touching only the relevant +/// transcripts. +pub(crate) fn load_external_cli_sessions_grouped( + cli: crate::tui::app::onboarding_flow::ExternalCli, +) -> (Vec, Vec) { + use crate::tui::app::onboarding_flow::ExternalCli; + let scan_limit = session_scan_limit(); + let sessions = match cli { + ExternalCli::Codex => load_external_codex_sessions(scan_limit), + ExternalCli::ClaudeCode => load_external_claude_code_sessions(scan_limit), + }; + (Vec::new(), sessions) +} + #[cfg(test)] #[path = "loading_tests.rs"] mod tests; diff --git a/crates/jcode-tui/src/tui/session_picker/loading_tests.rs b/crates/jcode-tui/src/tui/session_picker/loading_tests.rs index 12b285927..769a9b888 100644 --- a/crates/jcode-tui/src/tui/session_picker/loading_tests.rs +++ b/crates/jcode-tui/src/tui/session_picker/loading_tests.rs @@ -783,3 +783,122 @@ fn benchmark_resume_loading_reports_timings() { sessions.len() ); } + +#[test] +fn onboarding_scoped_loader_returns_only_codex_sessions() { + use crate::tui::app::onboarding_flow::ExternalCli; + let _env_lock = crate::storage::lock_test_env(); + let temp = tempfile::tempdir().expect("temp dir"); + let _home = EnvVarGuard::set_path("JCODE_HOME", temp.path()); + + // A Codex transcript that the onboarding picker should surface. + let codex_dir = temp.path().join("external/.codex/sessions/2026/05/01"); + std::fs::create_dir_all(&codex_dir).expect("create codex dir"); + std::fs::write( + codex_dir.join("rollout-2026-05-01T10-00-00-test.jsonl"), + "{\"timestamp\":\"2026-05-01T10:00:00Z\",\"type\":\"session_meta\",\"payload\":{\"id\":\"codex-onboarding-test\",\"timestamp\":\"2026-05-01T09:59:00Z\",\"cwd\":\"/tmp/codex-onboard\"}}\n", + ) + .expect("write codex transcript"); + + // A jcode session that must NOT appear in the scoped Codex view (the whole + // point of the scoped loader is to skip parsing these on onboarding). + let mut jcode_session = Session::create_with_id( + "session_onboarding_jcode_1780000000000".to_string(), + Some("/tmp/jcode-onboard".to_string()), + Some("Jcode Onboarding".to_string()), + ); + jcode_session.append_stored_message(crate::session::StoredMessage { + id: "msg-1".to_string(), + role: crate::message::Role::User, + content: vec![crate::message::ContentBlock::Text { + text: "should not show in codex onboarding view".to_string(), + cache_control: None, + }], + display_role: None, + timestamp: None, + tool_duration_ms: None, + token_usage: None, + }); + jcode_session.save().expect("save jcode session"); + + let (groups, orphans) = load_external_cli_sessions_grouped(ExternalCli::Codex); + assert!(groups.is_empty(), "scoped loader produces only orphans"); + assert!( + orphans + .iter() + .any(|s| s.id == "codex:codex-onboarding-test"), + "expected codex transcript in scoped onboarding load: {:?}", + orphans.iter().map(|s| &s.id).collect::>() + ); + assert!( + orphans + .iter() + .all(|s| matches!(s.resume_target, ResumeTarget::CodexSession { .. })), + "scoped Codex load must not include jcode/other-CLI sessions" + ); +} + +#[test] +fn parallel_fill_skips_many_recent_empty_sessions_to_reach_scan_limit() { + let _env_lock = crate::storage::lock_test_env(); + let temp = tempfile::tempdir().expect("temp dir"); + let _home = EnvVarGuard::set_path("JCODE_HOME", temp.path()); + let _scan_limit = EnvVarGuard::set_str("JCODE_SESSION_PICKER_MAX_SESSIONS", "50"); + + let sessions_dir = temp.path().join("sessions"); + std::fs::create_dir_all(&sessions_dir).expect("create sessions dir"); + + let push_message = |session: &mut Session, text: &str| { + session.append_stored_message(crate::session::StoredMessage { + id: format!("msg-{text}"), + role: crate::message::Role::User, + content: vec![crate::message::ContentBlock::Text { + text: text.to_string(), + cache_control: None, + }], + display_role: None, + timestamp: None, + tool_duration_ms: None, + token_usage: None, + }); + }; + + // Many recent but empty sessions (no visible messages) that the parallel + // two-phase fill must skip while still collecting `scan_limit` real ones. + for idx in 0..200 { + let mut session = Session::create_with_id( + format!("session_empty_{}", 1_790_000_000_000u64 + idx as u64), + Some(format!("/tmp/empty-{idx:03}")), + Some(format!("Empty {idx:03}")), + ); + session.save().expect("save empty session"); + } + // Older but non-empty sessions that should fill the list despite being less + // recent than the empty stubs above. + for idx in 0..60 { + let mut session = Session::create_with_id( + format!("session_full_{}", 1_780_000_000_000u64 + idx as u64), + Some(format!("/tmp/full-{idx:03}")), + Some(format!("Full {idx:03}")), + ); + push_message(&mut session, &format!("real content {idx:03}")); + session.save().expect("save full session"); + } + + invalidate_session_list_cache(); + let sessions = load_sessions().expect("load sessions"); + let visible: Vec<&SessionInfo> = sessions + .iter() + .filter(|s| s.id.starts_with("session_full_")) + .collect(); + assert_eq!( + visible.len(), + 50, + "expected exactly scan_limit non-empty sessions, got {}", + visible.len() + ); + assert!( + !sessions.iter().any(|s| s.id.starts_with("session_empty_")), + "empty sessions must be filtered out of the loaded list" + ); +} diff --git a/telemetry-worker/README.md b/telemetry-worker/README.md index 9d7b289c8..e58b87b6a 100644 --- a/telemetry-worker/README.md +++ b/telemetry-worker/README.md @@ -2,6 +2,38 @@ Cloudflare Worker that receives anonymous telemetry events from jcode. +## Dashboard + +The worker also serves a visual dashboard so you do not have to run SQL by hand: + +- `GET /` (or `/dashboard`) - the HTML dashboard. Public page, no data until a + token is entered. +- `GET /v1/stats` - JSON aggregates (counts only, never raw event rows), gated + behind `DASHBOARD_TOKEN`. Accepts `Authorization: Bearer `, + `?token=`, or `X-Dashboard-Token`. +- `POST /v1/event` - unchanged event ingest. + +The headline number is **Total users**: distinct, non-CI `telemetry_id`s that +ever installed jcode OR did meaningful work in it. The page shows every metric +the API returns, organized into tiers (hero / key cards / diagnostic tables) so +the important numbers stand out while nothing is hidden. Each user tier (reached +> total > core) is broader than the one below it, and CI / raw figures are shown +alongside for transparency. + +Set the token once (it is a Worker secret, not in source): + +```bash +wrangler secret put DASHBOARD_TOKEN +# then open https:/// and paste the token +``` + +If `DASHBOARD_TOKEN` is unset the stats endpoint stays locked (deny by default). +The CLI equivalent of the headline number: + +```bash +wrangler d1 execute jcode-telemetry --remote --file=users.sql +``` + ## Setup 1. Install wrangler: `npm install` diff --git a/telemetry-worker/package.json b/telemetry-worker/package.json index 72ddec2dc..693f266a1 100644 --- a/telemetry-worker/package.json +++ b/telemetry-worker/package.json @@ -2,6 +2,7 @@ "name": "jcode-telemetry", "version": "1.0.0", "private": true, + "type": "module", "scripts": { "dev": "npx wrangler dev", "deploy": "npx wrangler deploy", @@ -18,7 +19,8 @@ "migrate:daily-active-backfill": "npx wrangler d1 execute jcode-telemetry --remote --file=migrations/0011_backfill_daily_active_recent.sql", "migrate:daily-active-ci": "npx wrangler d1 execute jcode-telemetry --remote --file=migrations/0012_daily_active_ci_flag.sql", "health": "npx wrangler d1 execute jcode-telemetry --remote --file=health.sql", - "dau": "npx wrangler d1 execute jcode-telemetry --remote --file=dau.sql" + "dau": "npx wrangler d1 execute jcode-telemetry --remote --file=dau.sql", + "users": "npx wrangler d1 execute jcode-telemetry --remote --file=users.sql" }, "devDependencies": { "wrangler": "^4" diff --git a/telemetry-worker/src/dashboard.js b/telemetry-worker/src/dashboard.js new file mode 100644 index 000000000..d5ec6c94e --- /dev/null +++ b/telemetry-worker/src/dashboard.js @@ -0,0 +1,494 @@ +// jcode telemetry console — "Terminal Observatory" aesthetic. +// +// Design intent (frontend-design skill): jcode is a terminal coding agent, so +// the dashboard is built as a precision instrument readout, not generic SaaS. +// - Type: JetBrains Mono (display + data) paired with a quiet grotesk for prose. +// - Palette: near-black graphite, warm phosphor amber as the dominant signal, +// a single cyan accent for the live/headline series. No purple-on-white. +// - Composition: a station-clock hero number, hairline rules, dense tabular +// instrument panels, scanline texture, staggered load-in reveals. +// +// Self-contained (HTML/CSS/inline-SVG, fonts via Google Fonts ). Fetches +// /v1/stats with the dashboard token. Every metric the API returns is shown, +// grouped by importance (HEADLINE / SIGNAL / DIAGNOSTIC). + +export const DASHBOARD_HTML = ` + + + + +jcode · telemetry console + + + + + + +
+ + + +
+ + + +`; diff --git a/telemetry-worker/src/stats.js b/telemetry-worker/src/stats.js new file mode 100644 index 000000000..a359dded4 --- /dev/null +++ b/telemetry-worker/src/stats.js @@ -0,0 +1,389 @@ +// Read-only telemetry aggregation for the dashboard. +// +// Everything here returns counts/aggregates only, never raw event rows. Metrics +// are organized into tiers (headline / secondary / diagnostic) and tagged with +// importance so the dashboard can present "the one number" prominently while +// still surfacing all available information. +// +// Accuracy rules (mirrors README "Accuracy notes"): +// - Users are distinct telemetry_id, never event counts. +// - "meaningful" = real work; see MEANINGFUL_SQL. +// - Headline numbers exclude CI traffic (is_ci = 1) and non-release channels. +// - Raw / less-filtered tiers are always reported alongside, never removed. + +// Meaningful-activity predicate, shared by every query so all windows agree. +// A row is meaningful if it is a session_end/session_crash that did real work, +// OR a turn_end (which only fires after a completed user turn) that did work. +const MEANINGFUL_SQL = `( + (event IN ('session_end','session_crash') AND ( + turns > 0 OR had_user_prompt > 0 OR had_assistant_response > 0 + OR assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0 + OR duration_secs > 0 OR error_provider_timeout > 0 OR error_auth_failed > 0 + OR error_tool_error > 0 OR error_mcp_error > 0 OR error_rate_limited > 0 + OR provider_switches > 0 OR model_switches > 0 + )) + OR (event = 'turn_end' AND ( + assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0 + OR file_write_calls > 0 OR tests_run > 0 OR turn_success > 0 + )) +)`; + +const LIFECYCLE_EVENTS = "('session_start','turn_end','session_end','session_crash')"; + +async function one(env, sql) { + const result = await env.DB.prepare(sql).all(); + return (result.results && result.results[0]) || {}; +} + +async function many(env, sql) { + const result = await env.DB.prepare(sql).all(); + return result.results || []; +} + +export async function getStats(env) { + // --- Headline: total users (the one number) ----------------------------- + // A user is a distinct non-CI id that ever installed OR did meaningful work. + const totals = await one(env, ` + SELECT + COUNT(DISTINCT CASE WHEN is_ci = 0 AND (event = 'install' OR ${MEANINGFUL_SQL}) THEN telemetry_id END) AS total_users, + COUNT(DISTINCT CASE WHEN is_ci = 0 AND ${MEANINGFUL_SQL} THEN telemetry_id END) AS core_users, + COUNT(DISTINCT CASE WHEN is_ci = 0 THEN telemetry_id END) AS reached_users, + COUNT(DISTINCT CASE WHEN is_ci = 0 AND event = 'install' THEN telemetry_id END) AS installed_users, + COUNT(DISTINCT telemetry_id) AS all_ids_including_ci, + COUNT(DISTINCT CASE WHEN is_ci = 1 THEN telemetry_id END) AS ci_ids + FROM events + `); + + // --- Active users from the rollup (cheap, ingest-time) ------------------- + // DAU/WAU/MAU as distinct ids, headline = meaningful + release + non-CI. + const active = await one(env, ` + SELECT + COUNT(DISTINCT CASE WHEN activity_date = date('now') THEN telemetry_id END) AS dau_raw, + COUNT(DISTINCT CASE WHEN activity_date = date('now') AND meaningful_active > 0 THEN telemetry_id END) AS dau_meaningful, + COUNT(DISTINCT CASE WHEN activity_date = date('now') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS dau, + COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') THEN telemetry_id END) AS wau_raw, + COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') AND meaningful_active > 0 THEN telemetry_id END) AS wau_meaningful, + COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS wau, + COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') THEN telemetry_id END) AS mau_raw, + COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND meaningful_active > 0 THEN telemetry_id END) AS mau_meaningful, + COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS mau, + COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND last_is_ci = 1 THEN telemetry_id END) AS ci_mau + FROM daily_active_users + `); + + // --- Installs and lifecycle totals -------------------------------------- + const lifecycle = await one(env, ` + SELECT + SUM(CASE WHEN event = 'install' THEN 1 ELSE 0 END) AS install_events, + SUM(CASE WHEN event = 'upgrade' THEN 1 ELSE 0 END) AS upgrade_events, + SUM(CASE WHEN event = 'session_start' THEN 1 ELSE 0 END) AS session_starts, + SUM(CASE WHEN event = 'session_end' THEN 1 ELSE 0 END) AS session_ends, + SUM(CASE WHEN event = 'session_crash' THEN 1 ELSE 0 END) AS session_crashes, + SUM(CASE WHEN event = 'turn_end' THEN 1 ELSE 0 END) AS turn_ends, + COUNT(DISTINCT CASE WHEN event = 'install' THEN telemetry_id END) AS install_ids, + COUNT(DISTINCT CASE WHEN event = 'install' AND is_ci = 0 THEN telemetry_id END) AS install_ids_noci + FROM events + WHERE event IN ('install','upgrade','session_start','turn_end','session_end','session_crash') + `); + const lifecycleCompletion = + (lifecycle.session_starts || 0) > 0 + ? Number(((lifecycle.session_ends + lifecycle.session_crashes) / lifecycle.session_starts).toFixed(3)) + : null; + const crashRate = + (lifecycle.session_ends + lifecycle.session_crashes) > 0 + ? Number((lifecycle.session_crashes / (lifecycle.session_ends + lifecycle.session_crashes)).toFixed(4)) + : null; + + // --- New vs returning (last 30d), retention ----------------------------- + const retention = await one(env, ` + WITH cohort AS ( + SELECT DISTINCT telemetry_id FROM events + WHERE event = 'install' AND is_ci = 0 + AND created_at >= datetime('now','-14 days') AND created_at < datetime('now','-7 days') + ), retained AS ( + SELECT DISTINCT telemetry_id FROM events + WHERE event IN ('session_end','session_crash') AND is_ci = 0 + AND created_at >= datetime('now','-7 days') + ) + SELECT + (SELECT COUNT(*) FROM cohort) AS d7_cohort, + (SELECT COUNT(*) FROM cohort WHERE telemetry_id IN retained) AS d7_retained + `); + const d7Retention = + (retention.d7_cohort || 0) > 0 + ? Number((retention.d7_retained / retention.d7_cohort).toFixed(3)) + : null; + + // --- 30d engagement quality --------------------------------------------- + const quality = await one(env, ` + SELECT + AVG(duration_mins) AS avg_session_mins, + AVG(turns) AS avg_turns, + AVG(CASE WHEN session_success > 0 THEN 1.0 ELSE 0.0 END) AS success_rate, + AVG(CASE WHEN abandoned_before_response > 0 THEN 1.0 ELSE 0.0 END) AS abandon_rate, + AVG(first_assistant_response_ms) AS avg_first_response_ms, + AVG(first_tool_success_ms) AS avg_first_tool_success_ms, + AVG(CASE WHEN executed_tool_calls > 0 THEN CAST(tool_latency_total_ms AS REAL)/executed_tool_calls END) AS avg_tool_latency_ms, + SUM(input_tokens + output_tokens) AS tokens_30d, + AVG(CASE WHEN multi_sessioned > 0 THEN 1.0 ELSE 0.0 END) AS multi_session_rate + FROM events + WHERE event IN ('session_end','session_crash') + AND is_ci = 0 AND created_at > datetime('now','-30 days') + `); + + // --- Token usage (all-time + 30d, full breakdown incl. cache) ----------- + const tokens = await one(env, ` + SELECT + SUM(input_tokens) AS input_all, + SUM(output_tokens) AS output_all, + SUM(cache_read_input_tokens) AS cache_read_all, + SUM(cache_creation_input_tokens) AS cache_creation_all, + SUM(total_tokens) AS total_all, + SUM(CASE WHEN created_at > datetime('now','-30 days') THEN input_tokens ELSE 0 END) AS input_30d, + SUM(CASE WHEN created_at > datetime('now','-30 days') THEN output_tokens ELSE 0 END) AS output_30d, + SUM(CASE WHEN created_at > datetime('now','-30 days') THEN cache_read_input_tokens ELSE 0 END) AS cache_read_30d, + SUM(CASE WHEN created_at > datetime('now','-30 days') THEN cache_creation_input_tokens ELSE 0 END) AS cache_creation_30d, + SUM(CASE WHEN created_at > datetime('now','-30 days') THEN total_tokens ELSE 0 END) AS total_30d + FROM events + WHERE event IN ('session_end','session_crash') AND is_ci = 0 + `); + + // --- Agent autonomy (30d): spawning, background/subagent/swarm, time split + const agent = await one(env, ` + SELECT + SUM(spawned_agent_count) AS spawned_agents, + SUM(background_task_count) AS background_tasks, + SUM(background_task_completed_count) AS background_completed, + SUM(subagent_task_count) AS subagent_tasks, + SUM(subagent_success_count) AS subagent_success, + SUM(swarm_task_count) AS swarm_tasks, + SUM(swarm_success_count) AS swarm_success, + SUM(user_cancelled_count) AS user_cancelled, + SUM(agent_active_ms_total) AS agent_active_ms, + SUM(agent_model_ms_total) AS agent_model_ms, + SUM(agent_tool_ms_total) AS agent_tool_ms, + SUM(agent_blocked_ms_total) AS agent_blocked_ms, + SUM(session_idle_ms_total) AS session_idle_ms, + AVG(time_to_first_agent_action_ms) AS avg_time_to_first_action_ms, + AVG(time_to_first_useful_action_ms) AS avg_time_to_first_useful_ms, + AVG(CASE WHEN max_concurrent_sessions > 0 THEN max_concurrent_sessions END) AS avg_max_concurrent + FROM events + WHERE event IN ('session_end','session_crash') AND is_ci = 0 + AND created_at > datetime('now','-30 days') + `); + + // --- Per-turn metrics (30d) --------------------------------------------- + const turns = await one(env, ` + SELECT + AVG(turn_active_duration_ms) AS avg_turn_ms, + AVG(CASE WHEN turn_success > 0 THEN 1.0 ELSE 0.0 END) AS turn_success_rate + FROM events + WHERE event = 'turn_end' AND is_ci = 0 AND created_at > datetime('now','-30 days') + `); + + // --- Errors (30d) -------------------------------------------------------- + const errors = await one(env, ` + SELECT + SUM(error_provider_timeout) AS provider_timeout, + SUM(error_auth_failed) AS auth_failed, + SUM(error_tool_error) AS tool_error, + SUM(error_mcp_error) AS mcp_error, + SUM(error_rate_limited) AS rate_limited + FROM events + WHERE event IN ('session_end','session_crash') AND is_ci = 0 + AND created_at > datetime('now','-30 days') + `); + + // --- Feature adoption (30d, distinct users) ----------------------------- + const features = await one(env, ` + SELECT + COUNT(DISTINCT CASE WHEN feature_memory_used > 0 THEN telemetry_id END) AS memory, + COUNT(DISTINCT CASE WHEN feature_swarm_used > 0 THEN telemetry_id END) AS swarm, + COUNT(DISTINCT CASE WHEN feature_web_used > 0 THEN telemetry_id END) AS web, + COUNT(DISTINCT CASE WHEN feature_email_used > 0 THEN telemetry_id END) AS email, + COUNT(DISTINCT CASE WHEN feature_mcp_used > 0 THEN telemetry_id END) AS mcp, + COUNT(DISTINCT CASE WHEN feature_side_panel_used > 0 THEN telemetry_id END) AS side_panel, + COUNT(DISTINCT CASE WHEN feature_goal_used > 0 THEN telemetry_id END) AS goal, + COUNT(DISTINCT CASE WHEN feature_selfdev_used > 0 THEN telemetry_id END) AS selfdev, + COUNT(DISTINCT CASE WHEN feature_background_used > 0 THEN telemetry_id END) AS background, + COUNT(DISTINCT CASE WHEN feature_subagent_used > 0 THEN telemetry_id END) AS subagent + FROM events + WHERE event IN ('session_end','session_crash') AND is_ci = 0 + AND created_at > datetime('now','-30 days') + `); + + // --- Transport mix (30d) ------------------------------------------------- + const transport = await one(env, ` + SELECT + SUM(transport_https) AS https, + SUM(transport_persistent_ws_fresh) AS ws_fresh, + SUM(transport_persistent_ws_reuse) AS ws_reuse, + SUM(transport_cli_subprocess) AS cli, + SUM(transport_native_http2) AS native_http2, + SUM(transport_other) AS other + FROM events + WHERE event IN ('session_end','session_crash') AND is_ci = 0 + AND created_at > datetime('now','-30 days') + `); + + // --- Breakdowns (distinct users) ---------------------------------------- + const versions = await many(env, ` + SELECT version, COUNT(DISTINCT telemetry_id) AS users + FROM events WHERE is_ci = 0 AND version IS NOT NULL + GROUP BY version ORDER BY users DESC LIMIT 12 + `); + const os = await many(env, ` + SELECT os, COUNT(DISTINCT telemetry_id) AS users + FROM events WHERE is_ci = 0 AND os IS NOT NULL + GROUP BY os ORDER BY users DESC + `); + const arch = await many(env, ` + SELECT (COALESCE(os,'?') || ' / ' || COALESCE(arch,'?')) AS platform, COUNT(DISTINCT telemetry_id) AS users + FROM events WHERE is_ci = 0 AND os IS NOT NULL + GROUP BY os, arch ORDER BY users DESC LIMIT 12 + `); + const channels = await many(env, ` + SELECT COALESCE(build_channel,'unknown') AS build_channel, COUNT(DISTINCT telemetry_id) AS users + FROM events WHERE event IN ('session_end','session_crash') + GROUP BY build_channel ORDER BY users DESC + `); + const providers = await many(env, ` + SELECT COALESCE(provider_end,'unknown') AS provider, COUNT(DISTINCT telemetry_id) AS users + FROM events WHERE event IN ('session_end','session_crash') AND is_ci = 0 AND ${MEANINGFUL_SQL} + GROUP BY provider_end ORDER BY users DESC LIMIT 12 + `); + const auth = await many(env, ` + SELECT COALESCE(auth_provider,'unknown') AS auth_provider, COUNT(DISTINCT telemetry_id) AS users + FROM events WHERE event = 'auth_success' AND is_ci = 0 + GROUP BY auth_provider ORDER BY users DESC LIMIT 12 + `); + const onboarding = await many(env, ` + SELECT step, COUNT(DISTINCT telemetry_id) AS users + FROM events WHERE event = 'onboarding_step' AND is_ci = 0 AND step IS NOT NULL + GROUP BY step ORDER BY users DESC + `); + + // --- Usage timing: session starts by UTC hour --------------------------- + const hours = await many(env, ` + SELECT session_start_hour_utc AS hour, COUNT(*) AS sessions + FROM events + WHERE event = 'session_start' AND is_ci = 0 AND session_start_hour_utc IS NOT NULL + GROUP BY session_start_hour_utc ORDER BY session_start_hour_utc + `); + + // --- Data health: identity reconciliation + duplicate/skew signals ------ + // These are *not* product metrics; they tell you whether the pipeline is + // healthy (events arriving, ids matching installs, no single id dominating). + const health = await one(env, ` + WITH lifecycle AS ( + SELECT telemetry_id FROM events WHERE event IN ('session_end','session_crash') + ), install_ids AS ( + SELECT DISTINCT telemetry_id FROM events WHERE event = 'install' + ) + SELECT + (SELECT COUNT(DISTINCT telemetry_id) FROM lifecycle) AS lifecycle_ids, + (SELECT COUNT(DISTINCT telemetry_id) FROM events WHERE event = 'session_start') AS session_start_ids, + (SELECT COUNT(DISTINCT l.telemetry_id) FROM lifecycle l + LEFT JOIN install_ids i ON i.telemetry_id = l.telemetry_id + WHERE i.telemetry_id IS NULL) AS lifecycle_ids_without_install + `); + const skew = await one(env, ` + SELECT + MAX(c) AS max_session_events_one_id, + SUM(c) AS total_session_events, + (SELECT SUM(c2) FROM (SELECT c AS c2 FROM ( + SELECT telemetry_id, COUNT(*) AS c FROM events + WHERE event IN ('session_end','session_crash') + GROUP BY telemetry_id ORDER BY c DESC LIMIT 5))) AS top5_session_events + FROM (SELECT telemetry_id, COUNT(*) AS c FROM events + WHERE event IN ('session_end','session_crash') GROUP BY telemetry_id) + `); + const meaningfulSessions = await one(env, ` + SELECT COUNT(*) AS meaningful_sessions + FROM events + WHERE event IN ('session_end','session_crash') AND is_ci = 0 + AND created_at > datetime('now','-30 days') AND ${MEANINGFUL_SQL} + `); + + // --- User leaderboard: most active anonymous ids ------------------------ + // Ranks by lifecycle (session_end + session_crash) volume. telemetry_id is + // anonymous, so we surface a short prefix only. Useful for spotting power + // users and dev/test skew. Includes whether the id is CI and its channel. + const leaderboard = await many(env, ` + SELECT + substr(telemetry_id, 1, 8) AS id_prefix, + COUNT(*) AS sessions, + SUM(turns) AS turns, + SUM(input_tokens + output_tokens) AS tokens, + SUM(tool_calls) AS tool_calls, + MAX(is_ci) AS is_ci, + MAX(build_channel) AS build_channel, + MAX(version) AS version, + MAX(created_at) AS last_seen + FROM events + WHERE event IN ('session_end','session_crash') + GROUP BY telemetry_id + ORDER BY sessions DESC + LIMIT 20 + `); + + // --- Daily timeseries (last 60 days) for charts ------------------------- + const daily = await many(env, ` + SELECT + activity_date AS date, + COUNT(DISTINCT telemetry_id) AS raw, + COUNT(DISTINCT CASE WHEN meaningful_active > 0 THEN telemetry_id END) AS meaningful, + COUNT(DISTINCT CASE WHEN meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS headline, + COUNT(DISTINCT CASE WHEN last_is_ci = 1 THEN telemetry_id END) AS ci + FROM daily_active_users + WHERE activity_date > date('now','-60 days') + GROUP BY activity_date ORDER BY activity_date + `); + const dailyInstalls = await many(env, ` + SELECT date(created_at) AS date, COUNT(DISTINCT telemetry_id) AS installs + FROM events + WHERE event = 'install' AND is_ci = 0 AND created_at > datetime('now','-60 days') + GROUP BY date(created_at) ORDER BY date(created_at) + `); + + // --- Recent feedback (text only, no identifiers) ------------------------ + const feedback = await many(env, ` + SELECT created_at, feedback_text, feedback_rating, feedback_reason, version + FROM events + WHERE event = 'feedback' AND feedback_text IS NOT NULL + ORDER BY created_at DESC LIMIT 25 + `); + + return { + generated_at: new Date().toISOString(), + headline: { + total_users: totals.total_users || 0, + dau: active.dau || 0, + wau: active.wau || 0, + mau: active.mau || 0, + }, + users: { + total_users: totals.total_users || 0, + core_users: totals.core_users || 0, + installed_users: totals.installed_users || 0, + reached_users: totals.reached_users || 0, + all_ids_including_ci: totals.all_ids_including_ci || 0, + ci_ids: totals.ci_ids || 0, + }, + active, + lifecycle: { ...lifecycle, lifecycle_completion_ratio: lifecycleCompletion, crash_rate: crashRate }, + retention: { ...retention, d7_retention: d7Retention }, + quality: { ...quality, meaningful_sessions_30d: meaningfulSessions.meaningful_sessions || 0 }, + tokens, + agent, + turns, + errors, + features, + transport, + breakdowns: { versions, os, arch, channels, providers, auth, onboarding, hours }, + leaderboard, + health: { ...health, ...skew }, + timeseries: { daily, installs: dailyInstalls }, + feedback, + }; +} diff --git a/telemetry-worker/src/worker.js b/telemetry-worker/src/worker.js index ecd45ae7e..b14ae2dd6 100644 --- a/telemetry-worker/src/worker.js +++ b/telemetry-worker/src/worker.js @@ -1,3 +1,6 @@ +import { getStats } from "./stats.js"; +import { DASHBOARD_HTML } from "./dashboard.js"; + let cachedEventColumns = null; let cachedSessionDetailColumns = null; let cachedTurnDetailColumns = null; @@ -10,11 +13,33 @@ export default { }); } + const url = new URL(request.url); + + // Read-only dashboard surface (GET). The HTML page is public; the JSON stats + // endpoint is gated behind DASHBOARD_TOKEN so raw aggregates are not exposed + // to anyone who finds the URL. Raw events are never returned, only counts. + if (request.method === "GET") { + if (url.pathname === "/" || url.pathname === "/dashboard") { + return htmlResponse(DASHBOARD_HTML); + } + if (url.pathname === "/v1/stats") { + if (!isAuthorized(request, env)) { + return jsonResponse({ error: "Unauthorized" }, 401); + } + try { + const stats = await getStats(env); + return jsonResponse(stats); + } catch (err) { + return jsonResponse({ error: "Internal error", detail: String(err?.message || err) }, 500); + } + } + return jsonResponse({ error: "Not found" }, 404); + } + if (request.method !== "POST") { return jsonResponse({ error: "Method not allowed" }, 405); } - const url = new URL(request.url); if (url.pathname !== "/v1/event") { return jsonResponse({ error: "Not found" }, 404); } @@ -54,6 +79,21 @@ export default { }, }; +// When DASHBOARD_TOKEN is unset the stats endpoint stays locked (deny by +// default) rather than leaking aggregates. Accepts either a Bearer header or a +// ?token= query param so it works from curl and the browser fetch alike. +function isAuthorized(request, env) { + const expected = env.DASHBOARD_TOKEN; + if (!expected) { + return false; + } + const url = new URL(request.url); + const header = request.headers.get("authorization") || ""; + const bearer = header.startsWith("Bearer ") ? header.slice(7) : null; + const provided = bearer || url.searchParams.get("token") || request.headers.get("x-dashboard-token"); + return provided != null && provided === expected; +} + async function insertEvent(env, body) { const columns = await getEventColumns(env); const sessionDetailColumns = await getSessionDetailColumns(env); @@ -593,10 +633,21 @@ function jsonResponse(data, status = 200) { }); } +function htmlResponse(html, status = 200) { + return new Response(html, { + status, + headers: { + "Content-Type": "text/html; charset=utf-8", + "Cache-Control": "no-store", + ...corsHeaders(), + }, + }); +} + function corsHeaders() { return { "Access-Control-Allow-Origin": "*", - "Access-Control-Allow-Methods": "POST, OPTIONS", - "Access-Control-Allow-Headers": "Content-Type", + "Access-Control-Allow-Methods": "GET, POST, OPTIONS", + "Access-Control-Allow-Headers": "Content-Type, Authorization, X-Dashboard-Token", }; } diff --git a/telemetry-worker/users.sql b/telemetry-worker/users.sql new file mode 100644 index 000000000..53a0e8601 --- /dev/null +++ b/telemetry-worker/users.sql @@ -0,0 +1,61 @@ +-- Canonical "total users" definitions for jcode telemetry. +-- Usage: +-- wrangler d1 execute jcode-telemetry --remote --file=users.sql +-- +-- Headline number: total_users. A "user" is a distinct, non-CI telemetry_id that +-- ever either installed jcode or did meaningful work in it. We exclude CI traffic +-- (ephemeral runners mint a fresh id per job) and exclude empty open/close +-- sessions that never did anything. Raw, less-filtered tiers are reported +-- alongside it so no signal is hidden. +-- +-- Caveats (see README "Accuracy notes"): telemetry_id is per-machine, so one +-- person on N machines counts as N; opt-outs and network-blocked clients are +-- never counted; CI rows created before the is_ci column existed default to 0 +-- and may slip in. + +SELECT + -- HEADLINE: real people who installed or meaningfully used jcode. + COUNT(DISTINCT CASE WHEN is_ci = 0 AND ( + event = 'install' + OR (event IN ('session_end', 'session_crash') AND ( + turns > 0 OR had_user_prompt > 0 OR had_assistant_response > 0 + OR assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0 + OR duration_secs > 0 OR error_provider_timeout > 0 OR error_auth_failed > 0 + OR error_tool_error > 0 OR error_mcp_error > 0 OR error_rate_limited > 0 + OR provider_switches > 0 OR model_switches > 0 + )) + OR (event = 'turn_end' AND ( + assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0 + OR file_write_calls > 0 OR tests_run > 0 OR turn_success > 0 + )) + ) THEN telemetry_id END) AS total_users, + + -- Core users: did meaningful work (excludes install-only, never-used ids). + COUNT(DISTINCT CASE WHEN is_ci = 0 AND ( + (event IN ('session_end', 'session_crash') AND ( + turns > 0 OR had_user_prompt > 0 OR had_assistant_response > 0 + OR assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0 + OR duration_secs > 0 OR error_provider_timeout > 0 OR error_auth_failed > 0 + OR error_tool_error > 0 OR error_mcp_error > 0 OR error_rate_limited > 0 + OR provider_switches > 0 OR model_switches > 0 + )) + OR (event = 'turn_end' AND ( + assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0 + OR file_write_calls > 0 OR tests_run > 0 OR turn_success > 0 + )) + ) THEN telemetry_id END) AS core_users, + + -- Reach: every distinct non-CI id that ever launched jcode (incl. empty + -- open/close sessions). Upper bound on "people who ran it at least once". + COUNT(DISTINCT CASE WHEN is_ci = 0 THEN telemetry_id END) AS reached_users, + + -- Installs only (non-CI), for comparison with total_users. + COUNT(DISTINCT CASE WHEN is_ci = 0 AND event = 'install' THEN telemetry_id END) AS installed_users, + + -- Unfiltered grand total (includes CI + dev). Never use as the headline; + -- kept for transparency and for sizing CI noise. + COUNT(DISTINCT telemetry_id) AS all_ids_including_ci, + + -- CI-only ids, so the gap between all_ids and total_users is explainable. + COUNT(DISTINCT CASE WHEN is_ci = 1 THEN telemetry_id END) AS ci_ids +FROM events;