diff --git a/coverage-thresholds.json b/coverage-thresholds.json index b2968e0b..eb22cb84 100644 --- a/coverage-thresholds.json +++ b/coverage-thresholds.json @@ -34,7 +34,7 @@ "threshold": 96 }, "vsix": { - "threshold": 84 + "threshold": 86 }, "nvim": { "threshold": 39 diff --git a/crates/basilisk-common/src/lib.rs b/crates/basilisk-common/src/lib.rs index b7fdbe7c..9b4a2bf0 100644 --- a/crates/basilisk-common/src/lib.rs +++ b/crates/basilisk-common/src/lib.rs @@ -82,6 +82,12 @@ pub mod commands { pub const MEMORY_OBJECTS_BY_TYPE: &str = "basilisk.memory.objectsByType"; /// Force garbage collection and report what was collected. pub const MEMORY_GC_COLLECT: &str = "basilisk.memory.gcCollect"; + /// Ingest the raw output of a memory injection script run by the editor in + /// the active debug session. The marker in the output (`__BASILISK_MEM__*`) + /// selects the parser; the LSP updates session state, publishes memory + /// diagnostics, and returns the structured result. This is the second leg + /// of the editor-as-courier round-trip (the LSP holds no DAP connection). + pub const MEMORY_INGEST: &str = "basilisk.memory.ingest"; /// Command names advertised via `executeCommandProvider` capabilities. /// @@ -126,6 +132,7 @@ pub mod commands { MEMORY_REFERENCES, MEMORY_OBJECTS_BY_TYPE, MEMORY_GC_COLLECT, + MEMORY_INGEST, ]; } diff --git a/crates/basilisk-lsp/src/profiler/cpuprofile.rs b/crates/basilisk-lsp/src/profiler/cpuprofile.rs new file mode 100644 index 00000000..ea1aa8b2 --- /dev/null +++ b/crates/basilisk-lsp/src/profiler/cpuprofile.rs @@ -0,0 +1,232 @@ +//! Implements [LSPPROF]. See docs/specs/LSP-PROFILING-SPEC.md#PROFILE-SPEEDSCOPE +//! +//! Export aggregated CPU samples as a V8 `.cpuprofile` (the Chrome `DevTools` +//! `Profiler.Profile` schema) so VS Code's built-in profile viewer renders it +//! natively (flame chart, bottom-up + left-heavy tables). Same UI as Node.js CPU +//! profiles. See . +//! +//! All threads are merged into one timeline (Python's GIL serializes execution), +//! producing a single call tree of `nodes` plus the `samples`/`timeDeltas` +//! arrays the viewer needs. + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use serde_json::{json, Value}; +use tracing::info; + +use super::aggregator::{ProfileData, SpeedscopeFrame}; + +/// One call-tree node while building the profile (`id` = index + 1). +struct CpuNode { + /// Index into `ProfileData::frames`, or `None` for the synthetic root. + frame: Option, + /// Samples whose leaf is this node. + hit_count: u64, + /// Child node indices. + children: Vec, +} + +/// Build a V8 `Profiler.Profile` (`.cpuprofile`) value from aggregated data. +/// +/// `sample_rate` (Hz) yields the per-sample interval as **integer** +/// microseconds (`1_000_000 / rate`), avoiding any float→int cast. +#[must_use] +pub fn build_cpuprofile(data: &ProfileData, sample_rate: u64) -> Value { + // Index 0 is the synthetic root. + let mut nodes = vec![CpuNode { + frame: None, + hit_count: 0, + children: Vec::new(), + }]; + let mut child_of: HashMap<(usize, usize), usize> = HashMap::new(); + let mut samples: Vec = Vec::new(); + let mut time_deltas: Vec = Vec::new(); + let micros = i64::try_from(1_000_000_u64 / sample_rate.max(1)).unwrap_or(0); + + let mut thread_ids: Vec = data.thread_stacks.keys().copied().collect(); + thread_ids.sort_unstable(); + + for tid in thread_ids { + let Some(stacks) = data.thread_stacks.get(&tid) else { + continue; + }; + for stack in stacks { + // Walk root → leaf (stacks are stored root-first), creating nodes. + let mut current = 0usize; + for &frame_idx in stack { + current = if let Some(&existing) = child_of.get(&(current, frame_idx)) { + existing + } else { + let new_idx = nodes.len(); + nodes.push(CpuNode { + frame: Some(frame_idx), + hit_count: 0, + children: Vec::new(), + }); + if let Some(parent) = nodes.get_mut(current) { + parent.children.push(new_idx); + } + let _ = child_of.insert((current, frame_idx), new_idx); + new_idx + }; + } + if let Some(leaf) = nodes.get_mut(current) { + leaf.hit_count += 1; + } + samples.push(current + 1); + time_deltas.push(micros); + } + } + + let nodes_json: Vec = nodes + .iter() + .enumerate() + .map(|(idx, node)| node_to_json(idx, node, &data.frames)) + .collect(); + + json!({ + "nodes": nodes_json, + "startTime": 0, + "endTime": time_deltas.iter().sum::(), + "samples": samples, + "timeDeltas": time_deltas, + }) +} + +/// Export `ProfileData` to a `.cpuprofile` file in `output_dir`; returns the path. +/// +/// # Errors +/// +/// Returns an error string if serialization or the file write fails. +pub fn export_cpuprofile( + data: &ProfileData, + session_id: &str, + sample_rate: u64, + output_dir: &Path, +) -> Result { + let profile = build_cpuprofile(data, sample_rate); + let json = serde_json::to_string(&profile) + .map_err(|err| format!("Failed to serialize cpuprofile: {err}"))?; + let path = output_dir.join(format!("basilisk-{session_id}.cpuprofile")); + std::fs::write(&path, json) + .map_err(|err| format!("Failed to write cpuprofile {}: {err}", path.display()))?; + info!(path = %path.display(), "exported cpuprofile"); + Ok(path) +} + +/// Serialize one node to the `ProfileNode` shape. +fn node_to_json(index: usize, node: &CpuNode, frames: &[SpeedscopeFrame]) -> Value { + json!({ + "id": index + 1, + "callFrame": call_frame(node.frame, frames), + "hitCount": node.hit_count, + "children": node.children.iter().map(|&c| c + 1).collect::>(), + }) +} + +/// Build a `Runtime.CallFrame` for a node (root or a real frame; lines 0-based). +fn call_frame(frame: Option, frames: &[SpeedscopeFrame]) -> Value { + match frame.and_then(|idx| frames.get(idx)) { + None => json!({ + "functionName": "(root)", + "scriptId": "0", + "url": "", + "lineNumber": -1, + "columnNumber": -1, + }), + Some(frame) => json!({ + "functionName": frame.name, + "scriptId": "0", + "url": frame.file, + "lineNumber": (frame.line - 1).max(0), + "columnNumber": 0, + }), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn frame(name: &str, file: &str, line: i32) -> SpeedscopeFrame { + SpeedscopeFrame { + name: name.to_owned(), + file: file.to_owned(), + line, + } + } + + #[test] + fn cpuprofile_matches_v8_schema() -> Result<(), String> { + // Two samples, both the single-frame stack [frame 0], at 100 Hz. + let data = ProfileData { + frames: vec![frame("hot_function", "/tmp/app.py", 19)], + thread_stacks: HashMap::from([(1_u64, vec![vec![0], vec![0]])]), + thread_weights: HashMap::from([(1_u64, vec![0.01, 0.01])]), + ..ProfileData::default() + }; + + let profile = build_cpuprofile(&data, 100); + + let nodes = profile + .get("nodes") + .and_then(Value::as_array) + .ok_or("missing nodes")?; + assert_eq!(nodes.len(), 2, "root + one frame node"); + + let leaf = nodes.get(1).ok_or("missing leaf node")?; + assert_eq!(leaf.get("hitCount").and_then(Value::as_u64), Some(2)); + let call = leaf.get("callFrame").ok_or("missing callFrame")?; + assert_eq!( + call.get("functionName").and_then(Value::as_str), + Some("hot_function") + ); + assert_eq!(call.get("url").and_then(Value::as_str), Some("/tmp/app.py")); + assert_eq!(call.get("lineNumber").and_then(Value::as_i64), Some(18)); + + // The root references the leaf as a child. + let root_children = nodes + .first() + .and_then(|root| root.get("children")) + .and_then(Value::as_array) + .ok_or("missing root children")?; + assert_eq!(root_children.first().and_then(Value::as_u64), Some(2)); + + // Samples point at the leaf (id 2); 10 ms per sample at 100 Hz. + assert_eq!( + profile.get("samples").and_then(Value::as_array), + Some(&vec![json!(2), json!(2)]) + ); + assert_eq!( + profile.get("timeDeltas").and_then(Value::as_array), + Some(&vec![json!(10_000), json!(10_000)]) + ); + assert_eq!(profile.get("endTime").and_then(Value::as_i64), Some(20_000)); + Ok(()) + } + + #[test] + fn export_writes_a_valid_cpuprofile_file() -> Result<(), String> { + let data = ProfileData { + frames: vec![frame("f", "/tmp/a.py", 1)], + thread_stacks: HashMap::from([(1_u64, vec![vec![0]])]), + thread_weights: HashMap::from([(1_u64, vec![0.01])]), + ..ProfileData::default() + }; + let path = export_cpuprofile(&data, "basilisk-unit-test", 100, &std::env::temp_dir())?; + assert!( + path.extension().is_some_and(|ext| ext == "cpuprofile"), + "path: {}", + path.display() + ); + let contents = std::fs::read_to_string(&path).map_err(|err| err.to_string())?; + let parsed: Value = serde_json::from_str(&contents).map_err(|err| err.to_string())?; + assert!(parsed + .get("nodes") + .and_then(Value::as_array) + .is_some_and(|nodes| nodes.len() == 2)); + let _ = std::fs::remove_file(&path); + Ok(()) + } +} diff --git a/crates/basilisk-lsp/src/profiler/memory/diagnostics.rs b/crates/basilisk-lsp/src/profiler/memory/diagnostics.rs index d153efc6..975482e1 100644 --- a/crates/basilisk-lsp/src/profiler/memory/diagnostics.rs +++ b/crates/basilisk-lsp/src/profiler/memory/diagnostics.rs @@ -81,13 +81,18 @@ pub fn generate_allocation_diagnostics( /// Feeds growths through the `LeakTracker` for confidence scoring: /// - `BSK-MEM-GROWTH` warnings for all growing allocations /// - `BSK-MEM-LEAK` for suspected leaks (Medium+ confidence) +/// +/// Returns the scored leaks alongside the diagnostics so a caller can both +/// surface the structured leaks (e.g. in a response) and publish the +/// diagnostics from a SINGLE scoring pass — scoring twice would double-count +/// consecutive growths and corrupt confidence. #[must_use] pub fn generate_diff_diagnostics( diff: &MemoryDiff, leak_tracker: &mut LeakTracker, -) -> DiagnosticsByUri { - let mut result: DiagnosticsByUri = HashMap::new(); +) -> (Vec, DiagnosticsByUri) { let suspected = leak_tracker.process_growths(&diff.grown_allocations); + let mut result: DiagnosticsByUri = HashMap::new(); for leak in &suspected { let Ok(uri) = Url::from_file_path(&leak.file) else { @@ -108,7 +113,7 @@ pub fn generate_diff_diagnostics( "generated memory diff diagnostics" ); - result + (suspected, result) } /// Parsed uncollectable object from gc collect output. @@ -503,7 +508,7 @@ mod tests { fn diff_diagnostics_with_large_growth() -> Result<(), String> { let diff = make_diff(); let mut tracker = LeakTracker::new(); - let diags = generate_diff_diagnostics(&diff, &mut tracker); + let (_, diags) = generate_diff_diagnostics(&diff, &mut tracker); let uri = Url::from_file_path("/tmp/cache.py").map_err(|()| "bad URI")?; let file_diags = diags @@ -541,7 +546,7 @@ mod tests { let _ = generate_diff_diagnostics(&diff, &mut tracker); let _ = generate_diff_diagnostics(&diff, &mut tracker); - let diags = generate_diff_diagnostics(&diff, &mut tracker); + let (_, diags) = generate_diff_diagnostics(&diff, &mut tracker); let uri = Url::from_file_path("/tmp/cache.py").map_err(|()| "bad URI")?; let file_diags = diags.get(&uri).ok_or("expected diagnostics")?; @@ -579,7 +584,7 @@ mod tests { }; let mut tracker = LeakTracker::new(); - let diags = generate_diff_diagnostics(&diff, &mut tracker); + let (_, diags) = generate_diff_diagnostics(&diff, &mut tracker); let uri = Url::from_file_path("/tmp/small.py").map_err(|()| "bad URI")?; let file_diags = diags.get(&uri).ok_or("expected diagnostics")?; @@ -738,7 +743,7 @@ mod tests { freed_allocations: vec![], }; let mut tracker = LeakTracker::new(); - let diags = generate_diff_diagnostics(&diff, &mut tracker); + let (_, diags) = generate_diff_diagnostics(&diff, &mut tracker); assert!(diags.is_empty()); } } diff --git a/crates/basilisk-lsp/src/profiler/memory/heapprofile.rs b/crates/basilisk-lsp/src/profiler/memory/heapprofile.rs new file mode 100644 index 00000000..b5e51f99 --- /dev/null +++ b/crates/basilisk-lsp/src/profiler/memory/heapprofile.rs @@ -0,0 +1,173 @@ +//! Implements [LSPPROF]. See docs/specs/LSP-PROFILING-SPEC.md#PROFILE-MEMORY +//! +//! Export a memory snapshot as a V8 `.heapprofile` (the Chrome `DevTools` +//! `HeapProfiler.SamplingHeapProfile` schema) so VS Code's built-in profile +//! viewer renders it natively (flame chart + table, Self/Total size) — the same +//! UI used for Node.js heap profiles. See +//! . +//! +//! Mapping: each `tracemalloc` allocation site becomes a child of the synthetic +//! root, with `selfSize` = bytes allocated at that line. (`statistics('lineno')` +//! yields one frame per site, so the tree is root → sites; the schema and this +//! builder also handle deeper tracebacks should the script switch to +//! `statistics('traceback')`.) + +use std::path::Path; + +use serde_json::{json, Value}; + +use super::{AllocationSite, MemorySnapshot}; + +/// Build a V8 `SamplingHeapProfile` (`.heapprofile`) JSON value from a snapshot. +#[must_use] +pub fn snapshot_to_heapprofile(snapshot: &MemorySnapshot) -> Value { + let mut next_id: u64 = 1; + let root_id = next_id; + next_id += 1; + + let mut children = Vec::with_capacity(snapshot.top_allocations.len()); + let mut samples = Vec::with_capacity(snapshot.top_allocations.len()); + + for (ordinal, alloc) in snapshot.top_allocations.iter().enumerate() { + let node_id = next_id; + next_id += 1; + children.push(json!({ + "callFrame": alloc_call_frame(alloc), + "selfSize": alloc.size, + "id": node_id, + "children": [], + })); + samples.push(json!({ + "size": alloc.size, + "nodeId": node_id, + "ordinal": ordinal, + })); + } + + json!({ + "head": { + "callFrame": root_call_frame(), + "selfSize": 0, + "id": root_id, + "children": children, + }, + "samples": samples, + }) +} + +/// Synthetic root frame. +fn root_call_frame() -> Value { + json!({ + "functionName": "(root)", + "scriptId": "0", + "url": "", + "lineNumber": -1, + "columnNumber": -1, + }) +} + +/// A `Runtime.CallFrame` for an allocation site. `tracemalloc` knows only the +/// file and line, so the function name is the file's basename and the URL is the +/// path; line numbers are 0-based in V8. +fn alloc_call_frame(alloc: &AllocationSite) -> Value { + json!({ + "functionName": file_basename(&alloc.file), + "scriptId": "0", + "url": alloc.file, + "lineNumber": (alloc.line - 1).max(0), + "columnNumber": 0, + }) +} + +/// Last path component of `file`, for a readable node label. +fn file_basename(file: &str) -> String { + Path::new(file).file_name().map_or_else( + || file.to_owned(), + |name| name.to_string_lossy().into_owned(), + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::profiler::memory::diff::TraceFrame; + + fn snapshot_with_site(file: &str, line: i32, size: u64) -> MemorySnapshot { + MemorySnapshot { + snapshot_id: "snap-1".to_owned(), + current_memory: size, + peak_memory: size, + gc_objects: 0, + gc_counts: vec![], + top_allocations: vec![AllocationSite { + file: file.to_owned(), + line, + size, + count: 1, + traceback: vec![TraceFrame { + file: file.to_owned(), + line, + }], + }], + } + } + + #[test] + fn heapprofile_matches_v8_schema() -> Result<(), String> { + let snapshot = snapshot_with_site("/tmp/app.py", 42, 24_567_890); + let profile = snapshot_to_heapprofile(&snapshot); + + let head = profile.get("head").ok_or("missing head")?; + assert_eq!(head.get("selfSize").and_then(Value::as_u64), Some(0)); + let children = head + .get("children") + .and_then(Value::as_array) + .ok_or("missing head.children")?; + assert_eq!(children.len(), 1); + + let site = children.first().ok_or("expected one allocation site")?; + assert_eq!( + site.get("selfSize").and_then(Value::as_u64), + Some(24_567_890) + ); + let frame = site.get("callFrame").ok_or("missing callFrame")?; + assert_eq!( + frame.get("url").and_then(Value::as_str), + Some("/tmp/app.py") + ); + assert_eq!(frame.get("lineNumber").and_then(Value::as_i64), Some(41)); + assert_eq!( + frame.get("functionName").and_then(Value::as_str), + Some("app.py") + ); + + // Every node id is unique (root + each site). + let sample_node = profile + .get("samples") + .and_then(Value::as_array) + .and_then(|s| s.first()) + .and_then(|s| s.get("nodeId")) + .and_then(Value::as_u64); + assert_eq!(sample_node, site.get("id").and_then(Value::as_u64)); + Ok(()) + } + + #[test] + fn empty_snapshot_yields_empty_children() { + let snapshot = MemorySnapshot { + snapshot_id: "empty".to_owned(), + current_memory: 0, + peak_memory: 0, + gc_objects: 0, + gc_counts: vec![], + top_allocations: vec![], + }; + let profile = snapshot_to_heapprofile(&snapshot); + let children = profile + .get("head") + .and_then(|h| h.get("children")) + .and_then(Value::as_array) + .map_or(usize::MAX, Vec::len); + assert_eq!(children, 0); + } +} diff --git a/crates/basilisk-lsp/src/profiler/memory/mod.rs b/crates/basilisk-lsp/src/profiler/memory/mod.rs index 2e6954f5..3954a938 100644 --- a/crates/basilisk-lsp/src/profiler/memory/mod.rs +++ b/crates/basilisk-lsp/src/profiler/memory/mod.rs @@ -15,8 +15,10 @@ pub mod diagnostics; pub mod diff; +pub mod heapprofile; pub mod leaks; pub mod scripts; +pub mod session; pub mod timeline; use serde::{Deserialize, Serialize}; @@ -161,10 +163,37 @@ fn parse_allocation_site(value: &serde_json::Value) -> AllocationSite { } } +/// Parse the `__BASILISK_MEM_REFS__` reference-graph payload. +/// +/// The reference-graph script's JSON (`{ nodes, edges, cycles }`) is passed +/// straight through to the editor's webview, so this validates the marker and +/// JSON shape and returns the parsed object verbatim rather than re-modelling +/// it in Rust (it has no server-side diagnostics). +/// +/// # Errors +/// +/// Returns an error if the marker is absent or the JSON is invalid. +pub fn parse_refs_output(output: &str) -> Result { + let json_str = extract_marker_json(output, REFS_MARKER)?; + serde_json::from_str(json_str).map_err(|err| format!("invalid reference-graph JSON: {err}")) +} + +/// Parse the `__BASILISK_MEM_OBJECTS__` objects-by-type payload. +/// +/// Like [`parse_refs_output`], this is a validated pass-through to the editor. +/// +/// # Errors +/// +/// Returns an error if the marker is absent or the JSON is invalid. +pub fn parse_objects_output(output: &str) -> Result { + let json_str = extract_marker_json(output, OBJECTS_MARKER)?; + serde_json::from_str(json_str).map_err(|err| format!("invalid objects-by-type JSON: {err}")) +} + /// Extract the JSON payload after a marker prefix from script output. /// /// Scans each line for the marker and returns the JSON string after it. -fn extract_marker_json<'a>(output: &'a str, marker: &str) -> Result<&'a str, String> { +pub(crate) fn extract_marker_json<'a>(output: &'a str, marker: &str) -> Result<&'a str, String> { for line in output.lines() { if let Some(json_start) = line.find(marker) { return Ok(&line[json_start + marker.len()..]); diff --git a/crates/basilisk-lsp/src/profiler/memory/session.rs b/crates/basilisk-lsp/src/profiler/memory/session.rs new file mode 100644 index 00000000..7978e3db --- /dev/null +++ b/crates/basilisk-lsp/src/profiler/memory/session.rs @@ -0,0 +1,326 @@ +//! Implements [LSPPROF]. See docs/specs/LSP-PROFILING-SPEC.md#PROFILE-MEMORY +//! +//! Memory-profiling session state — the server-side brain of the ingest +//! round-trip. +//! +//! The LSP holds no DAP connection (the editor owns it), so memory profiling is +//! a two-leg round-trip: the LSP hands the editor a Python injection script +//! (see [`super::scripts`]), the editor runs it in the debuggee via DAP +//! `evaluate`, and couriers the raw stdout back through `basilisk.memory.ingest`. +//! +//! [`MemorySessionManager`] owns the cross-call state the stateless parsers and +//! scorers can't hold on their own: the per-session [`LeakTracker`] that +//! escalates leak confidence across diffs, the last snapshot, and the +//! [`MemoryTimeline`]. Each ingest marker-dispatches the output to the existing +//! parser and returns both the structured outcome and the diagnostics to publish +//! — no parsing logic is duplicated here; this is thin orchestration glue. + +use std::collections::HashMap; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{Instant, SystemTime}; + +use serde_json::Value; +use tokio::sync::Mutex; +use tracing::{debug, info}; + +use super::diagnostics::{self, DiagnosticsByUri, GcCollectResult, MemoryHotspotConfig}; +use super::diff::{parse_diff_output, MemoryDiff}; +use super::leaks::{LeakTracker, SuspectedLeak}; +use super::timeline::{AutoSnapshotConfig, MemoryTimeline}; +use super::{ + extract_marker_json, parse_objects_output, parse_refs_output, parse_snapshot_output, + MemorySnapshot, DIFF_MARKER, GC_MARKER, OBJECTS_MARKER, OK_MARKER, REFS_MARKER, + SNAPSHOT_MARKER, +}; + +/// The marker-dispatched outcome of ingesting one memory-script's output. +#[derive(Debug)] +pub enum IngestOutcome { + /// A `tracemalloc` snapshot (`__BASILISK_MEM__`). + Snapshot(MemorySnapshot), + /// A snapshot diff with leak-confidence scoring (`__BASILISK_MEM_DIFF__`). + Diff { + /// The parsed growth/free diff. + diff: MemoryDiff, + /// Suspected leaks scored against this session's accumulated history. + leaks: Vec, + }, + /// A gc-collection result (`__BASILISK_MEM_GC__`). + Gc(GcCollectResult), + /// A reference-graph payload, passed through to the editor (`__BASILISK_MEM_REFS__`). + Refs(Value), + /// An objects-by-type payload, passed through to the editor (`__BASILISK_MEM_OBJECTS__`). + Objects(Value), + /// A bare acknowledgment (`__BASILISK_MEM_OK__`), e.g. from start/stop scripts. + Ack, +} + +/// The structured result of a single ingest, plus diagnostics to publish. +#[derive(Debug)] +pub struct IngestResult { + /// The typed outcome the editor renders. + pub outcome: IngestOutcome, + /// Diagnostics to publish via `textDocument/publishDiagnostics`, keyed by URI. + /// Empty for outcomes that produce none (refs, objects, ack). + pub diagnostics: DiagnosticsByUri, +} + +/// Per-session memory-profiling state. +struct MemorySession { + /// Unique session identifier (`mem-XXXXXXXX`). + session_id: String, + /// When the session started (for snapshot ids and timeline elapsed time). + started_at: Instant, + /// Most recent parsed snapshot (for UI summaries / future baseline use). + last_snapshot: Option, + /// Cross-diff leak-confidence accumulator. + leak_tracker: LeakTracker, + /// Rolling memory timeline for the dashboard chart. + timeline: MemoryTimeline, + /// Allocation-hotspot thresholds for diagnostics. + hotspot_config: MemoryHotspotConfig, + /// Number of snapshots ingested so far (for snapshot ids). + snapshot_count: u64, +} + +impl MemorySession { + fn new(session_id: String) -> Self { + let mut timeline = MemoryTimeline::new(AutoSnapshotConfig::default()); + timeline.start(); + Self { + session_id, + started_at: Instant::now(), + last_snapshot: None, + leak_tracker: LeakTracker::new(), + timeline, + hotspot_config: MemoryHotspotConfig::default(), + snapshot_count: 0, + } + } + + /// Marker-dispatch the raw script output to the matching parser. + fn ingest(&mut self, output: &str) -> Result { + // Dispatch to the marker that appears EARLIEST in the output. Each + // script prints exactly one marker at the start of its payload line; an + // object `repr` embedded later in the JSON could contain a marker-like + // substring, so position — not mere presence — selects the true marker. + let detected = [ + (DIFF_MARKER, MarkerKind::Diff), + (GC_MARKER, MarkerKind::Gc), + (REFS_MARKER, MarkerKind::Refs), + (OBJECTS_MARKER, MarkerKind::Objects), + (SNAPSHOT_MARKER, MarkerKind::Snapshot), + (OK_MARKER, MarkerKind::Ack), + ] + .into_iter() + .filter_map(|(marker, kind)| output.find(marker).map(|idx| (idx, kind))) + .min_by_key(|(idx, _)| *idx) + .map(|(_, kind)| kind); + + match detected { + Some(MarkerKind::Diff) => self.ingest_diff(output), + Some(MarkerKind::Gc) => self.ingest_gc(output), + Some(MarkerKind::Refs) => Ok(no_diagnostics(IngestOutcome::Refs(parse_refs_output( + output, + )?))), + Some(MarkerKind::Objects) => Ok(no_diagnostics(IngestOutcome::Objects( + parse_objects_output(output)?, + ))), + Some(MarkerKind::Snapshot) => self.ingest_snapshot(output), + Some(MarkerKind::Ack) => { + debug!(session_id = %self.session_id, "ingested ack"); + Ok(no_diagnostics(IngestOutcome::Ack)) + } + None => Err("no recognized __BASILISK_MEM*__ marker in script output".to_owned()), + } + } + + fn ingest_snapshot(&mut self, output: &str) -> Result { + let snapshot_id = format!("{}-snap-{}", self.session_id, self.snapshot_count); + self.snapshot_count += 1; + let snapshot = parse_snapshot_output(output, &snapshot_id)?; + let diagnostics = + diagnostics::generate_allocation_diagnostics(&snapshot, &self.hotspot_config); + self.timeline.record(&snapshot); + self.last_snapshot = Some(snapshot.clone()); + info!( + session_id = %self.session_id, + current = snapshot.current_memory, + allocations = snapshot.top_allocations.len(), + "ingested memory snapshot" + ); + Ok(IngestResult { + outcome: IngestOutcome::Snapshot(snapshot), + diagnostics, + }) + } + + fn ingest_diff(&mut self, output: &str) -> Result { + let json = extract_marker_json(output, DIFF_MARKER)?; + let diff = match parse_diff_output(json) { + Ok(diff) => diff, + // The very first diff has no baseline yet — the injection script + // seeds it for the next call. Surface a clean empty diff rather than + // a hard error so the editor shows "0 leaks" instead of a scary + // message on the first "Compare Snapshots". + Err(err) if err.contains("no previous snapshot") => empty_diff(), + Err(err) => return Err(err), + }; + // `generate_diff_diagnostics` scores once and returns both the leaks + // (for the outcome) and the diagnostics, so confidence isn't corrupted. + let (leaks, diagnostics) = + diagnostics::generate_diff_diagnostics(&diff, &mut self.leak_tracker); + info!( + session_id = %self.session_id, + growths = diff.grown_allocations.len(), + suspected = leaks.len(), + "ingested memory diff" + ); + Ok(IngestResult { + outcome: IngestOutcome::Diff { diff, leaks }, + diagnostics, + }) + } + + fn ingest_gc(&mut self, output: &str) -> Result { + let json = extract_marker_json(output, GC_MARKER)?; + let gc = diagnostics::parse_gc_result(json)?; + let diagnostics = diagnostics::generate_cycle_diagnostics(&gc); + info!( + session_id = %self.session_id, + uncollectable = gc.uncollectable_count, + "ingested gc result" + ); + Ok(IngestResult { + outcome: IngestOutcome::Gc(gc), + diagnostics, + }) + } +} + +/// Which marker a script's output carries — selected by earliest position. +#[derive(Debug, Clone, Copy)] +enum MarkerKind { + Snapshot, + Diff, + Gc, + Refs, + Objects, + Ack, +} + +/// An empty diff, used when the first comparison has no baseline yet. +fn empty_diff() -> MemoryDiff { + MemoryDiff { + total_growth: 0, + total_freed: 0, + net_growth: 0, + grown_allocations: Vec::new(), + freed_allocations: Vec::new(), + } +} + +/// Wrap an outcome that produces no diagnostics. +fn no_diagnostics(outcome: IngestOutcome) -> IngestResult { + IngestResult { + outcome, + diagnostics: DiagnosticsByUri::new(), + } +} + +/// Upper bound on retained memory sessions. The editor tracks one at a time, +/// but repeated start-without-stop cycles would otherwise accumulate entries; +/// the oldest is evicted past this cap so growth stays bounded. +const MAX_SESSIONS: usize = 32; + +/// Manages active memory-profiling sessions for the LSP. +/// +/// One session per `memorySessionId`. Lives on `LspServer` alongside +/// `ProfileSessionManager` and `DebugSessionManager`. +pub struct MemorySessionManager { + sessions: Mutex>, + /// Monotonic counter mixed into session ids so two sessions minted in the + /// same nanosecond cannot collide. + next_seq: AtomicU64, +} + +impl std::fmt::Debug for MemorySessionManager { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MemorySessionManager") + .finish_non_exhaustive() + } +} + +impl Default for MemorySessionManager { + fn default() -> Self { + Self::new() + } +} + +impl MemorySessionManager { + /// Create a new memory session manager. + #[must_use] + pub fn new() -> Self { + Self { + sessions: Mutex::new(HashMap::new()), + next_seq: AtomicU64::new(0), + } + } + + /// Begin a new memory-tracking session and return its id. + /// + /// `traceback_depth` is the `tracemalloc` frame depth the editor will inject; + /// it is logged for diagnostics but the script itself is generated by the + /// command handler from [`super::scripts::start_tracemalloc`]. + pub async fn start_session(&self, traceback_depth: u32) -> String { + let seq = self.next_seq.fetch_add(1, Ordering::Relaxed); + let session_id = generate_memory_session_id(seq); + info!(session_id = %session_id, traceback_depth, "memory session started"); + let mut sessions = self.sessions.lock().await; + let _ = sessions.insert(session_id.clone(), MemorySession::new(session_id.clone())); + evict_oldest_over_cap(&mut sessions); + session_id + } + + /// Ingest raw script output for a session, returning the structured outcome + /// and the diagnostics to publish. + /// + /// # Errors + /// + /// Returns an error if the session is unknown, no marker is present, or the + /// payload JSON is malformed. + pub async fn ingest(&self, session_id: &str, output: &str) -> Result { + let mut sessions = self.sessions.lock().await; + let session = sessions + .get_mut(session_id) + .ok_or_else(|| format!("unknown memory session: {session_id}"))?; + session.ingest(output) + } +} + +/// Evict the oldest session(s) once the cap is exceeded (keeps growth bounded). +fn evict_oldest_over_cap(sessions: &mut HashMap) { + while sessions.len() > MAX_SESSIONS { + let oldest = sessions + .values() + .min_by_key(|session| session.started_at) + .map(|session| session.session_id.clone()); + match oldest { + Some(id) => { + let _ = sessions.remove(&id); + } + None => break, + } + } +} + +/// Generate a unique memory session id (`mem-XXXXXXXX-SEQ`). +fn generate_memory_session_id(seq: u64) -> String { + let now = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap_or_default(); + let nanos = now.subsec_nanos(); + let secs_low = u32::try_from(now.as_secs()).unwrap_or(u32::MAX); + let mixed = nanos.wrapping_mul(2_654_435_761).wrapping_add(secs_low); + format!("mem-{mixed:08x}-{seq:x}") +} diff --git a/crates/basilisk-lsp/src/profiler/mod.rs b/crates/basilisk-lsp/src/profiler/mod.rs index 1b588982..ad897052 100644 --- a/crates/basilisk-lsp/src/profiler/mod.rs +++ b/crates/basilisk-lsp/src/profiler/mod.rs @@ -14,6 +14,7 @@ pub mod aggregator; pub mod commands; +pub mod cpuprofile; pub mod diagnostics; pub mod export; /// Elevated-helper-over-Unix-socket sampling path (Unix only). See [`helper_client`]. @@ -110,6 +111,8 @@ struct ProfileSession { hotspot_config: HotspotConfig, /// Seconds per sample (1.0 / `sample_rate`). sample_weight: f64, + /// Samples per second (used to emit integer microsecond timeDeltas). + sample_rate: u64, /// Whether idle threads are included. include_idle: bool, } @@ -163,6 +166,8 @@ pub struct StopResult { pub hotspot_config: HotspotConfig, /// Seconds per sample. pub sample_weight: f64, + /// Samples per second. + pub sample_rate: u64, } /// Manages active profiling sessions for the LSP. @@ -269,6 +274,7 @@ impl ProfileSessionManager { sampler, hotspot_config: HotspotConfig::default(), sample_weight, + sample_rate: rate, include_idle: false, }, ); @@ -313,6 +319,7 @@ impl ProfileSessionManager { hot_lines, hotspot_config: session.hotspot_config, sample_weight: session.sample_weight, + sample_rate: session.sample_rate, }) } @@ -348,6 +355,7 @@ impl ProfileSessionManager { hot_lines, hotspot_config: session.hotspot_config.clone(), sample_weight: session.sample_weight, + sample_rate: session.sample_rate, }) } diff --git a/crates/basilisk-lsp/src/server/commands.rs b/crates/basilisk-lsp/src/server/commands.rs index 024f6eb3..674a1e49 100644 --- a/crates/basilisk-lsp/src/server/commands.rs +++ b/crates/basilisk-lsp/src/server/commands.rs @@ -113,7 +113,8 @@ pub(super) async fn dispatch_execute_command( | basilisk_common::commands::MEMORY_DIFF | basilisk_common::commands::MEMORY_REFERENCES | basilisk_common::commands::MEMORY_OBJECTS_BY_TYPE - | basilisk_common::commands::MEMORY_GC_COLLECT => { + | basilisk_common::commands::MEMORY_GC_COLLECT + | basilisk_common::commands::MEMORY_INGEST => { dispatch_profiler_or_memory(server, ¶ms.command, ¶ms.arguments).await } unknown => { @@ -169,6 +170,9 @@ async fn dispatch_profiler_or_memory( basilisk_common::commands::MEMORY_GC_COLLECT => { super::memory_handlers::execute_memory_gc_collect(server, args).await } + basilisk_common::commands::MEMORY_INGEST => { + super::memory_handlers::execute_memory_ingest(server, args).await + } _ => Ok(None), } } diff --git a/crates/basilisk-lsp/src/server/memory_handlers.rs b/crates/basilisk-lsp/src/server/memory_handlers.rs index 8859414a..e8a4b6cb 100644 --- a/crates/basilisk-lsp/src/server/memory_handlers.rs +++ b/crates/basilisk-lsp/src/server/memory_handlers.rs @@ -8,9 +8,25 @@ use tower_lsp::jsonrpc::Result as LspResult; use tower_lsp::lsp_types::MessageType; -use tracing::info; +use tracing::{error, info}; use super::LspServer; +use crate::profiler::memory::diagnostics::DiagnosticsByUri; +use crate::profiler::memory::session::IngestOutcome; + +/// Max allocation sites a snapshot/diff script emits. Kept modest so the printed +/// `__BASILISK_MEM__` line (which surfaces in the Debug Console) stays readable — +/// the dashboard only needs the top sites. +const MAX_SNAPSHOT_STATS: usize = 100; + +/// Construct a memory-domain LSP error (`-32010`). +fn memory_error(message: impl Into) -> tower_lsp::jsonrpc::Error { + tower_lsp::jsonrpc::Error { + code: tower_lsp::jsonrpc::ErrorCode::ServerError(-32010), + message: message.into().into(), + data: None, + } +} /// Return the first command argument, or an empty JSON object if absent. /// @@ -43,21 +59,23 @@ pub(super) async fn execute_memory_start( let script = crate::profiler::memory::scripts::start_tracemalloc(traceback_depth); + // Register a real session so subsequent snapshot/diff ingests can accumulate + // cross-call leak history. The editor runs `script` in the debuggee and posts + // the output back via `basilisk.memory.ingest` (the LSP holds no DAP wire). + let memory_session_id = server.memory_manager.start_session(traceback_depth).await; + server .client .log_message( MessageType::INFO, - format!("Basilisk: Starting memory tracking (depth={traceback_depth})"), + format!( + "Basilisk: memory tracking started ({memory_session_id}, depth={traceback_depth})" + ), ) .await; - // In a real implementation, we would send this script to the debug session - // via DAP evaluate. For now, return the session info indicating readiness. Ok(Some(serde_json::json!({ - "memorySessionId": format!("mem-{:08x}", std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .subsec_nanos()), + "memorySessionId": memory_session_id, "tracingStarted": true, "script": script, "tracebackDepth": traceback_depth, @@ -78,7 +96,7 @@ pub(super) async fn execute_memory_snapshot( .and_then(serde_json::Value::as_str) .unwrap_or("unknown"); - let script = crate::profiler::memory::scripts::take_snapshot(500); + let script = crate::profiler::memory::scripts::take_snapshot(MAX_SNAPSHOT_STATS); server .client @@ -108,7 +126,7 @@ pub(super) async fn execute_memory_diff( .and_then(serde_json::Value::as_str) .unwrap_or("unknown"); - let script = crate::profiler::memory::scripts::diff_snapshot(500); + let script = crate::profiler::memory::scripts::diff_snapshot(MAX_SNAPSHOT_STATS); server .client @@ -232,3 +250,191 @@ pub(super) async fn execute_memory_gc_collect( "script": script, }))) } + +/// Handle `basilisk.memory.ingest` — second leg of the round-trip. +/// +/// The editor ran an injection script in the debuggee via DAP `evaluate` and +/// posts the raw stdout here. The marker in the output selects the parser; the +/// [`MemorySessionManager`](crate::profiler::memory::session::MemorySessionManager) +/// updates session state and produces diagnostics, which we publish before +/// returning the structured result to the editor. +pub(super) async fn execute_memory_ingest( + server: &LspServer, + args: &[serde_json::Value], +) -> LspResult> { + info!("execute_memory_ingest called"); + + let arg = first_arg_or_empty(args); + + let Some(session_id) = arg + .get("memorySessionId") + .and_then(serde_json::Value::as_str) + else { + return Err(memory_error("Missing required parameter: memorySessionId")); + }; + let output = arg + .get("output") + .and_then(serde_json::Value::as_str) + .unwrap_or_default(); + + match server.memory_manager.ingest(session_id, output).await { + Ok(result) => { + publish_memory_diagnostics(server, &result.diagnostics).await; + let mut json = ingest_outcome_to_json(session_id, &result.outcome); + // A snapshot is also exported as a V8 `.heapprofile` so the editor can + // open it in VS Code's built-in profile viewer (flame chart + table). + if let IngestOutcome::Snapshot(snapshot) = &result.outcome { + if let Some(path) = write_heapprofile(snapshot) { + if let Some(obj) = json.as_object_mut() { + let _ = obj.insert( + "heapProfilePath".to_owned(), + serde_json::Value::String(path), + ); + } + } + } + Ok(Some(json)) + } + Err(err) => { + error!(%err, "memory ingest failed"); + server + .client + .log_message( + MessageType::ERROR, + format!("Basilisk: memory ingest failed: {err}"), + ) + .await; + Err(memory_error(err)) + } + } +} + +/// Write a snapshot as a V8 `.heapprofile` to the temp dir and return its path. +/// +/// Returns `None` (logging the cause) if serialization or the write fails — the +/// snapshot result is still returned to the editor, just without a file to open. +fn write_heapprofile(snapshot: &crate::profiler::memory::MemorySnapshot) -> Option { + let profile = crate::profiler::memory::heapprofile::snapshot_to_heapprofile(snapshot); + let json = serde_json::to_string(&profile) + .map_err(|err| error!(%err, "failed to serialize heapprofile")) + .ok()?; + let path = std::env::temp_dir().join(format!("basilisk-{}.heapprofile", snapshot.snapshot_id)); + std::fs::write(&path, json) + .map_err(|err| error!(%err, "failed to write heapprofile")) + .ok()?; + info!(path = %path.display(), "wrote heapprofile"); + Some(path.display().to_string()) +} + +/// Publish memory diagnostics for every affected URI. +async fn publish_memory_diagnostics(server: &LspServer, diagnostics: &DiagnosticsByUri) { + for (uri, items) in diagnostics { + server + .client + .publish_diagnostics(uri.clone(), items.clone(), None) + .await; + } +} + +/// Serialize an ingest outcome into the editor wire format (camelCase, tagged +/// by `kind` so the editor can dispatch). +fn ingest_outcome_to_json(session_id: &str, outcome: &IngestOutcome) -> serde_json::Value { + match outcome { + IngestOutcome::Snapshot(snapshot) => serde_json::json!({ + "kind": "snapshot", + "memorySessionId": session_id, + "snapshotId": snapshot.snapshot_id, + "currentMemory": snapshot.current_memory, + "peakMemory": snapshot.peak_memory, + "gcObjects": snapshot.gc_objects, + "gcCounts": snapshot.gc_counts, + "topAllocations": allocation_sites_json(snapshot), + }), + IngestOutcome::Diff { diff, leaks } => serde_json::json!({ + "kind": "diff", + "memorySessionId": session_id, + "totalGrowth": diff.total_growth, + "totalFreed": diff.total_freed, + "netGrowth": diff.net_growth, + "suspectedLeaks": suspected_leaks_json(leaks), + }), + IngestOutcome::Gc(gc) => serde_json::json!({ + "kind": "gc", + "memorySessionId": session_id, + "collected": gc.collected, + "uncollectable": gc.uncollectable_count, + "memoryFreed": gc.memory_freed, + "uncollectableObjects": uncollectable_objects_json(gc), + }), + IngestOutcome::Refs(graph) => serde_json::json!({ + "kind": "refs", + "memorySessionId": session_id, + "graph": graph, + }), + IngestOutcome::Objects(objects) => serde_json::json!({ + "kind": "objects", + "memorySessionId": session_id, + "objects": objects, + }), + IngestOutcome::Ack => serde_json::json!({ + "kind": "ack", + "memorySessionId": session_id, + }), + } +} + +/// Build the `topAllocations` array for a snapshot response. +fn allocation_sites_json( + snapshot: &crate::profiler::memory::MemorySnapshot, +) -> Vec { + snapshot + .top_allocations + .iter() + .map(|alloc| { + serde_json::json!({ + "file": alloc.file, + "line": alloc.line, + "size": alloc.size, + "count": alloc.count, + }) + }) + .collect() +} + +/// Build the `suspectedLeaks` array for a diff response. +fn suspected_leaks_json( + leaks: &[crate::profiler::memory::leaks::SuspectedLeak], +) -> Vec { + leaks + .iter() + .map(|leak| { + serde_json::json!({ + "file": leak.file, + "line": leak.line, + "sizeGrowth": leak.size_growth, + "countGrowth": leak.count_growth, + "currentSize": leak.current_size, + "currentCount": leak.current_count, + "confidence": leak.confidence.to_string(), + "reason": leak.reason, + }) + }) + .collect() +} + +/// Build the `uncollectableObjects` array for a gc response. +fn uncollectable_objects_json( + gc: &crate::profiler::memory::diagnostics::GcCollectResult, +) -> Vec { + gc.uncollectable_objects + .iter() + .map(|obj| { + serde_json::json!({ + "typeName": obj.type_name, + "size": obj.size, + "repr": obj.repr, + "reason": obj.reason, + }) + }) + .collect() +} diff --git a/crates/basilisk-lsp/src/server/mod.rs b/crates/basilisk-lsp/src/server/mod.rs index 8909b75d..0db44b87 100644 --- a/crates/basilisk-lsp/src/server/mod.rs +++ b/crates/basilisk-lsp/src/server/mod.rs @@ -107,6 +107,9 @@ pub struct LspServer { pub(super) debug_manager: crate::debug::DebugSessionManager, /// Profiler session manager — py-spy sampling, aggregation, export. pub(super) profiler_manager: crate::profiler::ProfileSessionManager, + /// Memory session manager — drives the editor-couriered ingest round-trip + /// (snapshot/diff/leak state) since the LSP holds no DAP connection. + pub(super) memory_manager: crate::profiler::memory::session::MemorySessionManager, /// Debounced file-watcher task. pub(super) watcher_debounce: Mutex>, /// Debounced module-changed notification task. @@ -131,6 +134,7 @@ impl LspServer { workspace_roots: RwLock::new(Vec::new()), debug_manager: crate::debug::DebugSessionManager::new(), profiler_manager: crate::profiler::ProfileSessionManager::new(), + memory_manager: crate::profiler::memory::session::MemorySessionManager::new(), watcher_debounce: Mutex::new(None), module_changed_debounce: Mutex::new(None), test_config: RwLock::new(TestExplorerConfig::default()), diff --git a/crates/basilisk-lsp/src/server/profiler_handlers.rs b/crates/basilisk-lsp/src/server/profiler_handlers.rs index f4f9c09a..48fc4986 100644 --- a/crates/basilisk-lsp/src/server/profiler_handlers.rs +++ b/crates/basilisk-lsp/src/server/profiler_handlers.rs @@ -164,6 +164,18 @@ pub(super) async fn execute_profiler_stop( } }; + // Always export a V8 `.cpuprofile` too, so the editor can open it in + // VS Code's built-in profile viewer (flame chart + tables). + let cpu_profile_path = crate::profiler::cpuprofile::export_cpuprofile( + &result.data, + &result.session_id, + result.sample_rate, + &output_dir, + ) + .map_err(|err| error!(%err, "failed to export cpuprofile")) + .ok() + .map(|path| path.display().to_string()); + publish_profiler_diagnostics(server, &result.data, &result.hotspot_config).await; server @@ -185,6 +197,7 @@ pub(super) async fn execute_profiler_stop( "duration": result.duration, "totalSamples": result.total_samples, "outputFile": output_file, + "cpuProfilePath": cpu_profile_path, "hotFunctions": hot_funcs_json, "hotLines": hot_lines_json, }))) diff --git a/crates/basilisk-lsp/tests/lsp/ws_test_memory.rs b/crates/basilisk-lsp/tests/lsp/ws_test_memory.rs new file mode 100644 index 00000000..24a36df2 --- /dev/null +++ b/crates/basilisk-lsp/tests/lsp/ws_test_memory.rs @@ -0,0 +1,234 @@ +//! Tests for [LSPPROF]. See docs/specs/LSP-PROFILING-SPEC.md#PROFILE-MEMORY +// +// End-to-end memory-profiling round-trip over the real LSP. The editor is the +// DAP courier: it asks the LSP for an injection script, runs it in the debuggee, +// and posts the raw output back via `basilisk.memory.ingest`. These tests drive +// that round-trip with the exact marker-prefixed output the Python injection +// scripts emit, exercising the handlers, the `MemorySessionManager`, the +// marker dispatch, and the diagnostics-publish path. + +use super::ws_test_common::*; + +/// Marker-prefixed `tracemalloc` snapshot output with a 24 MB allocation site. +const SNAPSHOT_OUTPUT: &str = r#"__BASILISK_MEM__{"current": 45678912, "peak": 50000000, "gcObjects": 14523, "gcCounts": [712, 45, 3], "stats": [{"file": "/tmp/app.py", "line": 42, "size": 24567890, "count": 15234, "traceback": [{"file": "/tmp/app.py", "line": 42}]}]}"#; + +/// Marker-prefixed diff output with one growing site at /tmp/cache.py:34. +const DIFF_OUTPUT: &str = r#"__BASILISK_MEM_DIFF__{"leaks": [{"file": "/tmp/cache.py", "line": 34, "sizeDiff": 5000, "countDiff": 100, "size": 1000000, "count": 500, "traceback": [{"file": "/tmp/cache.py", "line": 34}]}], "current": 60000000, "peak": 70000000}"#; + +/// Marker-prefixed gc-collect output with one uncollectable cycle. +const GC_OUTPUT: &str = r#"__BASILISK_MEM_GC__{"collected": 42, "uncollectable": 1, "memoryFreed": 8192, "uncollectableObjects": [{"id": 1, "type": "CacheNode", "size": 4096, "repr": "", "reason": "Instance has __del__ method and is in a reference cycle"}]}"#; + +/// Marker-prefixed reference-graph output. +const REFS_OUTPUT: &str = r#"__BASILISK_MEM_REFS__{"nodes": [{"id": 1, "type": "dict", "size": 100, "repr": "{}", "depth": 0, "isTarget": true}], "edges": [], "cycles": []}"#; + +/// Run a `workspace/executeCommand` and return its `result` object. +async fn exec( + fixture: &mut WsTestFixture, + id: u64, + command: &str, + args: serde_json::Value, +) -> TestResult { + let resp = fixture + .request( + id, + "workspace/executeCommand", + serde_json::json!({ "command": command, "arguments": [args] }), + ) + .await? + .ok_or_else(|| format!("no response to {command}"))?; + let parsed: serde_json::Value = serde_json::from_str(&resp)?; + assert!( + parsed.get("error").is_none(), + "{command} returned an error: {resp}" + ); + parsed + .get("result") + .cloned() + .ok_or_else(|| format!("{command} response had no result: {resp}").into()) +} + +/// Field accessor: read a string field from a JSON object. +fn str_field<'a>(value: &'a serde_json::Value, key: &str) -> Option<&'a str> { + value.get(key).and_then(serde_json::Value::as_str) +} + +/// Start a memory session over the wire and return its id + start script. +async fn start_memory_session( + fixture: &mut WsTestFixture, + id: u64, +) -> TestResult<(String, String)> { + let start = exec( + fixture, + id, + "basilisk.memory.start", + serde_json::json!({ "tracebackDepth": 25 }), + ) + .await?; + let session_id = str_field(&start, "memorySessionId") + .ok_or("start should return memorySessionId")? + .to_owned(); + let script = str_field(&start, "script").unwrap_or_default().to_owned(); + Ok((session_id, script)) +} + +/// `start` mints a `mem-` session and hands out a tracemalloc script; ingesting a +/// snapshot returns the parsed allocation summary. +#[tokio::test] +async fn test_ws_memory_start_then_snapshot() -> TestResult<()> { + let mut fixture = WsTestFixture::new().await?; + let _ = fixture.initialize().await?; + + let (session_id, start_script) = start_memory_session(&mut fixture, 800).await?; + assert!(session_id.starts_with("mem-"), "session id: {session_id}"); + assert!( + start_script.contains("tracemalloc.start(25)"), + "start script: {start_script}" + ); + + let snap_cmd = exec( + &mut fixture, + 801, + "basilisk.memory.snapshot", + serde_json::json!({ "memorySessionId": session_id }), + ) + .await?; + assert!( + str_field(&snap_cmd, "script") + .unwrap_or_default() + .contains("__BASILISK_MEM__"), + "snapshot script should print the marker" + ); + + let snap = exec( + &mut fixture, + 802, + "basilisk.memory.ingest", + serde_json::json!({ "memorySessionId": session_id, "output": SNAPSHOT_OUTPUT }), + ) + .await?; + assert_eq!(str_field(&snap, "kind"), Some("snapshot")); + assert_eq!( + snap.get("currentMemory") + .and_then(serde_json::Value::as_u64), + Some(45_678_912) + ); + let allocs = snap + .get("topAllocations") + .and_then(serde_json::Value::as_array) + .ok_or("snapshot should carry topAllocations")?; + assert_eq!(allocs.len(), 1, "one allocation site expected"); + + // The snapshot is also exported as a V8 `.heapprofile` the editor opens in + // VS Code's native profile viewer — verify the file exists and is valid. + let heap_path = + str_field(&snap, "heapProfilePath").ok_or("snapshot should return heapProfilePath")?; + assert!( + heap_path.ends_with(".heapprofile"), + "heapProfilePath: {heap_path}" + ); + let contents = + std::fs::read_to_string(heap_path).map_err(|err| format!("read heapprofile: {err}"))?; + let profile: serde_json::Value = serde_json::from_str(&contents)?; + let self_size = profile + .get("head") + .and_then(|head| head.get("children")) + .and_then(serde_json::Value::as_array) + .and_then(|children| children.first()) + .and_then(|node| node.get("selfSize")) + .and_then(serde_json::Value::as_u64); + assert_eq!( + self_size, + Some(24_567_890), + "heapprofile self size matches the allocation" + ); + Ok(()) +} + +/// Repeated diffs of the same growing site escalate leak confidence across +/// snapshots within one session: LOW (1) → MEDIUM (2) → HIGH (3+). +#[tokio::test] +async fn test_ws_memory_diff_escalates_leak_confidence() -> TestResult<()> { + let mut fixture = WsTestFixture::new().await?; + let _ = fixture.initialize().await?; + let (session_id, _) = start_memory_session(&mut fixture, 810).await?; + + for (idx, want) in ["LOW", "MEDIUM", "HIGH"].iter().enumerate() { + let id = 820 + idx as u64; + let diff = exec( + &mut fixture, + id, + "basilisk.memory.ingest", + serde_json::json!({ "memorySessionId": session_id, "output": DIFF_OUTPUT }), + ) + .await?; + assert_eq!(str_field(&diff, "kind"), Some("diff")); + let leaks = diff + .get("suspectedLeaks") + .and_then(serde_json::Value::as_array) + .ok_or("diff should carry suspectedLeaks")?; + let first = leaks.first().ok_or("expected one suspected leak")?; + assert_eq!(str_field(first, "confidence"), Some(*want), "diff #{idx}"); + } + Ok(()) +} + +/// gc-collect and reference-graph outputs marker-dispatch to their kinds. +#[tokio::test] +async fn test_ws_memory_gc_and_references() -> TestResult<()> { + let mut fixture = WsTestFixture::new().await?; + let _ = fixture.initialize().await?; + let (session_id, _) = start_memory_session(&mut fixture, 840).await?; + + let gc = exec( + &mut fixture, + 841, + "basilisk.memory.ingest", + serde_json::json!({ "memorySessionId": session_id, "output": GC_OUTPUT }), + ) + .await?; + assert_eq!(str_field(&gc, "kind"), Some("gc")); + assert_eq!( + gc.get("uncollectable").and_then(serde_json::Value::as_u64), + Some(1) + ); + + let refs = exec( + &mut fixture, + 842, + "basilisk.memory.ingest", + serde_json::json!({ "memorySessionId": session_id, "output": REFS_OUTPUT }), + ) + .await?; + assert_eq!(str_field(&refs, "kind"), Some("refs")); + let graph = refs.get("graph").ok_or("refs should carry a graph")?; + assert!(graph + .get("nodes") + .and_then(serde_json::Value::as_array) + .is_some_and(|nodes| nodes.len() == 1)); + Ok(()) +} + +/// Ingesting against an unknown session id is rejected, not silently accepted. +#[tokio::test] +async fn test_ws_memory_ingest_unknown_session_errors() -> TestResult<()> { + let mut fixture = WsTestFixture::new().await?; + let _ = fixture.initialize().await?; + + let resp = fixture + .request( + 850, + "workspace/executeCommand", + serde_json::json!({ + "command": "basilisk.memory.ingest", + "arguments": [{ "memorySessionId": "mem-nope", "output": SNAPSHOT_OUTPUT }] + }), + ) + .await? + .ok_or("no response to ingest")?; + let parsed: serde_json::Value = serde_json::from_str(&resp)?; + assert!( + parsed.get("error").is_some(), + "unknown session must error: {resp}" + ); + Ok(()) +} diff --git a/crates/basilisk-lsp/tests/memory_session_manager.rs b/crates/basilisk-lsp/tests/memory_session_manager.rs new file mode 100644 index 00000000..210b1ad6 --- /dev/null +++ b/crates/basilisk-lsp/tests/memory_session_manager.rs @@ -0,0 +1,93 @@ +//! Implements [LSPPROF]. See docs/specs/LSP-PROFILING-SPEC.md#PROFILE-MEMORY +//! +//! Coarse e2e for the memory-profiling round-trip state machine. +//! +//! The LSP holds no DAP connection — the editor runs the injection scripts and +//! couriers their raw stdout back via `basilisk.memory.ingest`. The +//! [`MemorySessionManager`] is the server-side brain of that round-trip: it +//! marker-dispatches the output to the existing parsers, accumulates +//! cross-snapshot leak history per session, and produces diagnostics to +//! publish. These tests drive that engine with the exact marker-prefixed output +//! the Python injection scripts in `scripts.rs` emit. + +use basilisk_lsp::profiler::memory::session::{IngestOutcome, IngestResult, MemorySessionManager}; + +/// A `__BASILISK_MEM__` snapshot line with one 24 MB allocation site. +const SNAPSHOT_OUTPUT: &str = r#"noise before +__BASILISK_MEM__{"current": 45678912, "peak": 50000000, "gcObjects": 14523, "gcCounts": [712, 45, 3], "stats": [{"file": "/tmp/app.py", "line": 42, "size": 24567890, "count": 15234, "traceback": [{"file": "/tmp/app.py", "line": 42}]}]} +noise after"#; + +/// A `__BASILISK_MEM_DIFF__` line with one growing site at /tmp/cache.py:34. +const DIFF_OUTPUT: &str = r#"__BASILISK_MEM_DIFF__{"leaks": [{"file": "/tmp/cache.py", "line": 34, "sizeDiff": 5000, "countDiff": 100, "size": 1000000, "count": 500, "traceback": [{"file": "/tmp/cache.py", "line": 34}]}], "current": 60000000, "peak": 70000000}"#; + +fn leak_confidence(result: &IngestResult) -> Result { + match &result.outcome { + IngestOutcome::Diff { leaks, .. } => { + let first = leaks + .first() + .ok_or("expected at least one suspected leak")?; + Ok(first.confidence.to_string()) + } + other => Err(format!("expected diff outcome, got {other:?}")), + } +} + +/// A snapshot is parsed, retained, and yields an allocation diagnostic; then +/// repeated diffs of the same growing site escalate leak confidence across +/// snapshots (the whole point of the per-session `LeakTracker`). +#[tokio::test] +async fn snapshot_then_repeated_diffs_escalate_leak_confidence() -> Result<(), String> { + let manager = MemorySessionManager::new(); + let session_id = manager.start_session(25).await; + assert!( + session_id.starts_with("mem-"), + "session id should be prefixed: {session_id}" + ); + + let snap = manager.ingest(&session_id, SNAPSHOT_OUTPUT).await?; + match snap.outcome { + IngestOutcome::Snapshot(ref snapshot) => { + assert_eq!(snapshot.current_memory, 45_678_912); + assert_eq!(snapshot.peak_memory, 50_000_000); + assert_eq!(snapshot.gc_objects, 14_523); + assert_eq!(snapshot.top_allocations.len(), 1); + } + ref other => return Err(format!("expected snapshot outcome, got {other:?}")), + } + assert!( + !snap.diagnostics.is_empty(), + "a 24 MB allocation must produce BSK-MEM-ALLOC diagnostics" + ); + + // Confidence escalates Low (1) -> Medium (2 consecutive) -> High (3+). + let first = manager.ingest(&session_id, DIFF_OUTPUT).await?; + assert_eq!(leak_confidence(&first)?, "LOW"); + + let second = manager.ingest(&session_id, DIFF_OUTPUT).await?; + assert_eq!(leak_confidence(&second)?, "MEDIUM"); + + let third = manager.ingest(&session_id, DIFF_OUTPUT).await?; + assert_eq!(leak_confidence(&third)?, "HIGH"); + + Ok(()) +} + +/// Ingesting against an unknown session id is an error, not a panic. +#[tokio::test] +async fn ingest_unknown_session_is_error() { + let manager = MemorySessionManager::new(); + let result = manager.ingest("mem-does-not-exist", SNAPSHOT_OUTPUT).await; + assert!(result.is_err(), "unknown session must be rejected"); +} + +/// Output with no recognized marker is rejected with a clear error. +#[tokio::test] +async fn ingest_without_marker_is_error() -> Result<(), String> { + let manager = MemorySessionManager::new(); + let session_id = manager.start_session(25).await; + let result = manager + .ingest(&session_id, "just some repl noise, no marker") + .await; + assert!(result.is_err(), "missing marker must be rejected"); + Ok(()) +} diff --git a/crates/basilisk-lsp/tests/profiler_tests.rs b/crates/basilisk-lsp/tests/profiler_tests.rs index e8c5aba5..cb290d64 100644 --- a/crates/basilisk-lsp/tests/profiler_tests.rs +++ b/crates/basilisk-lsp/tests/profiler_tests.rs @@ -1558,7 +1558,7 @@ fn verify_memory_diagnostics(snap3: &MemorySnapshot, growth: &[AllocationGrowth] freed_allocations: vec![], }; let mut fresh_tracker = LeakTracker::new(); - let leak_diags = mem_diag::generate_diff_diagnostics(&diff_data, &mut fresh_tracker); + let (_leaks, leak_diags) = mem_diag::generate_diff_diagnostics(&diff_data, &mut fresh_tracker); assert!(!leak_diags.is_empty(), "should generate leak diagnostics"); let cache_leak_diag = leak_diags diff --git a/crates/basilisk-lsp/tests/ws_features_tests.rs b/crates/basilisk-lsp/tests/ws_features_tests.rs index ada57474..09c1d5e7 100644 --- a/crates/basilisk-lsp/tests/ws_features_tests.rs +++ b/crates/basilisk-lsp/tests/ws_features_tests.rs @@ -34,6 +34,8 @@ mod ws_test_execute_uv; mod ws_test_hover; #[path = "lsp/ws_test_inlay_hints.rs"] mod ws_test_inlay_hints; +#[path = "lsp/ws_test_memory.rs"] +mod ws_test_memory; #[path = "lsp/ws_test_processes.rs"] mod ws_test_processes; #[path = "lsp/ws_test_refactoring.rs"] diff --git a/docs/specs/LSP-PROFILING-SPEC.md b/docs/specs/LSP-PROFILING-SPEC.md index 4b0ec49c..41f2334c 100644 --- a/docs/specs/LSP-PROFILING-SPEC.md +++ b/docs/specs/LSP-PROFILING-SPEC.md @@ -90,15 +90,27 @@ Start profiling a Python process. | Field | Type | Required | Description | |---|---|---|---| -| `pid` | `number` | Yes¹ | Target PID. ¹Either `pid` or `debugSession` must be supplied; there is **no** silent auto-detect — the editor obtains a PID from [`basilisk.profiler.processes`](#PROFILE-PROCESSES-LSP) (the Python Processes panel) rather than prompting the user to type one. | -| `debugSession` | `string` | Yes¹ | Attach to the debuggee of an active debug session instead of a raw PID. | +| `pid` | `number` | **Yes** | Target PID. The editor obtains it from [`basilisk.profiler.processes`](#PROFILE-PROCESSES-LSP) (the Python Processes panel) or, for the active debug session, from the captured debuggee PID (see [#PROFILE-SAME-PROCESS]). There is **no** silent auto-detect. | | `sampleRate` | `number` | No | Samples per second (default: 100) | | `includeNative` | `boolean` | No | Include C extension frames (default: false) | | `duration` | `number` | No | Auto-stop after N seconds (default: null = manual stop) | A missing `pid` is rejected with `-32001` — earlier revisions of this spec claimed an "auto-detect when omitted", but none was ever implemented (#62). PID -discovery is now an explicit, user-visible step via the process panel. +discovery is now an explicit, user-visible step. + +#### Profiling the debug session's process {#PROFILE-SAME-PROCESS} + +The profiler and debugger **use the same process**. Because the LSP holds no DAP +connection, it never learns the debuggee's OS PID directly (it spawns +`debugpy.adapter`; debugpy spawns the debuggee later). Instead the editor captures +it: the DAP proxy (`vscode-extension/src/dap-proxy.ts`) intercepts debugpy's +`process` event (`body.systemProcessId`) and stores `sessionId → pid` in the +extension store; "Profile Debug Session" (`basilisk.profileAttachToDebug`) then +calls `basilisk.profiler.start` with that concrete `pid`. The LSP profiler stays +PID-based — **no server-side `debugSession`→PID resolution** — and the existing +privilege layer ([#PROFILE-PERMISSIONS]) routes the attach: child/same-user → +in-process py-spy (Linux/Windows), external/grandchild → elevated helper (macOS). **Response fields:** `sessionId`, `pid`, `pythonVersion`, `startedAt`. @@ -300,6 +312,28 @@ Stacks in speedscope are root-first (callers before callees). py-spy returns lea For direct SVG flamegraph output, use the `inferno` crate (Rust port of Brendan Gregg's FlameGraph). Convert aggregated stacks to collapsed format and pipe through `inferno::flamegraph::from_lines()`. +## Native VS Code profile files {#PROFILE-NATIVE} + +Both profilers also emit **V8 profile files** that VS Code's built-in profile +viewer opens natively (flame chart + bottom-up/left-heavy tables) — the same UI +as Node.js profiling (see ). +The editor opens them with `vscode.open`; the custom flamegraph/dashboard +webviews remain as fallbacks. + +- **CPU → `.cpuprofile`** (`Profiler.Profile` schema): + [`cpuprofile.rs`](../../crates/basilisk-lsp/src/profiler/cpuprofile.rs) merges the + per-thread py-spy stacks into one call tree (`nodes` + `samples` + integer-µs + `timeDeltas`, derived from the sample rate). Written on `profiler.stop`; + the path is returned as `cpuProfilePath`. +- **Memory → `.heapprofile`** (`HeapProfiler.SamplingHeapProfile` schema): + [`heapprofile.rs`](../../crates/basilisk-lsp/src/profiler/memory/heapprofile.rs) + maps each `tracemalloc` site to a `head`-tree node with `selfSize`. Written on + a snapshot ingest; the path is returned as `heapProfilePath`. + +Line numbers are 0-based in V8; `url` is the source file path so the viewer can +navigate. `.heapsnapshot` is intentionally not produced (the built-in editor +doesn't render it). + ## Visualization {#PROFILE-VIS} ### Brand Palette for Profiling {#PROFILE-VIS-PALETTE} @@ -448,25 +482,73 @@ graph TB MEM_DIAG -->|"publishDiagnostics"| MEM_INLINE ``` -### How It Works {#PROFILE-MEMORY-HOWTO} - -Memory profiling requires an active **debug session** (debugpy). The LSP injects Python code into the running process via DAP `evaluate` requests. - -1. **Start tracking**: Inject `tracemalloc.start(25)` (25-frame deep tracebacks) and `gc.set_debug(gc.DEBUG_SAVEALL)`. -2. **Take snapshots**: Inject code to call `tracemalloc.take_snapshot()` and serialize top allocations as JSON via a `__BASILISK_MEM__` marker. -3. **Diff snapshots**: Compare two snapshots to find growing allocations (suspected leaks), new allocations, and freed allocations. Lines that consistently grow across multiple diffs are flagged as suspected leaks. -4. **Walk reference graph**: Inject an introspection script that uses `gc.get_referrers()` to walk the reference graph for a target object type, building a node/edge graph with cycle detection. This answers "why won't this object die?" +### How It Works — Editor-as-Courier Round-Trip {#PROFILE-MEMORY-HOWTO} + +Memory profiling requires an active **debug session** (debugpy). Crucially, **the +LSP holds no DAP connection — the editor does** (the editor connects directly to +debugpy; see [LSP-DEBUG-INTEGRATION-SPEC]). So the LSP cannot inject Python +itself. Instead, memory analysis is a **two-leg round-trip with the editor as +courier**, and debugpy can only `evaluate` against a **stopped** frame, so the +debuggee must be paused at a breakpoint: + +1. **Leg 1 — LSP → editor (get script):** A `basilisk.memory.*` command returns a + Python injection script (e.g. `tracemalloc.take_snapshot()` printing a + `__BASILISK_MEM__`-prefixed JSON payload). The LSP performs no DAP I/O. +2. **Editor runs the script** in the paused debuggee via a DAP `evaluate` request + (`vscode-extension/src/dap-evaluate.ts`), capturing the printed marker output. +3. **Leg 2 — editor → LSP (ingest):** The editor posts the raw output back via + [`basilisk.memory.ingest`](#PROFILE-MEMORY-INGEST). The LSP marker-dispatches it + to the matching parser, updates per-session state (the + [`MemorySessionManager`](../../crates/basilisk-lsp/src/profiler/memory/session.rs) + holds the cross-diff [`LeakTracker`] and timeline), **publishes memory + diagnostics** via `textDocument/publishDiagnostics`, and returns the structured, + `kind`-tagged result the editor renders (decorations, dashboard, reference graph). + +The operations: **start tracking** (`tracemalloc.start(25)` + `gc.set_debug`), +**snapshots** (`__BASILISK_MEM__`), **diffs** (`__BASILISK_MEM_DIFF__`; lines that +grow across ≥3 consecutive diffs escalate to High confidence), **gc collect** +(`__BASILISK_MEM_GC__`), and **reference-graph walks** (`__BASILISK_MEM_REFS__`, +via `gc.get_referrers()` with cycle detection). The diff script self-seeds its +baseline (`tracemalloc._basilisk_prev_snapshot`) inside the debuggee, so +cross-snapshot baseline state lives in Python; the LSP keeps only leak-confidence +history and diagnostics. + +This is identical for both editors — 100% of the engine is shared. Zed reaches the +same flow through `workspace/executeCommand`; only the script-running leg is +editor-specific. ### LSP Commands {#PROFILE-MEMORY-COMMANDS} -| Command | Request Fields | Response Summary | +The `start`/`snapshot`/`diff`/`references`/`objectsByType`/`gcCollect` commands are +**leg 1** — they return `{ memorySessionId?, script }`. The editor runs the script +and posts the output to [`basilisk.memory.ingest`](#PROFILE-MEMORY-INGEST) (leg 2). + +| Command | Request Fields | Leg-1 Response | |---|---|---| -| `basilisk/memory/start` | `sessionId`, `tracebackDepth` (default 25), `snapshotInterval` (optional auto-snapshot) | `memorySessionId`, `tracingStarted`, `currentMemory`, `peakMemory` | -| `basilisk/memory/snapshot` | `memorySessionId` | `snapshotId`, `currentMemory`, `peakMemory`, `gcObjects`, `gcCounts`, `topAllocations[]` | -| `basilisk/memory/diff` | `memorySessionId`, `snapshot1`, `snapshot2` | `totalGrowth`, `totalFreed`, `netGrowth`, `suspectedLeaks[]`, `grownAllocations[]`, `freedAllocations[]` | -| `basilisk/memory/references` | `memorySessionId`, `targetType`, `targetReprContains`, `maxDepth`, `maxNodes`, `direction` (`referrers`/`referents`/`both`) | `graph` with `nodes[]`, `edges[]`, `cycles[]`, `retentionPath[]` | -| `basilisk/memory/objectsByType` | `memorySessionId`, `typeName`, `sortBy`, `limit` | `objects[]` (id, type, size, refcount, repr, createdAt), `totalCount`, `totalSize`, `typeSummary` | -| `basilisk/memory/gcCollect` | `memorySessionId` | `collected`, `uncollectable`, `memoryFreed`, `uncollectableObjects[]` | +| `basilisk.memory.start` | `tracebackDepth` (default 25) | `memorySessionId`, `tracingStarted`, `script` | +| `basilisk.memory.snapshot` | `memorySessionId` | `memorySessionId`, `script` | +| `basilisk.memory.diff` | `memorySessionId` | `memorySessionId`, `script` | +| `basilisk.memory.references` | `memorySessionId`, `targetType`, `targetReprContains`, `maxDepth`, `maxNodes` | `script` | +| `basilisk.memory.objectsByType` | `memorySessionId`, `typeName`, `limit` | `script` | +| `basilisk.memory.gcCollect` | `memorySessionId` | `script` | + +#### basilisk.memory.ingest {#PROFILE-MEMORY-INGEST} + +Leg 2 of the round-trip. Request: `{ memorySessionId, output }` where `output` is +the raw stdout of a script run in the debuggee. The +[`MemorySessionManager`](../../crates/basilisk-lsp/src/profiler/memory/session.rs) +detects the `__BASILISK_MEM*__` marker, parses with the existing parsers, scores +leaks via the per-session `LeakTracker`, publishes diagnostics, and returns a +`kind`-tagged object: + +- `kind: "snapshot"` → `snapshotId`, `currentMemory`, `peakMemory`, `gcObjects`, `gcCounts`, `topAllocations[]` +- `kind: "diff"` → `totalGrowth`, `totalFreed`, `netGrowth`, `suspectedLeaks[]` (with `confidence`) +- `kind: "gc"` → `collected`, `uncollectable`, `memoryFreed`, `uncollectableObjects[]` +- `kind: "refs"` → `graph` with `nodes[]`, `edges[]`, `cycles[]` +- `kind: "objects"` → `objects` (`objects[]`, `totalCount`, `totalSize`, `typeSummary`) +- `kind: "ack"` → bare acknowledgment (start/stop scripts) + +An unknown session or a marker-less payload is rejected with `-32010`. ### Reference Graph Visualization {#PROFILE-MEMORY-VIS-REFGRAPH} @@ -506,13 +588,14 @@ CPU and memory profiling can run simultaneously. Dashboard shows dual heat maps | Component | Code Location | |---|---| -| tracemalloc injection scripts | `basilisk-lsp/src/profiler/memory/scripts.rs` | -| Reference graph walker script | `basilisk-lsp/src/profiler/memory/refgraph.rs` | -| Snapshot diffing | `basilisk-lsp/src/profiler/memory/diff.rs` | +| tracemalloc / gc injection scripts (incl. reference-graph walker) | `basilisk-lsp/src/profiler/memory/scripts.rs` | +| Snapshot/diff/refs/objects parsers | `basilisk-lsp/src/profiler/memory/{mod,diff}.rs` | | Leak confidence scoring | `basilisk-lsp/src/profiler/memory/leaks.rs` | | Memory diagnostics | `basilisk-lsp/src/profiler/memory/diagnostics.rs` | -| LSP memory commands | `basilisk-lsp/src/profiler/memory/commands.rs` | -| Reference graph webview | `vscode-extension/src/profiler/refgraph/` (VS Code only) | +| Session state + marker-dispatched ingest | `basilisk-lsp/src/profiler/memory/session.rs` | +| LSP memory command handlers (incl. `ingest`) | `basilisk-lsp/src/server/memory_handlers.rs` | +| Editor DAP `evaluate` courier bridge | `vscode-extension/src/dap-evaluate.ts` (VS Code only) | +| Memory UI (decorations, dashboard, reference graph webview) | `vscode-extension/src/memory-profiler.ts`, `memory-decorations.ts` (VS Code only) | ## Permissions Model {#PROFILE-PERMISSIONS} @@ -616,7 +699,7 @@ Works without root if `ptrace_scope=0`. Options for restricted environments: `su ### Integration Tests {#PROFILE-TESTING-INTEGRATION} - Start a known Python script, attach profiler, verify hot function matches expected bottleneck -- Profile a debug session, verify PID auto-detection +- Profile a debug session, verifying the debuggee PID captured from the DAP `process` event ([#PROFILE-SAME-PROCESS]) - Verify speedscope output opens correctly in speedscope.app - Verify diagnostics appear for hot lines and disappear after clearing diff --git a/docs/specs/VSIX-SPEC.md b/docs/specs/VSIX-SPEC.md index f2220d39..01fbc975 100644 --- a/docs/specs/VSIX-SPEC.md +++ b/docs/specs/VSIX-SPEC.md @@ -249,6 +249,31 @@ flowchart LR The LSP server spawns `debugpy.adapter --port ` via `basilisk/startDebugSession`. The proxy connects to that port and relays DAP messages bidirectionally, intercepting specific message patterns. +### Starting a session (zero-config) {#VSIX-PYTHON-DEBUGGER-START} + +The `basilisk-debug` debugger is **factory-based** (no `program`/`runtime` in the +manifest), so the extension must own both activation and config provisioning: + +- **Activation:** `activationEvents` includes `onDebug`, + `onDebugResolve:basilisk-debug`, and `onDebugDynamicConfigurations:basilisk-debug` + so the adapter/tracker register whenever debugging starts — not only after a + Python file is opened. +- **Config provider:** `createBasiliskDebugConfigProvider` (`debug-adapter.ts`) is + registered for `basilisk-debug` (Dynamic + default). It makes **"Run and Debug" + / F5 work with no `launch.json`**: `provideDebugConfigurations` offers a + "Python: Current File (Basilisk)" entry, and `resolveDebugConfiguration` (pure + `applyDebugConfigDefaults`) fills an empty/partial config to launch the active + Python file (`program: ${file}`). Without this, an empty-state workspace shows + no Basilisk debug option. + +### Tracker capture {#VSIX-PYTHON-DEBUGGER-DAP-TRACKER} + +`BasiliskDebugAdapterTracker` is the single observability point for debugpy → +VS Code traffic. It captures the debuggee's `process` event (`systemProcessId`, +used by the CPU profiler — see [LSP-PROFILING-SPEC.md] `#PROFILE-SAME-PROCESS`) +and `output` events (the `__BASILISK_MEM*__` payloads the memory round-trip +recovers, since debugpy delivers `print()` output here, not in `evaluate`). + ### Debug Adapter Proxy (VS Code Implementation) {#VSIX-PYTHON-DEBUGGER-DAP-PROXY} The proxy (`vscode-extension/src/dap-proxy.ts`) implements `vscode.DebugAdapter` via `DebugAdapterInlineImplementation`. It fixes four debugpy quirks: diff --git a/vscode-extension/.vscode-test.mjs b/vscode-extension/.vscode-test.mjs index 28513d5a..5710616c 100644 --- a/vscode-extension/.vscode-test.mjs +++ b/vscode-extension/.vscode-test.mjs @@ -43,8 +43,10 @@ export default defineConfig({ // coverage signal under the VS Code extension host. '**/out/coverage-decorations.js', '**/out/info-panel.js', + '**/out/memory-dashboard.js', '**/out/memory-decorations.js', '**/out/memory-profiler.js', + '**/out/memory-ref-graph.js', '**/out/module-explorer.js', '**/out/profiler.js', '**/out/profiler-flamegraph-html.js', diff --git a/vscode-extension/package.json b/vscode-extension/package.json index 6d59941d..947a107b 100644 --- a/vscode-extension/package.json +++ b/vscode-extension/package.json @@ -41,7 +41,10 @@ "lsp" ], "activationEvents": [ - "onLanguage:python" + "onLanguage:python", + "onDebug", + "onDebugResolve:basilisk-debug", + "onDebugDynamicConfigurations:basilisk-debug" ], "main": "./out/extension.js", "contributes": { @@ -239,6 +242,12 @@ "category": "Basilisk", "icon": "$(diff)" }, + { + "command": "basilisk.memoryMenu", + "title": "Basilisk: Memory…", + "category": "Basilisk", + "icon": "$(database)" + }, { "command": "basilisk.memoryStart", "title": "Basilisk: Start Memory Tracking", @@ -257,6 +266,18 @@ "category": "Basilisk", "icon": "$(debug-stop)" }, + { + "command": "basilisk.memoryDiff", + "title": "Basilisk: Compare Memory Snapshots", + "category": "Basilisk", + "icon": "$(diff)" + }, + { + "command": "basilisk.memoryGcCollect", + "title": "Basilisk: Force Garbage Collection", + "category": "Basilisk", + "icon": "$(trash)" + }, { "command": "basilisk.memoryReferences", "title": "Basilisk: Show Reference Graph", @@ -348,6 +369,34 @@ { "command": "basilisk.revealProcessScript", "when": "false" + }, + { + "command": "basilisk.memoryMenu", + "when": "basilisk.debugging" + }, + { + "command": "basilisk.memoryStart", + "when": "basilisk.debugging" + }, + { + "command": "basilisk.memorySnapshot", + "when": "basilisk.debugging" + }, + { + "command": "basilisk.memoryDiff", + "when": "basilisk.debugging" + }, + { + "command": "basilisk.memoryGcCollect", + "when": "basilisk.debugging" + }, + { + "command": "basilisk.memoryReferences", + "when": "basilisk.debugging" + }, + { + "command": "basilisk.memoryStop", + "when": "basilisk.debugging" } ], "view/title": [ diff --git a/vscode-extension/src/dap-evaluate.ts b/vscode-extension/src/dap-evaluate.ts new file mode 100644 index 00000000..5457cb15 --- /dev/null +++ b/vscode-extension/src/dap-evaluate.ts @@ -0,0 +1,135 @@ +// Implements [LSPPROF]. See docs/specs/LSP-PROFILING-SPEC.md#PROFILE-MEMORY +/** + * DAP `evaluate` bridge for memory profiling. + * + * The LSP holds no DAP connection — the editor owns it — so memory profiling is + * a courier round-trip: the LSP hands us a Python injection script, we run it in + * the debuggee via DAP `evaluate`, and post the raw output back to the LSP + * (`basilisk.memory.ingest`). These are internal helpers, NOT registered + * commands: the LSP owns commands; the editor only shuttles bytes (CLAUDE.md + * command-ownership rule). + * + * debugpy can only `evaluate` against a *stopped* frame, so memory profiling + * requires the debuggee to be paused at a breakpoint — [`currentStoppedFrameId`] + * resolves that frame (or null when nothing is paused). + */ + +import * as vscode from "vscode"; +import { Logger } from "./logger"; +import { debugOutputCursor, debugOutputSince } from "./dap-output"; + +/** The Basilisk debug adapter type. */ +const DEBUG_TYPE = "basilisk-debug"; + +/** Prefix shared by every memory-script output marker (`__BASILISK_MEM*__`). */ +const MARKER_PREFIX = "__BASILISK_MEM"; +/** How long to wait for a script's (possibly large, chunked) marker output. */ +const MARKER_WAIT_MS = 4000; +/** Poll interval while waiting for marker output. */ +const MARKER_POLL_MS = 25; + +/** Return the active Basilisk debug session, or undefined. */ +function activeBasiliskSession(): vscode.DebugSession | undefined { + const session = vscode.debug.activeDebugSession; + return session?.type === DEBUG_TYPE ? session : undefined; +} + +/** + * Evaluate a Python expression/statement in the active Basilisk debug session + * and return its textual output. + * + * Injection scripts `print()` their `__BASILISK_MEM*__` marker payloads, and + * debugpy delivers that to DAP `output` events (the debuggee's stdout is + * redirected) — **not** in the `evaluate` response. So we snapshot the output + * cursor, run the evaluate, and then recover whatever the script printed (with + * a short wait, since the `output` event can land just after the response). The + * evaluate `result` is included too, in case an adapter does echo it. Returns + * null when there is no active Basilisk session or the request fails. + */ +export async function evaluateInDebugSession( + expression: string, + frameId?: number, + context: "repl" | "watch" | "hover" = "repl", +): Promise { + const session = activeBasiliskSession(); + if (session === undefined) { return null; } + + const cursor = debugOutputCursor(session.id); + try { + const request: Record = { expression, context }; + if (frameId !== undefined) { request.frameId = frameId; } + const response = (await session.customRequest("evaluate", request)) as { result?: string }; + const direct = response.result ?? ""; + if (direct.includes(MARKER_PREFIX)) { return direct; } + const printed = await waitForMarkerOutput(session.id, cursor); + return printed.length > 0 ? printed : direct; + } catch (err: unknown) { + Logger.warn(`[Memory] evaluate failed: ${err instanceof Error ? err.message : String(err)}`); + return null; + } +} + +/** + * Wait for printed marker output to arrive via `output` events. + * + * The payload is a single `print()`ed line (`marker + json.dumps(...)` + `\n`) + * but debugpy can split it across several `output` events, so we wait until the + * marker line is **newline-terminated** — otherwise a large JSON snapshot is + * truncated mid-string. `json.dumps` (no indent) emits no embedded newlines, so + * the first `\n` after the marker reliably ends the payload. + */ +async function waitForMarkerOutput(sessionId: string, cursor: number): Promise { + const deadline = Date.now() + MARKER_WAIT_MS; + for (;;) { + const out = debugOutputSince(sessionId, cursor); + const markerAt = out.indexOf(MARKER_PREFIX); + // The payload line is complete once a newline follows the marker (the + // `print()` terminator); `includes(.., markerAt)` searches from the marker. + const complete = markerAt !== -1 && out.includes("\n", markerAt); + if (complete || Date.now() >= deadline) { + return out; + } + await new Promise((resolve) => setTimeout(resolve, MARKER_POLL_MS)); + } +} + +/** + * Resolve a frameId for a currently-stopped thread, or null if nothing is + * paused. debugpy rejects `evaluate` without a stopped frame, so memory + * profiling requires the debuggee to be paused at a breakpoint. + */ +export async function currentStoppedFrameId(): Promise { + const session = activeBasiliskSession(); + if (session === undefined) { return null; } + + try { + const threads = (await session.customRequest("threads")) as { threads?: { id: number }[] }; + for (const thread of threads.threads ?? []) { + const frameId = await topFrameIdIfStopped(session, thread.id); + if (frameId !== null) { return frameId; } + } + return null; + } catch (err: unknown) { + Logger.warn( + `[Memory] could not resolve a stopped frame: ${err instanceof Error ? err.message : String(err)}`, + ); + return null; + } +} + +/** Top frameId of `threadId` if it is stopped, else null (running threads error). */ +async function topFrameIdIfStopped( + session: vscode.DebugSession, + threadId: number, +): Promise { + try { + const stack = (await session.customRequest("stackTrace", { + threadId, + startFrame: 0, + levels: 1, + })) as { stackFrames?: { id: number }[] }; + return stack.stackFrames?.[0]?.id ?? null; + } catch { + return null; // thread not suspended + } +} diff --git a/vscode-extension/src/dap-output.ts b/vscode-extension/src/dap-output.ts new file mode 100644 index 00000000..3bc5c599 --- /dev/null +++ b/vscode-extension/src/dap-output.ts @@ -0,0 +1,44 @@ +// Implements [LSPPROF]. See docs/specs/LSP-PROFILING-SPEC.md#PROFILE-MEMORY +/** + * Per-session capture of debuggee output (DAP `output` events). + * + * Memory injection scripts `print('__BASILISK_MEM*__' + json)` — and debugpy + * delivers that stdout as DAP `output` events, **not** in the `evaluate` + * response result (the debuggee's stdout is redirected). So to recover a + * marker payload after running a script, we accumulate the session's output + * here (fed by the debug adapter tracker) and slice out what arrived after the + * `evaluate` was issued. See `dap-evaluate.ts`. + */ + +/** Cap per-session buffer so a long-lived session can't grow it unbounded. */ +const MAX_BUFFER_CHARS = 1_000_000; + +/** sessionId → accumulated output text. */ +const buffers = new Map(); + +/** Append a chunk of debuggee output for a session (called by the DAP tracker). */ +export function appendDebugOutput(sessionId: string, text: string): void { + const combined = (buffers.get(sessionId) ?? "") + text; + buffers.set( + sessionId, + combined.length > MAX_BUFFER_CHARS + ? combined.slice(combined.length - MAX_BUFFER_CHARS) + : combined, + ); +} + +/** Current length of a session's output buffer — a cursor for [`debugOutputSince`]. */ +export function debugOutputCursor(sessionId: string): number { + return (buffers.get(sessionId) ?? "").length; +} + +/** Output appended after `cursor` (everything, if the buffer was trimmed past it). */ +export function debugOutputSince(sessionId: string, cursor: number): string { + const all = buffers.get(sessionId) ?? ""; + return cursor < all.length ? all.slice(cursor) : ""; +} + +/** Drop a session's buffer (called when the debug session ends). */ +export function clearDebugOutput(sessionId: string): void { + buffers.delete(sessionId); +} diff --git a/vscode-extension/src/debug-adapter.ts b/vscode-extension/src/debug-adapter.ts index a6fdf307..bb26390a 100644 --- a/vscode-extension/src/debug-adapter.ts +++ b/vscode-extension/src/debug-adapter.ts @@ -8,6 +8,7 @@ import * as net from "net"; import { type LanguageClient } from "vscode-languageclient/node"; import { Logger } from "./logger"; import { DapTcpProxy } from "./dap-proxy"; +import { appendDebugOutput, clearDebugOutput } from "./dap-output"; /** Max number of variables to log inline before switching to a count summary. */ const MAX_INLINE_VARS = 10; @@ -91,23 +92,36 @@ function summarizeCollectionFields(obj: Record, parts: string[] /** * Factory that creates per-session DAP message trackers. + * + * The tracker is the single observability point for debugpy → VS Code traffic, + * so it captures both the debuggee `process` event (the PID the CPU profiler + * targets — "same process") and `output` events (the marker payloads the + * memory round-trip recovers). `onDebuggeeProcessId`, when supplied, receives + * `(sessionId, pid)` once the `process` event arrives. */ export class BasiliskDebugAdapterTrackerFactory implements vscode.DebugAdapterTrackerFactory { + constructor(private readonly onDebuggeeProcessId?: DebuggeeProcessIdCallback) {} + public createDebugAdapterTracker( session: vscode.DebugSession ): vscode.ProviderResult { - return new BasiliskDebugAdapterTracker(session); + return new BasiliskDebugAdapterTracker(session, this.onDebuggeeProcessId); } } class BasiliskDebugAdapterTracker implements vscode.DebugAdapterTracker { private readonly sessionId: string; + private readonly fullSessionId: string; private readonly sessionName: string; - constructor(session: vscode.DebugSession) { + constructor( + session: vscode.DebugSession, + private readonly onDebuggeeProcessId?: DebuggeeProcessIdCallback + ) { this.sessionId = session.id.slice(0, SESSION_ID_PREFIX_LEN); + this.fullSessionId = session.id; this.sessionName = session.name; } @@ -117,6 +131,7 @@ class BasiliskDebugAdapterTracker implements vscode.DebugAdapterTracker { public onWillStopSession(): void { Logger.info(`[DAP ${this.sessionId}] session "${this.sessionName}" stopping`); + clearDebugOutput(this.fullSessionId); } public onWillReceiveMessage(message: unknown): void { @@ -139,10 +154,35 @@ class BasiliskDebugAdapterTracker implements vscode.DebugAdapterTracker { Logger.warn(text); } } else if (msg.type === "event") { - Logger.debug(`[DAP ${this.sessionId}] <-- event:${msg.event} ${summarizeBody(msg.body)}`); - if (msg.event === "terminated") { - Logger.info(`[DAP ${this.sessionId}] program terminated`); + this.handleEvent(msg.event, msg.body); + } + } + + /** Capture profiler-relevant events; log the rest. */ + private handleEvent(event: string | undefined, body: unknown): void { + if (event === "output") { + // Capture debuggee stdout/stderr so the memory round-trip can recover + // the `__BASILISK_MEM*__` marker its injection scripts print (debugpy + // delivers print() output here, not in the evaluate result). + const text = (body as { output?: string } | undefined)?.output; + if (typeof text === "string") { + appendDebugOutput(this.fullSessionId, text); } + return; + } + if (event === "process") { + // The debuggee's OS PID — captured so the CPU profiler can attach to the + // SAME process the debugger drives (DAP: body.systemProcessId). + const pid = (body as { systemProcessId?: number } | undefined)?.systemProcessId; + if (typeof pid === "number" && this.onDebuggeeProcessId !== undefined) { + Logger.info(`[DAP ${this.sessionId}] debuggee systemProcessId=${pid}`); + this.onDebuggeeProcessId(this.fullSessionId, pid); + } + return; + } + Logger.debug(`[DAP ${this.sessionId}] <-- event:${event} ${summarizeBody(body)}`); + if (event === "terminated") { + Logger.info(`[DAP ${this.sessionId}] program terminated`); } } @@ -173,6 +213,9 @@ async function isPortAlive(_host: string, port: number): Promise { }); } +/** Callback that receives the debuggee OS PID once debugpy emits its `process` event. */ +export type DebuggeeProcessIdCallback = (sessionId: string, pid: number) => void; + /** Handle attach mode: connect to user-specified host:port, respawning if needed. */ async function handleAttachMode( config: vscode.DebugConfiguration, @@ -305,3 +348,72 @@ export function createDebugAdapterFactory( }, }; } + +// ── Debug configuration provider ────────────────────────────────────────── + +/** A config field is "blank" when undefined (VS Code's empty `{}`) or empty. */ +function isBlank(value: string | undefined): boolean { + return value === undefined || value === ""; +} + +/** The default launch config for the current file. */ +function defaultLaunchConfig(): vscode.DebugConfiguration { + return { + name: "Python: Current File (Basilisk)", + type: "basilisk-debug", + request: "launch", + program: "${file}", + console: "internalConsole", + redirectOutput: true, + justMyCode: true, + }; +} + +/** + * Fill in a runnable `basilisk-debug` config from an empty or partial one. + * + * This is what makes "Run and Debug" / F5 work **without a launch.json**: VS + * Code calls the provider with an empty config (no type), and for a Python file + * we synthesize a launch of the current file. A partial config missing + * `program` defaults to `${file}`. Pure (no VS Code APIs) so it is unit-testable; + * the active language id is passed in. + */ +export function applyDebugConfigDefaults( + config: vscode.DebugConfiguration, + activeLanguageId: string | undefined, +): vscode.DebugConfiguration { + // Empty config (F5 / "Run and Debug" with no launch.json — VS Code passes `{}`): + // only synthesize one for a Python file, else leave it for VS Code to report + // "open a file". Falsy check also tolerates blank fields from a stub config. + if (isBlank(config.type) && isBlank(config.request) && isBlank(config.name)) { + return activeLanguageId === "python" ? defaultLaunchConfig() : config; + } + // A launch config missing `program` targets the active file. + if ( + config.type === "basilisk-debug" && + config.request === "launch" && + isBlank(config.program as string | undefined) + ) { + return { ...config, program: "${file}" }; + } + return config; +} + +/** + * Provider that lets `basilisk-debug` start with no `launch.json`: it offers a + * default configuration in the Run-and-Debug picker and resolves empty/partial + * configs to a launch of the current file. + */ +export function createBasiliskDebugConfigProvider(): vscode.DebugConfigurationProvider { + return { + provideDebugConfigurations(): vscode.DebugConfiguration[] { + return [defaultLaunchConfig()]; + }, + resolveDebugConfiguration( + _folder: vscode.WorkspaceFolder | undefined, + config: vscode.DebugConfiguration, + ): vscode.DebugConfiguration { + return applyDebugConfigDefaults(config, vscode.window.activeTextEditor?.document.languageId); + }, + }; +} diff --git a/vscode-extension/src/extension.ts b/vscode-extension/src/extension.ts index ec37cd51..c5f254a0 100644 --- a/vscode-extension/src/extension.ts +++ b/vscode-extension/src/extension.ts @@ -7,13 +7,13 @@ */ import * as vscode from "vscode"; -import { execFile } from "child_process"; import * as path from "path"; import * as os from "os"; import { Logger, bindLogger, CompositeSink, FileLogSink, nullSink } from "./logger"; import type { LogSink } from "./logger"; import { startLspClient } from "./lsp-client"; -import { createDebugAdapterFactory, BasiliskDebugAdapterTrackerFactory } from "./debug-adapter"; +import { createDebugAdapterFactory, BasiliskDebugAdapterTrackerFactory, createBasiliskDebugConfigProvider } from "./debug-adapter"; +import { startSubprocessMode } from "./subprocess-mode"; import { registerTestExplorer } from "./test-explorer"; import { registerModuleExplorer } from "./module-explorer"; import { registerTypeHealth } from "./type-health"; @@ -30,9 +30,6 @@ const STATUS_BAR_PRIORITY = 100; /** Length of an abbreviated session ID prefix for logging. */ const SESSION_ID_PREFIX_LEN = 8; -/** Exit code returned by `basilisk check` on internal errors. */ -const BASILISK_INTERNAL_ERROR_EXIT_CODE = 3; - let store: Store | undefined; /** @@ -260,12 +257,35 @@ function registerDebugSupport(context: vscode.ExtensionContext, s: Store): void createDebugAdapterFactory(() => s.client.value) ) ); + // Let users start debugging with NO launch.json: the Dynamic provider lists a + // "Python (Basilisk)" config in the Run-and-Debug picker, and resolve fills in + // the current file for an empty/partial config (F5 / the big Run button). + const debugConfigProvider = createBasiliskDebugConfigProvider(); + singletonDisposables.push( + vscode.debug.registerDebugConfigurationProvider( + "basilisk-debug", + debugConfigProvider, + vscode.DebugConfigurationProviderTriggerKind.Dynamic + ), + vscode.debug.registerDebugConfigurationProvider("basilisk-debug", debugConfigProvider) + ); singletonDisposables.push( vscode.debug.registerDebugAdapterTrackerFactory( "basilisk-debug", - new BasiliskDebugAdapterTrackerFactory() + // The tracker captures the debuggee's PID (from the DAP `process` event) + // so the CPU profiler can attach to the SAME process the debugger drives. + new BasiliskDebugAdapterTrackerFactory((sessionId, pid) => { + s.setDebuggeeProcessId(sessionId, pid); + }) ) ); + // Forget the debuggee PID when its session ends so stale mappings can't + // misdirect a later profile attach. + context.subscriptions.push( + vscode.debug.onDidTerminateDebugSession((session) => { + s.clearDebuggeeProcessId(session.id); + }) + ); registerDebugLifecycleLogging(context); } @@ -273,6 +293,10 @@ function registerDebugLifecycleLogging(context: vscode.ExtensionContext): void { context.subscriptions.push( vscode.debug.onDidStartDebugSession((session) => { Logger.info(`Debug session started: id=${session.id}, name=${session.name}, type=${session.type}`); + // Gate debug-only commands (memory profiling needs a paused debuggee). + if (session.type === "basilisk-debug") { + void vscode.commands.executeCommand("setContext", "basilisk.debugging", true); + } }) ); context.subscriptions.push( @@ -282,6 +306,12 @@ function registerDebugLifecycleLogging(context: vscode.ExtensionContext): void { `[Lifecycle] onDidTerminateDebugSession: terminated=${session.id.slice(0, SESSION_ID_PREFIX_LEN)}, ` + `active=${activeId === "undefined" ? "correctly undefined" : `STILL SET (${activeId.slice(0, SESSION_ID_PREFIX_LEN)})`}` ); + // Clear the debug context once no Basilisk debug session remains active. + // Symmetric with the type-gated set above: stays true only while a + // basilisk-debug session is active (ignores other debuggers' sessions). + if (vscode.debug.activeDebugSession?.type !== "basilisk-debug") { + void vscode.commands.executeCommand("setContext", "basilisk.debugging", false); + } }) ); context.subscriptions.push( @@ -382,111 +412,3 @@ async function startRuntime(context: vscode.ExtensionContext, s: Store): Promise } } -function workspaceRoot(): string | undefined { - return vscode.workspace.workspaceFolders?.[0]?.uri.fsPath; -} - -// ── Subprocess mode ─────────────────────────────────────────────────────── - -/** Shape of a single diagnostic emitted by `basilisk check --output json`. */ -interface BasiliskDiagnostic { - code: string; - severity: "error" | "warning"; - message: string; - path: string; - line: number; - col: number; - end_line: number; - end_col: number; -} - -function startSubprocessMode( - context: vscode.ExtensionContext, - executablePath: string -): void { - const collection = vscode.languages.createDiagnosticCollection("basilisk"); - context.subscriptions.push(collection); - - context.subscriptions.push( - vscode.workspace.onDidOpenTextDocument((doc) => { - if (doc.languageId === "python") {checkDocument(doc, collection, executablePath);} - }) - ); - context.subscriptions.push( - vscode.workspace.onDidSaveTextDocument((doc) => { - if (doc.languageId === "python") {checkDocument(doc, collection, executablePath);} - }) - ); - context.subscriptions.push( - vscode.workspace.onDidCloseTextDocument((doc) => { collection.delete(doc.uri); }) - ); - - for (const doc of vscode.workspace.textDocuments) { - if (doc.languageId === "python") {checkDocument(doc, collection, executablePath);} - } -} - -function checkDocument( - doc: vscode.TextDocument, - collection: vscode.DiagnosticCollection, - executablePath: string -): void { - const enabled = vscode.workspace.getConfiguration("basilisk").get("enabled") ?? true; - if (!enabled) { - collection.delete(doc.uri); - return; - } - if (doc.isUntitled || doc.uri.scheme !== "file") {return;} - - const filePath = doc.uri.fsPath; - execFile( - executablePath, - ["check", "--output", "json", filePath], - { cwd: workspaceRoot() }, - (error, stdout, stderr) => { - if (error?.code === BASILISK_INTERNAL_ERROR_EXIT_CODE) { - vscode.window.showWarningMessage( - `Basilisk: internal error checking ${path.basename(filePath)}: ${stderr}` - ); - return; - } - if (error && typeof error.code === "number" && error.code !== 1) { - vscode.window.showWarningMessage( - `Basilisk: failed to run '${executablePath}'. Is it on PATH? (${error.message})` - ); - collection.delete(doc.uri); - return; - } - collection.set(doc.uri, parseDiagnostics(stdout, doc)); - } - ); -} - -function parseDiagnostics(json: string, doc: vscode.TextDocument): vscode.Diagnostic[] { - let items: BasiliskDiagnostic[]; - try { - items = JSON.parse(json) as BasiliskDiagnostic[]; - } catch { - return []; - } - if (!Array.isArray(items)) {return [];} - - return items - .filter((item) => item.path === doc.uri.fsPath) - .map((item) => { - const range = new vscode.Range( - new vscode.Position(item.line - 1, item.col - 1), - new vscode.Position(item.end_line - 1, item.end_col - 1) - ); - const severity = item.severity === "error" - ? vscode.DiagnosticSeverity.Error - : vscode.DiagnosticSeverity.Warning; - const diag = new vscode.Diagnostic(range, `${item.message} [${item.code}]`, severity); - diag.source = "basilisk"; - diag.code = { - value: item.code, - target: vscode.Uri.parse(`https://www.basilisk-python.dev/errors/${item.code}`), - }; - return diag; - }); -} diff --git a/vscode-extension/src/memory-profiler.ts b/vscode-extension/src/memory-profiler.ts index 27c52fc8..99fadc04 100644 --- a/vscode-extension/src/memory-profiler.ts +++ b/vscode-extension/src/memory-profiler.ts @@ -15,9 +15,24 @@ import * as vscode from "vscode"; import { Logger } from "./logger"; import type { Store } from "./store"; +import { currentStoppedFrameId, evaluateInDebugSession } from "./dap-evaluate"; import { + disposeMemoryDashboard, + openMemoryDashboard, + type MemoryDashboardSnapshot, + type MemoryDiffData, +} from "./memory-dashboard"; +import { + disposeRefGraph, + openRefGraphWebview, + type ReferenceGraphResult, +} from "./memory-ref-graph"; +import { + applyLeakDecorations, + applyMemoryDecorations, clearMemoryDecorations, disposeMemoryDecorations, + type MemoryDiffResult, type MemorySnapshotResult, } from "./memory-decorations"; @@ -30,14 +45,27 @@ const LSP_MEM_CMD = { references: "basilisk.memory.references", objectsByType: "basilisk.memory.objectsByType", gcCollect: "basilisk.memory.gcCollect", + ingest: "basilisk.memory.ingest", } as const; +/** tracemalloc traceback depth injected at start. */ +const TRACEBACK_DEPTH = 25; +/** Reference-graph traversal bounds. */ +const REF_GRAPH_MAX_DEPTH = 5; +const REF_GRAPH_MAX_NODES = 200; + +/** A tagged ingest result returned by `basilisk.memory.ingest`. */ +interface MemoryIngestResult { + kind: "snapshot" | "diff" | "gc" | "refs" | "objects" | "ack"; + [field: string]: unknown; +} + // ── State ───────────────────────────────────────────────────────────────── let memoryStatusBarItem: vscode.StatusBarItem | undefined; let activeMemorySessionId: string | undefined; -let refGraphPanel: vscode.WebviewPanel | undefined; -let memDashboardPanel: vscode.WebviewPanel | undefined; +/** Most recent snapshot, so a later "Compare" can show it alongside the diff. */ +let lastDashboardSnapshot: MemoryDashboardSnapshot | undefined; // ── Registration ────────────────────────────────────────────────────────── @@ -53,38 +81,129 @@ export function registerMemoryProfiler( vscode.StatusBarAlignment.Left, MEMORY_STATUS_BAR_PRIORITY, ); - memoryStatusBarItem.command = "basilisk.memoryStop"; + // Click the status-bar item to open the memory action menu (no palette needed). + memoryStatusBarItem.command = "basilisk.memoryMenu"; const disposables: vscode.Disposable[] = [ memoryStatusBarItem, + vscode.commands.registerCommand("basilisk.memoryMenu", async () => + handleMemoryMenu(), + ), vscode.commands.registerCommand("basilisk.memoryStart", async () => handleMemoryStart(store), ), vscode.commands.registerCommand("basilisk.memorySnapshot", async () => handleMemorySnapshot(store), ), + vscode.commands.registerCommand("basilisk.memoryDiff", async () => + handleMemoryDiff(store), + ), + vscode.commands.registerCommand("basilisk.memoryGcCollect", async () => + handleMemoryGcCollect(store), + ), vscode.commands.registerCommand("basilisk.memoryStop", () => { handleMemoryStop(store); }), vscode.commands.registerCommand("basilisk.memoryReferences", async () => handleMemoryReferences(store), ), + // Show/hide the memory status-bar entry as Basilisk debug sessions come and go. + vscode.debug.onDidChangeActiveDebugSession(() => { refreshMemoryStatusBar(); }), + vscode.debug.onDidStartDebugSession(() => { refreshMemoryStatusBar(); }), + vscode.debug.onDidTerminateDebugSession(() => { refreshMemoryStatusBar(); }), ]; + refreshMemoryStatusBar(); return disposables; } +/** Quick-pick menu of memory actions — the clickable alternative to the palette. */ +async function handleMemoryMenu(): Promise { + const tracking = activeMemorySessionId !== undefined; + const items: { label: string; command: string }[] = tracking + ? [ + { label: "$(device-camera) Take Memory Snapshot", command: "basilisk.memorySnapshot" }, + { label: "$(diff) Compare Memory Snapshots", command: "basilisk.memoryDiff" }, + { label: "$(type-hierarchy) Show Reference Graph", command: "basilisk.memoryReferences" }, + { label: "$(trash) Force Garbage Collection", command: "basilisk.memoryGcCollect" }, + { label: "$(debug-stop) Stop Memory Tracking", command: "basilisk.memoryStop" }, + ] + : [{ label: "$(database) Start Memory Tracking", command: "basilisk.memoryStart" }]; + const pick = await vscode.window.showQuickPick(items, { + placeHolder: tracking ? "Basilisk memory profiling" : "Pause the debugger, then start memory tracking", + }); + if (pick !== undefined) { + await vscode.commands.executeCommand(pick.command); + } +} + /** Clean up memory profiler resources. */ export function disposeMemoryProfiler(): void { clearMemoryDecorations(); disposeMemoryDecorations(); - if (refGraphPanel !== undefined) { - refGraphPanel.dispose(); - refGraphPanel = undefined; + disposeRefGraph(); + disposeMemoryDashboard(); + lastDashboardSnapshot = undefined; +} + +// ── Round-trip courier ────────────────────────────────────────────────────── + +/** + * Run one memory operation as the editor-as-courier round-trip: + * 1. ask the LSP for the injection script (`command` → `{ script }`), + * 2. run it in the paused debuggee via DAP `evaluate`, + * 3. post the raw output back to `basilisk.memory.ingest`, + * 4. return the LSP's structured, marker-dispatched result. + * + * Returns null (with an actionable message) when there is no session, nothing + * is paused, or evaluation fails — memory profiling requires the debuggee to be + * stopped at a breakpoint because debugpy cannot evaluate a running program. + */ +async function runMemoryScript( + store: Store, + command: string, + extraArgs: Record = {}, +): Promise { + const client = store.client.value; + if (client?.isRunning() !== true) { + void vscode.window.showErrorMessage("Basilisk LSP not connected"); + return null; + } + if (activeMemorySessionId === undefined) { + void vscode.window.showWarningMessage("Basilisk: Start memory tracking first."); + return null; } - if (memDashboardPanel !== undefined) { - memDashboardPanel.dispose(); - memDashboardPanel = undefined; + const frameId = await currentStoppedFrameId(); + if (frameId === null) { + void vscode.window.showWarningMessage( + "Basilisk: Pause the debugger at a breakpoint to inspect memory.", + ); + return null; + } + + try { + const phase1 = await client.sendRequest<{ script?: string } | null>("workspace/executeCommand", { + command, + arguments: [{ memorySessionId: activeMemorySessionId, ...extraArgs }], + }); + const script = phase1?.script; + if (script === undefined || script === "") { return null; } + + const output = await evaluateInDebugSession(script, frameId); + if (output === null) { + void vscode.window.showWarningMessage("Basilisk: Could not run the memory script in the debuggee."); + return null; + } + + return await client.sendRequest("workspace/executeCommand", { + command: LSP_MEM_CMD.ingest, + arguments: [{ memorySessionId: activeMemorySessionId, output }], + }); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + Logger.warn(`[Memory] ${command} round-trip failed: ${msg}`); + void vscode.window.showWarningMessage(`Basilisk: ${msg}`); + return null; } } @@ -96,21 +215,33 @@ async function handleMemoryStart(store: Store): Promise { void vscode.window.showErrorMessage("Basilisk LSP not connected"); return; } + // tracemalloc must be injected into a paused debuggee, so require a stopped + // frame before we even mint a session. + const frameId = await currentStoppedFrameId(); + if (frameId === null) { + void vscode.window.showWarningMessage( + "Basilisk: Pause the debugger at a breakpoint, then start memory tracking.", + ); + return; + } try { - const TRACEBACK_DEPTH = 25; - const result = await client.sendRequest<{ memorySessionId: string } | null>("workspace/executeCommand", { + const result = await client.sendRequest<{ memorySessionId?: string; script?: string } | null>("workspace/executeCommand", { command: LSP_MEM_CMD.start, arguments: [{ tracebackDepth: TRACEBACK_DEPTH }], }); + if (result?.memorySessionId === undefined || result.script === undefined) { return; } - if (result?.memorySessionId !== undefined && result.memorySessionId !== "") { - activeMemorySessionId = result.memorySessionId; - updateMemoryStatusBar("tracking"); - Logger.info( - `Memory tracking started: session ${result.memorySessionId}`, - ); + const ack = await evaluateInDebugSession(result.script, frameId); + if (ack === null) { + void vscode.window.showWarningMessage("Basilisk: Could not start tracemalloc in the debuggee."); + return; } + + activeMemorySessionId = result.memorySessionId; + refreshMemoryStatusBar(); + Logger.info(`Memory tracking started: session ${result.memorySessionId}`); + void vscode.window.showInformationMessage("Basilisk: Memory tracking started. Take a snapshot to inspect allocations."); } catch (err) { void vscode.window.showErrorMessage( `Memory tracking failed: ${err instanceof Error ? err.message : String(err)}`, @@ -119,41 +250,113 @@ async function handleMemoryStart(store: Store): Promise { } async function handleMemorySnapshot(store: Store): Promise { - const client = store.client.value; - if (client?.isRunning() !== true || activeMemorySessionId === undefined) { - void vscode.window.showWarningMessage( - "No active memory tracking session", + const result = await runMemoryScript(store, LSP_MEM_CMD.snapshot); + if (result?.kind === "snapshot") { + applyMemoryDecorations(result as unknown as MemorySnapshotResult); + // Retain for a later "Compare" (the Basilisk leak-analysis dashboard). + lastDashboardSnapshot = toDashboardSnapshot(result); + Logger.info(`Memory snapshot: ${lastDashboardSnapshot.currentMemory} bytes current`); + // Open the V8 .heapprofile in VS Code's built-in profile viewer (flame chart + // + table, Self/Total size) — the same UI as Node.js heap profiles. + const heapProfilePath = asString(result.heapProfilePath); + if (heapProfilePath !== "") { + await vscode.commands.executeCommand("vscode.open", vscode.Uri.file(heapProfilePath)); + } else { + // Fall back to the Basilisk dashboard if the file wasn't produced. + openMemoryDashboard(lastDashboardSnapshot); + } + } +} + +async function handleMemoryDiff(store: Store): Promise { + const result = await runMemoryScript(store, LSP_MEM_CMD.diff); + if (result?.kind === "diff") { + applyLeakDecorations(result as unknown as MemoryDiffResult); + const leaks = Array.isArray(result.suspectedLeaks) ? result.suspectedLeaks : []; + Logger.info(`Memory diff: ${leaks.length} suspected leak(s)`); + // Refresh the dashboard with the leak analysis (needs a prior snapshot). + if (lastDashboardSnapshot !== undefined) { + openMemoryDashboard(lastDashboardSnapshot, toDashboardDiff(result)); + } + void vscode.window.showInformationMessage( + `Basilisk: Compared snapshots — ${leaks.length} suspected leak(s)`, ); - return; } +} - try { - const result = await client.sendRequest("workspace/executeCommand", { - command: LSP_MEM_CMD.snapshot, - arguments: [{ memorySessionId: activeMemorySessionId }], - }); +/** Coerce an `unknown` JSON field to a string (never an object stringification). */ +function asString(value: unknown, fallback = ""): string { + return typeof value === "string" ? value : fallback; +} - if (result !== null) { - Logger.info(`Memory snapshot taken: ${activeMemorySessionId}`); - } - } catch (err) { - void vscode.window.showErrorMessage( - `Memory snapshot failed: ${err instanceof Error ? err.message : String(err)}`, +/** Coerce an `unknown` JSON field to a finite number. */ +function asNumber(value: unknown, fallback = 0): number { + return typeof value === "number" && Number.isFinite(value) ? value : fallback; +} + +/** Map an ingest snapshot result to the dashboard's snapshot shape. */ +function toDashboardSnapshot(result: MemoryIngestResult): MemoryDashboardSnapshot { + return { + memorySessionId: asString(result.memorySessionId), + snapshotId: asString(result.snapshotId), + currentMemory: asNumber(result.currentMemory), + peakMemory: asNumber(result.peakMemory), + gcObjects: asNumber(result.gcObjects), + gcCounts: Array.isArray(result.gcCounts) ? (result.gcCounts as number[]) : [], + topAllocations: (Array.isArray(result.topAllocations) + ? result.topAllocations + : []) as MemoryDashboardSnapshot["topAllocations"], + timeline: [], + }; +} + +/** Map an ingest diff result to the dashboard's diff shape (lowercasing confidence). */ +function toDashboardDiff(result: MemoryIngestResult): MemoryDiffData { + const leaks = Array.isArray(result.suspectedLeaks) ? result.suspectedLeaks : []; + return { + totalGrowth: asNumber(result.totalGrowth), + totalFreed: asNumber(result.totalFreed), + netGrowth: asNumber(result.netGrowth), + grownAllocations: [], + suspectedLeaks: leaks.map((raw) => { + const leak = raw as Record; + return { + file: asString(leak.file), + line: asNumber(leak.line), + sizeGrowth: asNumber(leak.sizeGrowth), + countGrowth: asNumber(leak.countGrowth), + currentSize: asNumber(leak.currentSize), + currentCount: asNumber(leak.currentCount), + confidence: asString(leak.confidence, "low").toLowerCase() as MemoryDiffData["suspectedLeaks"][number]["confidence"], + reason: asString(leak.reason), + }; + }), + }; +} + +async function handleMemoryGcCollect(store: Store): Promise { + const result = await runMemoryScript(store, LSP_MEM_CMD.gcCollect); + if (result?.kind === "gc") { + const collected = Number(result.collected ?? 0); + const uncollectable = Number(result.uncollectable ?? 0); + Logger.info(`gc.collect(): ${collected} collected, ${uncollectable} uncollectable`); + void vscode.window.showInformationMessage( + `Basilisk: gc.collect() freed ${collected} object(s); ${uncollectable} uncollectable`, ); } } function handleMemoryStop(_store: Store): void { activeMemorySessionId = undefined; - updateMemoryStatusBar("idle"); + lastDashboardSnapshot = undefined; + refreshMemoryStatusBar(); clearMemoryDecorations(); Logger.info("Memory tracking stopped"); } async function handleMemoryReferences(store: Store): Promise { - const client = store.client.value; - if (client?.isRunning() !== true) { - void vscode.window.showErrorMessage("Basilisk LSP not connected"); + if (activeMemorySessionId === undefined) { + void vscode.window.showWarningMessage("Basilisk: Start memory tracking first."); return; } @@ -161,326 +364,50 @@ async function handleMemoryReferences(store: Store): Promise { prompt: "Object type to inspect (e.g. DataFrame, dict, MyClass)", placeHolder: "DataFrame", }); - if (typeName === undefined || typeName.trim() === "") { return; } - try { - const REF_GRAPH_MAX_DEPTH = 5; - const REF_GRAPH_MAX_NODES = 200; - const result = await client.sendRequest("workspace/executeCommand", { - command: LSP_MEM_CMD.references, - arguments: [ - { - targetType: typeName.trim(), - maxDepth: REF_GRAPH_MAX_DEPTH, - maxNodes: REF_GRAPH_MAX_NODES, - }, - ], + const result = await runMemoryScript(store, LSP_MEM_CMD.references, { + targetType: typeName.trim(), + maxDepth: REF_GRAPH_MAX_DEPTH, + maxNodes: REF_GRAPH_MAX_NODES, + }); + if (result?.kind === "refs") { + openRefGraphWebview({ + targetType: typeName.trim(), + maxDepth: REF_GRAPH_MAX_DEPTH, + maxNodes: REF_GRAPH_MAX_NODES, + script: "", + graph: result.graph as ReferenceGraphResult["graph"], }); - - if (result !== null) { - openRefGraphWebview(result); - } - } catch (err) { - void vscode.window.showErrorMessage( - `Reference graph failed: ${err instanceof Error ? err.message : String(err)}`, - ); } } // ── Status bar ──────────────────────────────────────────────────────────── -function updateMemoryStatusBar(state: "idle" | "tracking"): void { +/** + * Show the memory status-bar entry whenever a Basilisk debug session is active + * (or tracking is on) and click it to open the action menu. Hidden otherwise. + */ +function refreshMemoryStatusBar(): void { if (memoryStatusBarItem === undefined) { return; } - if (state === "tracking") { - memoryStatusBarItem.text = "$(eye) Memory Tracking"; - memoryStatusBarItem.tooltip = - "Basilisk: Memory tracking active (click to stop)"; - memoryStatusBarItem.backgroundColor = new vscode.ThemeColor( - "statusBarItem.warningBackground", - ); - memoryStatusBarItem.show(); - } else { + const debugging = vscode.debug.activeDebugSession?.type === "basilisk-debug"; + const tracking = activeMemorySessionId !== undefined; + if (!debugging && !tracking) { memoryStatusBarItem.hide(); + return; } -} -// ── Reference graph webview (Phase 5D) ──────────────────────────────────── - -interface ReferenceGraphResult { - targetType: string; - maxDepth: number; - maxNodes: number; - script: string; - graph?: { - nodes: RefGraphNode[]; - edges: RefGraphEdge[]; - cycles: number[][]; - retentionPath?: string[]; - }; -} - -interface RefGraphNode { - id: number; - type: string; - size: number; - repr: string; - depth: number; - isTarget: boolean; -} - -interface RefGraphEdge { - from: number; - to: number; - label: string; -} - -function openRefGraphWebview(result: ReferenceGraphResult): void { - if (refGraphPanel !== undefined) { - refGraphPanel.reveal(vscode.ViewColumn.Beside); + if (tracking) { + memoryStatusBarItem.text = "$(eye) Memory: tracking"; + memoryStatusBarItem.tooltip = "Basilisk: memory tracking active — click for snapshot/compare/stop"; + memoryStatusBarItem.backgroundColor = new vscode.ThemeColor("statusBarItem.warningBackground"); } else { - refGraphPanel = vscode.window.createWebviewPanel( - "basilisk.refGraph", - `Retention Graph \u2014 ${result.targetType}`, - vscode.ViewColumn.Beside, - { enableScripts: true, retainContextWhenHidden: true }, - ); - refGraphPanel.onDidDispose(() => { - refGraphPanel = undefined; - }); + memoryStatusBarItem.text = "$(database) Memory"; + memoryStatusBarItem.tooltip = "Basilisk: click to start memory tracking (pause at a breakpoint first)"; + memoryStatusBarItem.backgroundColor = undefined; } - - refGraphPanel.webview.html = buildRefGraphHtml(result); - - refGraphPanel.webview.onDidReceiveMessage( - (msg: { type: string; file?: string; line?: number }) => { - if ( - msg.type === "navigateToSource" && - msg.file !== undefined && - msg.line !== undefined - ) { - const uri = vscode.Uri.file(msg.file); - const position = new vscode.Position(msg.line - 1, 0); - void vscode.window.showTextDocument(uri, { - selection: new vscode.Range(position, position), - viewColumn: vscode.ViewColumn.One, - }); - } - }, - ); -} - -function buildRefGraphCss(): string { - return ` - :root { - --mem-critical: #c084fc; - --mem-hot: #a78bfa; - --mem-leak: #f87171; - --mem-freed: #34d399; - --mem-info: #60a5fa; - --bg: #0a0c12; - --surface: #141820; - --border: #1a1f2e; - --text: #f0f2f7; - --text-secondary: #8892a4; - } - * { margin: 0; padding: 0; box-sizing: border-box; } - body { background: var(--bg); color: var(--text); font-family: 'Space Grotesk', sans-serif; padding: 16px; } - h1 { font-size: 18px; font-weight: 600; margin-bottom: 12px; } - h1 .accent { color: var(--mem-critical); } - .retention-path { - background: var(--surface); - border: 1px solid var(--border); - border-radius: 8px; - padding: 12px 16px; - margin-bottom: 16px; - font-family: 'JetBrains Mono', monospace; - font-size: 12px; - line-height: 1.8; - } - .retention-path .label { - font-size: 11px; - color: var(--text-secondary); - text-transform: uppercase; - letter-spacing: 0.05em; - margin-bottom: 6px; - } - .retention-path .step { color: var(--mem-info); } - .retention-path .target { color: var(--mem-critical); font-weight: 600; } - canvas { display: block; border-radius: 8px; background: var(--surface); } - .legend { - display: flex; gap: 16px; margin-top: 12px; font-size: 11px; - color: var(--text-secondary); - } - .legend-item { display: flex; align-items: center; gap: 4px; } - .legend-dot { width: 8px; height: 8px; border-radius: 50%; } - .no-data { text-align: center; padding: 60px; color: var(--text-secondary); }`; -} - -function buildRetentionPathHtml(retentionPath: string[]): string { - if (retentionPath.length === 0) { return ""; } - const steps = retentionPath - .map((step, i) => `
${escapeHtml(step)}
`) - .join("\n "); - return `
-
Retention Path
- ${steps} -
`; -} - -function buildRefGraphScriptInit(nodesJson: string, edgesJson: string, cyclesJson: string): string { - return ` - const vscode = acquireVsCodeApi(); - const nodes = ${nodesJson}; - const edges = ${edgesJson}; - const cycles = ${cyclesJson}; - - if (nodes.length === 0) { - document.getElementById('graph').style.display = 'none'; - const noData = document.createElement('div'); - noData.className = 'no-data'; - noData.textContent = 'No reference graph data available. Run the memory references command with an active debug session.'; - document.body.appendChild(noData); - } else { - const canvas = document.getElementById('graph'); - const ctx = canvas.getContext('2d'); - const W = canvas.width, H = canvas.height; - const cycleNodeIds = new Set(cycles.flat()); - const nodeMap = new Map(); - nodes.forEach((n, i) => { - nodeMap.set(n.id, { - ...n, - x: W / 2 + (Math.random() - 0.5) * W * 0.6, - y: H / 2 + (Math.random() - 0.5) * H * 0.6, - vx: 0, vy: 0, - radius: Math.max(8, Math.min(30, Math.log2(Math.max(n.size, 1)) * 2)), - }); - }); - for (let iter = 0; iter < 60; iter++) { - const alpha = 0.3 * (1 - iter / 60); - const nodeList = Array.from(nodeMap.values()); - for (let i = 0; i < nodeList.length; i++) { - for (let j = i + 1; j < nodeList.length; j++) { - const a = nodeList[i], b = nodeList[j]; - let dx = b.x - a.x, dy = b.y - a.y; - const dist = Math.max(1, Math.sqrt(dx * dx + dy * dy)); - const force = 2000 / (dist * dist); - dx = (dx / dist) * force * alpha; - dy = (dy / dist) * force * alpha; - a.x -= dx; a.y -= dy; - b.x += dx; b.y += dy; - } - } - for (const edge of edges) { - const a = nodeMap.get(edge.from), b = nodeMap.get(edge.to); - if (!a || !b) continue; - let dx = b.x - a.x, dy = b.y - a.y; - const dist = Math.max(1, Math.sqrt(dx * dx + dy * dy)); - const force = (dist - 80) * 0.01 * alpha; - dx = (dx / dist) * force; - dy = (dy / dist) * force; - a.x += dx; a.y += dy; - b.x -= dx; b.y -= dy; - } - for (const n of nodeList) { - n.x = Math.max(40, Math.min(W - 40, n.x)); - n.y = Math.max(40, Math.min(H - 40, n.y)); - } - }`; -} - -function buildRefGraphScriptDraw(): string { - return ` - ctx.strokeStyle = 'rgba(136, 146, 164, 0.3)'; - ctx.lineWidth = 1; - for (const edge of edges) { - const a = nodeMap.get(edge.from), b = nodeMap.get(edge.to); - if (!a || !b) continue; - const isCycleEdge = cycleNodeIds.has(edge.from) && cycleNodeIds.has(edge.to); - ctx.strokeStyle = isCycleEdge ? '#f87171' : 'rgba(136, 146, 164, 0.3)'; - ctx.lineWidth = isCycleEdge ? 2 : 1; - ctx.beginPath(); - ctx.moveTo(a.x, a.y); - ctx.lineTo(b.x, b.y); - ctx.stroke(); - if (edge.label) { - const mx = (a.x + b.x) / 2, my = (a.y + b.y) / 2; - ctx.fillStyle = '#8892a4'; - ctx.font = '9px monospace'; - ctx.fillText(edge.label, mx + 4, my - 4); - } - } - for (const n of nodeMap.values()) { - const isCycle = cycleNodeIds.has(n.id); - const color = n.isTarget ? '#c084fc' - : isCycle ? '#f87171' - : n.depth <= 1 ? '#60a5fa' - : '#8892a4'; - ctx.beginPath(); - ctx.arc(n.x, n.y, n.radius, 0, Math.PI * 2); - ctx.fillStyle = color + '33'; - ctx.fill(); - ctx.strokeStyle = color; - ctx.lineWidth = n.isTarget ? 3 : 1.5; - ctx.stroke(); - ctx.fillStyle = '#f0f2f7'; - ctx.font = '10px monospace'; - ctx.textAlign = 'center'; - ctx.fillText(n.type, n.x, n.y + n.radius + 14); - ctx.fillStyle = '#8892a4'; - ctx.font = '9px monospace'; - ctx.fillText(formatBytes(n.size), n.x, n.y + n.radius + 26); - } - } - function formatBytes(bytes) { - if (bytes >= 1073741824) return (bytes / 1073741824).toFixed(1) + ' GB'; - if (bytes >= 1048576) return (bytes / 1048576).toFixed(1) + ' MB'; - if (bytes >= 1024) return (bytes / 1024).toFixed(1) + ' KB'; - return bytes + ' B'; - }`; -} - -function buildRefGraphScript(nodesJson: string, edgesJson: string, cyclesJson: string): string { - return buildRefGraphScriptInit(nodesJson, edgesJson, cyclesJson) + - buildRefGraphScriptDraw(); -} - -function buildRefGraphHtml(result: ReferenceGraphResult): string { - const nodesJson = JSON.stringify(result.graph?.nodes ?? []); - const edgesJson = JSON.stringify(result.graph?.edges ?? []); - const cyclesJson = JSON.stringify(result.graph?.cycles ?? []); - const retentionPath = result.graph?.retentionPath ?? []; - const escapedType = escapeHtml(result.targetType); - - return ` - - - - Retention Graph \u2014 ${escapedType} - - - -

\u25C9 Retention Graph \u2014 ${escapedType}

- ${buildRetentionPathHtml(retentionPath)} - -
-
Target object
-
Root retainer
-
Intermediate
-
Cycle member
-
- - -`; -} - -function escapeHtml(text: string): string { - return text - .replace(/&/g, "&") - .replace(//g, ">") - .replace(/"/g, """); + memoryStatusBarItem.show(); } diff --git a/vscode-extension/src/memory-ref-graph.ts b/vscode-extension/src/memory-ref-graph.ts new file mode 100644 index 00000000..387f1c80 --- /dev/null +++ b/vscode-extension/src/memory-ref-graph.ts @@ -0,0 +1,297 @@ +// Implements [LSPPROF]. See docs/specs/LSP-PROFILING-SPEC.md#PROFILE-MEMORY-VIS-REFGRAPH +/** + * Reference-graph webview for memory profiling. + * + * Renders the force-directed object-retention graph (`gc.get_referrers()` walk + * parsed by the LSP) in a Canvas 2D webview. Extracted from `memory-profiler.ts` + * so that module stays focused on command routing and the courier round-trip. + */ + +import * as vscode from "vscode"; + +/** Reference-graph result returned by `basilisk.memory.ingest` (kind `refs`). */ +export interface ReferenceGraphResult { + targetType: string; + maxDepth: number; + maxNodes: number; + script: string; + graph?: { + nodes: RefGraphNode[]; + edges: RefGraphEdge[]; + cycles: number[][]; + retentionPath?: string[]; + }; +} + +interface RefGraphNode { + id: number; + type: string; + size: number; + repr: string; + depth: number; + isTarget: boolean; +} + +interface RefGraphEdge { + from: number; + to: number; + label: string; +} + +let refGraphPanel: vscode.WebviewPanel | undefined; + +/** Open (or reveal) the retention-graph webview for a parsed reference graph. */ +export function openRefGraphWebview(result: ReferenceGraphResult): void { + if (refGraphPanel !== undefined) { + refGraphPanel.reveal(vscode.ViewColumn.Beside); + } else { + refGraphPanel = vscode.window.createWebviewPanel( + "basilisk.refGraph", + `Retention Graph — ${result.targetType}`, + vscode.ViewColumn.Beside, + { enableScripts: true, retainContextWhenHidden: true }, + ); + refGraphPanel.onDidDispose(() => { + refGraphPanel = undefined; + }); + } + + refGraphPanel.webview.html = buildRefGraphHtml(result); + + refGraphPanel.webview.onDidReceiveMessage( + (msg: { type: string; file?: string; line?: number }) => { + if ( + msg.type === "navigateToSource" && + msg.file !== undefined && + msg.line !== undefined + ) { + const uri = vscode.Uri.file(msg.file); + const position = new vscode.Position(msg.line - 1, 0); + void vscode.window.showTextDocument(uri, { + selection: new vscode.Range(position, position), + viewColumn: vscode.ViewColumn.One, + }); + } + }, + ); +} + +/** Dispose the reference-graph webview, if open. */ +export function disposeRefGraph(): void { + if (refGraphPanel !== undefined) { + refGraphPanel.dispose(); + refGraphPanel = undefined; + } +} + +function buildRefGraphCss(): string { + return ` + :root { + --mem-critical: #c084fc; + --mem-hot: #a78bfa; + --mem-leak: #f87171; + --mem-freed: #34d399; + --mem-info: #60a5fa; + --bg: #0a0c12; + --surface: #141820; + --border: #1a1f2e; + --text: #f0f2f7; + --text-secondary: #8892a4; + } + * { margin: 0; padding: 0; box-sizing: border-box; } + body { background: var(--bg); color: var(--text); font-family: 'Space Grotesk', sans-serif; padding: 16px; } + h1 { font-size: 18px; font-weight: 600; margin-bottom: 12px; } + h1 .accent { color: var(--mem-critical); } + .retention-path { + background: var(--surface); + border: 1px solid var(--border); + border-radius: 8px; + padding: 12px 16px; + margin-bottom: 16px; + font-family: 'JetBrains Mono', monospace; + font-size: 12px; + line-height: 1.8; + } + .retention-path .label { + font-size: 11px; + color: var(--text-secondary); + text-transform: uppercase; + letter-spacing: 0.05em; + margin-bottom: 6px; + } + .retention-path .step { color: var(--mem-info); } + .retention-path .target { color: var(--mem-critical); font-weight: 600; } + canvas { display: block; border-radius: 8px; background: var(--surface); } + .legend { + display: flex; gap: 16px; margin-top: 12px; font-size: 11px; + color: var(--text-secondary); + } + .legend-item { display: flex; align-items: center; gap: 4px; } + .legend-dot { width: 8px; height: 8px; border-radius: 50%; } + .no-data { text-align: center; padding: 60px; color: var(--text-secondary); }`; +} + +function buildRetentionPathHtml(retentionPath: string[]): string { + if (retentionPath.length === 0) { return ""; } + const steps = retentionPath + .map((step, i) => `
${escapeHtml(step)}
`) + .join("\n "); + return `
+
Retention Path
+ ${steps} +
`; +} + +function buildRefGraphScriptInit(nodesJson: string, edgesJson: string, cyclesJson: string): string { + return ` + const vscode = acquireVsCodeApi(); + const nodes = ${nodesJson}; + const edges = ${edgesJson}; + const cycles = ${cyclesJson}; + + if (nodes.length === 0) { + document.getElementById('graph').style.display = 'none'; + const noData = document.createElement('div'); + noData.className = 'no-data'; + noData.textContent = 'No reference graph data available. Run the memory references command with an active debug session.'; + document.body.appendChild(noData); + } else { + const canvas = document.getElementById('graph'); + const ctx = canvas.getContext('2d'); + const W = canvas.width, H = canvas.height; + const cycleNodeIds = new Set(cycles.flat()); + const nodeMap = new Map(); + nodes.forEach((n, i) => { + nodeMap.set(n.id, { + ...n, + x: W / 2 + (Math.random() - 0.5) * W * 0.6, + y: H / 2 + (Math.random() - 0.5) * H * 0.6, + vx: 0, vy: 0, + radius: Math.max(8, Math.min(30, Math.log2(Math.max(n.size, 1)) * 2)), + }); + }); + for (let iter = 0; iter < 60; iter++) { + const alpha = 0.3 * (1 - iter / 60); + const nodeList = Array.from(nodeMap.values()); + for (let i = 0; i < nodeList.length; i++) { + for (let j = i + 1; j < nodeList.length; j++) { + const a = nodeList[i], b = nodeList[j]; + let dx = b.x - a.x, dy = b.y - a.y; + const dist = Math.max(1, Math.sqrt(dx * dx + dy * dy)); + const force = 2000 / (dist * dist); + dx = (dx / dist) * force * alpha; + dy = (dy / dist) * force * alpha; + a.x -= dx; a.y -= dy; + b.x += dx; b.y += dy; + } + } + for (const edge of edges) { + const a = nodeMap.get(edge.from), b = nodeMap.get(edge.to); + if (!a || !b) continue; + let dx = b.x - a.x, dy = b.y - a.y; + const dist = Math.max(1, Math.sqrt(dx * dx + dy * dy)); + const force = (dist - 80) * 0.01 * alpha; + dx = (dx / dist) * force; + dy = (dy / dist) * force; + a.x += dx; a.y += dy; + b.x -= dx; b.y -= dy; + } + for (const n of nodeList) { + n.x = Math.max(40, Math.min(W - 40, n.x)); + n.y = Math.max(40, Math.min(H - 40, n.y)); + } + }`; +} + +function buildRefGraphScriptDraw(): string { + return ` + ctx.strokeStyle = 'rgba(136, 146, 164, 0.3)'; + ctx.lineWidth = 1; + for (const edge of edges) { + const a = nodeMap.get(edge.from), b = nodeMap.get(edge.to); + if (!a || !b) continue; + const isCycleEdge = cycleNodeIds.has(edge.from) && cycleNodeIds.has(edge.to); + ctx.strokeStyle = isCycleEdge ? '#f87171' : 'rgba(136, 146, 164, 0.3)'; + ctx.lineWidth = isCycleEdge ? 2 : 1; + ctx.beginPath(); + ctx.moveTo(a.x, a.y); + ctx.lineTo(b.x, b.y); + ctx.stroke(); + if (edge.label) { + const mx = (a.x + b.x) / 2, my = (a.y + b.y) / 2; + ctx.fillStyle = '#8892a4'; + ctx.font = '9px monospace'; + ctx.fillText(edge.label, mx + 4, my - 4); + } + } + for (const n of nodeMap.values()) { + const isCycle = cycleNodeIds.has(n.id); + const color = n.isTarget ? '#c084fc' + : isCycle ? '#f87171' + : n.depth <= 1 ? '#60a5fa' + : '#8892a4'; + ctx.beginPath(); + ctx.arc(n.x, n.y, n.radius, 0, Math.PI * 2); + ctx.fillStyle = color + '33'; + ctx.fill(); + ctx.strokeStyle = color; + ctx.lineWidth = n.isTarget ? 3 : 1.5; + ctx.stroke(); + ctx.fillStyle = '#f0f2f7'; + ctx.font = '10px monospace'; + ctx.textAlign = 'center'; + ctx.fillText(n.type, n.x, n.y + n.radius + 14); + ctx.fillStyle = '#8892a4'; + ctx.font = '9px monospace'; + ctx.fillText(formatBytes(n.size), n.x, n.y + n.radius + 26); + } + } + function formatBytes(bytes) { + if (bytes >= 1073741824) return (bytes / 1073741824).toFixed(1) + ' GB'; + if (bytes >= 1048576) return (bytes / 1048576).toFixed(1) + ' MB'; + if (bytes >= 1024) return (bytes / 1024).toFixed(1) + ' KB'; + return bytes + ' B'; + }`; +} + +function buildRefGraphScript(nodesJson: string, edgesJson: string, cyclesJson: string): string { + return buildRefGraphScriptInit(nodesJson, edgesJson, cyclesJson) + + buildRefGraphScriptDraw(); +} + +function buildRefGraphHtml(result: ReferenceGraphResult): string { + const nodesJson = JSON.stringify(result.graph?.nodes ?? []); + const edgesJson = JSON.stringify(result.graph?.edges ?? []); + const cyclesJson = JSON.stringify(result.graph?.cycles ?? []); + const retentionPath = result.graph?.retentionPath ?? []; + const escapedType = escapeHtml(result.targetType); + + return ` + + + + Retention Graph — ${escapedType} + + + +

Retention Graph — ${escapedType}

+ ${buildRetentionPathHtml(retentionPath)} + +
+
Target object
+
Root retainer
+
Intermediate
+
Cycle member
+
+ + +`; +} + +function escapeHtml(text: string): string { + return text + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} diff --git a/vscode-extension/src/profiler-decorations.ts b/vscode-extension/src/profiler-decorations.ts index 59cdb57f..4c48430e 100644 --- a/vscode-extension/src/profiler-decorations.ts +++ b/vscode-extension/src/profiler-decorations.ts @@ -41,6 +41,8 @@ export interface ProfileResult { duration: number; totalSamples: number; outputFile: string; + /** Path to the V8 `.cpuprofile` for VS Code's built-in profile viewer. */ + cpuProfilePath?: string; hotFunctions: ProfileHotFunction[]; hotLines: ProfileHotLine[]; } diff --git a/vscode-extension/src/profiler.ts b/vscode-extension/src/profiler.ts index 40fbaf41..a08e0b73 100644 --- a/vscode-extension/src/profiler.ts +++ b/vscode-extension/src/profiler.ts @@ -101,7 +101,13 @@ export function registerProfiler( .get("profiler.profileOnLaunch", false); if (profileOnLaunch && session.type === "basilisk-debug" && activeSessionId === undefined) { Logger.info(`Profile on Launch: auto-profiling debug session ${session.id}`); - void handleProfileAttachToDebug(store); + // The debuggee PID arrives asynchronously via the DAP `process` event, + // so wait for it before attaching (avoids a "not ready yet" race). + void waitForDebuggeePid(store, session.id).then((ready) => { + if (ready && activeSessionId === undefined) { + void handleProfileAttachToDebug(store); + } + }); } }), ); @@ -119,6 +125,34 @@ export function registerProfiler( return disposables; } +// ── Debuggee PID readiness ───────────────────────────────────────────────── + +/** + * Resolve once the debuggee's PID for `sessionId` is known (captured from the + * DAP `process` event into the store), or after a bounded wait. Returns whether + * the PID became available. Used by the "Profile on Launch" auto-attach so it + * doesn't fire before debugpy reports the process. + */ +async function waitForDebuggeePid(store: Store, sessionId: string): Promise { + if (store.getDebuggeeProcessId(sessionId) !== undefined) { + return true; + } + return new Promise((resolve) => { + const interval = setInterval(() => { + if (store.getDebuggeeProcessId(sessionId) !== undefined) { + clearInterval(interval); + clearTimeout(timeout); + resolve(true); + } + }, POLL_INTERVAL_MS); + // Clear BOTH timers on whichever path resolves first so neither dangles. + const timeout = setTimeout(() => { + clearInterval(interval); + resolve(store.getDebuggeeProcessId(sessionId) !== undefined); + }, STARTUP_TIMEOUT_MS); + }); +} + // ── Command handlers ────────────────────────────────────────────────────── async function handleProfileStart(): Promise { @@ -201,7 +235,14 @@ async function handleProfileStop(store: Store): Promise { if (result !== undefined && result !== null) { lastResult = result; applyProfileDecorations(result); - openFlamegraphWebview(result); + // Open the V8 .cpuprofile in VS Code's built-in profile viewer (flame + // chart + bottom-up/left-heavy tables); fall back to the speedscope-style + // webview only if the file wasn't produced. + if (result.cpuProfilePath !== undefined && result.cpuProfilePath !== "") { + await vscode.commands.executeCommand("vscode.open", vscode.Uri.file(result.cpuProfilePath)); + } else { + openFlamegraphWebview(result); + } Logger.info( `Profiling stopped: ${result.totalSamples} samples, ${result.duration.toFixed(1)}s, ` + `output: ${result.outputFile}`, @@ -266,6 +307,17 @@ async function handleProfileAttachToDebug(store: Store): Promise { return; } + // Resolve the debuggee's PID captured from the DAP `process` event so we + // profile the SAME process the debugger is attached to. The LSP profiler is + // PID-based; the privilege layer handles elevation (macOS helper) transparently. + const pid = store.getDebuggeeProcessId(session.id); + if (pid === undefined) { + vscode.window.showWarningMessage( + "Basilisk: The debuggee process isn't ready yet — let it start running, then run “Profile Debug Session” again.", + ); + return; + } + const cfg = vscode.workspace.getConfiguration("basilisk"); const sampleRate = cfg.get("profiler.sampleRate", DEFAULT_SAMPLE_RATE); const includeNative = cfg.get("profiler.includeNative", false); @@ -273,7 +325,7 @@ async function handleProfileAttachToDebug(store: Store): Promise { try { const result = await client.sendRequest<{ sessionId: string; pid: number; pythonVersion: string } | undefined>("workspace/executeCommand", { command: LSP_CMD.start, - arguments: [{ debugSession: session.id, sampleRate, includeNative }], + arguments: [{ pid, sampleRate, includeNative }], }); if (result !== undefined && result !== null) { diff --git a/vscode-extension/src/store.ts b/vscode-extension/src/store.ts index 5555a8d1..93f54b78 100644 --- a/vscode-extension/src/store.ts +++ b/vscode-extension/src/store.ts @@ -51,6 +51,8 @@ export interface Store { readonly lspState: ReadonlySignal; readonly isServerReady: ReadonlySignal; readonly runtimeResolution: ReadonlySignal; + /** Map of VS Code debug session id → debuggee OS process id (from the DAP `process` event). */ + readonly sessionIdToPid: ReadonlySignal>; // Read-only access to the ready handle (for whenReady callers). readonly lspReadyPromise: ReadonlySignal | undefined>; @@ -61,6 +63,12 @@ export interface Store { setOutputChannel(ch: vscode.OutputChannel): void; setLogSink(sink: LogSink): void; setRuntimeResolution(resolution: RuntimeResolution): void; + /** Record the debuggee PID captured from a debug session's DAP `process` event. */ + setDebuggeeProcessId(sessionId: string, pid: number): void; + /** Look up the debuggee PID for a debug session, or undefined if not yet known. */ + getDebuggeeProcessId(sessionId: string): number | undefined; + /** Forget a debug session's PID mapping (called when the session terminates). */ + clearDebuggeeProcessId(sessionId: string): void; isClientCommandRegistered(id: string): boolean; isServerCommandAdvertised(id: string): boolean; ensureLspReadyPromise(timeoutMs?: number): Promise>; @@ -77,6 +85,7 @@ interface StoreSignals { logSink: Signal; lspState: Signal; runtimeResolution: Signal; + sessionIdToPid: Signal>; readyHandle: Signal; /** Disposables for client-registered commands — disposed on LSP stop/restart. */ commandDisposables: vscode.Disposable[]; @@ -299,13 +308,38 @@ function resetSignals(signals: StoreSignals): void { signals.logSink.value = undefined; signals.lspState.value = "idle"; signals.runtimeResolution.value = undefined; + signals.sessionIdToPid.value = new Map(); signals.readyHandle.value = undefined; } +/** Debuggee PID actions (copy-on-write Map) — extracted to keep createStore small. */ +function debuggeePidActions(signals: StoreSignals): Pick< + Store, + "setDebuggeeProcessId" | "getDebuggeeProcessId" | "clearDebuggeeProcessId" +> { + return { + setDebuggeeProcessId(sessionId: string, pid: number): void { + const next = new Map(signals.sessionIdToPid.value); + next.set(sessionId, pid); + signals.sessionIdToPid.value = next; + }, + getDebuggeeProcessId(sessionId: string): number | undefined { + return signals.sessionIdToPid.value.get(sessionId); + }, + clearDebuggeeProcessId(sessionId: string): void { + if (!signals.sessionIdToPid.value.has(sessionId)) { return; } + const next = new Map(signals.sessionIdToPid.value); + next.delete(sessionId); + signals.sessionIdToPid.value = next; + }, + }; +} + // ── Factory ─────────────────────────────────────────────────────────────── -export function createStore(onReset?: () => void): Store { - const signals: StoreSignals = { +/** Build the fresh, mutable signal bag backing a store. */ +function createStoreSignals(): StoreSignals { + return { client: signal(undefined), serverCommands: signal>(new Set()), clientCommands: signal>(new Set()), @@ -314,10 +348,15 @@ export function createStore(onReset?: () => void): Store { logSink: signal(undefined), lspState: signal("idle"), runtimeResolution: signal(undefined), + sessionIdToPid: signal>(new Map()), readyHandle: signal(undefined), commandDisposables: [], serverCommandDisposables: [], }; +} + +export function createStore(onReset?: () => void): Store { + const signals: StoreSignals = createStoreSignals(); const isServerReady = computed(() => signals.client.value?.isRunning() === true); const lspReadyPromise = computed(async () => signals.readyHandle.value?.promise); @@ -331,6 +370,7 @@ export function createStore(onReset?: () => void): Store { logSink: signals.logSink as ReadonlySignal, lspState: signals.lspState as ReadonlySignal, runtimeResolution: signals.runtimeResolution as ReadonlySignal, + sessionIdToPid: signals.sessionIdToPid as ReadonlySignal>, lspReadyPromise, isServerReady, @@ -350,6 +390,7 @@ export function createStore(onReset?: () => void): Store { setRuntimeResolution(resolution: RuntimeResolution): void { signals.runtimeResolution.value = resolution; }, + ...debuggeePidActions(signals), isClientCommandRegistered(id: string): boolean { return signals.clientCommands.value.has(id); }, diff --git a/vscode-extension/src/subprocess-mode.ts b/vscode-extension/src/subprocess-mode.ts new file mode 100644 index 00000000..387b8db1 --- /dev/null +++ b/vscode-extension/src/subprocess-mode.ts @@ -0,0 +1,123 @@ +// Implements [VSIX]. See docs/specs/VSIX-SPEC.md#VSIX +/** + * Subprocess mode: run `basilisk check --output json` on open/save and publish + * the parsed diagnostics. The fallback when the LSP is disabled + * (`basilisk.useLsp: false`). Extracted from `extension.ts` to keep activation + * focused on the LSP/debug/profiler wiring. + */ + +import * as vscode from "vscode"; +import { execFile } from "child_process"; +import * as path from "path"; + +/** Exit code returned by `basilisk check` on internal errors. */ +const BASILISK_INTERNAL_ERROR_EXIT_CODE = 3; + +/** Shape of a single diagnostic emitted by `basilisk check --output json`. */ +interface BasiliskDiagnostic { + code: string; + severity: "error" | "warning"; + message: string; + path: string; + line: number; + col: number; + end_line: number; + end_col: number; +} + +/** First workspace folder path, if any. */ +function workspaceRoot(): string | undefined { + return vscode.workspace.workspaceFolders?.[0]?.uri.fsPath; +} + +/** Start subprocess mode: check Python documents on open/save via the CLI. */ +export function startSubprocessMode( + context: vscode.ExtensionContext, + executablePath: string +): void { + const collection = vscode.languages.createDiagnosticCollection("basilisk"); + context.subscriptions.push(collection); + + context.subscriptions.push( + vscode.workspace.onDidOpenTextDocument((doc) => { + if (doc.languageId === "python") {checkDocument(doc, collection, executablePath);} + }) + ); + context.subscriptions.push( + vscode.workspace.onDidSaveTextDocument((doc) => { + if (doc.languageId === "python") {checkDocument(doc, collection, executablePath);} + }) + ); + context.subscriptions.push( + vscode.workspace.onDidCloseTextDocument((doc) => { collection.delete(doc.uri); }) + ); + + for (const doc of vscode.workspace.textDocuments) { + if (doc.languageId === "python") {checkDocument(doc, collection, executablePath);} + } +} + +function checkDocument( + doc: vscode.TextDocument, + collection: vscode.DiagnosticCollection, + executablePath: string +): void { + const enabled = vscode.workspace.getConfiguration("basilisk").get("enabled") ?? true; + if (!enabled) { + collection.delete(doc.uri); + return; + } + if (doc.isUntitled || doc.uri.scheme !== "file") {return;} + + const filePath = doc.uri.fsPath; + execFile( + executablePath, + ["check", "--output", "json", filePath], + { cwd: workspaceRoot() }, + (error, stdout, stderr) => { + if (error?.code === BASILISK_INTERNAL_ERROR_EXIT_CODE) { + vscode.window.showWarningMessage( + `Basilisk: internal error checking ${path.basename(filePath)}: ${stderr}` + ); + return; + } + if (error && typeof error.code === "number" && error.code !== 1) { + vscode.window.showWarningMessage( + `Basilisk: failed to run '${executablePath}'. Is it on PATH? (${error.message})` + ); + collection.delete(doc.uri); + return; + } + collection.set(doc.uri, parseDiagnostics(stdout, doc)); + } + ); +} + +function parseDiagnostics(json: string, doc: vscode.TextDocument): vscode.Diagnostic[] { + let items: BasiliskDiagnostic[]; + try { + items = JSON.parse(json) as BasiliskDiagnostic[]; + } catch { + return []; + } + if (!Array.isArray(items)) {return [];} + + return items + .filter((item) => item.path === doc.uri.fsPath) + .map((item) => { + const range = new vscode.Range( + new vscode.Position(item.line - 1, item.col - 1), + new vscode.Position(item.end_line - 1, item.end_col - 1) + ); + const severity = item.severity === "error" + ? vscode.DiagnosticSeverity.Error + : vscode.DiagnosticSeverity.Warning; + const diag = new vscode.Diagnostic(range, `${item.message} [${item.code}]`, severity); + diag.source = "basilisk"; + diag.code = { + value: item.code, + target: vscode.Uri.parse(`https://www.basilisk-python.dev/errors/${item.code}`), + }; + return diag; + }); +} diff --git a/vscode-extension/src/test/suite/command-registration.test.ts b/vscode-extension/src/test/suite/command-registration.test.ts index 0517eeb4..8313c69b 100644 --- a/vscode-extension/src/test/suite/command-registration.test.ts +++ b/vscode-extension/src/test/suite/command-registration.test.ts @@ -121,6 +121,7 @@ const SERVER_COMMANDS = [ 'basilisk.memory.references', 'basilisk.memory.objectsByType', 'basilisk.memory.gcCollect', + 'basilisk.memory.ingest', ] as const; /** Assert that registering a command succeeds (it was NOT already registered). */ diff --git a/vscode-extension/src/test/suite/debug-integration.test.ts b/vscode-extension/src/test/suite/debug-integration.test.ts index 10915205..00e046c5 100644 --- a/vscode-extension/src/test/suite/debug-integration.test.ts +++ b/vscode-extension/src/test/suite/debug-integration.test.ts @@ -25,6 +25,9 @@ import * as net from 'net'; import { execFileSync } from 'child_process'; import { findBasiliskBinary } from './test-helpers'; +import { getStore } from '../../extension'; +import { currentStoppedFrameId, evaluateInDebugSession } from '../../dap-evaluate'; +import { applyDebugConfigDefaults } from '../../debug-adapter'; const EXTENSION_ID = 'Nimblesite.basilisk'; @@ -1636,4 +1639,128 @@ suite('Debug Integration E2E Tests', () => { ); } }); + + // ──────────────────────────────────────────────────────────────────────── + // 23. Profiler "same process": the debuggee PID is captured from the DAP + // `process` event so CPU profiling can target the same process. [LSPPROF] + // ──────────────────────────────────────────────────────────────────────── + + test('captures debuggee PID from the debug session for same-process profiling', async function () { + this.timeout(DEBUG_SESSION_TIMEOUT_MS + STOPPED_EVENT_TIMEOUT_MS); + + const { session } = await launchAndWaitForBreakpoint([34], pythonPath); + + // The DAP `process` event carries systemProcessId; the proxy captures it + // into the store keyed by VS Code session id. Poll briefly because the + // event can arrive shortly after the first stop. + let pid: number | undefined; + for (let i = 0; i < 40 && pid === undefined; i++) { + pid = getStore()?.getDebuggeeProcessId(session.id); + if (pid === undefined) { + await new Promise((resolve) => setTimeout(resolve, 50)); + } + } + + assert.ok( + pid !== undefined && pid > 0, + `debuggee PID should be captured for session ${session.id}, got ${String(pid)}` + ); + + await stopActiveDebugSession(); + }); + + // ──────────────────────────────────────────────────────────────────────── + // 24. Memory profiling round-trip against REAL debugpy: the editor couriers + // the LSP's injection scripts via DAP `evaluate` and posts the output + // back to `basilisk.memory.ingest`, which parses a real tracemalloc + // snapshot. [LSPPROF] PROFILE-MEMORY + // ──────────────────────────────────────────────────────────────────────── + + test('memory round-trip: tracemalloc start + snapshot via DAP evaluate', async function () { + this.timeout(DEBUG_SESSION_TIMEOUT_MS + STOPPED_EVENT_TIMEOUT_MS); + + await launchAndWaitForBreakpoint([34], pythonPath); + + // Exercise the real bridge (dap-evaluate.ts): resolve the stopped frame + // the same way the memory commands do. + const frameId = await currentStoppedFrameId(); + if (frameId === null) { + assert.fail('currentStoppedFrameId should resolve a frame while paused'); + } + + // 1. Mint a memory session + fetch the start script from the LSP. + const start = await vscode.commands.executeCommand<{ memorySessionId?: string; script?: string }>( + 'basilisk.memory.start', + { tracebackDepth: 25 } + ); + assert.ok(start.memorySessionId !== undefined, 'start should return a memorySessionId'); + assert.ok(start.script?.includes('tracemalloc.start'), 'start script should start tracemalloc'); + + // 2. Inject tracemalloc into the live debuggee, then allocate ~2 MB so the + // snapshot has something concrete to report. + await evaluateInDebugSession(start.script ?? '', frameId); + await evaluateInDebugSession( + 'global _bsk_leak\n_bsk_leak = [bytearray(1024) for _ in range(2000)]', + frameId + ); + + // 3. Fetch the snapshot script, run it in the debuggee, courier output back. + const snapCmd = await vscode.commands.executeCommand<{ script?: string }>( + 'basilisk.memory.snapshot', + { memorySessionId: start.memorySessionId } + ); + assert.ok(snapCmd.script?.includes('__BASILISK_MEM__'), 'snapshot script should print the marker'); + + const output = await evaluateInDebugSession(snapCmd.script ?? '', frameId); + assert.ok(output !== null, 'evaluate should return the snapshot output'); + const result = await vscode.commands.executeCommand<{ kind?: string; currentMemory?: number; snapshotId?: string }>( + 'basilisk.memory.ingest', + { memorySessionId: start.memorySessionId, output } + ); + + assert.strictEqual(result.kind, 'snapshot', 'ingest should yield a snapshot'); + assert.ok(typeof result.snapshotId === 'string', 'snapshot should have an id'); + assert.ok( + typeof result.currentMemory === 'number' && result.currentMemory > 0, + `tracemalloc should report tracked memory, got ${String(result.currentMemory)}` + ); + + await stopActiveDebugSession(); + }); +}); + +// ── Zero-config debug start [VSIX-PYTHON-DEBUGGER-DAP] ────────────────────── +// Pure tests for the DebugConfigurationProvider's defaulting logic that lets +// "Run and Debug" / F5 start without a launch.json. +suite('Basilisk Debug Config Provider', () => { + test('empty config + Python file synthesizes a current-file launch', () => { + // VS Code passes a truly-empty {} when starting with no launch.json. + const resolved = applyDebugConfigDefaults({} as vscode.DebugConfiguration, 'python'); + assert.strictEqual(resolved.type, 'basilisk-debug'); + assert.strictEqual(resolved.request, 'launch'); + assert.strictEqual(resolved.program, '${file}'); + }); + + test('empty config + non-Python file is left untouched', () => { + const empty = {} as vscode.DebugConfiguration; + const resolved = applyDebugConfigDefaults(empty, 'rust'); + assert.strictEqual(resolved.type, undefined); + assert.strictEqual(resolved.program, undefined); + }); + + test('launch config missing program defaults to the current file', () => { + const resolved = applyDebugConfigDefaults( + { name: 'x', type: 'basilisk-debug', request: 'launch' } as vscode.DebugConfiguration, + 'python' + ); + assert.strictEqual(resolved.program, '${file}'); + }); + + test('a complete config passes through unchanged', () => { + const full = { + name: 'x', type: 'basilisk-debug', request: 'launch', program: '/tmp/a.py', + } as vscode.DebugConfiguration; + const resolved = applyDebugConfigDefaults(full, 'python'); + assert.strictEqual(resolved.program, '/tmp/a.py'); + }); });