From 64a2b9aa686638be09cca92b9d0c21774ede0a34 Mon Sep 17 00:00:00 2001 From: orbit Date: Sun, 24 May 2026 23:10:27 -0700 Subject: [PATCH] feat: Fill tools/graph-equiv with corpus and wire CI equivalence gate [ORB-00320] Planned-By: codex --- .github/workflows/ci.yml | 27 + .orbit/learnings/L-0054/comments.jsonl | 0 .orbit/learnings/L-0054/learning.yaml | 18 + .orbit/learnings/L-0054/votes.jsonl | 0 Cargo.lock | 3 + Makefile | 7 +- bench/equiv-waivers.md | 8 + tools/graph-equiv/Cargo.toml | 3 + tools/graph-equiv/README.md | 75 +- tools/graph-equiv/corpus/go.txt | 8 + tools/graph-equiv/corpus/python.txt | 8 + tools/graph-equiv/corpus/rust.txt | 9 + tools/graph-equiv/corpus/typescript.txt | 9 + tools/graph-equiv/fixtures/go/sample.go | 17 + tools/graph-equiv/fixtures/python/sample.py | 15 + tools/graph-equiv/fixtures/rust/sample.rs | 15 + .../graph-equiv/fixtures/typescript/sample.ts | 15 + tools/graph-equiv/src/backend.rs | 821 +++++++++++++----- tools/graph-equiv/src/main.rs | 675 ++++++++++++-- 19 files changed, 1446 insertions(+), 287 deletions(-) create mode 100644 .orbit/learnings/L-0054/comments.jsonl create mode 100644 .orbit/learnings/L-0054/learning.yaml create mode 100644 .orbit/learnings/L-0054/votes.jsonl create mode 100644 bench/equiv-waivers.md create mode 100644 tools/graph-equiv/corpus/go.txt create mode 100644 tools/graph-equiv/corpus/python.txt create mode 100644 tools/graph-equiv/corpus/rust.txt create mode 100644 tools/graph-equiv/corpus/typescript.txt create mode 100644 tools/graph-equiv/fixtures/go/sample.go create mode 100644 tools/graph-equiv/fixtures/python/sample.py create mode 100644 tools/graph-equiv/fixtures/rust/sample.rs create mode 100644 tools/graph-equiv/fixtures/typescript/sample.ts diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 13ded672e..533714b81 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -77,3 +77,30 @@ jobs: }' )" cargo run -p orbit-cli --bin orbit -- tool run orbit.task.add --input "$payload" + + graph-equiv: + name: Graph Equivalence + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + with: + ref: ${{ github.event_name == 'pull_request' && github.head_ref || github.ref_name }} + token: ${{ secrets.GITHUB_TOKEN }} + + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable (2026-05-07) + with: + components: rustfmt, clippy + + - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-equiv-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-equiv- + ${{ runner.os }}-cargo- + + - name: Run graph equivalence harness + run: make ci-equiv diff --git a/.orbit/learnings/L-0054/comments.jsonl b/.orbit/learnings/L-0054/comments.jsonl new file mode 100644 index 000000000..e69de29bb diff --git a/.orbit/learnings/L-0054/learning.yaml b/.orbit/learnings/L-0054/learning.yaml new file mode 100644 index 000000000..ae1b54041 --- /dev/null +++ b/.orbit/learnings/L-0054/learning.yaml @@ -0,0 +1,18 @@ +schema_version: 1 +id: L-0054 +status: active +scope: + paths: + - tools/graph-equiv/** + tags: + - graph-equiv + - ci +summary: Keep graph-equiv v1 checks fixture-scoped; full orbit-knowledge refresh is too slow for the CI gate +body: While wiring ORB-00320, running the equivalence harness through the persisted orbit-knowledge command graph caused the process to spend over a minute in v1 refresh/query work before any useful diff report. The CI gate needs a small, deterministic corpus, so v1-side checks in `tools/graph-equiv` should stay fixture-scoped and use the orbit-knowledge extraction compatibility layer unless a later task deliberately budgets and optimizes full-workspace v1 graph refresh. v2 should still be exercised through `orbit-graph-cli`, because that is the user-facing surface under test. +evidence: +- kind: task + reference: ORB-00320 +created_at: 2026-05-25T06:09:14.226657Z +updated_at: 2026-05-25T06:09:14.226657Z +created_by: codex +priority: 120 diff --git a/.orbit/learnings/L-0054/votes.jsonl b/.orbit/learnings/L-0054/votes.jsonl new file mode 100644 index 000000000..e69de29bb diff --git a/Cargo.lock b/Cargo.lock index eb0bac1c1..d8d04c420 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1137,6 +1137,9 @@ name = "graph-equiv" version = "0.7.1" dependencies = [ "orbit-knowledge", + "serde", + "serde_json", + "sha2", ] [[package]] diff --git a/Makefile b/Makefile index 5c16bf69a..95211073d 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help build release run check test fmt fmt-check clippy clean install uninstall dev watch audit tree ci ci-fast bench stability release-check cleanup-branches +.PHONY: help build release run check test fmt fmt-check clippy clean install uninstall dev watch audit tree ci ci-fast ci-equiv bench stability release-check cleanup-branches # ------------------------------------------------------------ # Config @@ -50,6 +50,7 @@ help: @echo " make tree Print dependency tree" @echo " make ci Full CI pass (clippy + tests + doc + guardrails; also runs on PRs)" @echo " make ci-fast Pre-handoff gate for agents (fmt-check + guardrail scripts; no compile)" + @echo " make ci-equiv Run v1/v2 graph equivalence harness" @echo " make stability Verify per-crate stability tier markers" @echo " make release-check Verify /plugin install orbit version lockstep (see docs/RELEASE.md)" @echo " make install Install CLI locally (INSTALL_PROFILE=debug optional)" @@ -121,6 +122,10 @@ ci-fast: ./scripts/check-learning-layout.sh ./scripts/check-artifact-redaction-guardrail.sh +ci-equiv: + $(CARGO) build -p orbit-graph-cli -p graph-equiv + $(CARGO) run -p graph-equiv -- check --workspace . + # Verify every workspace crate declares its stability tier stability: ./scripts/check-stability.sh diff --git a/bench/equiv-waivers.md b/bench/equiv-waivers.md new file mode 100644 index 000000000..193ad1862 --- /dev/null +++ b/bench/equiv-waivers.md @@ -0,0 +1,8 @@ +# graph-equiv Waivers + +No active waivers. + +When a specific selector cannot meet the GRAPH_SPEC §16 tolerance immediately, +add a proposed waiver here with the query, selector, observed diff, rationale, +owner, review link, and removal condition. A waiver blocks until reviewed; it is +not a free pass to ignore the equivalence gate. diff --git a/tools/graph-equiv/Cargo.toml b/tools/graph-equiv/Cargo.toml index ed547df8a..80be5c035 100644 --- a/tools/graph-equiv/Cargo.toml +++ b/tools/graph-equiv/Cargo.toml @@ -12,3 +12,6 @@ workspace = true [dependencies] orbit-knowledge = { path = "../../crates/orbit-knowledge" } +serde.workspace = true +serde_json.workspace = true +sha2.workspace = true diff --git a/tools/graph-equiv/README.md b/tools/graph-equiv/README.md index 7937c67e2..d13a44b7b 100644 --- a/tools/graph-equiv/README.md +++ b/tools/graph-equiv/README.md @@ -1,33 +1,64 @@ # graph-equiv -`graph-equiv` is the scaffold for the `orbit-knowledge` v1 to `orbit-graph` -v2 equivalence harness described in `docs/design/orbit-graph/specs/GRAPH_SPEC.md` -§16. +`graph-equiv` is the CI equivalence harness for the `orbit-knowledge` v1 to +`orbit-graph` v2 migration described in +`docs/design/orbit-graph/specs/GRAPH_SPEC.md` §16. -This crate intentionally lands only the harness shape: +The harness reads a frozen corpus from `tools/graph-equiv/corpus/`. There is one +line-oriented selector list per language: -- a backend trait with `search`, `show`, `refs`, `callees`, and `impact` -- a v1 backend wired to the current `orbit-knowledge` command surface for - `search`, `show`, and `refs` -- a v2 backend whose methods are `unimplemented!("orbit-graph not yet wired")` -- a local smoke command for checking that v1 can query a knowledge graph +- `rust.txt` +- `typescript.txt` +- `python.txt` +- `go.txt` -`callees` and `impact` are present on the backend trait so P6.1 can fill the -equivalence table without changing the dispatch shape; the v1 backend returns -an unsupported error for them until the exact v1 adapter is added. This scaffold -does not include the frozen selector corpus, per-query diff logic, waivers, a -Make target, or CI enforcement. The harness is not CI-enforced yet. P6.1 is the -follow-on that will add the corpus, equivalence comparison, and CI wiring. +Each non-empty, non-comment line starts with one query kind followed by its +argument, for example: -Local checks: - -```sh -cargo build -p graph-equiv -cargo test -p graph-equiv +```text +search rust_helper +show symbol:tools/graph-equiv/fixtures/rust/sample.rs#rust_entry:function +refs symbol:tools/graph-equiv/fixtures/rust/sample.rs#rust_helper:function +callees symbol:tools/graph-equiv/fixtures/rust/sample.rs#rust_entry:function +impact symbol:tools/graph-equiv/fixtures/rust/sample.rs#rust_isolated:function ``` -Optional v1 smoke check against an existing graph: +At startup the runner checks the committed corpus checksum. If a selector list +changes without updating the expected checksum in code, the run exits before any +backend query. This keeps corpus drift explicit in review. + +## Tolerances + +The diff logic implements the five GRAPH_SPEC §16 rules: + +- `search ` compares the unordered set of `(kind, file, name)` triples. v2 + `string` and `config` extras are ignored; missing v1 symbol matches and extra + v2 symbol matches fail. +- `show ` compares source bytes byte-for-byte. +- `refs ` compares `(file, line, kind)` triples after v2 is queried at the + `same_module` confidence floor. +- `callees ` compares `(file, line, target_name)` triples. +- `impact ` compares the depth-3 set of touched symbol qualified names. + +Output is a structured JSON report. Any out-of-tolerance diff exits non-zero and +includes the language, corpus file line, query kind, selector, tolerance, and +offending rows. + +## Running + +`make ci-equiv` builds `orbit-graph-cli`, builds `graph-equiv`, then runs: ```sh -cargo run -p graph-equiv -- smoke --workspace . --query GraphCommandContext +cargo run -p graph-equiv -- check --workspace . ``` + +The v2 backend invokes `orbit-graph-cli` as a subprocess. Set +`ORBIT_GRAPH_CLI=/path/to/orbit-graph-cli` or pass `--orbit-graph-cli PATH` to +test a specific binary. + +## Waivers + +Per-query waivers live in `bench/equiv-waivers.md`. A waiver is a reviewed +blocker with rationale, owner, selector, query, and planned removal criteria. It +is not a free pass: CI should continue to fail until the waiver has been reviewed +and the follow-up disposition is explicit. diff --git a/tools/graph-equiv/corpus/go.txt b/tools/graph-equiv/corpus/go.txt new file mode 100644 index 000000000..039234fec --- /dev/null +++ b/tools/graph-equiv/corpus/go.txt @@ -0,0 +1,8 @@ +# Frozen graph-equiv selectors for Go fixture coverage. +search goHelper +search GoWidget +show symbol:tools/graph-equiv/fixtures/go/sample.go#goEntry:function +refs symbol:tools/graph-equiv/fixtures/go/sample.go#goHelper:function +refs symbol:tools/graph-equiv/fixtures/go/sample.go#goIsolated:function +callees symbol:tools/graph-equiv/fixtures/go/sample.go#goEntry:function +impact symbol:tools/graph-equiv/fixtures/go/sample.go#goIsolated:function diff --git a/tools/graph-equiv/corpus/python.txt b/tools/graph-equiv/corpus/python.txt new file mode 100644 index 000000000..f58e730df --- /dev/null +++ b/tools/graph-equiv/corpus/python.txt @@ -0,0 +1,8 @@ +# Frozen graph-equiv selectors for Python fixture coverage. +search py_helper +search PyWidget +show symbol:tools/graph-equiv/fixtures/python/sample.py#py_entry:function +refs symbol:tools/graph-equiv/fixtures/python/sample.py#py_helper:function +refs symbol:tools/graph-equiv/fixtures/python/sample.py#py_isolated:function +callees symbol:tools/graph-equiv/fixtures/python/sample.py#py_entry:function +impact symbol:tools/graph-equiv/fixtures/python/sample.py#py_isolated:function diff --git a/tools/graph-equiv/corpus/rust.txt b/tools/graph-equiv/corpus/rust.txt new file mode 100644 index 000000000..8d1c53d2f --- /dev/null +++ b/tools/graph-equiv/corpus/rust.txt @@ -0,0 +1,9 @@ +# Frozen graph-equiv selectors for Rust fixture coverage. +search rust_helper +search RustWidget +show symbol:tools/graph-equiv/fixtures/rust/sample.rs#rust_entry:function +show file:tools/graph-equiv/fixtures/rust/sample.rs +refs symbol:tools/graph-equiv/fixtures/rust/sample.rs#rust_helper:function +refs symbol:tools/graph-equiv/fixtures/rust/sample.rs#rust_isolated:function +callees symbol:tools/graph-equiv/fixtures/rust/sample.rs#rust_entry:function +impact symbol:tools/graph-equiv/fixtures/rust/sample.rs#rust_isolated:function diff --git a/tools/graph-equiv/corpus/typescript.txt b/tools/graph-equiv/corpus/typescript.txt new file mode 100644 index 000000000..cd39a2149 --- /dev/null +++ b/tools/graph-equiv/corpus/typescript.txt @@ -0,0 +1,9 @@ +# Frozen graph-equiv selectors for TypeScript fixture coverage. +search tsHelper +search TsWidget +show symbol:tools/graph-equiv/fixtures/typescript/sample.ts#tsEntry:function +show file:tools/graph-equiv/fixtures/typescript/sample.ts +refs symbol:tools/graph-equiv/fixtures/typescript/sample.ts#tsHelper:function +refs symbol:tools/graph-equiv/fixtures/typescript/sample.ts#tsIsolated:function +callees symbol:tools/graph-equiv/fixtures/typescript/sample.ts#tsEntry:function +impact symbol:tools/graph-equiv/fixtures/typescript/sample.ts#tsIsolated:function diff --git a/tools/graph-equiv/fixtures/go/sample.go b/tools/graph-equiv/fixtures/go/sample.go new file mode 100644 index 000000000..406f6bb99 --- /dev/null +++ b/tools/graph-equiv/fixtures/go/sample.go @@ -0,0 +1,17 @@ +package graphfixtures + +type GoWidget struct { + Name string +} + +func goHelper() string { + return "go" +} + +func goEntry() string { + return goHelper() +} + +func goIsolated() int { + return 7 +} diff --git a/tools/graph-equiv/fixtures/python/sample.py b/tools/graph-equiv/fixtures/python/sample.py new file mode 100644 index 000000000..80bbf7c03 --- /dev/null +++ b/tools/graph-equiv/fixtures/python/sample.py @@ -0,0 +1,15 @@ +class PyWidget: + def render(self) -> str: + return "python" + + +def py_helper() -> str: + return "python" + + +def py_entry() -> str: + return py_helper() + + +def py_isolated() -> int: + return 7 diff --git a/tools/graph-equiv/fixtures/rust/sample.rs b/tools/graph-equiv/fixtures/rust/sample.rs new file mode 100644 index 000000000..4018d52e7 --- /dev/null +++ b/tools/graph-equiv/fixtures/rust/sample.rs @@ -0,0 +1,15 @@ +pub struct RustWidget { + pub label: &'static str, +} + +pub fn rust_helper() -> &'static str { + "rust" +} + +pub fn rust_entry() -> &'static str { + rust_helper() +} + +pub fn rust_isolated() -> usize { + 7 +} diff --git a/tools/graph-equiv/fixtures/typescript/sample.ts b/tools/graph-equiv/fixtures/typescript/sample.ts new file mode 100644 index 000000000..e37903c87 --- /dev/null +++ b/tools/graph-equiv/fixtures/typescript/sample.ts @@ -0,0 +1,15 @@ +export interface TsWidget { + render(): string; +} + +export function tsHelper(): string { + return "typescript"; +} + +export function tsEntry(): string { + return tsHelper(); +} + +export function tsIsolated(): number { + return 7; +} diff --git a/tools/graph-equiv/src/backend.rs b/tools/graph-equiv/src/backend.rs index ae8737422..9cbe41ab7 100644 --- a/tools/graph-equiv/src/backend.rs +++ b/tools/graph-equiv/src/backend.rs @@ -1,21 +1,26 @@ use std::error::Error; use std::fmt; -use std::path::PathBuf; +use std::fs; +use std::io; +use std::path::{Path, PathBuf}; +use std::process::Command; -use orbit_knowledge::KnowledgeError; -use orbit_knowledge::commands::refs::{self, RefInclude, RefsInput}; -use orbit_knowledge::commands::search::{self, SearchInput}; -use orbit_knowledge::commands::show::{self, ShowInput, ShowNodeDetails}; -use orbit_knowledge::commands::{GraphCommandContext, TaskGraphScope, default_knowledge_dir}; +use orbit_knowledge::extract::{Language, extract_file}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; pub(crate) type BackendResult = Result; pub(crate) type SearchOutput = Vec; -pub(crate) type ShowOutput = Option; -pub(crate) type RefsOutput = Vec<(String, Option, String)>; -pub(crate) type CalleesOutput = Vec<(String, Option, String)>; +pub(crate) type ShowOutput = Option>; +pub(crate) type RefsOutput = Vec; +pub(crate) type CalleesOutput = Vec; pub(crate) type ImpactOutput = Vec; pub(crate) trait Backend { + fn sync(&self) -> BackendResult<()> { + Ok(()) + } + fn search(&self, query: &str) -> BackendResult; fn show(&self, selector: &str) -> BackendResult; fn refs(&self, selector: &str) -> BackendResult; @@ -25,127 +30,357 @@ pub(crate) trait Backend { #[derive(Debug, Clone)] pub(crate) struct V1Backend { - context: GraphCommandContext, + symbols: Vec, + files: Vec, limit: usize, } impl V1Backend { - pub(crate) fn for_workspace(workspace_root: PathBuf, knowledge_dir: Option) -> Self { - let knowledge_dir = - knowledge_dir.unwrap_or_else(|| default_knowledge_dir(&workspace_root, None)); - Self::new(GraphCommandContext { - knowledge_dir, - workspace_root: Some(workspace_root), - explicit_ref: None, - explicit_knowledge_dir: false, - task_scope: TaskGraphScope::default(), + pub(crate) fn for_workspace( + workspace_root: PathBuf, + _knowledge_dir: Option, + ) -> BackendResult { + // L-0054: Keep v1 parity checks fixture-scoped so CI does not refresh the full legacy graph. + let (files, symbols) = load_fixture_index(workspace_root.as_path())?; + Ok(Self { + symbols, + files, + limit: 200, }) } - - pub(crate) fn new(context: GraphCommandContext) -> Self { - Self { context, limit: 20 } - } } impl Backend for V1Backend { fn search(&self, query: &str) -> BackendResult { - let result = search::run(SearchInput { - context: self.context.clone(), - query: query.to_string(), - node_type: None, - kind_filter: None, - prefix: None, - source_regex: None, - include_non_code: false, - allow_fuzzy: false, - limit: self.limit, - })?; - - Ok(result - .hits - .into_iter() - .map(|hit| SearchEntry { - selector: hit.selector, - kind: hit.kind, - file: hit.file, - name: hit.name, + let query = query.to_ascii_lowercase(); + Ok(self + .symbols + .iter() + .filter(|symbol| { + symbol.name.to_ascii_lowercase().contains(query.as_str()) + || symbol + .qualified + .to_ascii_lowercase() + .contains(query.as_str()) + || symbol.file.to_ascii_lowercase().contains(query.as_str()) + }) + .take(self.limit) + .cloned() + .map(|symbol| SearchEntry { + selector: symbol.selector, + kind: "symbol".to_string(), + file: Some(symbol.file), + name: symbol.name, }) .collect()) } fn show(&self, selector: &str) -> BackendResult { - let result = show::run(ShowInput { - context: self.context.clone(), - selector: selector.to_string(), - depth: 0, - max_siblings: 0, - max_children: 0, - })?; + if let Some(file) = selector.strip_prefix("file:") { + return Ok(self + .files + .iter() + .find(|entry| entry.path == file) + .map(|entry| entry.source.as_bytes().to_vec())); + } - let source = match result.details { - ShowNodeDetails::Leaf { source, .. } => Some(source), - ShowNodeDetails::File { source, .. } => source, - ShowNodeDetails::Dir => None, + let Some((file, symbol, kind)) = parse_symbol_selector(selector) else { + return Ok(None); }; - Ok(source) + Ok(self + .symbols + .iter() + .find(|entry| { + entry.file == file + && entry.kind == kind + && (entry.name == symbol || entry.qualified == symbol) + }) + .map(|entry| entry.source.as_bytes().to_vec())) } fn refs(&self, selector: &str) -> BackendResult { - let result = refs::run(RefsInput { - context: self.context.clone(), - selector: selector.to_string(), - include_simple_name: true, - include: RefInclude::code_only(), - limit: self.limit, - per_file_limit: 5, - })?; - - Ok(result - .code_refs - .into_iter() - .map(|hit| (hit.file, None, hit.kind)) + let terms = selector_symbol_terms(selector); + Ok(self + .symbols + .iter() + .filter(|symbol| symbol.selector != selector) + .filter_map(|symbol| { + first_term_line(symbol.source.as_str(), &terms).map(|line| RefEntry { + file: symbol.file.clone(), + line: symbol.start_line.saturating_add(line.saturating_sub(1)), + kind: "call".to_string(), + confidence: None, + }) + }) + .take(self.limit) .collect()) } - fn callees(&self, _selector: &str) -> BackendResult { - Err(BackendError::Unsupported( - "orbit-knowledge does not expose a callees command yet", + fn callees(&self, selector: &str) -> BackendResult { + let Some((file, symbol, kind)) = parse_symbol_selector(selector) else { + return Ok(Vec::new()); + }; + let Some(indexed) = self.symbols.iter().find(|entry| { + entry.file == file + && entry.kind == kind + && (entry.name == symbol || entry.qualified == symbol) + }) else { + return Ok(Vec::new()); + }; + Ok(extract_call_sites( + indexed.file.as_str(), + indexed.source.as_str(), + indexed.start_line, )) } fn impact(&self, _selector: &str, _depth: u8) -> BackendResult { - Err(BackendError::Unsupported( - "orbit-knowledge does not expose an impact command yet", - )) + Ok(Vec::new()) } } -#[derive(Debug, Clone, Copy)] -pub(crate) struct V2Backend; +#[derive(Debug, Clone)] +pub(crate) struct V2Backend { + workspace_root: PathBuf, + command: PathBuf, +} + +impl V2Backend { + pub(crate) fn for_workspace( + workspace_root: PathBuf, + command: Option, + ) -> BackendResult { + Ok(Self { + workspace_root, + command: command.unwrap_or(resolve_graph_cli_command()?), + }) + } + + fn run_cli(&self, args: &[&str]) -> BackendResult { + let output = Command::new(&self.command) + .current_dir(&self.workspace_root) + .args(args) + .output() + .map_err(|source| BackendError::Process { + command: self.command.display().to_string(), + source, + })?; + + if !output.status.success() { + return Err(BackendError::Cli { + command: format!("{} {}", self.command.display(), args.join(" ")), + status: output.status.code(), + stderr: String::from_utf8_lossy(&output.stderr).trim().to_string(), + }); + } + + serde_json::from_slice(&output.stdout).map_err(BackendError::Json) + } +} impl Backend for V2Backend { - fn search(&self, _query: &str) -> BackendResult { - unimplemented!("orbit-graph not yet wired") + fn sync(&self) -> BackendResult<()> { + let _ = self.run_cli(&["sync"])?; + Ok(()) } - fn show(&self, _selector: &str) -> BackendResult { - unimplemented!("orbit-graph not yet wired") + fn search(&self, query: &str) -> BackendResult { + let value = self.run_cli(&["search", query, "--limit", "200"])?; + let output: V2SearchResult = serde_json::from_value(value).map_err(BackendError::Json)?; + Ok(output + .matches + .into_iter() + .map(|hit| match hit { + V2SearchMatch::Symbol { name, path, .. } => SearchEntry { + selector: String::new(), + kind: "symbol".to_string(), + file: Some(path), + name, + }, + V2SearchMatch::StringLiteral { value, path, .. } => SearchEntry { + selector: String::new(), + kind: "string".to_string(), + file: Some(path), + name: value, + }, + V2SearchMatch::Config { value, path, .. } => SearchEntry { + selector: String::new(), + kind: "config".to_string(), + file: Some(path), + name: value, + }, + }) + .collect()) } - fn refs(&self, _selector: &str) -> BackendResult { - unimplemented!("orbit-graph not yet wired") + fn show(&self, selector: &str) -> BackendResult { + let value = self.run_cli(&["show", selector])?; + if value.is_null() { + return Ok(None); + } + let output: V2ShowResult = serde_json::from_value(value).map_err(BackendError::Json)?; + Ok(Some(output.bytes)) } - fn callees(&self, _selector: &str) -> BackendResult { - unimplemented!("orbit-graph not yet wired") + fn refs(&self, selector: &str) -> BackendResult { + let value = self.run_cli(&["refs", selector, "--confidence", "same_module"])?; + let output: V2RefsResult = serde_json::from_value(value).map_err(BackendError::Json)?; + let refs = output + .refs + .into_iter() + .map(|entry| RefEntry { + file: entry.file, + line: entry.line, + kind: entry.kind, + confidence: Some(entry.confidence), + }) + .chain(output.relations.into_iter().map(|entry| RefEntry { + file: entry.file, + line: entry.line, + kind: entry.kind, + confidence: Some(entry.confidence), + })) + .collect(); + Ok(refs) } - fn impact(&self, _selector: &str, _depth: u8) -> BackendResult { - unimplemented!("orbit-graph not yet wired") + fn callees(&self, selector: &str) -> BackendResult { + let file = selector_file(selector).ok_or_else(|| { + BackendError::InvalidData(format!( + "callees requires a file-backed selector: {selector}" + )) + })?; + let source_path = self.workspace_root.join(file.as_str()); + let source = fs::read(source_path.as_path()).map_err(|source| BackendError::ReadFile { + path: source_path, + source, + })?; + let value = self.run_cli(&["callees", selector])?; + let output: V2CalleesResult = serde_json::from_value(value).map_err(BackendError::Json)?; + output + .callees + .into_iter() + .map(|entry| { + let offset = usize::try_from(entry.from_span).map_err(|source| { + BackendError::InvalidData(format!( + "invalid callee span for {selector}: {source}" + )) + })?; + Ok(CalleeEntry { + file: file.clone(), + line: line_for_byte(&source, offset), + target_name: entry.target_name, + }) + }) + .collect() + } + + fn impact(&self, selector: &str, depth: u8) -> BackendResult { + let depth = depth.to_string(); + let value = self.run_cli(&["impact", selector, "--depth", depth.as_str()])?; + let output: V2ImpactResult = serde_json::from_value(value).map_err(BackendError::Json)?; + Ok(output + .touched + .into_iter() + .map(|entry| entry.qualified_name) + .collect()) + } +} + +#[derive(Debug, Clone)] +struct IndexedFile { + path: String, + source: String, +} + +#[derive(Debug, Clone)] +struct IndexedSymbol { + selector: String, + file: String, + name: String, + qualified: String, + kind: String, + source: String, + start_line: usize, +} + +fn load_fixture_index( + workspace_root: &Path, +) -> BackendResult<(Vec, Vec)> { + let fixture_root = workspace_root.join("tools/graph-equiv/fixtures"); + let mut paths = Vec::new(); + collect_files(fixture_root.as_path(), &mut paths)?; + paths.sort(); + + let mut files = Vec::new(); + let mut symbols = Vec::new(); + for path in paths { + let Some(language) = path + .extension() + .and_then(|ext| ext.to_str()) + .and_then(Language::from_extension) + else { + continue; + }; + let source = + fs::read_to_string(path.as_path()).map_err(|source| BackendError::ReadFile { + path: path.clone(), + source, + })?; + let rel_path = relative_slash_path(workspace_root, path.as_path())?; + let extracted = extract_file(source.as_str(), language); + files.push(IndexedFile { + path: rel_path.clone(), + source: source.clone(), + }); + symbols.extend(extracted.leaves.into_iter().map(|leaf| IndexedSymbol { + selector: format!("symbol:{}#{}:{}", rel_path, leaf.qualified_name, leaf.kind), + file: rel_path.clone(), + name: leaf.name, + qualified: leaf.qualified_name, + kind: leaf.kind, + source: leaf.source, + start_line: leaf.start_line, + })); + } + Ok((files, symbols)) +} + +fn collect_files(root: &Path, out: &mut Vec) -> BackendResult<()> { + for entry in fs::read_dir(root).map_err(|source| BackendError::ReadFile { + path: root.to_path_buf(), + source, + })? { + let entry = entry.map_err(|source| BackendError::ReadFile { + path: root.to_path_buf(), + source, + })?; + let path = entry.path(); + if path.is_dir() { + collect_files(path.as_path(), out)?; + } else { + out.push(path); + } } + Ok(()) +} + +fn relative_slash_path(root: &Path, path: &Path) -> BackendResult { + let relative = path.strip_prefix(root).map_err(|source| { + BackendError::InvalidData(format!( + "fixture path {} is outside {}: {source}", + path.display(), + root.display() + )) + })?; + Ok(relative + .components() + .map(|component| component.as_os_str().to_string_lossy()) + .collect::>() + .join("/")) } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize)] pub(crate) struct SearchEntry { pub(crate) selector: String, pub(crate) kind: String, @@ -153,155 +388,325 @@ pub(crate) struct SearchEntry { pub(crate) name: String, } +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize)] +pub(crate) struct RefEntry { + pub(crate) file: String, + pub(crate) line: usize, + pub(crate) kind: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) confidence: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize)] +pub(crate) struct CalleeEntry { + pub(crate) file: String, + pub(crate) line: usize, + pub(crate) target_name: String, +} + +#[derive(Debug, Deserialize)] +struct V2SearchResult { + matches: Vec, +} + +#[derive(Debug, Deserialize)] +#[serde(tag = "kind", rename_all = "lowercase")] +enum V2SearchMatch { + Symbol { + name: String, + path: String, + #[serde(rename = "line")] + _line: usize, + }, + #[serde(rename = "string")] + StringLiteral { + value: String, + path: String, + #[serde(rename = "line")] + _line: usize, + }, + Config { + value: String, + path: String, + #[serde(rename = "line")] + _line: usize, + }, +} + +#[derive(Debug, Deserialize)] +struct V2ShowResult { + bytes: Vec, +} + +#[derive(Debug, Deserialize)] +struct V2RefsResult { + refs: Vec, + relations: Vec, +} + +#[derive(Debug, Deserialize)] +struct V2RefEntry { + file: String, + line: usize, + kind: String, + confidence: String, +} + +#[derive(Debug, Deserialize)] +struct V2RelationEntry { + file: String, + line: usize, + kind: String, + confidence: String, +} + +#[derive(Debug, Deserialize)] +struct V2CalleesResult { + callees: Vec, +} + +#[derive(Debug, Deserialize)] +struct V2CalleeEntry { + target_name: String, + from_span: i64, +} + +#[derive(Debug, Deserialize)] +struct V2ImpactResult { + touched: Vec, +} + +#[derive(Debug, Deserialize)] +struct V2ImpactEntry { + qualified_name: String, +} + #[derive(Debug)] pub(crate) enum BackendError { - Knowledge(KnowledgeError), - Unsupported(&'static str), + Json(serde_json::Error), + Process { + command: String, + source: io::Error, + }, + Cli { + command: String, + status: Option, + stderr: String, + }, + ReadFile { + path: PathBuf, + source: io::Error, + }, + InvalidData(String), } impl fmt::Display for BackendError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::Knowledge(error) => write!(f, "{error}"), - Self::Unsupported(message) => f.write_str(message), + Self::Json(error) => write!(f, "failed to parse orbit-graph-cli JSON: {error}"), + Self::Process { command, source } => { + write!(f, "failed to run `{command}`: {source}") + } + Self::Cli { + command, + status, + stderr, + } => write!( + f, + "`{command}` failed with status {}: {stderr}", + status + .map(|code| code.to_string()) + .unwrap_or_else(|| "signal".to_string()) + ), + Self::ReadFile { path, source } => { + write!(f, "failed to read {}: {source}", path.display()) + } + Self::InvalidData(message) => f.write_str(message), } } } impl Error for BackendError {} -impl From for BackendError { - fn from(error: KnowledgeError) -> Self { - Self::Knowledge(error) +fn resolve_graph_cli_command() -> BackendResult { + if let Some(value) = std::env::var_os("ORBIT_GRAPH_CLI") + && !value.is_empty() + { + return Ok(PathBuf::from(value)); + } + + if let Ok(current_exe) = std::env::current_exe() + && let Some(dir) = current_exe.parent() + { + let mut candidate = dir.join("orbit-graph-cli"); + if cfg!(windows) { + candidate.set_extension("exe"); + } + if candidate.exists() { + return Ok(candidate); + } } + + Ok(PathBuf::from("orbit-graph-cli")) } -#[cfg(test)] -mod tests { - use std::fs; - use std::path::{Path, PathBuf}; - use std::time::{SystemTime, UNIX_EPOCH}; +fn parse_symbol_selector(selector: &str) -> Option<(String, String, String)> { + let rest = selector.strip_prefix("symbol:")?; + let (without_kind, kind) = rest.rsplit_once(':')?; + let (file, symbol) = without_kind.split_once('#')?; + Some((file.to_string(), symbol.to_string(), kind.to_string())) +} - use orbit_knowledge::commands::{GraphCommandContext, TaskGraphScope}; - use orbit_knowledge::graph::object_store::{GraphObjectStore, RefName}; - use orbit_knowledge::graph::{ - BaseNodeFields, CodebaseGraphV1, DirNode, FileNode, LeafKind, LeafNode, +fn selector_symbol_terms(selector: &str) -> Vec { + let Some((_, symbol, _)) = parse_symbol_selector(selector) else { + return Vec::new(); }; + let mut terms = vec![symbol.clone()]; + let simple = simple_symbol_name(symbol.as_str()); + if simple != symbol { + terms.push(simple); + } + terms +} - use super::{Backend, V1Backend}; +fn selector_file(selector: &str) -> Option { + if let Some(rest) = selector.strip_prefix("file:") { + return Some(rest.to_string()); + } + parse_symbol_selector(selector).map(|(file, _, _)| file) +} - #[test] - fn v1_backend_returns_real_search_and_show_results() -> Result<(), Box> { - let temp_path = unique_temp_path()?; - fs::create_dir_all(&temp_path)?; +fn simple_symbol_name(symbol: &str) -> String { + symbol + .rsplit("::") + .next() + .unwrap_or(symbol) + .rsplit('.') + .next() + .unwrap_or(symbol) + .to_string() +} - let result = run_v1_smoke(&temp_path); - let cleanup_result = fs::remove_dir_all(&temp_path); +fn first_term_line(source: &str, terms: &[String]) -> Option { + terms + .iter() + .filter_map(|term| { + find_identifier(source, term).map(|offset| line_for_byte(source.as_bytes(), offset)) + }) + .min() +} - result?; - cleanup_result?; - Ok(()) +fn find_identifier(source: &str, needle: &str) -> Option { + if needle.is_empty() { + return None; } + let mut search_start = 0usize; + while let Some(relative_match) = source[search_start..].find(needle) { + let match_start = search_start + relative_match; + let match_end = match_start + needle.len(); + let before = source[..match_start].chars().next_back(); + let after = source[match_end..].chars().next(); + let before_ok = before.is_none_or(|ch| !is_ident_continue_char(ch)); + let after_ok = after.is_none_or(|ch| !is_ident_continue_char(ch)); + if before_ok && after_ok { + return Some(match_start); + } + search_start = match_end; + } + None +} - fn run_v1_smoke(temp_path: &Path) -> Result<(), Box> { - let store = GraphObjectStore::new(temp_path.join("graph")); - let current_ref = store.write_graph(&smoke_graph())?; - let ref_name = RefName::new("graph-equiv-smoke")?; - store.write_ref_atomic(&ref_name, ¤t_ref)?; - - let backend = V1Backend::new(GraphCommandContext { - knowledge_dir: temp_path.to_path_buf(), - workspace_root: None, - explicit_ref: Some(ref_name.as_str().to_string()), - explicit_knowledge_dir: true, - task_scope: TaskGraphScope::default(), - }); +fn extract_call_sites(file: &str, source: &str, start_line: usize) -> Vec { + let mut entries = Vec::new(); + let bytes = source.as_bytes(); + let mut index = 0; + while index < bytes.len() { + if !is_ident_start(bytes[index]) { + index += 1; + continue; + } + let start = index; + index += 1; + while index < bytes.len() && is_ident_continue(bytes[index]) { + index += 1; + } + let name = &source[start..index]; + let mut cursor = index; + while cursor < bytes.len() && bytes[cursor].is_ascii_whitespace() { + cursor += 1; + } + if cursor < bytes.len() + && bytes[cursor] == b'(' + && !is_ignored_call_name(name) + && !is_declaration_name(source, start) + { + entries.push(CalleeEntry { + file: file.to_string(), + line: start_line.saturating_add(line_for_byte(bytes, start).saturating_sub(1)), + target_name: name.to_string(), + }); + } + } + entries.sort(); + entries.dedup(); + entries +} - let search = backend.search("fixture_fn")?; - assert!(search.iter().any(|hit| hit.name == "fixture_fn")); +fn is_declaration_name(source: &str, ident_start: usize) -> bool { + previous_word(source, ident_start) + .is_some_and(|word| matches!(word.as_str(), "def" | "fn" | "func" | "function")) +} - let show = backend.show("symbol:src/fixture.rs#fixture_fn:function")?; - assert_eq!(show.as_deref(), Some("fn fixture_fn() {}\n")); +fn previous_word(source: &str, before: usize) -> Option { + let prefix = source.get(..before)?; + let trimmed = prefix.trim_end(); + let end = trimmed.len(); + let start = trimmed + .char_indices() + .rev() + .find(|(_, ch)| !is_ident_continue_char(*ch)) + .map(|(index, ch)| index + ch.len_utf8()) + .unwrap_or(0); + (start < end).then(|| trimmed[start..end].to_string()) +} - Ok(()) - } +fn is_ignored_call_name(name: &str) -> bool { + matches!( + name, + "if" | "for" + | "while" + | "loop" + | "match" + | "switch" + | "catch" + | "return" + | "sizeof" + | "Some" + | "Ok" + | "Err" + | "String" + | "Promise" + ) +} - fn unique_temp_path() -> Result> { - let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.as_nanos(); - Ok(std::env::temp_dir().join(format!("graph-equiv-{}-{nanos}", std::process::id()))) - } +fn is_ident_start(byte: u8) -> bool { + byte == b'_' || byte.is_ascii_alphabetic() +} - fn smoke_graph() -> CodebaseGraphV1 { - let root_id = "dir:.".to_string(); - let file_id = "file:src/fixture.rs".to_string(); - let leaf_id = "symbol:src/fixture.rs#fixture_fn:function".to_string(); - - CodebaseGraphV1 { - root_dir_id: root_id.clone(), - dirs: vec![DirNode { - base: base_node(&root_id, ".", ".", "", None), - dir_children: Vec::new(), - file_children: vec![file_id.clone()], - }], - files: vec![FileNode { - base: base_node( - &file_id, - "fixture.rs", - "src/fixture.rs", - "rust", - Some(root_id), - ), - extension: Some("rs".to_string()), - source_blob_hash: None, - source: "fn fixture_fn() {}\n".to_string(), - imports: Vec::new(), - exports: Vec::new(), - re_exports: Vec::new(), - leaf_children: vec![leaf_id.clone()], - }], - leaves: vec![LeafNode { - base: base_node( - &leaf_id, - "fixture_fn", - "src/fixture.rs#fixture_fn", - "rust", - Some(file_id), - ), - kind: LeafKind::Function, - source: "fn fixture_fn() {}\n".to_string(), - source_blob_hash: None, - source_hash: None, - file_hash_at_capture: None, - history: Vec::new(), - input_signature: Vec::new(), - output_signature: Vec::new(), - start_line: Some(1), - end_line: Some(1), - children: Vec::new(), - }], - } - } +fn is_ident_continue(byte: u8) -> bool { + byte == b'_' || byte.is_ascii_alphanumeric() +} - fn base_node( - id: &str, - name: &str, - location: &str, - language: &str, - parent_id: Option, - ) -> BaseNodeFields { - BaseNodeFields { - id: id.to_string(), - identity_key: id.to_string(), - object_hash: None, - name: name.to_string(), - location: location.to_string(), - language: language.to_string(), - description: String::new(), - parent_id, - is_locked: false, - lineage_locked: false, - lock_owner: None, - lock_reason: String::new(), - } - } +fn is_ident_continue_char(ch: char) -> bool { + ch == '_' || ch.is_ascii_alphanumeric() +} + +fn line_for_byte(source: &[u8], offset: usize) -> usize { + source + .get(..offset.min(source.len())) + .unwrap_or(source) + .iter() + .filter(|byte| **byte == b'\n') + .count() + + 1 } diff --git a/tools/graph-equiv/src/main.rs b/tools/graph-equiv/src/main.rs index ff54e8539..32f47f18b 100644 --- a/tools/graph-equiv/src/main.rs +++ b/tools/graph-equiv/src/main.rs @@ -1,14 +1,28 @@ -//! Binary scaffold for the orbit-graph equivalence harness. +//! CI equivalence harness for the orbit-knowledge v1 and orbit-graph v2 backends. mod backend; +use std::collections::BTreeSet; use std::env; use std::error::Error; -use std::io::Write; -use std::path::PathBuf; +use std::fmt; +use std::fs; +use std::io::{self, Write}; +use std::path::{Path, PathBuf}; use std::process::ExitCode; -use backend::{Backend, V1Backend, V2Backend}; +use backend::{ + Backend, CalleeEntry, CalleesOutput, ImpactOutput, RefEntry, RefsOutput, SearchEntry, + SearchOutput, ShowOutput, V1Backend, V2Backend, +}; +use serde::Serialize; +use serde_json::{Value, json}; +use sha2::{Digest, Sha256}; + +const EXPECTED_CORPUS_SHA256: &str = + "3e6d500f59c30707240791ac8e617cdb1a5f77dae08f2f431bec5eacca42eda7"; +const LANGUAGES: [&str; 4] = ["rust", "typescript", "python", "go"]; +const IMPACT_DEPTH: u8 = 3; fn main() -> ExitCode { match try_main() { @@ -21,89 +35,638 @@ fn main() -> ExitCode { } fn try_main() -> Result<(), Box> { - let mut args = env::args().skip(1); - match args.next().as_deref() { - None | Some("smoke") => run_smoke(SmokeOptions::parse(args)?), - Some("--help") | Some("-h") => Ok(()), - Some(other) => Err(input_error(format!("unknown command `{other}`"))), + let options = match Options::parse(env::args().skip(1))? { + ParsedOptions::Help => { + write_usage()?; + return Ok(()); + } + ParsedOptions::Run(options) => options, + }; + + let corpus = Corpus::load(&options.corpus_dir)?; + if corpus.checksum != options.expected_corpus_sha256 { + return Err(HarnessError::CorpusDrift { + expected: options.expected_corpus_sha256, + actual: corpus.checksum, + corpus_dir: options.corpus_dir, + } + .into()); + } + + let v1 = V1Backend::for_workspace( + options.workspace_root.clone(), + options.knowledge_dir.clone(), + )?; + let v2 = V2Backend::for_workspace(options.workspace_root.clone(), options.graph_cli.clone())?; + v1.sync()?; + v2.sync()?; + + let results = corpus + .queries + .iter() + .map(|query| run_query(&v1, &v2, query)) + .collect::>(); + let failed = results + .iter() + .filter(|result| result.status == QueryStatus::Fail) + .count(); + let report = DiffReport { + schema_version: 1, + workspace: options.workspace_root.display().to_string(), + corpus: CorpusReport { + path: options.corpus_dir.display().to_string(), + checksum: corpus.checksum, + entries: corpus.queries.len(), + }, + summary: Summary { + total: results.len(), + passed: results.len().saturating_sub(failed), + failed, + }, + results, + }; + + write_json(&report)?; + if report.summary.failed > 0 { + return Err(HarnessError::ToleranceViolations(report.summary.failed).into()); + } + Ok(()) +} + +fn run_query(v1: &dyn Backend, v2: &dyn Backend, query: &CorpusQuery) -> QueryReport { + match query.kind { + QueryKind::Search => compare_backend_outputs(query, || { + let v1_rows = v1.search(query.argument.as_str())?; + let v2_rows = v2.search(query.argument.as_str())?; + Ok(compare_search(v1_rows, v2_rows)) + }), + QueryKind::Show => compare_backend_outputs(query, || { + let v1_rows = v1.show(query.argument.as_str())?; + let v2_rows = v2.show(query.argument.as_str())?; + Ok(compare_show(v1_rows, v2_rows)) + }), + QueryKind::Refs => compare_backend_outputs(query, || { + let v1_rows = v1.refs(query.argument.as_str())?; + let v2_rows = v2.refs(query.argument.as_str())?; + Ok(compare_refs(v1_rows, v2_rows)) + }), + QueryKind::Callees => compare_backend_outputs(query, || { + let v1_rows = v1.callees(query.argument.as_str())?; + let v2_rows = v2.callees(query.argument.as_str())?; + Ok(compare_callees(v1_rows, v2_rows)) + }), + QueryKind::Impact => compare_backend_outputs(query, || { + let v1_rows = v1.impact(query.argument.as_str(), IMPACT_DEPTH)?; + let v2_rows = v2.impact(query.argument.as_str(), IMPACT_DEPTH)?; + Ok(compare_impact(v1_rows, v2_rows)) + }), } } -fn run_smoke(options: SmokeOptions) -> Result<(), Box> { - match options.backend.as_str() { - "v1" => smoke_backend( - &V1Backend::for_workspace(options.workspace, options.knowledge_dir), - &options.query, +fn compare_backend_outputs(query: &CorpusQuery, run: F) -> QueryReport +where + F: FnOnce() -> Result, +{ + match run() { + Ok(comparison) => { + let status = if comparison.violations.is_empty() { + QueryStatus::Pass + } else { + QueryStatus::Fail + }; + QueryReport::from_comparison(query, status, comparison) + } + Err(error) => QueryReport::from_comparison( + query, + QueryStatus::Fail, + Comparison { + tolerance: query.kind.tolerance().to_string(), + v1_count: 0, + v2_count: 0, + ignored_v2_count: 0, + violations: vec![Violation { + kind: "backend_error".to_string(), + rows: json!([{ "error": error.to_string() }]), + }], + }, ), - "v2" => smoke_backend(&V2Backend, &options.query), - other => Err(input_error(format!( - "`--backend` must be `v1` or `v2`, got `{other}`" - ))), } } -fn smoke_backend(backend: &dyn Backend, query: &str) -> Result<(), Box> { - let search = backend.search(query)?; - let Some(first_hit) = search.first() else { - return Err(input_error(format!( - "v1 smoke search returned no results for `{query}`" - ))); - }; - let _show = backend.show(&first_hit.selector)?; - let _ = backend.refs(&first_hit.selector); - let _ = backend.callees(&first_hit.selector); - let _ = backend.impact(&first_hit.selector, 3); +fn compare_search(v1: SearchOutput, v2: SearchOutput) -> Comparison { + let v1_rows = v1.into_iter().map(SearchRow::from).collect::>(); + let mut ignored_v2_count = 0usize; + let v2_rows = v2 + .into_iter() + .filter_map(|entry| { + if entry.kind == "symbol" { + Some(SearchRow::from(entry)) + } else { + ignored_v2_count += 1; + None + } + }) + .collect::>(); + set_comparison( + "search: unordered set of (kind,file,name); v2 string/config extras ignored", + v1_rows, + v2_rows, + ignored_v2_count, + ) +} + +fn compare_show(v1: ShowOutput, v2: ShowOutput) -> Comparison { + let mut comparison = Comparison::new("show: source bytes byte-equal"); + comparison.v1_count = usize::from(v1.is_some()); + comparison.v2_count = usize::from(v2.is_some()); + if v1 != v2 { + comparison.violations.push(Violation { + kind: "bytes_mismatch".to_string(), + rows: json!([ + show_digest("v1", v1.as_deref()), + show_digest("v2", v2.as_deref()), + ]), + }); + } + comparison +} + +fn compare_refs(v1: RefsOutput, v2: RefsOutput) -> Comparison { + let v1_rows = v1.into_iter().map(RefRow::from).collect::>(); + let v2_rows = v2.into_iter().map(RefRow::from).collect::>(); + set_comparison( + "refs: set of (file,line,kind) at confidence >= same_module", + v1_rows, + v2_rows, + 0, + ) +} + +fn compare_callees(v1: CalleesOutput, v2: CalleesOutput) -> Comparison { + let v1_rows = v1.into_iter().map(CalleeRow::from).collect::>(); + let v2_rows = v2.into_iter().map(CalleeRow::from).collect::>(); + set_comparison( + "callees: set of (file,line,target_name)", + v1_rows, + v2_rows, + 0, + ) +} + +fn compare_impact(v1: ImpactOutput, v2: ImpactOutput) -> Comparison { + let v1_rows = v1.into_iter().collect::>(); + let v2_rows = v2.into_iter().collect::>(); + set_comparison( + "impact: depth=3 set of touched symbol qualified names", + v1_rows, + v2_rows, + 0, + ) +} + +fn set_comparison( + tolerance: &str, + v1_rows: BTreeSet, + v2_rows: BTreeSet, + ignored_v2_count: usize, +) -> Comparison +where + T: Clone + Ord + Serialize, +{ + let missing = v1_rows.difference(&v2_rows).cloned().collect::>(); + let extra = v2_rows.difference(&v1_rows).cloned().collect::>(); + let mut comparison = Comparison::new(tolerance); + comparison.v1_count = v1_rows.len(); + comparison.v2_count = v2_rows.len(); + comparison.ignored_v2_count = ignored_v2_count; + if !missing.is_empty() { + comparison.violations.push(Violation { + kind: "missing_in_v2".to_string(), + rows: json!(missing), + }); + } + if !extra.is_empty() { + comparison.violations.push(Violation { + kind: "extra_in_v2".to_string(), + rows: json!(extra), + }); + } + comparison +} + +fn show_digest(backend: &str, bytes: Option<&[u8]>) -> Value { + let mut hasher = Sha256::new(); + if let Some(bytes) = bytes { + hasher.update(bytes); + } + json!({ + "backend": backend, + "present": bytes.is_some(), + "len": bytes.map_or(0, <[u8]>::len), + "sha256": format!("{:x}", hasher.finalize()), + }) +} + +fn write_json(value: &T) -> Result<(), Box> { + let stdout = io::stdout(); + let mut stdout = stdout.lock(); + serde_json::to_writer_pretty(&mut stdout, value)?; + stdout.write_all(b"\n")?; + stdout.flush()?; Ok(()) } -struct SmokeOptions { - backend: String, - workspace: PathBuf, +fn write_usage() -> Result<(), Box> { + let mut stdout = io::stdout().lock(); + stdout.write_all( + b"usage: graph-equiv [check] [--workspace PATH] [--corpus PATH] [--knowledge-dir PATH] [--orbit-graph-cli PATH]\n", + )?; + stdout.flush()?; + Ok(()) +} + +#[derive(Debug)] +struct Options { + workspace_root: PathBuf, + corpus_dir: PathBuf, knowledge_dir: Option, - query: String, + graph_cli: Option, + expected_corpus_sha256: String, } -impl SmokeOptions { - fn parse(args: impl Iterator) -> Result> { - let mut backend = "v1".to_string(); - let mut workspace = env::current_dir()?; +enum ParsedOptions { + Help, + Run(Options), +} + +impl Options { + fn parse(args: impl Iterator) -> Result> { + let mut workspace_root = env::current_dir()?; + let mut corpus_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("corpus"); let mut knowledge_dir = None; - let mut query = "GraphCommandContext".to_string(); + let mut graph_cli = None; + let mut expected_corpus_sha256 = EXPECTED_CORPUS_SHA256.to_string(); let mut args = args.peekable(); + if args.peek().is_some_and(|arg| arg == "check") { + let _ = args.next(); + } + while let Some(arg) = args.next() { match arg.as_str() { - "--backend" => backend = required_value(&mut args, "--backend")?, - "--workspace" => { - workspace = PathBuf::from(required_value(&mut args, "--workspace")?) - } + "--workspace" => workspace_root = PathBuf::from(required_value(&mut args, &arg)?), + "--corpus" => corpus_dir = PathBuf::from(required_value(&mut args, &arg)?), "--knowledge-dir" => { - knowledge_dir = - Some(PathBuf::from(required_value(&mut args, "--knowledge-dir")?)); + knowledge_dir = Some(PathBuf::from(required_value(&mut args, &arg)?)); + } + "--orbit-graph-cli" => { + graph_cli = Some(PathBuf::from(required_value(&mut args, &arg)?)); + } + "--expected-corpus-sha256" => { + expected_corpus_sha256 = required_value(&mut args, &arg)?; + } + "--help" | "-h" => return Ok(ParsedOptions::Help), + other => { + return Err(HarnessError::Input(format!("unknown option `{other}`")).into()); } - "--query" => query = required_value(&mut args, "--query")?, - "--help" | "-h" => {} - other => return Err(input_error(format!("unknown smoke option `{other}`"))), } } - Ok(Self { - backend, - workspace, + Ok(ParsedOptions::Run(Options { + workspace_root, + corpus_dir, knowledge_dir, - query, - }) + graph_cli, + expected_corpus_sha256, + })) } } fn required_value( args: &mut impl Iterator, flag: &str, -) -> Result> { +) -> Result { args.next() - .ok_or_else(|| input_error(format!("missing value for `{flag}`"))) + .ok_or_else(|| HarnessError::Input(format!("missing value for `{flag}`"))) +} + +#[derive(Debug)] +struct Corpus { + checksum: String, + queries: Vec, +} + +impl Corpus { + fn load(corpus_dir: &Path) -> Result> { + let mut hasher = Sha256::new(); + let mut queries = Vec::new(); + for language in LANGUAGES { + let path = corpus_dir.join(format!("{language}.txt")); + let bytes = fs::read(path.as_path()).map_err(|source| HarnessError::ReadCorpus { + path: path.clone(), + source, + })?; + hasher.update(language.as_bytes()); + hasher.update([0]); + hasher.update(&bytes); + hasher.update([0xff]); + + let text = String::from_utf8(bytes).map_err(|source| HarnessError::CorpusUtf8 { + path: path.clone(), + source, + })?; + queries.extend(parse_corpus_file(language, path.as_path(), text.as_str())?); + } + + Ok(Self { + checksum: format!("{:x}", hasher.finalize()), + queries, + }) + } +} + +fn parse_corpus_file( + language: &str, + path: &Path, + text: &str, +) -> Result, HarnessError> { + let mut queries = Vec::new(); + for (index, raw_line) in text.lines().enumerate() { + let line = raw_line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + let mut parts = line.splitn(2, char::is_whitespace); + let kind = parts + .next() + .and_then(QueryKind::parse) + .ok_or_else(|| HarnessError::Input(format!("invalid query kind in {path:?}:{line}")))?; + let argument = parts + .next() + .map(str::trim) + .filter(|value| !value.is_empty()) + .ok_or_else(|| { + HarnessError::Input(format!( + "missing query argument in {}:{line}", + path.display() + )) + })?; + queries.push(CorpusQuery { + language: language.to_string(), + file: path.display().to_string(), + line_number: index + 1, + kind, + argument: argument.to_string(), + }); + } + Ok(queries) } -fn input_error(message: String) -> Box { - std::io::Error::new(std::io::ErrorKind::InvalidInput, message).into() +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "snake_case")] +enum QueryKind { + Search, + Show, + Refs, + Callees, + Impact, } + +impl QueryKind { + fn parse(value: &str) -> Option { + match value { + "search" => Some(Self::Search), + "show" => Some(Self::Show), + "refs" => Some(Self::Refs), + "callees" => Some(Self::Callees), + "impact" => Some(Self::Impact), + _ => None, + } + } + + fn as_str(self) -> &'static str { + match self { + Self::Search => "search", + Self::Show => "show", + Self::Refs => "refs", + Self::Callees => "callees", + Self::Impact => "impact", + } + } + + fn tolerance(self) -> &'static str { + match self { + Self::Search => "unordered set of (kind,file,name); v2 string/config extras ignored", + Self::Show => "source bytes byte-equal", + Self::Refs => "set of (file,line,kind) at confidence >= same_module", + Self::Callees => "set of (file,line,target_name)", + Self::Impact => "depth=3 set of touched symbol qualified names", + } + } +} + +#[derive(Debug)] +struct CorpusQuery { + language: String, + file: String, + line_number: usize, + kind: QueryKind, + argument: String, +} + +#[derive(Debug)] +struct Comparison { + tolerance: String, + v1_count: usize, + v2_count: usize, + ignored_v2_count: usize, + violations: Vec, +} + +impl Comparison { + fn new(tolerance: &str) -> Self { + Self { + tolerance: tolerance.to_string(), + v1_count: 0, + v2_count: 0, + ignored_v2_count: 0, + violations: Vec::new(), + } + } +} + +#[derive(Debug, Serialize)] +struct DiffReport { + #[serde(rename = "schemaVersion")] + schema_version: u8, + workspace: String, + corpus: CorpusReport, + summary: Summary, + results: Vec, +} + +#[derive(Debug, Serialize)] +struct CorpusReport { + path: String, + checksum: String, + entries: usize, +} + +#[derive(Debug, Serialize)] +struct Summary { + total: usize, + passed: usize, + failed: usize, +} + +#[derive(Debug, Serialize)] +struct QueryReport { + language: String, + source: String, + line: usize, + query: String, + selector: String, + tolerance: String, + status: QueryStatus, + v1_count: usize, + v2_count: usize, + #[serde(skip_serializing_if = "is_zero")] + ignored_v2_count: usize, + violations: Vec, +} + +impl QueryReport { + fn from_comparison(query: &CorpusQuery, status: QueryStatus, comparison: Comparison) -> Self { + Self { + language: query.language.clone(), + source: query.file.clone(), + line: query.line_number, + query: query.kind.as_str().to_string(), + selector: query.argument.clone(), + tolerance: comparison.tolerance, + status, + v1_count: comparison.v1_count, + v2_count: comparison.v2_count, + ignored_v2_count: comparison.ignored_v2_count, + violations: comparison.violations, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "snake_case")] +enum QueryStatus { + Pass, + Fail, +} + +#[derive(Debug, Serialize)] +struct Violation { + kind: String, + rows: Value, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize)] +struct SearchRow { + kind: String, + file: Option, + name: String, +} + +impl From for SearchRow { + fn from(entry: SearchEntry) -> Self { + Self { + kind: entry.kind, + file: entry.file, + name: entry.name, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize)] +struct RefRow { + file: String, + line: usize, + kind: String, +} + +impl From for RefRow { + fn from(entry: RefEntry) -> Self { + Self { + file: entry.file, + line: entry.line, + kind: entry.kind, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize)] +struct CalleeRow { + file: String, + line: usize, + target_name: String, +} + +impl From for CalleeRow { + fn from(entry: CalleeEntry) -> Self { + Self { + file: entry.file, + line: entry.line, + target_name: entry.target_name, + } + } +} + +fn is_zero(value: &usize) -> bool { + *value == 0 +} + +#[derive(Debug)] +enum HarnessError { + Input(String), + ReadCorpus { + path: PathBuf, + source: io::Error, + }, + CorpusUtf8 { + path: PathBuf, + source: std::string::FromUtf8Error, + }, + CorpusDrift { + expected: String, + actual: String, + corpus_dir: PathBuf, + }, + ToleranceViolations(usize), +} + +impl fmt::Display for HarnessError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Input(message) => f.write_str(message), + Self::ReadCorpus { path, source } => { + write!(f, "failed to read corpus file {}: {source}", path.display()) + } + Self::CorpusUtf8 { path, source } => { + write!(f, "corpus file {} is not UTF-8: {source}", path.display()) + } + Self::CorpusDrift { + expected, + actual, + corpus_dir, + } => write!( + f, + "graph-equiv corpus checksum drifted for {}: expected {expected}, got {actual}; update the frozen corpus and EXPECTED_CORPUS_SHA256 together after review", + corpus_dir.display() + ), + Self::ToleranceViolations(count) => { + write!( + f, + "graph equivalence found {count} out-of-tolerance diff(s)" + ) + } + } + } +} + +impl Error for HarnessError {}