From f17071ff6abf67f4dac861e70c8e7708cf68da71 Mon Sep 17 00:00:00 2001 From: NaeemHaque Date: Thu, 18 Jun 2026 19:10:33 +0600 Subject: [PATCH] fix: resolve scoped/static call targets (Class::method) to graph nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scoped/static calls — PHP `Mailer::send()`, Rust `Notifier::notify()`, `Self::method()` — were stored as CALLS edges with the intermediate `Class::method` target form, which matches neither the qualified-name (`::Class.method`) nor the bare-name lookup path. The edges dangled, so callers_of / get_impact_radius / tests_for silently under-reported these callers (notably every Rust associated-fn/constructor call). Add a conservative post-build resolver (scoped_resolver.py) that rewrites resolvable `Class::method` targets to the canonical node qualified name, mirroring the existing Spring/Temporal/ReScript resolvers, wired into both full build and incremental update. It only rewrites unambiguous (class, method) matches or import-disambiguated ones; external/unknown targets like `Vec::new` are left untouched so no false edge is created. self/static/Self/ this resolve to the enclosing class; parent to its base. Resolved edges are tagged confidence_tier=INFERRED. Co-Authored-By: Claude Opus 4.8 (1M context) --- code_review_graph/incremental.py | 27 +++ code_review_graph/scoped_resolver.py | 170 +++++++++++++++ tests/test_scoped_resolver.py | 296 +++++++++++++++++++++++++++ 3 files changed, 493 insertions(+) create mode 100644 code_review_graph/scoped_resolver.py create mode 100644 tests/test_scoped_resolver.py diff --git a/code_review_graph/incremental.py b/code_review_graph/incremental.py index 81dc3026..6227be18 100644 --- a/code_review_graph/incremental.py +++ b/code_review_graph/incremental.py @@ -93,6 +93,27 @@ def _run_temporal_resolver(store: GraphStore) -> Optional[dict]: logger.warning("Temporal resolver failed: %s", exc) return None + +# File extensions whose grammars emit resolvable ``Class::method`` scoped-call +# targets. Only PHP and Rust produce a dangling Class::method CALLS edge that +# this pass can rewrite: C++/Ruby emit no CALLS edge for scoped calls (a +# separate extraction gap), and R's ``pkg::fn`` is external package access with +# no in-graph target. Adding those extensions would only trigger wasted +# full-graph scans, so they are intentionally excluded. +_SCOPED_CALL_EXTS = (".php", ".rs") + + +def _run_scoped_resolver(store: GraphStore) -> Optional[dict]: + """Run the scoped/static call (``Class::method``) resolver, swallowing any + failure so build never fails because of it. Returns stats or None on error. + """ + try: + from .scoped_resolver import resolve_scoped_calls + return resolve_scoped_calls(store) + except Exception as exc: # noqa: BLE001 - best-effort post-pass + logger.warning("Scoped call resolver failed: %s", exc) + return None + # Default ignore patterns (in addition to .gitignore). # # `/**` patterns are matched at any depth by _should_ignore, so @@ -904,6 +925,7 @@ def full_build( rescript_stats = _run_rescript_resolver(store) spring_stats = _run_spring_resolver(store) temporal_stats = _run_temporal_resolver(store) + scoped_stats = _run_scoped_resolver(store) return { "files_parsed": len(files), @@ -913,6 +935,7 @@ def full_build( "rescript_resolution": rescript_stats, "spring_resolution": spring_stats, "temporal_resolution": temporal_stats, + "scoped_resolution": scoped_stats, } @@ -1043,6 +1066,9 @@ def incremental_update( spring_stats = _run_spring_resolver(store) if spring_changed else None temporal_stats = _run_temporal_resolver(store) if spring_changed else None + scoped_changed = any(rp.endswith(_SCOPED_CALL_EXTS) for rp in all_files) + scoped_stats = _run_scoped_resolver(store) if scoped_changed else None + return { "files_updated": len(all_files), "total_nodes": total_nodes, @@ -1053,6 +1079,7 @@ def incremental_update( "rescript_resolution": rescript_stats, "spring_resolution": spring_stats, "temporal_resolution": temporal_stats, + "scoped_resolution": scoped_stats, } diff --git a/code_review_graph/scoped_resolver.py b/code_review_graph/scoped_resolver.py new file mode 100644 index 00000000..a4f33b88 --- /dev/null +++ b/code_review_graph/scoped_resolver.py @@ -0,0 +1,170 @@ +"""Post-build scoped/static call resolver (``Class::method`` targets). + +Grammars that emit a ``::`` scope separator in call expressions store a CALLS +edge whose target is the intermediate ``Class::method`` form rather than a +node qualified name: + + PHP ``Mailer::send($x)`` -> target ``Mailer::send`` + Rust ``Notifier::notify(x)`` -> target ``Notifier::notify`` + +Every node is keyed by its dotted qualified name (``::Class.method``), +so these ``Class::method`` targets match neither the qualified-name nor the +bare-name lookup paths and dangle forever — ``callers_of`` / +``get_impact_radius`` / ``tests_for`` silently under-report the caller. + +This module runs as a post-build pass (like the Spring/Temporal/ReScript +resolvers) and rewrites resolvable ``Class::method`` targets to the canonical +node qualified name. Resolution is conservative: an edge is only rewritten +when the ``(class, method)`` pair maps to exactly one node, or to exactly one +node whose file the source file imports. Unresolvable targets (external +types such as ``Vec::new``) are left untouched so no false edge is created. + +The intra-class receivers ``self`` / ``static`` / ``Self`` / ``this`` resolve +to the enclosing class of the call site. (Languages whose grammars strip +these keywords — e.g. PHP emits ``self::m()`` as the bare target ``m`` — are +already handled by the same-file resolver and never reach this pass.) + +Safe to call multiple times — already-resolved edges (whose target is a node +qualified name, i.e. has a file path before ``::``) are not re-selected. +""" + +from __future__ import annotations + +import json +import logging +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .graph import GraphStore + +logger = logging.getLogger(__name__) + +# Receivers that refer to the enclosing class rather than a named class. +# Rust uses ``Self``; ``self`` is kept as a defensive guard. (``static`` / +# ``this`` never reach this pass for the supported languages.) +_SELF_RECEIVERS = {"self", "Self"} + + +def _is_scoped_target(target: str) -> bool: + """True if *target* is a 2-part ``Class::method`` form (not a node name). + + Two filters: + + * **Exactly one** ``::`` — a real ``Class::method``. Multi-segment paths + such as Rust ``a::b::run`` (module ``a`` -> submodule ``b`` -> free fn + ``run``) have more than one ``::`` and are *not* resolved, since their + leading segments are module paths, not a class name — resolving them by + the last two segments would fabricate edges to unrelated classes. + * The segment before ``::`` is a class (or PHP namespaced class), not a + file path. Node qualified names carry a file path there + (``src/Mailer.php::Mailer.send``), which has a ``/`` or a file-extension + ``.``. ``\\`` is allowed so PHP namespaces (``App\\Mailer``) qualify; + Windows file paths are still excluded by their ``.`` extension. + """ + if target.count("::") != 1: + return False + prefix = target.split("::", 1)[0] + return not any(ch in prefix for ch in ("/", ".")) + + +def _enclosing(source_qualified: str) -> tuple[str | None, str | None]: + """Return ``(file, enclosing_class)`` for a ``::Class.method`` source.""" + if "::" not in source_qualified: + return None, None + file_part, after = source_qualified.split("::", 1) + enclosing_class = after.split(".")[0] if "." in after else None + return file_part, enclosing_class + + +def resolve_scoped_calls(store: GraphStore) -> dict: + """Resolve ``Class::method`` CALLS targets to canonical node names. + + Returns a dict with resolution counts for telemetry. + """ + conn = store._conn + + candidates = [ + row + for row in conn.execute( + "SELECT id, source_qualified, target_qualified, file_path, extra " + "FROM edges WHERE kind = 'CALLS' AND target_qualified LIKE '%::%'" + ).fetchall() + if _is_scoped_target(row["target_qualified"]) + ] + if not candidates: + return {"calls_resolved": 0} + + # (class_name, method_name) -> list of node qualified_names + method_to_qual: dict[tuple[str, str], list[str]] = {} + for row in conn.execute( + "SELECT name, qualified_name, parent_name FROM nodes " + "WHERE kind IN ('Function', 'Test') AND parent_name IS NOT NULL" + ).fetchall(): + method_to_qual.setdefault((row["parent_name"], row["name"]), []).append( + row["qualified_name"] + ) + + # source file -> set of imported files (for disambiguation) + import_targets: dict[str, set[str]] = {} + for row in conn.execute( + "SELECT DISTINCT file_path, target_qualified FROM edges " + "WHERE kind = 'IMPORTS_FROM'" + ).fetchall(): + target = row["target_qualified"] + target_file = target.split("::", 1)[0] if "::" in target else target + import_targets.setdefault(row["file_path"], set()).add(target_file) + + resolved = 0 + for edge in candidates: + try: + extra = json.loads(edge["extra"] or "{}") + except (json.JSONDecodeError, TypeError): + extra = {} + if extra.get("scoped_resolved"): + continue + + # _is_scoped_target guarantees exactly one "::", so this is 2 parts. + class_name, method = edge["target_qualified"].split("::", 1) + # Strip a PHP namespace prefix to the bare class name (App\Mailer -> Mailer). + if "\\" in class_name: + class_name = class_name.rsplit("\\", 1)[-1] + + src_file, enclosing_class = _enclosing(edge["source_qualified"]) + same_file_only = False + + if class_name in _SELF_RECEIVERS: + if not enclosing_class: + continue + class_name = enclosing_class + same_file_only = True + + cands = method_to_qual.get((class_name, method), []) + if same_file_only and src_file is not None: + cands = [c for c in cands if c.split("::", 1)[0] == src_file] + + if not cands: + continue + if len(cands) == 1: + new_target = cands[0] + else: + imported_files = import_targets.get(edge["file_path"], set()) + imported = [c for c in cands if c.split("::", 1)[0] in imported_files] + if len(imported) == 1: + new_target = imported[0] + else: + continue + + extra["scoped_resolved"] = True + conn.execute( + "UPDATE edges SET target_qualified = ?, extra = ?, " + "confidence = ?, confidence_tier = ? WHERE id = ?", + (new_target, json.dumps(extra), 0.9, "INFERRED", edge["id"]), + ) + resolved += 1 + logger.debug("Scoped resolved: %s -> %s", edge["target_qualified"], new_target) + + if resolved: + conn.commit() + + logger.info("Scoped call resolver: resolved %d Class::method CALLS edges", resolved) + return {"calls_resolved": resolved} diff --git a/tests/test_scoped_resolver.py b/tests/test_scoped_resolver.py new file mode 100644 index 00000000..4d7169af --- /dev/null +++ b/tests/test_scoped_resolver.py @@ -0,0 +1,296 @@ +"""Tests for the post-build scoped/static call resolver (``Class::method``). + +Languages whose grammars emit a ``::`` scope separator in call expressions +(PHP ``scoped_call_expression``, Rust ``scoped_identifier``) store a CALLS +edge whose target is the intermediate ``Class::method`` form (e.g. +``Mailer::send``). Nodes are keyed by their dotted qualified name +(``::Class.method``), so those edges dangle and ``callers_of`` / +``get_impact_radius`` silently miss the caller. + +The scoped resolver rewrites resolvable ``Class::method`` targets to the +canonical node qualified name as a post-build pass, mirroring the existing +Spring/Temporal/ReScript resolvers. +""" + +from __future__ import annotations + +from code_review_graph.graph import GraphStore +from code_review_graph.incremental import full_build, incremental_update +from code_review_graph.parser import EdgeInfo, NodeInfo +from code_review_graph.scoped_resolver import resolve_scoped_calls +from code_review_graph.tools.query import query_graph + + +class TestScopedCallResolver: + def _build(self, tmp_path): + (tmp_path / ".git").mkdir() + src = tmp_path / "src" + src.mkdir() + + # --- PHP: cross-file static call + self:: call --- + (src / "Mailer.php").write_text( + " bool { true }\n" + "}\n" + ) + (src / "account.rs").write_text( + "struct Account;\n" + "impl Account {\n" + " fn signup(email: &str) -> bool {\n" + " Notifier::notify(email)\n" + " }\n" + " fn resignup(email: &str) -> bool {\n" + " Self::signup(email)\n" + " }\n" + " fn external(email: &str) -> bool {\n" + " Missing::gone(email)\n" + " }\n" + " fn deep(email: &str) -> bool {\n" + " a::b::run(email)\n" + " }\n" + "}\n" + ) + # A struct `b` with method `run` exists elsewhere — the multi-segment + # path a::b::run must NOT be misattributed to it. + (src / "widgets.rs").write_text( + "struct b;\n" + "impl b {\n" + " fn run(x: &str) -> bool { true }\n" + "}\n" + ) + + graph_dir = tmp_path / ".code-review-graph" + graph_dir.mkdir() + store = GraphStore(graph_dir / "graph.db") + result = full_build(tmp_path, store) + return store, result + + def _targets_from(self, store, source_suffix): + rows = store._conn.execute( + "SELECT target_qualified FROM edges " + "WHERE kind='CALLS' AND source_qualified LIKE ?", + (f"%{source_suffix}",), + ).fetchall() + return {r["target_qualified"] for r in rows} + + def _qn(self, store, suffix): + row = store._conn.execute( + "SELECT qualified_name FROM nodes WHERE qualified_name LIKE ?", + (f"%{suffix}",), + ).fetchone() + return row["qualified_name"] if row else None + + # ------------------------------------------------------------------ + # PHP + # ------------------------------------------------------------------ + def test_php_cross_file_static_call_resolves_to_node(self, tmp_path): + store, _ = self._build(tmp_path) + targets = self._targets_from(store, "SignupController.register") + assert any(t.endswith("Mailer.php::Mailer.send") for t in targets), ( + f"static call Mailer::send not resolved to node, got {targets}" + ) + assert "Mailer::send" not in targets + + def test_php_callers_of_finds_static_caller(self, tmp_path): + store, _ = self._build(tmp_path) + send_qn = self._qn(store, "Mailer.php::Mailer.send") + try: + result = query_graph( + pattern="callers_of", + target=send_qn, + repo_root=str(tmp_path), + detail_level="standard", + ) + finally: + store.close() + assert result["status"] == "ok", result + names = {r["name"] for r in result["results"]} + assert "register" in names, f"register not found as caller: {result}" + + def test_php_self_call_resolved_endstate(self, tmp_path): + # PHP strips `self`, so `self::register` arrives bare and is already + # qualified by the same-file resolver before this pass. This is a + # regression guard on the end state, not on the scoped resolver itself. + store, _ = self._build(tmp_path) + targets = self._targets_from(store, "SignupController.reuse") + assert any( + t.endswith("SignupController.php::SignupController.register") + for t in targets + ), f"self::register not resolved to enclosing class, got {targets}" + + def test_php_namespaced_static_call_resolves(self, tmp_path): + store, _ = self._build(tmp_path) + targets = self._targets_from(store, "NsCaller.go") + assert any(t.endswith("Sender.php::Sender.dispatch") for t in targets), ( + f"namespaced App\\Sender::dispatch not resolved, got {targets}" + ) + assert not any("\\" in t for t in targets) + + # ------------------------------------------------------------------ + # Rust + # ------------------------------------------------------------------ + def test_rust_cross_file_associated_call_resolves(self, tmp_path): + store, _ = self._build(tmp_path) + targets = self._targets_from(store, "account.rs::Account.signup") + assert any(t.endswith("notifier.rs::Notifier.notify") for t in targets), ( + f"Notifier::notify not resolved to node, got {targets}" + ) + assert "Notifier::notify" not in targets + + def test_rust_self_call_resolves_to_enclosing_type(self, tmp_path): + store, _ = self._build(tmp_path) + targets = self._targets_from(store, "account.rs::Account.resignup") + assert any(t.endswith("account.rs::Account.signup") for t in targets), ( + f"Self::signup not resolved to enclosing type, got {targets}" + ) + + # ------------------------------------------------------------------ + # Safety: never invent edges for unresolvable / multi-segment targets + # ------------------------------------------------------------------ + def test_external_scoped_target_left_untouched(self, tmp_path): + store, _ = self._build(tmp_path) + targets = self._targets_from(store, "account.rs::Account.external") + assert targets == {"Missing::gone"}, ( + f"external target should be untouched, got {targets}" + ) + + def test_multisegment_target_not_falsely_resolved(self, tmp_path): + # a::b::run is a module path, NOT Class::method. It must never be + # misattributed to the unrelated struct `b` with method `run`. + store, _ = self._build(tmp_path) + targets = self._targets_from(store, "account.rs::Account.deep") + assert not any(t.endswith("widgets.rs::b.run") for t in targets), ( + f"multi-segment path falsely resolved to unrelated node: {targets}" + ) + + # ------------------------------------------------------------------ + # Telemetry + idempotency + # ------------------------------------------------------------------ + def test_stats_reported_in_build_result(self, tmp_path): + _, result = self._build(tmp_path) + stats = result["scoped_resolution"] + # Mailer::send, App\Sender::dispatch, Notifier::notify, Self::signup. + # (PHP self::register arrives bare; a::b::run and Missing::gone are + # intentionally not resolved.) + assert stats["calls_resolved"] >= 4, stats + + def test_resolver_is_idempotent(self, tmp_path): + store, _ = self._build(tmp_path) + second = resolve_scoped_calls(store) + assert second["calls_resolved"] == 0, second + + def test_resolved_edges_marked_inferred(self, tmp_path): + store, _ = self._build(tmp_path) + row = store._conn.execute( + "SELECT confidence_tier FROM edges " + "WHERE kind='CALLS' AND target_qualified LIKE '%Mailer.php::Mailer.send'" + ).fetchone() + assert row["confidence_tier"] == "INFERRED" + + def test_incremental_reparse_reresolves_scoped_call(self, tmp_path): + # On incremental re-parse a file's edges are deleted and re-created in + # the raw `Class::method` form; the resolver must re-run and re-resolve. + store, _ = self._build(tmp_path) + caller = tmp_path / "src" / "SignupController.php" + caller.write_text(caller.read_text().replace( + "return Mailer::send($email);", + "$x = 1;\n return Mailer::send($email);", + )) + result = incremental_update( + tmp_path, store, changed_files=["src/SignupController.php"], + ) + assert result["scoped_resolution"] is not None + targets = self._targets_from(store, "SignupController.register") + assert any(t.endswith("Mailer.php::Mailer.send") for t in targets), ( + f"scoped call not re-resolved after incremental update: {targets}" + ) + assert "Mailer::send" not in targets + + +class TestScopedResolverDisambiguation: + """Seeded tests for same-name class collisions (two ``Box.open`` defs).""" + + def _seed(self, tmp_path, with_import: bool): + store = GraphStore(tmp_path / "graph.db") + for box_file in ("boxa.php", "boxb.php"): + store.upsert_node(NodeInfo( + kind="Function", name="open", file_path=box_file, + line_start=1, line_end=1, language="php", parent_name="Box", + )) + store.upsert_node(NodeInfo( + kind="Function", name="use_box", file_path="user.php", + line_start=1, line_end=1, language="php", parent_name="User", + )) + store.upsert_edge(EdgeInfo( + kind="CALLS", source="user.php::User.use_box", + target="Box::open", file_path="user.php", line=1, + )) + if with_import: + store.upsert_edge(EdgeInfo( + kind="IMPORTS_FROM", source="user.php", + target="boxa.php", file_path="user.php", line=1, + )) + store.commit() + return store + + def _call_target(self, store): + return store._conn.execute( + "SELECT target_qualified FROM edges " + "WHERE kind='CALLS' AND source_qualified='user.php::User.use_box'" + ).fetchone()["target_qualified"] + + def test_collision_disambiguated_by_import(self, tmp_path): + store = self._seed(tmp_path, with_import=True) + try: + stats = resolve_scoped_calls(store) + assert stats["calls_resolved"] == 1 + assert self._call_target(store) == "boxa.php::Box.open" + finally: + store.close() + + def test_collision_without_import_left_unresolved(self, tmp_path): + store = self._seed(tmp_path, with_import=False) + try: + stats = resolve_scoped_calls(store) + assert stats["calls_resolved"] == 0 + # ambiguous (two Box.open, no import) -> left untouched, no false edge + assert self._call_target(store) == "Box::open" + finally: + store.close()