diff --git a/code_review_graph/parser.py b/code_review_graph/parser.py index c55b2e8f..85989891 100644 --- a/code_review_graph/parser.py +++ b/code_review_graph/parser.py @@ -3093,11 +3093,11 @@ def _julia_short_func_name(self, call_expr) -> Optional[str]: for child in call_expr.children: if child.type == "identifier": return child.text.decode("utf-8", errors="replace") + if child.type == "operator": + # bare operator def: ``+(a, b) = ...`` + return child.text.decode("utf-8", errors="replace") if child.type == "field_expression": - for ident in reversed(child.children): - if ident.type == "identifier": - return ident.text.decode("utf-8", errors="replace") - return None + return self._julia_field_method_name(child) if child.type == "parametrized_type_expression": for ident in child.children: if ident.type == "identifier": @@ -3128,6 +3128,89 @@ def _julia_call_first_identifier(self, call_expr) -> Optional[str]: return child.text.decode("utf-8", errors="replace") return None + def _julia_field_parts(self, field_expr) -> list[str]: + """Flatten a (possibly nested) ``field_expression`` to its dotted + identifier parts in source order. ``A.B.f`` nests as + ``field_expression(field_expression(A, B), f)``, so recurse. + """ + parts: list[str] = [] + for child in field_expr.children: + if child.type == "field_expression": + parts.extend(self._julia_field_parts(child)) + elif child.type == "identifier": + parts.append(child.text.decode("utf-8", errors="replace")) + return parts + + @staticmethod + def _julia_component_name(node) -> Optional[str]: + """Name of a field_expression's final component: a plain + ``identifier`` or a quoted operator (``:+`` / ``:(==)``). + """ + if node.type == "identifier": + return node.text.decode("utf-8", errors="replace") + if node.type == "quote_expression": + for c in node.children: + if c.type == "operator": + return c.text.decode("utf-8", errors="replace") + if c.type == "parenthesized_expression": + for cc in c.children: + if cc.type == "operator": + return cc.text.decode("utf-8", errors="replace") + return None + + def _julia_field_qualifier(self, field_expr) -> Optional[str]: + """Module qualifier of a ``field_expression`` (``A.B.f`` -> ``A.B``, + ``Base.:+`` -> ``Base``): the dotted prefix before the final + component. None when there is no prefix. + """ + kids = field_expr.children + if len(kids) < 2: + return None + prefix = kids[0] + if prefix.type == "field_expression": + parts = self._julia_field_parts(prefix) + return ".".join(parts) if parts else None + if prefix.type == "identifier": + return prefix.text.decode("utf-8", errors="replace") + return None + + def _julia_field_method_name(self, field_expr) -> Optional[str]: + """Final method name of a field_expression (``A.B.f`` -> ``f``, + ``Base.:+`` -> ``+``). + """ + kids = field_expr.children + return self._julia_component_name(kids[-1]) if kids else None + + def _julia_def_qualifier(self, func_def) -> Optional[str]: + """Module qualifier of a ``function Mod.f(...)`` definition. + + Peels ``where`` / typed-return wrappers around the signature to reach + the inner ``call_expression`` (or a bare ``field_expression`` for a + stub ``function Mod.f end``), then returns its module prefix. + """ + for sub in func_def.children: + if sub.type != "signature": + continue + scope = sub + for _ in range(2): + wrapper = next( + (c for c in scope.children + if c.type in ("where_expression", "typed_expression")), + None, + ) + if wrapper is None: + break + scope = wrapper + for inner in scope.children: + if inner.type == "field_expression": + return self._julia_field_qualifier(inner) + if inner.type == "call_expression": + if inner.children and inner.children[0].type == "field_expression": + return self._julia_field_qualifier(inner.children[0]) + return None + return None + return None + def _extract_julia_constructs( self, child, @@ -3148,6 +3231,48 @@ def _extract_julia_constructs( Returns True if the child was fully handled and should be skipped by the main dispatch loop. """ + # --- const type alias: ``const Name = T{...}`` -> Type node --- + # Only parametrized / curly RHS forms are treated as aliases; value + # bindings (``const MAX = 42``) stay on the generic path. + if node_type == "const_statement": + assign = next( + (c for c in child.children if c.type == "assignment"), None, + ) + if assign is not None and assign.children: + name_node = assign.children[0] + rhs = assign.children[-1] if len(assign.children) >= 3 else None + if ( + name_node.type == "identifier" + and rhs is not None + and rhs.type in ( + "parametrized_type_expression", "curly_expression", + ) + ): + name = name_node.text.decode("utf-8", errors="replace") + qualified = self._qualify(name, file_path, enclosing_class) + nodes.append(NodeInfo( + kind="Type", + name=name, + file_path=file_path, + line_start=child.start_point[0] + 1, + line_end=child.end_point[0] + 1, + language=language, + parent_name=enclosing_class, + )) + container = ( + self._qualify(enclosing_class, file_path, None) + if enclosing_class + else file_path + ) + edges.append(EdgeInfo( + kind="CONTAINS", + source=container, + target=qualified, + file_path=file_path, + line=child.start_point[0] + 1, + )) + return True + # --- Short-form function: assignment with call_expression LHS --- # ``f(x) = expr`` or ``Base.f(x) = expr``. Anything else with an # ``=`` (plain variable, const) is left to the generic path. @@ -3168,6 +3293,13 @@ def _extract_julia_constructs( qualified = self._qualify( name, file_path, enclosing_class, ) + # ``Base.f(x) = ...`` short-form qualified method: keep the + # module qualifier the same way as the long form. + short_extra: dict = {} + if lhs.children and lhs.children[0].type == "field_expression": + qmod = self._julia_field_qualifier(lhs.children[0]) + if qmod: + short_extra["julia_module_qualifier"] = qmod nodes.append(NodeInfo( kind=kind, name=name, @@ -3177,6 +3309,7 @@ def _extract_julia_constructs( language=language, parent_name=enclosing_class, is_test=is_test, + extra=short_extra, )) container = ( self._qualify(enclosing_class, file_path, None) @@ -3190,17 +3323,36 @@ def _extract_julia_constructs( file_path=file_path, line=child.start_point[0] + 1, )) + if short_extra.get("julia_module_qualifier"): + edges.append(EdgeInfo( + kind="REFERENCES", + source=qualified, + target=short_extra["julia_module_qualifier"], + file_path=file_path, + line=child.start_point[0] + 1, + extra={"julia_qualified_def": True}, + )) # Recurse into the RHS only (children after the ``=`` # operator) with this function as the enclosing scope # so internal calls wire up correctly. Visiting the # whole assignment would re-treat the LHS # ``call_expression`` as a self-call. + call_types_jl = set(_CALL_TYPES.get(language, [])) seen_op = False for sub in child.children: if not seen_op: if sub.type == "operator": seen_op = True continue + # A one-liner whose RHS *is* the call (``f(x) = g(x)``) + # needs the call node itself dispatched; + # _extract_from_tree only visits a node's children. + if sub.type in call_types_jl: + self._extract_calls( + sub, source, language, file_path, nodes, edges, + enclosing_class, name, + import_map, defined_names, _depth + 1, + ) self._extract_from_tree( sub, source, language, file_path, nodes, edges, enclosing_class=enclosing_class, @@ -4445,6 +4597,13 @@ def _extract_functions( child, name, enclosing_class, file_path, edges, ) + # Julia: ``function Mod.f(...)`` carries its module qualifier on the + # node so the definition is distinguishable from a local ``f``. + if language == "julia" and child.type == "function_definition": + qmod = self._julia_def_qualifier(child) + if qmod: + method_extra["julia_module_qualifier"] = qmod + node = NodeInfo( kind=kind, name=name, @@ -4475,56 +4634,21 @@ def _extract_functions( )) # Julia: ``function Base.show(io, x)`` extends a foreign module's - # method. Record a REFERENCES edge from the function to the - # qualifier module so cross-module links stay visible even though - # the function's local name is just the method name. - if language == "julia" and child.type == "function_definition": - for sub in child.children: - if sub.type != "signature": - continue - call_expr = None - scope = sub - # Peel where_expression / typed_expression wrappers so we - # land on the inner call_expression regardless of - # ``func(x) where T`` or ``func(x)::T`` sugar. - for _ in range(2): - found_wrapper = False - for inner in scope.children: - if inner.type in ( - "where_expression", "typed_expression", - ): - scope = inner - found_wrapper = True - break - if not found_wrapper: - break - for inner in scope.children: - if inner.type == "call_expression": - call_expr = inner - break - if call_expr is None: - break - if call_expr.children and call_expr.children[0].type == "field_expression": - field_expr = call_expr.children[0] - parts: list[str] = [] - for ident in field_expr.children: - if ident.type == "identifier": - parts.append( - ident.text.decode("utf-8", errors="replace"), - ) - # Module qualifier = everything except the final method - # name. - if len(parts) >= 2: - qualifier = ".".join(parts[:-1]) - edges.append(EdgeInfo( - kind="REFERENCES", - source=qualified, - target=qualifier, - file_path=file_path, - line=child.start_point[0] + 1, - extra={"julia_qualified_def": True}, - )) - break + # method. Record a REFERENCES edge to the qualifier module so the + # cross-module link stays visible (the local name is just the method). + if ( + language == "julia" + and child.type == "function_definition" + and method_extra.get("julia_module_qualifier") + ): + edges.append(EdgeInfo( + kind="REFERENCES", + source=qualified, + target=method_extra["julia_module_qualifier"], + file_path=file_path, + line=child.start_point[0] + 1, + extra={"julia_qualified_def": True}, + )) # Solidity: modifier invocations on functions -> CALLS edges if language == "solidity": @@ -4696,6 +4820,17 @@ def _extract_calls( if receiver: call_extra["receiver"] = receiver + # Julia: ``Mod.f(...)`` keeps its module qualifier on the edge + # instead of collapsing to a bare ``f``. + if ( + language == "julia" + and child.children + and child.children[0].type == "field_expression" + ): + qmod = self._julia_field_qualifier(child.children[0]) + if qmod: + call_extra["julia_call_module"] = qmod + # When a receiver is present, skip scope-based resolution: the method # lives on the receiver's type, not in the current file's scope. # The spring_resolver post-pass will do the correct cross-type lookup. @@ -5922,13 +6057,15 @@ def _leaf_name(qi): return target.text.decode( "utf-8", errors="replace", ) + if target.type == "operator": + # bare operator def: ``function +(a, b) end`` + return target.text.decode( + "utf-8", errors="replace", + ) if target.type == "field_expression": - # Qualified: last identifier is method name - for ident in reversed(target.children): - if ident.type == "identifier": - return ident.text.decode( - "utf-8", errors="replace", - ) + return self._julia_field_method_name( + target, + ) if target.type == "parametrized_type_expression": # Parametric constructor: Foo{T}(x) = ... for p in target.children: @@ -5937,6 +6074,15 @@ def _leaf_name(qi): "utf-8", errors="replace", ) return None + # Stub / generic-function declaration: ``function foo + # end`` (and qualified ``function Mod.foo end``) parse + # as a signature with a direct identifier / + # field_expression child, no call_expression. + for sub in call.children: + if sub.type == "identifier": + return sub.text.decode("utf-8", errors="replace") + if sub.type == "field_expression": + return self._julia_field_method_name(sub) return None if node.type in ("struct_definition", "abstract_definition"): for child in node.children: @@ -6344,6 +6490,18 @@ def _import_path_text(n) -> str: parts.append(sub.text.decode("utf-8", errors="replace")) return ".".join(parts) + def _alias_real_name(alias_node) -> Optional[str]: + # ``X as Y`` / ``A.B as Y``: the dependency is on the real + # name (first child), not the local alias. + for sub in alias_node.children: + if sub.type == "as": + break + if sub.type == "identifier": + return sub.text.decode("utf-8", errors="replace") + if sub.type == "import_path": + return _import_path_text(sub) + return None + for child in node.children: if child.type == "identifier": imports.append( @@ -6353,6 +6511,10 @@ def _import_path_text(n) -> str: path = _import_path_text(child) if path: imports.append(path) + elif child.type == "import_alias": + real = _alias_real_name(child) + if real: + imports.append(real) elif child.type == "selected_import": module_name: Optional[str] = None seen_colon = False @@ -6375,6 +6537,10 @@ def _import_path_text(n) -> str: "utf-8", errors="replace", ) imports.append(f"{module_name}.{imported}") + elif sub.type == "import_alias" and module_name: + real = _alias_real_name(sub) + if real: + imports.append(f"{module_name}.{real}") elif language == "gdscript": # ``extends Node`` → type > identifier("Node") # ``extends "res://path.gd"`` → string literal diff --git a/tests/fixtures/sample.jl b/tests/fixtures/sample.jl index f9c6e43e..12a7b589 100644 --- a/tests/fixtures/sample.jl +++ b/tests/fixtures/sample.jl @@ -4,6 +4,7 @@ using LinearAlgebra using Statistics: mean, std import Base: show, print import JSON +import DataFrames as DF export greet, Dog, process public square, add @@ -34,6 +35,24 @@ add(a, b) = a + b square(x) = x^2 +delegate(x) = greet(x) + +function bar end + +Base.length(d::Dog) = d.age + +Base.:+(a::Dog, b::Dog) = Dog(a.name, a.age + b.age) + +function analyze(v::Vector{Float64}) + return LinearAlgebra.norm(v) +end + +function spawn_work(v) + return LinearAlgebra.BLAS.gemv('N', 1.0, v, v) +end + +const FloatVec = Vector{Float64} + const MY_CONST = 42 macro sayhello(name) diff --git a/tests/test_multilang.py b/tests/test_multilang.py index afda355e..265bfd79 100644 --- a/tests/test_multilang.py +++ b/tests/test_multilang.py @@ -1654,6 +1654,79 @@ def test_finds_public_symbols(self): assert "square" in trailing assert "add" in trailing + def test_finds_function_stub(self): + # ``function bar end`` (generic-function declaration, no signature + # call_expression) must still register a node. + funcs = {n.name for n in self.nodes if n.kind == "Function"} + assert "bar" in funcs + + def test_qualified_long_def_records_module(self): + # ``function Base.show(...)`` -> node name "show", module qualifier + # "Base" preserved on the node so it is distinguishable from a local + # ``show``. + by_name = {n.name: n for n in self.nodes if n.kind == "Function"} + assert by_name["show"].extra.get("julia_module_qualifier") == "Base" + + def test_qualified_short_def_records_module(self): + # ``Base.length(d::Dog) = ...`` short-form qualified method. + by_name = {n.name: n for n in self.nodes if n.kind == "Function"} + assert "length" in by_name + assert by_name["length"].extra.get("julia_module_qualifier") == "Base" + + def test_qualified_call_records_module(self): + # ``LinearAlgebra.norm(v)`` -> CALLS edge keeps the module qualifier + # instead of collapsing to a bare ``norm``. + calls = [ + e for e in self.edges + if e.kind == "CALLS" + and e.extra + and e.extra.get("julia_call_module") == "LinearAlgebra" + ] + assert any(e.target.split("::")[-1] == "norm" for e in calls) + + def test_multi_segment_call_module(self): + # ``LinearAlgebra.BLAS.gemv(...)`` keeps the full dotted module path, + # not just the innermost segment. + calls = [ + e for e in self.edges + if e.kind == "CALLS" + and e.extra.get("julia_call_module") == "LinearAlgebra.BLAS" + ] + assert any(e.target.split("::")[-1] == "gemv" for e in calls) + + def test_short_form_body_call(self): + # ``delegate(x) = greet(x)`` -> the RHS call must be captured (the + # one-liner body is the call itself, not a block). + def tail(q): + return q.split("::")[-1].split(".")[-1] + calls = [e for e in self.edges if e.kind == "CALLS"] + srcs_targets = {(tail(e.source), tail(e.target)) for e in calls} + assert ("delegate", "greet") in srcs_targets + + def test_qualified_operator_def(self): + # ``Base.:+(a, b) = ...`` -> method name is the operator, module is + # ``Base``; the module must not leak in as a function node. + by_name = {n.name: n for n in self.nodes if n.kind == "Function"} + assert "+" in by_name + assert by_name["+"].extra.get("julia_module_qualifier") == "Base" + assert "Base" not in by_name + + def test_finds_type_alias(self): + # ``const FloatVec = Vector{Float64}`` -> Type node. + types = {n.name for n in self.nodes if n.kind == "Type"} + assert "FloatVec" in types + + def test_value_const_not_a_type(self): + # ``const MY_CONST = 42`` is a value binding, not a type alias. + types = {n.name for n in self.nodes if n.kind == "Type"} + assert "MY_CONST" not in types + + def test_finds_aliased_import(self): + # ``import DataFrames as DF`` records a dependency on the real module. + imports = [e for e in self.edges if e.kind == "IMPORTS_FROM"] + targets = {e.target for e in imports} + assert "DataFrames" in targets + def test_qualified_function_references_base(self): refs = [e for e in self.edges if e.kind == "REFERENCES"] # function Base.show(...) should emit a REFERENCES edge to Base