From 9817f4d859eb9bca47666f8265b33ff327705949 Mon Sep 17 00:00:00 2001 From: mallyskies Date: Fri, 12 Jun 2026 16:53:56 -0600 Subject: [PATCH 1/3] feat: add Haxe language support via tree-sitter-haxe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - extract_haxe(): extracts classes, interfaces, enums, enum abstracts, typedefs, and functions from .hx files using tree-sitter-haxe grammar - _haxe_recover_scattered(): fallback parser for files where the grammar produces scattered tokens instead of proper declaration nodes - CR/CRLF normalization before parsing (handles old Mac \r-only files) - detect.py: register .hx extension → Haxe language - pyproject.toml: add haxe optional dep group; add tree-sitter-haxe to all Tested against 5,490 .hx files; 2 empty files (both legitimately all-commented-out). Produces 82,867 nodes and 98,717 edges. --- graphify/detect.py | 2 +- graphify/extract.py | 343 ++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 5 + 3 files changed, 349 insertions(+), 1 deletion(-) diff --git a/graphify/detect.py b/graphify/detect.py index 92b773f7b..a9d3b4226 100644 --- a/graphify/detect.py +++ b/graphify/detect.py @@ -27,7 +27,7 @@ class FileType(str, Enum): _MANIFEST_PATH = str(out_path("manifest.json")) -CODE_EXTENSIONS = {'.py', '.ts', '.tsx', '.js', '.jsx', '.mjs', '.ejs', '.ets', '.go', '.rs', '.java', '.groovy', '.gradle', '.cpp', '.cc', '.cxx', '.c', '.h', '.hpp', '.cu', '.cuh', '.metal', '.rb', '.swift', '.kt', '.kts', '.cs', '.scala', '.php', '.lua', '.luau', '.toc', '.zig', '.ps1', '.psm1', '.psd1', '.ex', '.exs', '.m', '.mm', '.jl', '.vue', '.svelte', '.astro', '.dart', '.v', '.sv', '.svh', '.sql', '.r', '.f', '.F', '.f90', '.F90', '.f95', '.F95', '.f03', '.F03', '.f08', '.F08', '.pas', '.pp', '.dpr', '.dpk', '.lpr', '.inc', '.dfm', '.lfm', '.lpk', '.sh', '.bash', '.json', '.tf', '.tfvars', '.hcl', '.dm', '.dme', '.dmi', '.dmm', '.dmf', '.sln', '.slnx', '.csproj', '.fsproj', '.vbproj', '.xaml', '.razor', '.cshtml', '.cls', '.trigger'} +CODE_EXTENSIONS = {'.py', '.ts', '.tsx', '.js', '.jsx', '.mjs', '.ejs', '.ets', '.go', '.rs', '.java', '.groovy', '.gradle', '.cpp', '.cc', '.cxx', '.c', '.h', '.hpp', '.cu', '.cuh', '.metal', '.rb', '.swift', '.kt', '.kts', '.cs', '.scala', '.php', '.lua', '.luau', '.toc', '.zig', '.ps1', '.psm1', '.psd1', '.ex', '.exs', '.m', '.mm', '.jl', '.vue', '.svelte', '.astro', '.dart', '.v', '.sv', '.svh', '.sql', '.r', '.f', '.F', '.f90', '.F90', '.f95', '.F95', '.f03', '.F03', '.f08', '.F08', '.pas', '.pp', '.dpr', '.dpk', '.lpr', '.inc', '.dfm', '.lfm', '.lpk', '.sh', '.bash', '.json', '.tf', '.tfvars', '.hcl', '.dm', '.dme', '.dmi', '.dmm', '.dmf', '.sln', '.slnx', '.csproj', '.fsproj', '.vbproj', '.xaml', '.razor', '.cshtml', '.cls', '.trigger', '.hx'} DOC_EXTENSIONS = {'.md', '.mdx', '.qmd', '.txt', '.rst', '.html', '.yaml', '.yml'} PAPER_EXTENSIONS = {'.pdf'} IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg'} diff --git a/graphify/extract.py b/graphify/extract.py index 698e192fe..d8e418653 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -15040,6 +15040,348 @@ def _body_of(block): return {"nodes": nodes, "edges": edges} +def extract_haxe(path: Path) -> dict: + """Extract classes, interfaces, typedefs, functions, imports, and inheritance from a .hx file.""" + try: + import tree_sitter_haxe as _tshaxe + from tree_sitter import Language, Parser + except ImportError: + return {"nodes": [], "edges": [], "error": "tree-sitter-haxe not installed"} + + try: + language = Language(_tshaxe.language()) + parser = Parser(language) + source = path.read_bytes() + # Normalize CR-only and CRLF line endings to LF so that the tree-sitter + # comment rule `seq('//', /.*/)` doesn't consume the rest of the file + # on old-Mac \r-only files (where .* matches \r and runs to EOF). + if b"\r" in source: + source = source.replace(b"\r\n", b"\n").replace(b"\r", b"\n") + tree = parser.parse(source) + root = tree.root_node + except Exception as e: + return {"nodes": [], "edges": [], "error": str(e)} + + stem = _file_stem(path) + str_path = str(path) + nodes: list[dict] = [] + edges: list[dict] = [] + seen_ids: set[str] = set() + function_bodies: list[tuple[str, Any]] = [] + + def add_node(nid: str, label: str, line: int) -> None: + if nid and nid not in seen_ids: + seen_ids.add(nid) + nodes.append({ + "id": nid, + "label": label, + "file_type": "code", + "source_file": str_path, + "source_location": f"L{line}", + }) + + def add_edge(src: str, tgt: str, relation: str, line: int, + confidence: str = "EXTRACTED") -> None: + if src and tgt and src != tgt: + edges.append({ + "source": src, + "target": tgt, + "relation": relation, + "confidence": confidence, + "source_file": str_path, + "source_location": f"L{line}", + "weight": 1.0, + }) + + def ensure_type_node(name: str, line: int) -> str: + nid = _make_id(stem, name) + if nid in seen_ids: + return nid + nid = _make_id(name) + if nid not in seen_ids: + nodes.append({ + "id": nid, + "label": name, + "file_type": "code", + "source_file": "", + "source_location": "", + }) + seen_ids.add(nid) + return nid + + file_nid = _make_id(str(path)) + add_node(file_nid, path.name, 1) + + def _haxe_call_name(call_node) -> str: + """Return the bare function/method name from a call_expression node.""" + obj = call_node.child_by_field_name("object") + ctor = call_node.child_by_field_name("constructor") + if ctor is not None: + return _read_text(ctor, source) + if obj is None: + return "" + if obj.type == "identifier": + return _read_text(obj, source) + if obj.type == "member_expression": + # Last entry in the `member` field list is the method name + members = obj.children_by_field_name("member") + if members: + last = members[-1] + if last.type == "identifier": + return _read_text(last, source) + # nested member_expression — recurse one level + if last.type == "member_expression": + sub = last.children_by_field_name("member") + if sub: + return _read_text(sub[-1], source) + return "" + + def walk_calls(node, owner_nid: str) -> None: + """Walk a function body collecting call edges; stops at nested function boundaries.""" + if node.type == "function_declaration": + return + if node.type == "call_expression": + call_name = _haxe_call_name(node) + if call_name and call_name not in _LANGUAGE_BUILTIN_GLOBALS: + tgt_nid = _make_id(stem, call_name) + if tgt_nid not in seen_ids: + tgt_nid = _make_id(call_name) + line = node.start_point[0] + 1 + add_edge(owner_nid, tgt_nid, "calls", line) + for child in node.children: + walk_calls(child, owner_nid) + + def _haxe_dotted_path(node) -> str: + """Reconstruct dotted package path from an import/using statement.""" + parts = [ + _read_text(c, source) + for c in node.children + if c.type in ("package_name", "type_name") + ] + return ".".join(parts) + + def walk(node, parent_class_nid: "str | None" = None, + parent_class_name: "str | None" = None) -> None: + t = node.type + + if t in ("import_statement", "using_statement"): + dotted = _haxe_dotted_path(node) + if dotted: + tgt_nid = _make_id(dotted.replace(".", "_")) + add_edge(file_nid, tgt_nid, "imports", node.start_point[0] + 1) + return + + if t in ("class_declaration", "interface_declaration"): + name_node = node.child_by_field_name("name") + if name_node is None: + for child in node.children: + walk(child, parent_class_nid, parent_class_name) + return + class_name = _read_text(name_node, source) + line = node.start_point[0] + 1 + class_nid = _make_id(stem, class_name) + add_node(class_nid, class_name, line) + add_edge(file_nid, class_nid, "contains", line) + + # extends + for super_node in node.children_by_field_name("super_class_name"): + base = _read_text(super_node, source).strip() + if base: + add_edge(class_nid, ensure_type_node(base, line), "inherits", line) + + # implements / interface extends + for iface_node in node.children_by_field_name("interface_name"): + iface = _read_text(iface_node, source).strip() + if iface: + rel = "inherits" if t == "interface_declaration" else "implements" + add_edge(class_nid, ensure_type_node(iface, line), rel, line) + + body = node.child_by_field_name("body") + if body is not None: + for child in body.children: + walk(child, class_nid, class_name) + return + + if t in ("enum_declaration", "enum_abstract_declaration"): + name_node = node.child_by_field_name("name") + if name_node is None: + return + enum_name = _read_text(name_node, source) + line = node.start_point[0] + 1 + enum_nid = _make_id(stem, enum_name) + add_node(enum_nid, enum_name, line) + add_edge(file_nid, enum_nid, "contains", line) + # Walk body for nested function declarations (uncommon but possible) + body = node.child_by_field_name("body") + if body is not None: + for child in body.children: + walk(child, enum_nid, enum_name) + return + + if t == "typedef_declaration": + name_node = node.child_by_field_name("name") + if name_node is None: + return + typedef_name = _read_text(name_node, source) + line = node.start_point[0] + 1 + typedef_nid = _make_id(stem, typedef_name) + add_node(typedef_nid, typedef_name, line) + add_edge(file_nid, typedef_nid, "contains", line) + return + + if t == "function_declaration": + name_node = node.child_by_field_name("name") + if name_node is None: + return + func_name = _read_text(name_node, source) + line = node.start_point[0] + 1 + if parent_class_nid is not None and parent_class_name is not None: + func_nid = _make_id(stem, parent_class_name, func_name) + add_node(func_nid, f"{func_name}()", line) + add_edge(parent_class_nid, func_nid, "method", line) + else: + func_nid = _make_id(stem, func_name) + add_node(func_nid, f"{func_name}()", line) + add_edge(file_nid, func_nid, "contains", line) + fn_body = node.child_by_field_name("body") + if fn_body is not None: + function_bodies.append((func_nid, fn_body)) + return + + for child in node.children: + walk(child, parent_class_nid, parent_class_name) + + walk(root) + + # Fallback: recover class/enum names from scattered module-level tokens. + # When the grammar can't form a proper declaration node (e.g. minified files + # where everything is on one line, or unsupported preprocessor patterns), the + # parser emits bare 'class'/'enum' keyword tokens followed by an identifier. + # Walk the top-level children looking for that pattern and create nodes for + # any names that weren't already extracted. + if len(nodes) <= 1: + _haxe_recover_scattered(root, source, stem, file_nid, + add_node, add_edge, seen_ids, function_bodies) + + for func_nid, body in function_bodies: + walk_calls(body, func_nid) + + return {"nodes": nodes, "edges": edges} + + +def _haxe_recover_scattered( + root: Any, + source: bytes, + stem: str, + file_nid: str, + add_node: Any, + add_edge: Any, + seen_ids: set, + function_bodies: list, +) -> None: + """Extract class/enum names from module-level scattered tokens. + + When the grammar fails to form a declaration node (minified code, unsupported + preprocessor patterns), the parser emits 'class'/'enum' as bare keyword tokens + followed by an identifier. This pass recovers at least the type name so the + file has a meaningful node rather than just a file-level stub. + """ + children = list(root.children) + i = 0 + while i < len(children): + node = children[i] + t = node.type + raw = source[node.start_byte:node.end_byte].decode("utf-8", "replace").strip() + + # Pattern: 'class' token followed by identifier token + if raw == "class" and i + 1 < len(children): + next_node = children[i + 1] + if next_node.type == "identifier": + class_name = source[next_node.start_byte:next_node.end_byte].decode("utf-8", "replace").strip() + line = node.start_point[0] + 1 + class_nid = _make_id(stem, class_name) + add_node(class_nid, class_name, line) + add_edge(file_nid, class_nid, "contains", line) + # Collect any function_declaration siblings that follow before + # we hit another keyword or end of file + j = i + 2 + while j < len(children): + sib = children[j] + if sib.type == "function_declaration": + fn_name_node = sib.child_by_field_name("name") + if fn_name_node is not None: + fn_name = source[fn_name_node.start_byte:fn_name_node.end_byte].decode("utf-8", "replace") + fn_line = sib.start_point[0] + 1 + fn_nid = _make_id(stem, class_name, fn_name) + add_node(fn_nid, f"{fn_name}()", fn_line) + add_edge(class_nid, fn_nid, "method", fn_line) + fn_body = sib.child_by_field_name("body") + if fn_body is not None: + function_bodies.append((fn_nid, fn_body)) + elif sib.type in ("class_declaration", "interface_declaration", + "enum_declaration", "enum_abstract_declaration"): + break + elif source[sib.start_byte:sib.end_byte].decode("utf-8", "replace").strip() == "class": + break + j += 1 + i = j + continue + + # Pattern: 'enum' keyword token (ERROR node contains the rest) + if raw == "enum" and i + 1 < len(children): + # Try to pull the name out of the following ERROR node's text + next_node = children[i + 1] + err_text = source[next_node.start_byte:next_node.end_byte].decode("utf-8", "replace") + import re as _re + # Matches: [abstract] Name[(...)][from...][to...] — grab Name + m = _re.match(r"\s*(?:abstract\s+)?([A-Za-z_][A-Za-z0-9_]*)", err_text) + if m: + enum_name = m.group(1) + line = node.start_point[0] + 1 + enum_nid = _make_id(stem, enum_name) + add_node(enum_nid, enum_name, line) + add_edge(file_nid, enum_nid, "contains", line) + + # Pattern: bare 'typedef' token followed by identifier — handles struct + # typedefs with optional fields (?field:T) that the grammar can't parse. + if raw == "typedef" and i + 1 < len(children): + next_node = children[i + 1] + if next_node.type == "identifier": + td_name = source[next_node.start_byte:next_node.end_byte].decode("utf-8", "replace").strip() + line = node.start_point[0] + 1 + td_nid = _make_id(stem, td_name) + add_node(td_nid, td_name, line) + add_edge(file_nid, td_nid, "contains", line) + + # Pattern: ERROR node whose text contains a class/interface/enum declaration. + # Use re.search (not match) to skip leading metadata like @deprecated that + # precede the actual keyword and would otherwise block recognition. + if node.type == "ERROR": + import re as _re + err_text = source[node.start_byte:node.end_byte].decode("utf-8", "replace") + m = _re.search( + r"\b(class|interface|enum)\s+" + r"(?:abstract\s+)?" + r"([A-Za-z_][A-Za-z0-9_]*)", + err_text, + ) + if m: + decl_name = m.group(2) + line = node.start_point[0] + 1 + decl_nid = _make_id(stem, decl_name) + add_node(decl_nid, decl_name, line) + add_edge(file_nid, decl_nid, "contains", line) + # Extract function names from the ERROR text with a simple regex + for fn_m in _re.finditer(r"\bfunction\s+([A-Za-z_][A-Za-z0-9_]*)\s*[\(<]", err_text): + fn_name = fn_m.group(1) + fn_line = line + err_text[:fn_m.start()].count("\n") + fn_nid = _make_id(stem, decl_name, fn_name) + add_node(fn_nid, f"{fn_name}()", fn_line) + add_edge(decl_nid, fn_nid, "method", fn_line) + + i += 1 + + _DISPATCH: dict[str, Any] = { ".py": extract_python, ".js": extract_js, @@ -15131,6 +15473,7 @@ def _body_of(block): ".cshtml": extract_razor, ".cls": extract_apex, ".trigger": extract_apex, + ".hx": extract_haxe, } diff --git a/pyproject.toml b/pyproject.toml index 876602bb9..787287155 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,11 @@ sql = ["tree-sitter-sql"] # avoids breaking the default `uv tool install graphifyy` for everyone (#1104). dm = ["tree-sitter-dm"] terraform = ["tree-sitter-hcl"] +# tree-sitter-haxe is not yet published on PyPI (upstream vantreeseba/tree-sitter-haxe +# hasn't cut a release); pull our patched fork straight from git in the meantime. +# Excluded from `all` — an unresolvable PyPI name would break `uv sync`/CI for +# everyone, not just fail to build (#1307). +haxe = ["tree-sitter-haxe @ git+https://github.com/masquepublishing/tree-sitter-haxe.git"] all = ["mcp", "starlette>=1.3.1", "neo4j", "falkordb", "pypdf>=6.12.0", "markdownify", "watchdog", "graspologic; python_version < '3.13'", "python-docx", "openpyxl", "faster-whisper; python_version >= '3.11'", "yt-dlp>=2026.6.9", "matplotlib", "numpy>=2.0; python_version >= '3.13'", "openai", "tiktoken", "boto3", "anthropic", "tree-sitter-sql", "jieba", "tree-sitter-dm", "tree-sitter-hcl"] [project.scripts] From 473afb813b4026a534541342a728be44d8926fdb Mon Sep 17 00:00:00 2001 From: mallyskies Date: Sat, 13 Jun 2026 13:54:14 -0600 Subject: [PATCH 2/3] docs+tests: add Haxe documentation, fixture, and language tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - README.md: add .hx to supported languages table (36 → 37 grammars) - CHANGELOG.md: add Unreleased entry for Haxe support - tests/fixtures/sample.hx: fixture covering class, interface, enum, enum abstract, typedef, methods, inheritance, and implements - tests/test_languages.py: 9 tests for extract_haxe(); skipped when tree-sitter-haxe is not installed (mirrors [dm] skip pattern) --- CHANGELOG.md | 4 +++ README.md | 3 +- tests/fixtures/sample.hx | 54 +++++++++++++++++++++++++++++ tests/test_languages.py | 75 ++++++++++++++++++++++++++++++++++++++-- 4 files changed, 133 insertions(+), 3 deletions(-) create mode 100644 tests/fixtures/sample.hx diff --git a/CHANGELOG.md b/CHANGELOG.md index 0eb835436..2262e2948 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ Full release notes with details on each version: [GitHub Releases](https://github.com/safishamsi/graphify/releases) +## Unreleased + +- Feat: Haxe (`.hx`) language support via `tree-sitter-haxe`. Extracts classes, interfaces, enums, enum abstracts, typedefs, and functions. Includes a fallback pass for files where the grammar emits scattered tokens instead of declaration nodes. Install with `pip install "graphifyy[haxe]"`. + ## 0.9.5 (2026-07-02) - Feat: the MCP server can serve many projects from one process via an optional `project_path` on every tool (#1594, thanks @joanfgarcia). Omit it and nothing changes — the server answers against the graph it was started with. Pass an absolute `project_path` and that call is routed to `//graph.json` instead, with its own mtime+size hot-reload, so one stdio/HTTP server backs a whole workspace of repos. Graphs load lazily and cache per resolved path; a missing/corrupt project graph is a tool error, not a process exit, and the server starts even when its default graph is absent. Backward-compatible and additive. diff --git a/README.md b/README.md index dbd8489a5..06954c0c4 100644 --- a/README.md +++ b/README.md @@ -245,7 +245,8 @@ To remove graphify from all platforms at once: `graphify uninstall` (add `--purg | Type | Extensions | |------|-----------| -| Code (36 tree-sitter grammars) | `.py .ts .js .jsx .tsx .mjs .go .rs .java .c .cpp .h .hpp .cu .cuh .metal .rb .cs .kt .scala .php .swift .lua .luau .zig .ps1 .psm1 .ex .exs .m .mm .jl .vue .svelte .astro .groovy .gradle .dart .v .sv .svh .sql .f .f90 .f95 .f03 .f08 .pas .pp .dpr .dpk .lpr .inc .dfm .lfm .lpk .sh .bash .json .dm .dme .dmi .dmm .dmf .sln .slnx .csproj .fsproj .vbproj .xaml .razor .cshtml` (`.dm`/`.dme` requires `uv tool install graphifyy[dm]`; CUDA `.cu`/`.cuh` and Metal `.metal` reuse the C++ grammar) | +| Code (37 tree-sitter grammars) | `.py .ts .js .jsx .tsx .mjs .go .rs .java .c .cpp .h .hpp .cu .cuh .metal .rb .cs .kt .scala .php .swift .lua .luau .zig .ps1 .psm1 .ex .exs .m .mm .jl .vue .svelte .astro .groovy .gradle .dart .v .sv .svh .sql .f .f90 .f95 .f03 .f08 .pas .pp .dpr .dpk .lpr .inc .dfm .lfm .lpk .sh .bash .json .dm .dme .dmi .dmm .dmf .sln .slnx .csproj .fsproj .vbproj .xaml .razor .cshtml` (`.dm`/`.dme` requires `uv tool install graphifyy[dm]`; CUDA `.cu`/`.cuh` and Metal `.metal` reuse the C++ grammar) | +| Haxe | `.hx` (requires `uv tool install graphifyy[haxe]`; classes, interfaces, enums, enum abstracts, typedefs, functions) | | Salesforce Apex | `.cls .trigger` (regex-based; classes, interfaces, enums, methods, triggers, SOQL/DML edges) | | Terraform / HCL | `.tf .tfvars .hcl` (requires `uv tool install graphifyy[terraform]`) | | MCP configs | `.mcp.json` `mcp.json` `mcp_servers.json` `claude_desktop_config.json` — extracts server nodes, package refs, env var requirements | diff --git a/tests/fixtures/sample.hx b/tests/fixtures/sample.hx new file mode 100644 index 000000000..07dc9e8d1 --- /dev/null +++ b/tests/fixtures/sample.hx @@ -0,0 +1,54 @@ +import com.masque.core.MApp; +import com.masque.net.NetMsg; + +interface ILoggable { + function log():Void; +} + +class BaseClient { + public function new() {} +} + +class HttpClient extends BaseClient implements ILoggable { + private var mBaseUrl:String; + + public function new(baseUrl:String) { + super(); + mBaseUrl = baseUrl; + } + + public function get(path:String):String { + return buildRequest("GET", path); + } + + public function post(path:String, body:String):String { + return buildRequest("POST", path); + } + + private function buildRequest(method:String, path:String):String { + return method + " " + mBaseUrl + path; + } + + public function log():Void {} +} + +enum CardSuit { + SPADES; + HEARTS; + DIAMONDS; + CLUBS; +} + +enum abstract Rank(Int) { + var ACE = 1; + var KING = 13; +} + +typedef Config = { + var baseUrl:String; + var timeout:Int; +} + +function createClient(cfg:Config):HttpClient { + return new HttpClient(cfg.baseUrl); +} diff --git a/tests/test_languages.py b/tests/test_languages.py index ca5169d54..797d2f35d 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -1,4 +1,4 @@ -"""Tests for language extractors: Java, C, C++, Ruby, C#, Kotlin, Scala, PHP, Swift, Go, Julia, Fortran, JS/TS, .NET project files, XAML.""" +"""Tests for language extractors: Java, C, C++, Ruby, C#, Kotlin, Scala, PHP, Swift, Go, Julia, Fortran, JS/TS, .NET project files, XAML, Haxe.""" from __future__ import annotations from pathlib import Path import pytest @@ -9,7 +9,7 @@ extract_groovy, extract_sln, extract_csproj, extract_xaml, extract_razor, extract_dm, extract_dmi, extract_dmm, extract_dmf, extract_powershell, extract_apex, extract_verilog, - extract_powershell_manifest, + extract_powershell_manifest, extract_haxe, ) FIXTURES = Path(__file__).parent / "fixtures" @@ -2909,3 +2909,74 @@ def test_decldef_merge_does_not_merge_same_name_same_dir_distinct_files(): r = _corpus("cpp_samedir/Alpha.h", "cpp_samedir/Beta.h") dups = _nodes_with_label(r, "Dup") assert len(dups) == 2, f"same-dir distinct Dups must stay distinct, got {[n['id'] for n in dups]}" + + +# ── Haxe ────────────────────────────────────────────────────────────────────── + +_needs_haxe = pytest.mark.skipif( + _ilu.find_spec("tree_sitter_haxe") is None, + reason="tree-sitter-haxe not installed (optional [haxe] extra)", +) + +@_needs_haxe +def test_haxe_no_error(): + r = extract_haxe(FIXTURES / "sample.hx") + assert "error" not in r + +@_needs_haxe +def test_haxe_finds_class(): + r = extract_haxe(FIXTURES / "sample.hx") + assert any("HttpClient" in l for l in _labels(r)) + +@_needs_haxe +def test_haxe_finds_interface(): + r = extract_haxe(FIXTURES / "sample.hx") + assert any("ILoggable" in l for l in _labels(r)) + +@_needs_haxe +def test_haxe_finds_enum(): + r = extract_haxe(FIXTURES / "sample.hx") + assert any("CardSuit" in l for l in _labels(r)) + +@_needs_haxe +def test_haxe_finds_enum_abstract(): + r = extract_haxe(FIXTURES / "sample.hx") + assert any("Rank" in l for l in _labels(r)) + +@_needs_haxe +def test_haxe_finds_typedef(): + r = extract_haxe(FIXTURES / "sample.hx") + assert any("Config" in l for l in _labels(r)) + +@_needs_haxe +def test_haxe_finds_methods(): + r = extract_haxe(FIXTURES / "sample.hx") + labels = _labels(r) + assert any("get()" in l for l in labels) + assert any("post()" in l for l in labels) + +@_needs_haxe +def test_haxe_finds_top_level_function(): + r = extract_haxe(FIXTURES / "sample.hx") + assert any("createClient()" in l for l in _labels(r)) + +@_needs_haxe +def test_haxe_splits_inherits_and_implements(): + r = extract_haxe(FIXTURES / "sample.hx") + assert ("HttpClient", "BaseClient") in _edge_labels(r, "inherits") + assert ("HttpClient", "ILoggable") in _edge_labels(r, "implements") + +@_needs_haxe +def test_haxe_finds_imports(): + r = extract_haxe(FIXTURES / "sample.hx") + imports = _edge_labels(r, "imports") + assert ("sample.hx", "com_masque_core_mapp") in imports + assert ("sample.hx", "com_masque_net_netmsg") in imports + +@_needs_haxe +def test_haxe_finds_calls(): + r = extract_haxe(FIXTURES / "sample.hx") + calls = _edge_labels(r, "calls") + assert ("get", "buildrequest") in calls + assert ("post", "buildrequest") in calls + assert ("createClient", "HttpClient") in calls From ef2140549d51503790c015b87c65d9a5e8d72982 Mon Sep 17 00:00:00 2001 From: mallyskies Date: Wed, 1 Jul 2026 18:29:27 -0600 Subject: [PATCH 3/3] fix: remove PyPI-blocking git-URL dependency for tree-sitter-haxe PyPI/Warehouse rejects any package upload whose metadata contains a direct URL/VCS dependency. graphifyy is actively published to PyPI, so the haxe extra's git+https dependency would block every future release of the package, not just fail to build for haxe users. Drop the extra entirely and document a manual pip install git+https://github.com/masquepublishing/tree-sitter-haxe.git step in the README instead, matching how the project treats every other grammar with install friction (real PyPI name, or nothing) - there is no existing precedent for a non-PyPI dependency in pyproject.toml. --- CHANGELOG.md | 2 +- README.md | 12 +++++++++++- pyproject.toml | 7 +++---- tests/test_languages.py | 2 +- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2262e2948..3e7a8cca8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ Full release notes with details on each version: [GitHub Releases](https://githu ## Unreleased -- Feat: Haxe (`.hx`) language support via `tree-sitter-haxe`. Extracts classes, interfaces, enums, enum abstracts, typedefs, and functions. Includes a fallback pass for files where the grammar emits scattered tokens instead of declaration nodes. Install with `pip install "graphifyy[haxe]"`. +- Feat: Haxe (`.hx`) language support via `tree-sitter-haxe`. Extracts classes, interfaces, enums, enum abstracts, typedefs, and functions. Includes a fallback pass for files where the grammar emits scattered tokens instead of declaration nodes. `tree-sitter-haxe` has no PyPI release, so there is no `graphifyy` extra for it — install with `pip install git+https://github.com/masquepublishing/tree-sitter-haxe.git`. ## 0.9.5 (2026-07-02) diff --git a/README.md b/README.md index 06954c0c4..8560868f3 100644 --- a/README.md +++ b/README.md @@ -186,6 +186,16 @@ Install only what you need: | `chinese` | Chinese query segmentation (jieba) | `uv tool install "graphifyy[chinese]"` | | `all` | Everything above | `uv tool install "graphifyy[all]"` | +Haxe is not in this table: `tree-sitter-haxe` has no PyPI release (upstream +`vantreeseba/tree-sitter-haxe` hasn't cut one), and PyPI rejects any package +upload whose metadata contains a direct URL/VCS dependency — so it can't be +declared as a `graphifyy` extra without blocking every future release of this +package. Install the patched fork manually to enable `.hx` support: + +```bash +pip install git+https://github.com/masquepublishing/tree-sitter-haxe.git +``` + --- ## Make your assistant always use the graph @@ -246,7 +256,7 @@ To remove graphify from all platforms at once: `graphify uninstall` (add `--purg | Type | Extensions | |------|-----------| | Code (37 tree-sitter grammars) | `.py .ts .js .jsx .tsx .mjs .go .rs .java .c .cpp .h .hpp .cu .cuh .metal .rb .cs .kt .scala .php .swift .lua .luau .zig .ps1 .psm1 .ex .exs .m .mm .jl .vue .svelte .astro .groovy .gradle .dart .v .sv .svh .sql .f .f90 .f95 .f03 .f08 .pas .pp .dpr .dpk .lpr .inc .dfm .lfm .lpk .sh .bash .json .dm .dme .dmi .dmm .dmf .sln .slnx .csproj .fsproj .vbproj .xaml .razor .cshtml` (`.dm`/`.dme` requires `uv tool install graphifyy[dm]`; CUDA `.cu`/`.cuh` and Metal `.metal` reuse the C++ grammar) | -| Haxe | `.hx` (requires `uv tool install graphifyy[haxe]`; classes, interfaces, enums, enum abstracts, typedefs, functions) | +| Haxe | `.hx` (requires `pip install git+https://github.com/masquepublishing/tree-sitter-haxe.git`; not a PyPI package, so no `graphifyy` extra exists for it — see below; classes, interfaces, enums, enum abstracts, typedefs, functions) | | Salesforce Apex | `.cls .trigger` (regex-based; classes, interfaces, enums, methods, triggers, SOQL/DML edges) | | Terraform / HCL | `.tf .tfvars .hcl` (requires `uv tool install graphifyy[terraform]`) | | MCP configs | `.mcp.json` `mcp.json` `mcp_servers.json` `claude_desktop_config.json` — extracts server nodes, package refs, env var requirements | diff --git a/pyproject.toml b/pyproject.toml index 787287155..7a6da3229 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,10 +76,9 @@ sql = ["tree-sitter-sql"] dm = ["tree-sitter-dm"] terraform = ["tree-sitter-hcl"] # tree-sitter-haxe is not yet published on PyPI (upstream vantreeseba/tree-sitter-haxe -# hasn't cut a release); pull our patched fork straight from git in the meantime. -# Excluded from `all` — an unresolvable PyPI name would break `uv sync`/CI for -# everyone, not just fail to build (#1307). -haxe = ["tree-sitter-haxe @ git+https://github.com/masquepublishing/tree-sitter-haxe.git"] +# hasn't cut a release). PyPI rejects uploads containing a direct URL/VCS dependency +# in Requires-Dist, so it cannot be declared as an extra here without blocking every +# future `graphifyy` release — install it manually, see README (#1307). all = ["mcp", "starlette>=1.3.1", "neo4j", "falkordb", "pypdf>=6.12.0", "markdownify", "watchdog", "graspologic; python_version < '3.13'", "python-docx", "openpyxl", "faster-whisper; python_version >= '3.11'", "yt-dlp>=2026.6.9", "matplotlib", "numpy>=2.0; python_version >= '3.13'", "openai", "tiktoken", "boto3", "anthropic", "tree-sitter-sql", "jieba", "tree-sitter-dm", "tree-sitter-hcl"] [project.scripts] diff --git a/tests/test_languages.py b/tests/test_languages.py index 797d2f35d..b6c59e910 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -2915,7 +2915,7 @@ def test_decldef_merge_does_not_merge_same_name_same_dir_distinct_files(): _needs_haxe = pytest.mark.skipif( _ilu.find_spec("tree_sitter_haxe") is None, - reason="tree-sitter-haxe not installed (optional [haxe] extra)", + reason="tree-sitter-haxe not installed (no PyPI release; see README)", ) @_needs_haxe