tirth8205 · indrajeet0510 · Jun 7, 2026 · Jun 11, 2026
diff --git a/README.md b/README.md
@@ -452,6 +452,13 @@ Your AI assistant uses these automatically once the graph is built.
 | `list_repos_tool` | List registered repositories |
 | `cross_repo_search_tool` | Search across all registered repositories |
 
+The three highest-payload tools — `get_impact_radius_tool`,
+`query_graph_tool`, and `get_review_context_tool` — accept an optional
+`format="dsl"` parameter that returns the graph payload as one-line
+strings instead of JSON dicts (~2× smaller responses, ~8K tokens saved
+per call on typical blast-radius queries). Default stays `format="dict"`
+for backwards compatibility.
+
 **MCP Prompts** (5 workflow templates):
 `review_changes`, `architecture_map`, `debug_issue`, `onboard_developer`, `pre_merge_check`
 

diff --git a/code_review_graph/graph.py b/code_review_graph/graph.py
@@ -1371,3 +1371,147 @@ def edge_to_dict(e: GraphEdge) -> dict:
         "file_path": e.file_path, "line": e.line,
         "confidence": e.confidence, "confidence_tier": e.confidence_tier,
     }
+
+
+# ---------------------------------------------------------------------------
+# DSL output mode — token-efficient one-line encoding for LLM consumption
+#
+# When a tool returns hundreds of nodes/edges, the JSON-dict form costs ~60
+# tokens per node and ~45 tokens per edge. The DSL form below is ~18 and ~12
+# respectively (3–4× compression), preserving every field that matters for
+# downstream reasoning. Internal database IDs and the redundant
+# confidence_tier (derivable from the float `confidence`) are dropped.
+# ---------------------------------------------------------------------------
+
+_NODE_KIND_CODE: dict[str, str] = {
+    "Function": "fn", "Class": "cl", "File": "fi",
+    "Test": "tst", "Community": "com",
+}
+
+_EDGE_KIND_CODE: dict[str, str] = {
+    "CALLS": "c", "IMPORTS_FROM": "i", "INHERITS": "h",
+    "CONTAINS": "n", "TESTED_BY": "t", "REFERENCES": "r",
+    "CROSS_COMMUNITY": "x",
+}
+
+_LANG_CODE: dict[str, str] = {
+    "python": "py", "javascript": "js", "typescript": "ts", "tsx": "tsx",
+    "go": "go", "rust": "rs", "java": "jv", "csharp": "cs", "ruby": "rb",
+    "cpp": "cp", "c": "c", "kotlin": "kt", "swift": "sw", "php": "ph",
+    "scala": "sc", "solidity": "sl", "vue": "vu", "dart": "dr", "r": "r",
+    "perl": "pl", "lua": "lu", "objc": "m", "bash": "sh",
+    # Note: 'luau' intentionally not abbreviated to avoid collision with 'lua'.
+    # Languages absent from this table fall through verbose (e.g. 'powershell',
+    # 'elixir', 'svelte') — self-documenting names need no legend entry.
+}
+
+DSL_LEGEND: str = (
+    "# legend: nodes 'kind name@file:start-end lang [parent=...] [T=test]'  "
+    "edges 'src→tgt kind @file:line conf'  "
+    "kinds: fn=Function cl=Class fi=File tst=Test com=Community  "
+    "edges: c=CALLS i=IMPORTS_FROM h=INHERITS n=CONTAINS t=TESTED_BY "
+    "r=REFERENCES x=CROSS_COMMUNITY  "
+    "langs: py=python js=javascript ts=typescript tsx=tsx go=go rs=rust "
+    "jv=java cs=csharp rb=ruby cp=cpp c=c kt=kotlin sw=swift ph=php "
+    "sc=scala sl=solidity vu=vue dr=dart r=r pl=perl lu=lua m=objc sh=bash"
+)
+
+
+def _dsl_sanitize(s: str) -> str:
+    """Strip characters that would break the one-line-per-row DSL contract.
+
+    Builds on ``_sanitize_name`` (which removes control chars but preserves
+    ``\\t`` and ``\\n`` because JSON can encode them) and additionally
+    collapses any remaining whitespace runs to a single space so a name like
+    ``"weird\\nname"`` doesn't split a DSL record across two lines.
+    """
+    if s is None:
+        return ""
+    cleaned = _sanitize_name(s)
+    # Collapse tabs/newlines/multiple spaces to single space — DSL is line-oriented
+    return " ".join(cleaned.split())
+
+
+def node_to_dsl(n: GraphNode) -> str:
+    """Encode a node as a single compact DSL line.
+
+    Format: ``<kind> <name>@<file>:<start>-<end> <lang> [parent=<parent>] [T]``
+
+    Drops the internal database ``id`` (not useful to LLMs) and folds the
+    ``qualified_name`` into the bare ``name`` + optional ``parent=`` suffix
+    (an LLM can reconstruct ``parent::name`` deterministically). The trailing
+    ``[T]`` flag is emitted only when ``is_test`` is true.
+
+    Example:
+        ``fn validateOrder@src/orders/validator.py:142-178 py parent=OrderValidator``
+    """
+    kind = _NODE_KIND_CODE.get(n.kind, n.kind.lower())
+    name = _dsl_sanitize(n.name)
+    # None language → empty string (rather than literal "None"). Known
+    # languages get their short code; unknown ones pass through verbose.
+    lang = _LANG_CODE.get(n.language, n.language) if n.language else ""
+    parts = [f"{kind} {name}@{n.file_path}:{n.line_start}-{n.line_end} {lang}".rstrip()]
+    if n.parent_name:
+        # Encode only the bare parent (last segment after `::`) to avoid
+        # repeating the file_path that already appears above.
+        parent = _dsl_sanitize(n.parent_name).rsplit("::", 1)[-1]
+        parts.append(f"parent={parent}")
+    if n.is_test:
+        parts.append("[T]")
+    return " ".join(parts)
+
+
+def edge_to_dsl(e: GraphEdge) -> str:
+    """Encode an edge as a single compact DSL line.
+
+    Format: ``<source>→<target> <kind> @<file>:<line> <confidence>``
+
+    Two redundancies are eliminated relative to the dict form:
+
+    1. The internal database ``id`` is dropped (not useful to LLMs).
+    2. The ``confidence_tier`` string is dropped (fully derivable from the
+       float ``confidence``: ≥0.9 EXTRACTED, ≥0.5 INFERRED, else AMBIGUOUS).
+    3. The ``file_path::`` prefix is stripped from ``source_qualified`` when
+       it matches the edge's ``file_path`` (which it does for edges
+       extracted from the source's containing file — the common case).
+
+    Example::
+
+        OrderValidator::validateOrder→src/orders/db.py::Database::persist
+            c @src/orders/validator.py:160 0.95
+    """
+    kind = _EDGE_KIND_CODE.get(e.kind, e.kind.lower())
+    src = _dsl_sanitize(e.source_qualified)
+    tgt = _dsl_sanitize(e.target_qualified)
+    # Strip redundant file_path prefix from source (it's already in @file:line)
+    if e.file_path and src.startswith(e.file_path + "::"):
+        src = src[len(e.file_path) + 2:]
+    return f"{src}\u2192{tgt} {kind} @{e.file_path}:{e.line} {e.confidence:.2f}"
+
+
+def encode_nodes(
+    nodes: "list[GraphNode]", fmt: str = "dict",
+) -> "list[dict] | list[str]":
+    """Encode a node list as dicts (default) or DSL strings.
+
+    Args:
+        nodes: List of GraphNode instances.
+        fmt: ``"dict"`` (default, backwards compatible) or ``"dsl"``.
+    """
+    if fmt == "dsl":
+        return [node_to_dsl(n) for n in nodes]
+    return [node_to_dict(n) for n in nodes]
+
+
+def encode_edges(
+    edges: "list[GraphEdge]", fmt: str = "dict",
+) -> "list[dict] | list[str]":
+    """Encode an edge list as dicts (default) or DSL strings.
+
+    Args:
+        edges: List of GraphEdge instances.
+        fmt: ``"dict"`` (default, backwards compatible) or ``"dsl"``.
+    """
+    if fmt == "dsl":
+        return [edge_to_dsl(e) for e in edges]
+    return [edge_to_dict(e) for e in edges]
diff --git a/code_review_graph/main.py b/code_review_graph/main.py
@@ -193,6 +193,7 @@ def get_impact_radius_tool(
     repo_root: Optional[str] = None,
     base: str = "HEAD~1",
     detail_level: str = "standard",
+    format: str = "dict",
 ) -> dict:
     """Analyze the blast radius of changed files in the codebase.
 
@@ -205,10 +206,12 @@ def get_impact_radius_tool(
         repo_root: Repository root path. Auto-detected if omitted.
         base: Git ref for auto-detecting changes. Default: HEAD~1.
         detail_level: "standard" for full output, "minimal" for compact summary. Default: standard.
+        format: "dict" (default) or "dsl" for compact line-based encoding (~3× fewer tokens).
     """
     return get_impact_radius(
         changed_files=changed_files, max_depth=max_depth,
         repo_root=_resolve_repo_root(repo_root), base=base, detail_level=detail_level,
+        format=format,
     )
 
 
@@ -218,6 +221,7 @@ def query_graph_tool(
     target: str,
     repo_root: Optional[str] = None,
     detail_level: str = "standard",
+    format: str = "dict",
 ) -> dict:
     """Run a predefined graph query to explore code relationships.
 
@@ -236,10 +240,11 @@ def query_graph_tool(
         target: Node name, qualified name, or file path to query.
         repo_root: Repository root path. Auto-detected if omitted.
         detail_level: "standard" for full output, "minimal" for compact summary. Default: standard.
+        format: "dict" (default) or "dsl" for compact line-based encoding (~3× fewer tokens).
     """
     return query_graph(
         pattern=pattern, target=target, repo_root=_resolve_repo_root(repo_root),
-        detail_level=detail_level,
+        detail_level=detail_level, format=format,
     )
 
 
@@ -252,6 +257,7 @@ def get_review_context_tool(
     repo_root: Optional[str] = None,
     base: str = "HEAD~1",
     detail_level: str = "standard",
+    format: str = "dict",
 ) -> dict:
     """Generate a focused, token-efficient review context for code changes.
 
@@ -267,11 +273,13 @@ def get_review_context_tool(
         base: Git ref for change detection. Default: HEAD~1.
         detail_level: "standard" for full output, "minimal" for
             token-efficient summary. Default: standard.
+        format: "dict" (default) or "dsl" for compact line-based encoding (~3× fewer tokens).
     """
     return get_review_context(
         changed_files=changed_files, max_depth=max_depth,
         include_source=include_source, max_lines_per_file=max_lines_per_file,
         repo_root=_resolve_repo_root(repo_root), base=base, detail_level=detail_level,
+        format=format,
     )