diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index da10c424..c6f595e6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -23,6 +23,23 @@ jobs:
       - name: Lint with ruff
         run: ruff check code_review_graph/
 
+  script-lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Validate install.sh (POSIX sh syntax)
+        run: sh -n install.sh
+      - name: Validate install.ps1 (PowerShell syntax)
+        shell: pwsh
+        run: |
+          $errors = $null
+          [void][System.Management.Automation.PSParser]::Tokenize(
+            (Get-Content -Raw ./install.ps1), [ref]$errors)
+          if ($errors.Count -gt 0) {
+            $errors | ForEach-Object { $_.Message }
+            exit 1
+          }
+
   type-check:
     runs-on: ubuntu-latest
     steps:
diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
index e60eedfb..fff576e3 100644
--- a/.github/workflows/eval.yml
+++ b/.github/workflows/eval.yml
@@ -51,7 +51,11 @@ jobs:
         if: always()
         run: |
           python - <<'PY' >> "$GITHUB_STEP_SUMMARY"
-          from code_review_graph.eval.reporter import generate_full_report
+          from code_review_graph.eval.reporter import (
+              generate_full_report,
+              median_token_reduction,
+              median_token_reduction_table,
+          )
 
           print("# Weekly eval (report-only)")
           print()
@@ -61,5 +65,28 @@ jobs:
               "not fail CI."
           )
           print()
+
+          # Headline: median per-question token reduction from this run.
+          print("## Headline: median per-question token reduction")
+          print()
+          stats = median_token_reduction("evaluate/results")
+          if stats["median_percent"] is not None:
+              print(
+                  f"**{stats['median_percent']}%** median reduction over "
+                  f"{stats['n']} questions "
+                  f"(source: `{stats['source']}`). This is the median of "
+                  "per-question `(1 - graph_tokens / baseline_tokens) * 100`."
+              )
+          else:
+              print(
+                  "No usable rows this run (clone or measurement failures) — "
+                  "see the CSV artifact for details."
+              )
+          print()
+          print(median_token_reduction_table("evaluate/results"))
+          print()
+
+          print("## Full report")
+          print()
           print(generate_full_report("evaluate/results"))
           PY
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5632f8a7..e6a92a79 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,71 @@
 
 ## [Unreleased]
 
+## [2.4.0] - 2026-06-14
+
+**The token moat, sharpened.** This release makes the graph spend fewer of your
+tokens by default, lets you *prove* the savings, and removes the Python-install
+friction — without changing how anything works (still a local SQLite graph,
+Tree-sitter parsed, served over MCP). Several defaults changed in your favor;
+see "Behavior changes" below.
+
+### Added
+
+- **`code-review-graph doctor`** — a read-only health checklist (✓/✗) that
+  verifies the graph exists and is fresh, MCP config is present, the server
+  boots, hooks are installed, and embeddings status — then prints your latest
+  **Token Savings** number as proof it's working. Answers the most common
+  question ("is this actually running?") in one command.
+- **One-line install that hides Python**: `install.sh` (macOS/Linux) and
+  `install.ps1` (Windows) bootstrap `uv` if missing, then install the CLI.
+  `curl -fsSL .../install.sh | sh`. `pip`/`pipx`/`uvx` remain alternatives.
+- **`serve --tools all|lean|<csv>`** and **`serve --detail minimal|standard|verbose`**
+  (plus `CRG_TOOLS` / `CRG_DETAIL_LEVEL` env vars) to control the exposed tool
+  set and default verbosity per server.
+- **Token budget for review output**: `get_review_context` and `detect_changes`
+  take a `max_tokens` (default 6000), keeping the highest-risk findings and
+  honestly reporting what was omitted — so a review can never blow the context
+  window.
+- Weekly eval CI now surfaces the **median per-question token reduction** in its
+  job summary; README gains a "benchmarks: reproducible" badge.
+
+### Changed (defaults that now save more tokens)
+
+- **Lean tool set by default**: `serve` exposes 7 curated tools
+  (`get_minimal_context`, `query_graph`, `semantic_search_nodes`,
+  `detect_changes`, `get_review_context`, `get_impact_radius`,
+  `get_affected_flows`) instead of all 30 — cutting ~8k tokens of tool
+  descriptions per session and reducing agent mis-picks. Restore everything with
+  `serve --tools all` or `CRG_TOOLS=all`.
+- **Minimal detail by default** for `query_graph`, `semantic_search_nodes`,
+  `get_impact_radius`, and `detect_changes`. Pass `detail_level="standard"` (or
+  `CRG_DETAIL_LEVEL=standard`) for full payloads.
+- `query_graph` gains a `max_results` cap (default 100) so `callers_of` on a hot
+  symbol can't return an unbounded payload.
+
+### Fixed
+
+- **TESTED_BY edge direction** (#515, integrates community PR #527 by
+  @Devilthelegend with an added producer-side regression test): `tests_for`,
+  `get_transitive_tests`, test-gap detection, flow criticality, symbol
+  enrichment, and dead-code detection now read test-coverage edges in the
+  canonical direction. Changed functions with tests are no longer reported as
+  untested, and the GitHub Action's "Tested" column is now correct. No DB
+  migration needed (read-side fix; stored edges were always canonical).
+- **First-run guard**: read MCP tools on a never-built repo now return a clear
+  `not_built` status pointing at `build_or_update_graph_tool`, instead of
+  silently creating an empty `graph.db` and returning "0 results".
+- GitHub Action PR comments now show **repo-relative paths** instead of absolute
+  CI-runner paths.
+
+### Behavior changes (read before upgrading)
+
+- `serve` exposes 7 tools by default; opt back into all 30 with `--tools all` /
+  `CRG_TOOLS=all`.
+- Four tools default to `minimal` detail; pass `detail_level="standard"` or set
+  `CRG_DETAIL_LEVEL=standard` for the old output.
+- Read tools return `not_built` on a fresh repo instead of an empty result.
+
 ## [2.3.6] - 2026-06-10
 
 **Community-response release.** Built from a full audit of every open PR,
diff --git a/CLAUDE.md b/CLAUDE.md
index 5c1c960b..7ef14a22 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -23,7 +23,8 @@ When using code-review-graph MCP tools, follow these rules:
   - `incremental.py` — Git-based change detection, file watching
   - `embeddings.py` — Optional vector embeddings (local sentence-transformers, OpenAI-compatible endpoints, Google Gemini, MiniMax)
   - `visualization.py` — D3.js interactive HTML graph generator
-  - `cli.py` — CLI entry point (install/init, build, update, postprocess, embed, watch, status, visualize, serve/mcp, wiki, detect-changes, register, unregister, repos, eval, daemon)
+  - `cli.py` — CLI entry point (install/init, build, update, postprocess, embed, watch, status, doctor, visualize, serve/mcp, wiki, detect-changes, register, unregister, repos, eval, daemon)
+  - `doctor.py` — Health checklist (`doctor` command): graph presence, freshness vs git HEAD, MCP config, server boot smoke, hooks, embeddings; surfaces the latest Token Savings number
   - `flows.py` — Execution flow detection and criticality scoring
   - `communities.py` — Community detection (Leiden algorithm or file-based grouping) and architecture overview
   - `search.py` — FTS5 hybrid search (keyword + vector)
@@ -55,6 +56,7 @@ uv run mypy code_review_graph/ --ignore-missing-imports --no-strict-optional
 uv run code-review-graph build              # Full graph build
 uv run code-review-graph update             # Incremental update
 uv run code-review-graph status             # Show stats
+uv run code-review-graph doctor             # Health checklist (verify the install)
 uv run code-review-graph serve              # Start MCP server
 uv run code-review-graph wiki               # Generate markdown wiki
 uv run code-review-graph detect-changes     # Risk-scored change analysis
@@ -102,6 +104,7 @@ uv run code-review-graph eval               # Run evaluation benchmarks
 - `tests/test_prompts.py` — MCP prompt template tests
 - `tests/test_wiki.py` — Wiki generation
 - `tests/test_context_savings.py` — Estimated context-savings metadata
+- `tests/test_doctor.py` — `doctor` health checklist (unbuilt flags, built healthy, exit codes)
 - `tests/test_skills.py` — Install/config generation and shipped skill metadata
 - `tests/test_registry.py` — Multi-repo registry
 - `tests/test_migrations.py` — Database migrations
diff --git a/README.md b/README.md
index 15847b36..0acfd4a0 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,7 @@
   <a href="https://github.com/tirth8205/code-review-graph/stargazers"><img src="https://img.shields.io/github/stars/tirth8205/code-review-graph?style=flat-square" alt="Stars"></a>
   <a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square" alt="MIT Licence"></a>
   <a href="https://github.com/tirth8205/code-review-graph/actions/workflows/ci.yml"><img src="https://github.com/tirth8205/code-review-graph/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
+  <a href="https://github.com/tirth8205/code-review-graph/actions/workflows/eval.yml"><img src="https://img.shields.io/badge/benchmarks-reproducible-success?style=flat-square" alt="Benchmarks: reproducible"></a>
   <a href="https://www.python.org/"><img src="https://img.shields.io/badge/python-3.10%2B-blue.svg?style=flat-square" alt="Python 3.10+"></a>
   <a href="https://modelcontextprotocol.io/"><img src="https://img.shields.io/badge/MCP-compatible-green.svg?style=flat-square" alt="MCP"></a>
   <a href="https://code-review-graph.com"><img src="https://img.shields.io/badge/website-code--review--graph.com-blue?style=flat-square" alt="Website"></a>
@@ -38,6 +39,8 @@
 
 AI coding tools can end up re-reading large parts of your codebase on review tasks. `code-review-graph` fixes that. It builds a structural map of your code with [Tree-sitter](https://tree-sitter.github.io/tree-sitter/), tracks changes incrementally, and gives your AI assistant precise context via [MCP](https://modelcontextprotocol.io/) so it reads only what matters.
 
+Where retrieval tools focus on navigation, CRG is review-native — risk-scored change analysis, impact radius, and a PR-review GitHub Action — token-efficient, local, and free, with no waitlist.
+
 <p align="center">
   <img src="diagrams/diagram1_before_vs_after.png" alt="The Token Problem: 38x to 528x token reduction across 6 real repositories" width="85%" />
 </p>
@@ -46,10 +49,30 @@ AI coding tools can end up re-reading large parts of your codebase on review tas
 
 ## Quick Start
 
+### Quick install (no Python setup)
+
+macOS / Linux:
+
+```bash
+curl -LsSf https://raw.githubusercontent.com/tirth8205/code-review-graph/main/install.sh | sh
+```
+
+Windows (PowerShell):
+
+```powershell
+irm https://raw.githubusercontent.com/tirth8205/code-review-graph/main/install.ps1 | iex
+```
+
+This installs [uv](https://docs.astral.sh/uv/) (a single static binary that manages Python for you) if it is missing, then installs the `code-review-graph` CLI. It does **not** ship a bundled runtime — uv handles Python under the hood, so you don't have to. The script is idempotent; re-run it any time. Prefer to do it yourself? Use the alternatives below.
+
+### Alternatives (already have Python / uv)
+
 ```bash
 pip install code-review-graph                     # or: pipx install code-review-graph
+uvx code-review-graph install                     # or run without installing, via uv
 code-review-graph install          # auto-detects and configures all supported platforms
 code-review-graph build            # parse your codebase
+code-review-graph doctor           # verify the install is healthy
 ```
 
 One command sets up everything. `install` detects which AI coding tools you have, writes the correct MCP configuration for each one, installs platform-native hooks/skills where supported, and injects graph-aware instructions into your platform rules. It auto-detects whether you installed via `uvx` or `pip`/`pipx` and generates the right config. Restart your editor/tool after installing.
@@ -176,7 +199,7 @@ See [docs/GITHUB_ACTION.md](docs/GITHUB_ACTION.md) for inputs, risk levels, and
 
 **Headline number: the median per-question token reduction across the 6 repos is ~82x** (whole-corpus baseline vs graph query). The frequently quoted **528x is the maximum** — a single best-case repo (fastapi) — not the typical result.
 
-All numbers come from the automated evaluation runner against 6 real open-source repositories (13 commits total). Every config pins an upstream SHA, the Leiden community detector runs with a fixed seed, and embeddings are deterministic on CPU — so two runs on different machines produce identical numbers. The full reproduction recipe with expected outputs is in [`docs/REPRODUCING.md`](docs/REPRODUCING.md). A weekly report-only run on the two smallest configs lives in [`.github/workflows/eval.yml`](.github/workflows/eval.yml).
+All numbers come from the automated evaluation runner against 6 real open-source repositories (13 commits total). Every config pins an upstream SHA, the Leiden community detector runs with a fixed seed, and embeddings are deterministic on CPU — so two runs on different machines produce identical numbers. The full reproduction recipe with expected outputs is in [`docs/REPRODUCING.md`](docs/REPRODUCING.md). A weekly report-only run on the two smallest configs lives in [`.github/workflows/eval.yml`](.github/workflows/eval.yml); it publishes the **median per-question token reduction** for that run as a table in the job summary and uploads the raw CSVs as an artifact. The [`benchmarks: reproducible`](https://github.com/tirth8205/code-review-graph/actions/workflows/eval.yml) badge links to those runs — the badge asserts the pipeline is reproducible; the canonical numbers live here and in [`docs/REPRODUCING.md`](docs/REPRODUCING.md), never auto-committed from CI.
 
 <details>
 <summary><strong>Token efficiency: ~82x median per-question reduction (range 38x – 528x; whole-corpus vs graph query)</strong></summary>
@@ -543,22 +566,39 @@ The cloud-egress warning is auto-skipped when the base URL points to localhost
 > much narrower quality gap against smaller models at this input length.
 > Body/docstring embedding is tracked as a follow-up enhancement.
 
-#### Tool Filtering
+#### Tool Filtering (lean by default)
+
+CRG registers 30 MCP tools, but loading every description costs ~8k tokens
+per LLM turn before any work happens. To protect that budget the server ships
+a **curated lean set of 7 tools by default**:
+
+`get_minimal_context_tool`, `query_graph_tool`, `semantic_search_nodes_tool`,
+`detect_changes_tool`, `get_review_context_tool`, `get_impact_radius_tool`,
+`get_affected_flows_tool`.
+
+These cover every documented workflow (explore, review, impact, flows). When
+the server trims tools it prints a one-line notice to **stderr** so a reduced
+list is never silent.
 
-CRG exposes 30 MCP tools by default. In token-constrained environments, you can
-limit the server to a subset of tools using `--tools` or the `CRG_TOOLS`
-environment variable:
+Restore the full set, pick the curated set explicitly, or pass a custom list
+via `--tools` or the `CRG_TOOLS` environment variable:
 
 ```bash
-# Via CLI flag
-code-review-graph serve --tools query_graph_tool,semantic_search_nodes_tool,detect_changes_tool
+# All 30 tools
+code-review-graph serve --tools all
+CRG_TOOLS=all code-review-graph serve
+
+# The curated lean set (the default), spelled out
+code-review-graph serve --tools lean
 
-# Via environment variable
+# A custom subset
+code-review-graph serve --tools query_graph_tool,semantic_search_nodes_tool,detect_changes_tool
 CRG_TOOLS=query_graph_tool,semantic_search_nodes_tool code-review-graph serve
 ```
 
-The CLI flag takes precedence over the environment variable. When neither is set,
-all tools are available. This is especially useful for MCP client configurations:
+The CLI flag takes precedence over the environment variable, which takes
+precedence over the lean default. Unknown tool names are ignored gracefully.
+This is especially useful for MCP client configurations:
 
 ```json
 {
@@ -571,6 +611,10 @@ all tools are available. This is especially useful for MCP client configurations
 }
 ```
 
+You can also force a server-wide response verbosity with `--detail`
+(`minimal`/`standard`/`verbose`) or the `CRG_DETAIL_LEVEL` env var; it
+overrides each tool's per-call `detail_level`.
+
 </details>
 
 ---
diff --git a/code_review_graph/__init__.py b/code_review_graph/__init__.py
index 304f003a..921aa8c9 100644
--- a/code_review_graph/__init__.py
+++ b/code_review_graph/__init__.py
@@ -8,7 +8,7 @@
     format_context_savings,
 )
 
-__version__ = "2.3.6"
+__version__ = "2.4.0"
 
 __all__ = [
     "__version__",
diff --git a/code_review_graph/changes.py b/code_review_graph/changes.py
index 77dc25ad..bea8cd23 100644
--- a/code_review_graph/changes.py
+++ b/code_review_graph/changes.py
@@ -352,7 +352,10 @@ def analyze_changes(
     for node in changed_funcs:
         if node.is_test:
             continue
-        tested = store.get_edges_by_target(node.qualified_name)
+        # TESTED_BY edges are stored as source=production, target=test by the
+        # parser, so a changed production function finds its tests by source.
+        # See: #515
+        tested = store.get_edges_by_source(node.qualified_name)
         if not any(e.kind == "TESTED_BY" for e in tested):
             test_gaps.append({
                 "name": _sanitize_name(node.name),
diff --git a/code_review_graph/cli.py b/code_review_graph/cli.py
index 102ad8a8..af0f10f3 100644
--- a/code_review_graph/cli.py
+++ b/code_review_graph/cli.py
@@ -7,6 +7,7 @@
     code-review-graph update [--base BASE]
     code-review-graph watch
     code-review-graph status
+    code-review-graph doctor [--repo PATH]
     code-review-graph serve [--auto-watch] [--http] [--host ADDR] [--port PORT]
     code-review-graph mcp [--auto-watch]
     code-review-graph visualize
@@ -115,6 +116,7 @@ def _print_banner() -> None:
     {g}update{r}      Incremental update {d}(changed files only){r}
     {g}watch{r}       Auto-update on file changes
     {g}status{r}      Show graph statistics
+    {g}doctor{r}      Health checklist {d}(verify your install){r}
     {g}visualize{r}   Generate interactive HTML graph
     {g}wiki{r}        Generate markdown wiki from communities
     {g}detect-changes{r} Analyze change impact {d}(risk-scored review){r}
@@ -331,7 +333,8 @@ def _handle_init(args: argparse.Namespace) -> None:
     print()
     print("Next steps:")
     print("  1. code-review-graph build    # build the knowledge graph")
-    print("  2. Restart your AI coding tool to pick up the new config")
+    print("  2. code-review-graph doctor   # verify the install is healthy")
+    print("  3. Restart your AI coding tool to pick up the new config")
 
 
 def _handle_data_dir_option(args, repo_root: Path) -> None:
@@ -348,6 +351,17 @@ def _handle_data_dir_option(args, repo_root: Path) -> None:
             sys.exit(1)
 
 
+def _handle_doctor(args: argparse.Namespace) -> None:
+    """Run the health checklist and exit non-zero on any critical failure."""
+    from .doctor import print_report, run_doctor
+    from .incremental import find_project_root
+
+    repo_root = Path(args.repo) if args.repo else find_project_root()
+    results, exit_code = run_doctor(repo_root)
+    print_report(results, repo_root=repo_root)
+    sys.exit(exit_code)
+
+
 def main() -> None:
     """Main CLI entry point."""
     ap = argparse.ArgumentParser(
@@ -607,6 +621,13 @@ def main() -> None:
     # repos
     sub.add_parser("repos", help="List registered repositories")
 
+    # doctor
+    doctor_cmd = sub.add_parser(
+        "doctor",
+        help="Run a health checklist and print next-step hints",
+    )
+    doctor_cmd.add_argument("--repo", default=None, help="Repository root (auto-detected)")
+
     # eval
     eval_cmd = sub.add_parser("eval", help="Run evaluation benchmarks")
     eval_cmd.add_argument(
@@ -657,11 +678,21 @@ def main() -> None:
     )
     serve_cmd.add_argument(
         "--tools", default=None,
+        metavar="all|lean|<csv>",
         help=(
-            "Comma-separated list of tool names to expose "
-            "(e.g. query_graph_tool,semantic_search_nodes_tool). "
-            "Unlisted tools are removed. Falls back to CRG_TOOLS env var. "
-            "When unset, all tools are available."
+            "Which MCP tools to expose. 'all' = every tool (~30), 'lean' = the "
+            "curated low-token set (default), or a comma-separated list "
+            "(e.g. query_graph_tool,semantic_search_nodes_tool). Unlisted tools "
+            "are removed. Falls back to CRG_TOOLS env var, then 'lean'."
+        ),
+    )
+    serve_cmd.add_argument(
+        "--detail", default=None,
+        choices=["minimal", "standard", "verbose"],
+        help=(
+            "Server-wide detail_level override for all tools. Forces every "
+            "tool's response to this verbosity, overriding per-call defaults. "
+            "Falls back to the CRG_DETAIL_LEVEL env var."
         ),
     )
     serve_cmd.add_argument(
@@ -795,9 +826,13 @@ def main() -> None:
                     host=host,
                     port=port,
                     tools=args.tools,
+                    detail_level=args.detail,
                 )
             else:
-                serve_main(repo_root=args.repo, auto_watch=auto_watch, tools=args.tools)
+                serve_main(
+                    repo_root=args.repo, auto_watch=auto_watch,
+                    tools=args.tools, detail_level=args.detail,
+                )
         else:
             serve_main(repo_root=args.repo, auto_watch=auto_watch)
         return
@@ -872,6 +907,10 @@ def main() -> None:
         _handle_init(args)
         return
 
+    if args.command == "doctor":
+        _handle_doctor(args)
+        return
+
     if args.command in ("register", "unregister", "repos"):
         logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
         from .registry import Registry
diff --git a/code_review_graph/doctor.py b/code_review_graph/doctor.py
new file mode 100644
index 00000000..385e9a6e
--- /dev/null
+++ b/code_review_graph/doctor.py
@@ -0,0 +1,532 @@
+"""Health checklist for a code-review-graph install (`doctor` command).
+
+Runs a series of fast, read-only checks and prints a clean ``✓``/``✗`` line
+per check with an actionable next-step hint.  The orchestrator returns a
+non-zero exit code when a *critical* check fails (no graph, no git, server
+won't import) so the command can gate CI and install scripts; warnings such
+as a stale graph or missing embeddings never fail the exit code.
+
+This generalizes the single-purpose ``scripts/diagnose_pypi_connectivity.py``
+pattern (clear PASS/FAIL lines, exit-code-driven) into a reusable checklist.
+The connectivity diagnostic is intentionally left untouched.
+
+Checks performed:
+
+1. graph.db exists and has nodes (critical; hint: run ``build``)
+2. freshness — stored ``git_head_sha`` vs current HEAD (warning; hint ``update``)
+3. MCP config present for the repo (warning; hint: run ``install``)
+4. MCP server import/boot smoke — tool count > 0 (critical)
+5. platform-native hooks installed (warning; hint: run ``install``)
+6. embeddings present or note keyword-search fallback (informational)
+
+If a graph exists, the closing summary surfaces the latest ``detect-changes``
+Token Savings number as the "see your savings" proof.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import sqlite3
+import subprocess
+from dataclasses import dataclass, field
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# Short, hard cap so a wedged git never hangs `doctor`.  Honors the same
+# env override the rest of the codebase uses for git timeouts.
+_GIT_TIMEOUT = int(os.environ.get("CRG_GIT_TIMEOUT", "10"))
+
+
+@dataclass
+class CheckResult:
+    """The outcome of a single health check.
+
+    Attributes:
+        name: Short, stable identifier for the check (used in the printed line).
+        ok: True when the check passed.
+        message: One-line human-readable status.
+        critical: When True, a failure (``ok is False``) makes ``doctor`` exit
+            non-zero.  Warnings (``critical is False``) are surfaced but never
+            fail the exit code.
+        hint: Optional actionable next-step shown when the check is not ok.
+    """
+
+    name: str
+    ok: bool
+    message: str = ""
+    critical: bool = False
+    hint: str | None = None
+    # Optional structured extras a caller may want (e.g. node counts).
+    extra: dict = field(default_factory=dict)
+
+    @property
+    def glyph(self) -> str:
+        return "✓" if self.ok else "✗"
+
+
+# ---------------------------------------------------------------------------
+# Safe git helper (list args, no shell, stdin closed, bounded timeout)
+# ---------------------------------------------------------------------------
+
+
+def _current_head_sha(repo_root: Path) -> str:
+    """Return the current ``HEAD`` sha via a safe git subprocess, or ``""``.
+
+    Mirrors :func:`code_review_graph.incremental._git_branch_info` semantics —
+    list-form args, ``shell=False`` (default), ``stdin=subprocess.DEVNULL`` so
+    git can never block on a prompt, and a bounded timeout.
+    """
+    try:
+        result = subprocess.run(
+            ["git", "rev-parse", "HEAD"],
+            capture_output=True,
+            text=True,
+            encoding="utf-8",
+            cwd=str(repo_root),
+            timeout=_GIT_TIMEOUT,
+            stdin=subprocess.DEVNULL,
+        )
+    except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as exc:
+        logger.debug("git rev-parse HEAD failed in %s: %s", repo_root, exc)
+        return ""
+    if result.returncode == 0:
+        return result.stdout.strip()
+    return ""
+
+
+# ---------------------------------------------------------------------------
+# Individual checks
+# ---------------------------------------------------------------------------
+
+
+def check_graph_db(repo_root: Path) -> CheckResult:
+    """Check that a graph database exists and contains nodes (critical)."""
+    from .incremental import get_db_path
+
+    db_path = get_db_path(repo_root)
+    if not db_path.exists():
+        return CheckResult(
+            name="graph",
+            ok=False,
+            critical=True,
+            message=f"No graph database at {db_path}",
+            hint="Run `code-review-graph build` to create it.",
+        )
+
+    from .graph import GraphStore
+
+    store = GraphStore(db_path)
+    try:
+        stats = store.get_stats()
+    finally:
+        store.close()
+
+    if stats.total_nodes <= 0:
+        return CheckResult(
+            name="graph",
+            ok=False,
+            critical=True,
+            message="Graph database exists but contains 0 nodes",
+            hint="Run `code-review-graph build` to (re)parse your codebase.",
+        )
+
+    langs = ", ".join(stats.languages[:5]) if stats.languages else "—"
+    return CheckResult(
+        name="graph",
+        ok=True,
+        message=(
+            f"{stats.total_nodes:,} nodes, {stats.total_edges:,} edges, "
+            f"{stats.files_count:,} files ({langs})"
+        ),
+        extra={"total_nodes": stats.total_nodes},
+    )
+
+
+def check_freshness(repo_root: Path) -> CheckResult:
+    """Compare stored ``git_head_sha`` to current HEAD (warning only)."""
+    from .incremental import detect_vcs, get_db_path
+
+    db_path = get_db_path(repo_root)
+    if not db_path.exists():
+        # The graph check already flags this as critical; here we just skip.
+        return CheckResult(
+            name="freshness",
+            ok=False,
+            critical=False,
+            message="No graph to compare — build first",
+            hint="Run `code-review-graph build`.",
+        )
+
+    if detect_vcs(repo_root) != "git":
+        return CheckResult(
+            name="freshness",
+            ok=True,
+            message="Not a git repo — freshness check skipped",
+        )
+
+    from .graph import GraphStore
+
+    store = GraphStore(db_path)
+    try:
+        stored_sha = store.get_metadata("git_head_sha") or ""
+    finally:
+        store.close()
+
+    current_sha = _current_head_sha(repo_root)
+
+    if not stored_sha:
+        return CheckResult(
+            name="freshness",
+            ok=False,
+            critical=False,
+            message="Graph has no recorded git HEAD",
+            hint="Run `code-review-graph build` to record the current commit.",
+        )
+    if not current_sha:
+        # git is unavailable or detached in a way we can't read — don't punish.
+        return CheckResult(
+            name="freshness",
+            ok=True,
+            message="Could not read current HEAD — assuming fresh",
+        )
+
+    if stored_sha == current_sha:
+        return CheckResult(
+            name="freshness",
+            ok=True,
+            message=f"Graph matches current HEAD ({current_sha[:12]})",
+        )
+    return CheckResult(
+        name="freshness",
+        ok=False,
+        critical=False,
+        message=(
+            f"Graph built at {stored_sha[:12]}, HEAD is now {current_sha[:12]}"
+        ),
+        hint="Run `code-review-graph update` to refresh the graph.",
+    )
+
+
+def check_mcp_config(repo_root: Path) -> CheckResult:
+    """Check that at least one MCP config file is present (warning only).
+
+    Reuses :data:`code_review_graph.skills.PLATFORMS` so the set of config
+    paths stays in lock-step with what ``install`` writes.  Only repo-local
+    config paths are considered authoritative here (``.mcp.json``,
+    ``.cursor/mcp.json``, ``.kiro/...``, etc.) — user-level config under
+    ``$HOME`` is not specific to this checkout and would produce false
+    positives.
+    """
+    from .skills import PLATFORMS
+
+    repo_root = repo_root.resolve()
+    found: list[str] = []
+    for plat in PLATFORMS.values():
+        try:
+            config_path = Path(plat["config_path"](repo_root)).resolve()
+        except (OSError, TypeError):
+            continue
+        # Only count config files that live inside the repo tree.
+        try:
+            config_path.relative_to(repo_root)
+        except ValueError:
+            continue
+        if config_path.exists():
+            found.append(config_path.name)
+
+    if not found:
+        return CheckResult(
+            name="mcp-config",
+            ok=False,
+            critical=False,
+            message="No repo-local MCP config found",
+            hint="Run `code-review-graph install` to configure your AI tools.",
+        )
+
+    # De-dup while preserving order (several platforms can share .mcp.json).
+    seen: dict[str, None] = {}
+    for name in found:
+        seen.setdefault(name, None)
+    return CheckResult(
+        name="mcp-config",
+        ok=True,
+        message=f"MCP config present: {', '.join(seen)}",
+    )
+
+
+def check_serve_command() -> CheckResult:
+    """Check that the detected serve command resolves to a runnable launcher."""
+    from .skills import _detect_serve_command
+
+    command, args = _detect_serve_command()
+    return CheckResult(
+        name="serve-cmd",
+        ok=True,
+        message=f"Serve command: {command} {' '.join(args)}",
+    )
+
+
+def check_server_boot(repo_root: Path) -> CheckResult:
+    """Import the MCP server module and confirm tool count > 0 (critical).
+
+    Kept fast: importing ``code_review_graph.main`` registers the FastMCP
+    tools as a side effect, so we can count them without booting a transport.
+    """
+    try:
+        from . import main as server_main
+    except Exception as exc:  # noqa: BLE001 — import can fail many ways
+        return CheckResult(
+            name="server",
+            ok=False,
+            critical=True,
+            message=f"MCP server failed to import: {exc}",
+            hint="Reinstall: `pip install --force-reinstall code-review-graph`.",
+        )
+
+    count = _count_registered_tools(server_main)
+    if count <= 0:
+        return CheckResult(
+            name="server",
+            ok=False,
+            critical=True,
+            message="MCP server imported but registered 0 tools",
+            hint="Reinstall: `pip install --force-reinstall code-review-graph`.",
+        )
+    return CheckResult(
+        name="server",
+        ok=True,
+        message=f"MCP server imports cleanly with {count} tools",
+        extra={"tool_count": count},
+    )
+
+
+def _count_registered_tools(server_main) -> int:  # type: ignore[no-untyped-def]
+    """Count registered FastMCP tools without booting the event loop.
+
+    FastMCP >=3 exposes an async ``list_tools``.  We run it on a short-lived
+    loop; on any failure we fall back to 0 so the caller flags the problem.
+    """
+    import asyncio
+
+    mcp = getattr(server_main, "mcp", None)
+    if mcp is None:
+        return 0
+    list_tools = getattr(mcp, "list_tools", None)
+    if list_tools is None:
+        return 0
+
+    def _runner() -> int:
+        return len(asyncio.run(list_tools()))
+
+    try:
+        asyncio.get_running_loop()
+    except RuntimeError:
+        # No loop running — safe to use asyncio.run directly.
+        try:
+            return _runner()
+        except Exception as exc:  # noqa: BLE001
+            logger.debug("list_tools failed: %s", exc)
+            return 0
+
+    # Already inside a loop (e.g. called from async test): use a worker thread.
+    import concurrent.futures
+
+    try:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            return pool.submit(_runner).result()
+    except Exception as exc:  # noqa: BLE001
+        logger.debug("list_tools (threaded) failed: %s", exc)
+        return 0
+
+
+def check_hooks(repo_root: Path) -> CheckResult:
+    """Check whether platform-native or git hooks are installed (warning)."""
+    found: list[str] = []
+
+    # Git pre-commit hook installed by install_git_hook (marker string).
+    pre_commit = repo_root / ".git" / "hooks" / "pre-commit"
+    try:
+        if pre_commit.exists() and "code-review-graph detect-changes" in (
+            pre_commit.read_text(encoding="utf-8", errors="replace")
+        ):
+            found.append("git pre-commit")
+    except OSError:
+        pass
+
+    # Claude / Qoder settings.json hooks.
+    for plat in ("claude", "qoder"):
+        settings = repo_root / f".{plat}" / "settings.json"
+        try:
+            if settings.exists() and "code-review-graph" in (
+                settings.read_text(encoding="utf-8", errors="replace")
+            ):
+                found.append(f"{plat} settings")
+        except OSError:
+            pass
+
+    if not found:
+        return CheckResult(
+            name="hooks",
+            ok=False,
+            critical=False,
+            message="No code-review-graph hooks detected",
+            hint="Run `code-review-graph install` to auto-update on changes.",
+        )
+    return CheckResult(
+        name="hooks",
+        ok=True,
+        message=f"Hooks installed: {', '.join(found)}",
+    )
+
+
+def check_embeddings(repo_root: Path) -> CheckResult:
+    """Check whether vector embeddings are present (informational only).
+
+    Reads the ``embeddings`` table directly with a read-only sqlite query so
+    we never load a model provider (which would be slow and could require a
+    network call).  Absence is never an error — search degrades gracefully to
+    keyword (FTS5) matching.
+    """
+    from .incremental import get_db_path
+
+    db_path = get_db_path(repo_root)
+    count = 0
+    if db_path.exists():
+        try:
+            conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=5)
+            try:
+                row = conn.execute(
+                    "SELECT COUNT(*) FROM embeddings"
+                ).fetchone()
+                count = int(row[0]) if row else 0
+            finally:
+                conn.close()
+        except sqlite3.Error as exc:
+            # No embeddings table yet — that's the common, fine case.
+            logger.debug("embeddings count query failed: %s", exc)
+            count = 0
+
+    if count > 0:
+        return CheckResult(
+            name="embeddings",
+            ok=True,
+            message=f"{count:,} embeddings present — semantic search active",
+        )
+    return CheckResult(
+        name="embeddings",
+        ok=True,  # not having embeddings is a fine, fully-supported state
+        critical=False,
+        message=(
+            "No embeddings — semantic search falls back to keyword (FTS5) "
+            "matching"
+        ),
+        hint="Optional: `code-review-graph embed --provider local`.",
+    )
+
+
+# ---------------------------------------------------------------------------
+# Token-savings proof (closing line)
+# ---------------------------------------------------------------------------
+
+
+def latest_token_savings(repo_root: Path) -> str | None:
+    """Return a one-line "see your savings" proof, or ``None``.
+
+    Runs the same read-only ``detect-changes`` analysis the CLI uses and
+    reports the estimated saved tokens/percent for the current change set.
+    Best-effort: any failure (no graph, no changes, git error) returns
+    ``None`` so the closing line is simply omitted.
+    """
+    try:
+        from .changes import analyze_changes
+        from .context_savings import (
+            attach_context_savings,
+            estimate_file_tokens,
+            format_context_savings,
+        )
+        from .graph import GraphStore
+        from .incremental import (
+            get_changed_files,
+            get_db_path,
+            get_staged_and_unstaged,
+        )
+
+        db_path = get_db_path(repo_root)
+        if not db_path.exists():
+            return None
+
+        changed = get_changed_files(repo_root, "HEAD~1")
+        if not changed:
+            changed = get_staged_and_unstaged(repo_root)
+        if not changed:
+            return None
+
+        store = GraphStore(db_path)
+        try:
+            result = analyze_changes(
+                store, changed, repo_root=str(repo_root), base="HEAD~1"
+            )
+        finally:
+            store.close()
+
+        original_tokens = estimate_file_tokens(repo_root, changed)
+        attach_context_savings(result, original_tokens=original_tokens)
+        return format_context_savings(result.get("context_savings"))
+    except Exception as exc:  # noqa: BLE001 — proof line is purely best-effort
+        logger.debug("latest_token_savings failed: %s", exc)
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Orchestrator + report
+# ---------------------------------------------------------------------------
+
+
+def run_doctor(repo_root: Path) -> tuple[list[CheckResult], int]:
+    """Run all checks and return ``(results, exit_code)``.
+
+    Exit code is ``1`` when any *critical* check failed, else ``0``.
+    """
+    results: list[CheckResult] = [
+        check_graph_db(repo_root),
+        check_freshness(repo_root),
+        check_mcp_config(repo_root),
+        check_serve_command(),
+        check_server_boot(repo_root),
+        check_hooks(repo_root),
+        check_embeddings(repo_root),
+    ]
+    exit_code = 1 if any(r.critical and not r.ok for r in results) else 0
+    return results, exit_code
+
+
+def print_report(results: list[CheckResult], *, repo_root: Path) -> None:
+    """Print the ``✓``/``✗`` checklist, hints, and a one-line summary."""
+    print(f"code-review-graph doctor — {repo_root}")
+    print()
+    for r in results:
+        print(f"  {r.glyph} {r.name}: {r.message}")
+        if not r.ok and r.hint:
+            print(f"      → {r.hint}")
+
+    critical_failures = [r for r in results if r.critical and not r.ok]
+    warnings = [r for r in results if not r.critical and not r.ok]
+
+    print()
+    if critical_failures:
+        names = ", ".join(r.name for r in critical_failures)
+        print(f"Result: NOT healthy — {len(critical_failures)} critical "
+              f"issue(s): {names}")
+    elif warnings:
+        print(f"Result: healthy with {len(warnings)} warning(s) — "
+              f"see hints above")
+    else:
+        print("Result: healthy — all checks passed")
+
+    # See-your-savings proof when a graph exists.
+    proof = latest_token_savings(repo_root)
+    if proof:
+        print()
+        print(f"See your savings: {proof}")
diff --git a/code_review_graph/enrich.py b/code_review_graph/enrich.py
index f95c334a..9b381e41 100644
--- a/code_review_graph/enrich.py
+++ b/code_review_graph/enrich.py
@@ -159,10 +159,12 @@ def _format_node_context(
         lines.append(f"  Flows: {', '.join(flow_names)}")
 
     # Tests
+    # TESTED_BY edges are stored as source=production, target=test by the
+    # parser, so look them up by source. See: #515
     tests: list[str] = []
-    for e in store.get_edges_by_target(qn):
+    for e in store.get_edges_by_source(qn):
         if e.kind == "TESTED_BY" and len(tests) < 3:
-            t = store.get_node(e.source_qualified)
+            t = store.get_node(e.target_qualified)
             if t:
                 tests.append(t.name)
     if tests:
diff --git a/code_review_graph/eval/reporter.py b/code_review_graph/eval/reporter.py
index 3a21985e..9bbff907 100644
--- a/code_review_graph/eval/reporter.py
+++ b/code_review_graph/eval/reporter.py
@@ -7,6 +7,7 @@
 from __future__ import annotations
 
 import csv
+import statistics
 from pathlib import Path
 from typing import Any
 
@@ -91,6 +92,103 @@ def _md_table(headers: list[str], rows: list[list[str]]) -> str:
     return "\n".join(lines)
 
 
+def _row_reduction_percent(naive: str, graph: str) -> float | None:
+    """Per-row token reduction = (1 - graph/naive) * 100, or None if unusable."""
+    try:
+        n = float(naive)
+        g = float(graph)
+    except (TypeError, ValueError):
+        return None
+    if n <= 0 or g < 0:
+        return None
+    return (1.0 - g / n) * 100.0
+
+
+def median_token_reduction(results_dir: str | Path) -> dict[str, Any]:
+    """Compute the median per-question token-reduction from a results dir.
+
+    The ``agent_baseline`` benchmark is the genuinely per-question source —
+    each row is one agent question, with ``baseline_tokens`` (grep top-k file
+    content) versus ``graph_tokens`` (graph query cost). The reduction for a
+    row is ``(1 - graph/baseline) * 100``. Rows with ``status != "ok"`` are
+    excluded (they are kept in the CSV for forensics only).
+
+    Falls back to the ``token_efficiency`` benchmark (naive full-file content
+    vs ``get_review_context`` JSON) when no agent-baseline rows are present.
+
+    Returns a dict with ``source``, ``n`` (sample count), ``median_percent``,
+    ``min_percent``, ``max_percent``. ``median_percent`` is ``None`` when no
+    usable rows exist (empty/failed run), so callers can render a clean
+    "no data" cell instead of a misleading number.
+    """
+    results_dir = Path(results_dir)
+
+    # Prefer agent_baseline: it is per-question by construction.
+    source = "agent_baseline"
+    pairs = [
+        (r.get("baseline_tokens", ""), r.get("graph_tokens", ""))
+        for r in _read_csvs(results_dir, "agent_baseline")
+        if (r.get("status", "ok") or "ok") == "ok"
+    ]
+    if not pairs:
+        source = "token_efficiency"
+        pairs = [
+            (r.get("naive_tokens", ""), r.get("graph_tokens", ""))
+            for r in _read_csvs(results_dir, "token_efficiency")
+            if (r.get("status", "ok") or "ok") == "ok"
+        ]
+
+    reductions = [
+        pct
+        for naive, graph in pairs
+        if (pct := _row_reduction_percent(naive, graph)) is not None
+    ]
+
+    if not reductions:
+        return {
+            "source": source,
+            "n": 0,
+            "median_percent": None,
+            "min_percent": None,
+            "max_percent": None,
+        }
+
+    return {
+        "source": source,
+        "n": len(reductions),
+        "median_percent": round(statistics.median(reductions), 1),
+        "min_percent": round(min(reductions), 1),
+        "max_percent": round(max(reductions), 1),
+    }
+
+
+def median_token_reduction_table(results_dir: str | Path) -> str:
+    """Render :func:`median_token_reduction` as a one-row markdown table.
+
+    Designed for the GitHub Actions job summary in ``.github/workflows/eval.yml``.
+    Honest by construction: when a run produced no usable rows the cells read
+    ``n/a`` rather than fabricating a number.
+    """
+    stats = median_token_reduction(results_dir)
+    headers = [
+        "Metric", "Source", "Questions (n)", "Median reduction",
+        "Min", "Max",
+    ]
+
+    def _fmt(value: Any) -> str:
+        return "n/a" if value is None else f"{value}%"
+
+    row = [
+        "Per-question token reduction",
+        f"`{stats['source']}`",
+        str(stats["n"]),
+        _fmt(stats["median_percent"]),
+        _fmt(stats["min_percent"]),
+        _fmt(stats["max_percent"]),
+    ]
+    return _md_table(headers, [row])
+
+
 def generate_full_report(results_dir: str | Path) -> str:
     """Generate a full markdown evaluation report from CSV result files.
 
diff --git a/code_review_graph/graph.py b/code_review_graph/graph.py
index 4ed3c7fc..dc6c33e4 100644
--- a/code_review_graph/graph.py
+++ b/code_review_graph/graph.py
@@ -377,7 +377,10 @@ def get_transitive_tests(
     ) -> list[dict]:
         """Find tests covering a node, including indirect (transitive) coverage.
 
-        1. Direct: TESTED_BY edges targeting this node (+ bare-name fallback).
+        TESTED_BY edges are stored as source=production, target=test by
+        the parser, so look them up by source_qualified. See: #515
+
+        1. Direct: TESTED_BY edges originating at this node (+ bare-name fallback).
         2. Indirect: follow outgoing CALLS edges up to *max_depth* hops,
            then collect TESTED_BY edges on each callee.
 
@@ -421,31 +424,31 @@ def _node_dict(qn: str, indirect: bool) -> dict | None:
                 "indirect": indirect,
             }
 
-        # Direct TESTED_BY
+        # Direct TESTED_BY (source=production, target=test). See: #515
         for qn in input_qns:
             for row in conn.execute(
-                "SELECT source_qualified FROM edges "
-                "WHERE target_qualified = ? AND kind = 'TESTED_BY'",
+                "SELECT target_qualified FROM edges "
+                "WHERE source_qualified = ? AND kind = 'TESTED_BY'",
                 (qn,),
             ).fetchall():
-                src = row["source_qualified"]
-                if src not in seen:
-                    seen.add(src)
-                    d = _node_dict(src, indirect=False)
+                tgt = row["target_qualified"]
+                if tgt not in seen:
+                    seen.add(tgt)
+                    d = _node_dict(tgt, indirect=False)
                     if d:
                         results.append(d)
 
         # Bare-name fallback for direct
         bare = qualified_name.rsplit("::", 1)[-1] if "::" in qualified_name else qualified_name
         for row in conn.execute(
-            "SELECT source_qualified FROM edges "
-            "WHERE target_qualified = ? AND kind = 'TESTED_BY'",
+            "SELECT target_qualified FROM edges "
+            "WHERE source_qualified = ? AND kind = 'TESTED_BY'",
             (bare,),
         ).fetchall():
-            src = row["source_qualified"]
-            if src not in seen:
-                seen.add(src)
-                d = _node_dict(src, indirect=False)
+            tgt = row["target_qualified"]
+            if tgt not in seen:
+                seen.add(tgt)
+                d = _node_dict(tgt, indirect=False)
                 if d:
                     results.append(d)
 
@@ -464,14 +467,14 @@ def _node_dict(qn: str, indirect: bool) -> dict | None:
                 next_frontier = set(list(next_frontier)[:max_frontier])
             for callee in next_frontier:
                 for row in conn.execute(
-                    "SELECT source_qualified FROM edges "
-                    "WHERE target_qualified = ? AND kind = 'TESTED_BY'",
+                    "SELECT target_qualified FROM edges "
+                    "WHERE source_qualified = ? AND kind = 'TESTED_BY'",
                     (callee,),
                 ).fetchall():
-                    src = row["source_qualified"]
-                    if src not in seen:
-                        seen.add(src)
-                        d = _node_dict(src, indirect=True)
+                    tgt = row["target_qualified"]
+                    if tgt not in seen:
+                        seen.add(tgt)
+                        d = _node_dict(tgt, indirect=True)
                         if d:
                             results.append(d)
             frontier = next_frontier
@@ -1269,8 +1272,8 @@ def load_flow_adjacency(self) -> "FlowAdjacency":
             kind, src, tgt = row["kind"], row["source_qualified"], row["target_qualified"]
             if kind == "CALLS":
                 calls_out.setdefault(src, []).append(tgt)
-            else:  # TESTED_BY
-                has_tested_by.add(tgt)
+            else:  # TESTED_BY: source is the production node being tested. See: #515
+                has_tested_by.add(src)
 
         return FlowAdjacency(
             calls_out=calls_out,
diff --git a/code_review_graph/main.py b/code_review_graph/main.py
index 629b1674..399ad740 100644
--- a/code_review_graph/main.py
+++ b/code_review_graph/main.py
@@ -82,6 +82,65 @@ def _resolve_repo_root(repo_root: Optional[str]) -> Optional[str]:
     return repo_root if repo_root else _default_repo_root
 
 
+# The curated "lean" allow-list loaded by default.  CRG registers ~30 MCP
+# tools; shipping every description costs ~8k tokens per LLM turn before any
+# work happens.  This set is the smallest one that still covers every
+# documented workflow (see CLAUDE.md "When to use graph tools FIRST" and the
+# prompt templates in prompts.py):
+#
+#   - get_minimal_context_tool   : the mandated entry point (~100 tokens).
+#   - query_graph_tool           : callers/callees/imports/tests tracing.
+#   - semantic_search_nodes_tool : find entities by name / keyword.
+#   - detect_changes_tool        : primary risk-scored review tool.
+#   - get_review_context_tool    : source snippets for review.
+#   - get_impact_radius_tool     : blast-radius analysis.
+#   - get_affected_flows_tool    : which execution paths a change touches.
+#
+# The remaining ~23 tools (wiki, communities, refactor, viz, embeddings,
+# build/postprocess, cross-repo, analysis explorers, docs) stay one
+# ``--tools all`` / ``CRG_TOOLS=all`` away.
+LEAN_TOOLS: tuple[str, ...] = (
+    "get_minimal_context_tool",
+    "query_graph_tool",
+    "semantic_search_nodes_tool",
+    "detect_changes_tool",
+    "get_review_context_tool",
+    "get_impact_radius_tool",
+    "get_affected_flows_tool",
+)
+
+# Valid detail levels accepted by the per-tool ``detail_level`` parameter and
+# the server-wide ``CRG_DETAIL_LEVEL`` / ``serve --detail`` override.
+_VALID_DETAIL_LEVELS = ("minimal", "standard", "verbose")
+
+# Server-wide detail-level override.  ``None`` means "use whatever the tool /
+# caller asked for".  Set from ``serve --detail`` or the ``CRG_DETAIL_LEVEL``
+# env var.  Tool wrappers route their ``detail_level`` argument through
+# ``_resolve_detail_level`` so a single flag can force every tool minimal.
+_detail_level_override: str | None = None
+
+
+def _resolve_detail_level(detail_level: str) -> str:
+    """Resolve the effective detail level for a tool call.
+
+    Precedence:
+    1. Server-wide override (``serve --detail`` / ``CRG_DETAIL_LEVEL``) when set.
+    2. The per-call ``detail_level`` argument (default ``"minimal"``).
+
+    Unknown values fall through unchanged — the underlying tool treats any
+    non-``"minimal"`` value as full output, so an invalid override never
+    silently expands the response.
+    """
+    override = _detail_level_override
+    if override is None:
+        override = os.environ.get("CRG_DETAIL_LEVEL") or None
+    if override:
+        override = override.strip().lower()
+        if override in _VALID_DETAIL_LEVELS:
+            return override
+    return detail_level
+
+
 mcp = FastMCP(
     "code-review-graph",
     instructions=(
@@ -192,7 +251,7 @@ def get_impact_radius_tool(
     max_depth: int = 2,
     repo_root: Optional[str] = None,
     base: str = "HEAD~1",
-    detail_level: str = "standard",
+    detail_level: str = "minimal",
 ) -> dict:
     """Analyze the blast radius of changed files in the codebase.
 
@@ -204,11 +263,12 @@ def get_impact_radius_tool(
         max_depth: Number of hops to traverse in the dependency graph. Default: 2.
         repo_root: Repository root path. Auto-detected if omitted.
         base: Git ref for auto-detecting changes. Default: HEAD~1.
-        detail_level: "standard" for full output, "minimal" for compact summary. Default: standard.
+        detail_level: "minimal" (default) for a compact summary; "standard" for full output.
     """
     return get_impact_radius(
         changed_files=changed_files, max_depth=max_depth,
-        repo_root=_resolve_repo_root(repo_root), base=base, detail_level=detail_level,
+        repo_root=_resolve_repo_root(repo_root), base=base,
+        detail_level=_resolve_detail_level(detail_level),
     )
 
 
@@ -217,7 +277,8 @@ def query_graph_tool(
     pattern: str,
     target: str,
     repo_root: Optional[str] = None,
-    detail_level: str = "standard",
+    detail_level: str = "minimal",
+    max_results: int = 100,
 ) -> dict:
     """Run a predefined graph query to explore code relationships.
 
@@ -235,11 +296,13 @@ def query_graph_tool(
         pattern: Query pattern name (see above).
         target: Node name, qualified name, or file path to query.
         repo_root: Repository root path. Auto-detected if omitted.
-        detail_level: "standard" for full output, "minimal" for compact summary. Default: standard.
+        detail_level: "minimal" (default) for a compact summary; "standard" for full output.
+        max_results: Cap on results returned so callers_of/callees_of on a hot
+            symbol cannot return an unbounded payload. Default: 100.
     """
     return query_graph(
         pattern=pattern, target=target, repo_root=_resolve_repo_root(repo_root),
-        detail_level=detail_level,
+        detail_level=_resolve_detail_level(detail_level), max_results=max_results,
     )
 
 
@@ -252,6 +315,7 @@ def get_review_context_tool(
     repo_root: Optional[str] = None,
     base: str = "HEAD~1",
     detail_level: str = "standard",
+    max_tokens: int = 6000,
 ) -> dict:
     """Generate a focused, token-efficient review context for code changes.
 
@@ -267,11 +331,15 @@ def get_review_context_tool(
         base: Git ref for change detection. Default: HEAD~1.
         detail_level: "standard" for full output, "minimal" for
             token-efficient summary. Default: standard.
+        max_tokens: Token budget for the response. When the full context would
+            exceed this, the lowest-risk source snippets are dropped first and
+            an honest ``omitted`` note is added. Default: 6000. Set 0 to disable.
     """
     return get_review_context(
         changed_files=changed_files, max_depth=max_depth,
         include_source=include_source, max_lines_per_file=max_lines_per_file,
-        repo_root=_resolve_repo_root(repo_root), base=base, detail_level=detail_level,
+        repo_root=_resolve_repo_root(repo_root), base=base,
+        detail_level=_resolve_detail_level(detail_level), max_tokens=max_tokens,
     )
 
 
@@ -283,7 +351,7 @@ def semantic_search_nodes_tool(
     repo_root: Optional[str] = None,
     model: Optional[str] = None,
     provider: Optional[str] = None,
-    detail_level: str = "standard",
+    detail_level: str = "minimal",
 ) -> dict:
     """Search for code entities by name, keyword, or semantic similarity.
 
@@ -303,11 +371,11 @@ def semantic_search_nodes_tool(
                (local) or CRG_OPENAI_MODEL (openai).
         provider: Embedding provider: "local" (default), "openai", "google",
                   or "minimax". Must match the provider used during embed_graph.
-        detail_level: "standard" for full output, "minimal" for compact summary. Default: standard.
+        detail_level: "minimal" (default) for a compact summary; "standard" for full output.
     """
     return semantic_search_nodes(
         query=query, kind=kind, limit=limit, repo_root=_resolve_repo_root(repo_root),
-        model=model, provider=provider, detail_level=detail_level,
+        model=model, provider=provider, detail_level=_resolve_detail_level(detail_level),
     )
 
 
@@ -578,7 +646,8 @@ async def detect_changes_tool(
     include_source: bool = False,
     max_depth: int = 2,
     repo_root: Optional[str] = None,
-    detail_level: str = "standard",
+    detail_level: str = "minimal",
+    max_tokens: int = 6000,
 ) -> dict:
     """Detect changes and produce risk-scored, priority-ordered review guidance.
 
@@ -596,14 +665,19 @@ async def detect_changes_tool(
         include_source: Include source code snippets for changed functions. Default: False.
         max_depth: Impact radius depth for BFS traversal. Default: 2.
         repo_root: Repository root path. Auto-detected if omitted.
-        detail_level: "standard" for full output, "minimal" for
-            token-efficient summary. Default: standard.
+        detail_level: "minimal" (default) for a token-efficient summary;
+            "standard" for full output.
+        max_tokens: Token budget for the standard response. When the full
+            analysis would exceed this, the lowest-risk changed functions and
+            flows are dropped first and an honest ``omitted`` note is added.
+            Default: 6000. Set 0 to disable.
     """
     coro = asyncio.to_thread(
         detect_changes_func,
         base=base, changed_files=changed_files,
         include_source=include_source, max_depth=max_depth,
-        repo_root=_resolve_repo_root(repo_root), detail_level=detail_level,
+        repo_root=_resolve_repo_root(repo_root),
+        detail_level=_resolve_detail_level(detail_level), max_tokens=max_tokens,
     )
     tool_timeout = int(os.environ.get("CRG_TOOL_TIMEOUT", "0"))
     if tool_timeout > 0:
@@ -942,41 +1016,73 @@ def pre_merge_check(base: str = "HEAD~1") -> list[dict]:
     return pre_merge_check_prompt(base=base)
 
 
+def _resolve_tool_allowlist(tools: str | None) -> set[str] | None:
+    """Resolve the requested tool spec into a concrete allow-list of names.
+
+    Returns ``None`` when **all** tools should remain (no trim).  Returns a
+    set of tool names to keep otherwise.
+
+    Accepts (first match wins): the ``tools`` argument (``serve --tools``),
+    then the ``CRG_TOOLS`` env var, then defaults to the lean set.
+
+    Recognised keywords (case-insensitive):
+    - ``"all"``   -> keep every registered tool (returns ``None``).
+    - ``"lean"``  -> the curated :data:`LEAN_TOOLS` set.
+    - anything else is parsed as a comma-separated list of tool names.
+
+    An empty / whitespace-only spec also returns ``None`` (keep all): an
+    explicit empty value must never silently remove every tool.
+    """
+    raw = tools if tools is not None else os.environ.get("CRG_TOOLS")
+    if raw is None:
+        # Nothing specified anywhere -> lean is the default.
+        return set(LEAN_TOOLS)
+    raw = raw.strip()
+    if not raw:
+        return None
+    if raw.lower() == "all":
+        return None
+    if raw.lower() == "lean":
+        return set(LEAN_TOOLS)
+    allowed = {t.strip() for t in raw.split(",") if t.strip()}
+    if not allowed:
+        return None
+    return allowed
+
+
 def _apply_tool_filter(tools: str | None = None) -> None:
-    """Remove tools not listed in the allow-list.
+    """Trim registered MCP tools down to an allow-list.
 
-    Accepts a comma-separated string of tool names to keep.  When set,
-    every registered MCP tool whose name is **not** in the list is
-    removed via ``FastMCP.remove_tool()``.
+    CRG registers ~30 MCP tools; shipping every description costs ~8k tokens
+    per LLM turn before any work happens.  To keep the token moat intact the
+    server loads only the curated :data:`LEAN_TOOLS` set **by default**.
 
-    The allow-list can be supplied in two ways (first match wins):
+    The allow-list is resolved by :func:`_resolve_tool_allowlist` from, in
+    order: the ``tools`` argument (``serve --tools ...``), the ``CRG_TOOLS``
+    env var, then the lean default.  Pass ``"all"`` (CLI or env) to restore
+    every tool, ``"lean"`` for the explicit curated set, or a comma-separated
+    list for a custom set.  Unknown names are ignored gracefully.
 
-    1. ``tools`` argument (from ``serve --tools ...``).
-    2. ``CRG_TOOLS`` environment variable.
+    A one-line notice is written to **stderr** whenever tools are trimmed, so
+    a reduced tool list is never silent or confusing.  (stdout stays clean for
+    the JSON-RPC stdio transport.)
 
-    When neither is set, all tools remain available.
+    Examples::
 
-    This is useful for token-constrained environments: CRG exposes 28+
-    tools by default (~8k description tokens per LLM turn).  Filtering
-    to a working set of 5-10 tools can reduce overhead by 70-85%.
+        # default — lean set (7 tools)
+        code-review-graph serve
 
-    Example::
+        # restore everything
+        code-review-graph serve --tools all
+        CRG_TOOLS=all
 
-        # via CLI
+        # custom set
         code-review-graph serve --tools query_graph_tool,semantic_search_nodes_tool
-
-        # via env var
-        CRG_TOOLS=query_graph_tool,semantic_search_nodes_tool
     """
-    import asyncio
-    import os
-
-    raw = tools or os.environ.get("CRG_TOOLS")
-    if not raw:
-        return
-    allowed = {t.strip() for t in raw.split(",") if t.strip()}
-    if not allowed:
+    allowed = _resolve_tool_allowlist(tools)
+    if allowed is None:
         return
+
     # FastMCP >=3 exposes tool enumeration via the async ``list_tools``
     # method.  ``_apply_tool_filter`` is typically called from
     # ``main()`` before the MCP event loop starts, but tests may invoke
@@ -999,9 +1105,24 @@ def _runner() -> list[str]:
         with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
             return pool.submit(_runner).result()
 
-    for name in _list_tool_names():
+    registered = _list_tool_names()
+    removed = 0
+    for name in registered:
         if name not in allowed:
             mcp.local_provider.remove_tool(name)
+            removed += 1
+
+    if removed:
+        kept = len(registered) - removed
+        # Note: never write to stdout — the stdio transport speaks JSON-RPC
+        # there.  stderr is safe and visible to the user launching the server.
+        print(
+            f"[code-review-graph] lean tool mode: exposing {kept} of "
+            f"{len(registered)} tools "
+            f"(removed {removed}). Use --tools all or CRG_TOOLS=all "
+            f"for the full set.",
+            file=sys.stderr,
+        )
 
 
 
@@ -1013,6 +1134,7 @@ def main(
     transport: str = "stdio",
     host: str | None = None,
     port: int | None = None,
+    detail_level: str | None = None,
 ) -> None:
     """Run the MCP server (stdio or HTTP).
 
@@ -1026,18 +1148,31 @@ def main(
 
     Args:
         repo_root: Default repository root for all tool calls.
-        tools: Comma-separated list of tool names to expose.
-            Falls back to ``CRG_TOOLS`` env var.  When unset, all
-            tools are available.
+        tools: Tool allow-list spec.  ``"all"`` exposes every tool,
+            ``"lean"`` the curated set, or a comma-separated list of names.
+            Falls back to ``CRG_TOOLS`` env var, then the lean default.
         auto_watch: Start filesystem watcher in a background daemon thread
             while the MCP server runs.
         transport: ``"stdio"`` (default) or ``"streamable-http"`` for local HTTP.
         host: Bind address when using HTTP (required for HTTP; set by CLI).
         port: Port when using HTTP (required for HTTP; set by CLI).
+        detail_level: Server-wide ``detail_level`` override
+            ("minimal"/"standard"/"verbose").  Falls back to the
+            ``CRG_DETAIL_LEVEL`` env var.  When set, it overrides each tool's
+            per-call ``detail_level`` argument.
     """
-    global _default_repo_root
+    global _default_repo_root, _detail_level_override
     root = Path(repo_root) if repo_root else find_project_root()
     _default_repo_root = str(root)
+    if detail_level:
+        normalized = detail_level.strip().lower()
+        if normalized in _VALID_DETAIL_LEVELS:
+            _detail_level_override = normalized
+        else:
+            logger.warning(
+                "Ignoring unknown --detail %r (expected one of %s)",
+                detail_level, ", ".join(_VALID_DETAIL_LEVELS),
+            )
     _apply_tool_filter(tools)
 
     watch_store: GraphStore | None = None
diff --git a/code_review_graph/refactor.py b/code_review_graph/refactor.py
index 938762f3..490d12ac 100644
--- a/code_review_graph/refactor.py
+++ b/code_review_graph/refactor.py
@@ -492,19 +492,38 @@ def _is_plausible_caller(
                 if _is_plausible_caller(e.file_path, node.file_path, node.name)
             ]
             incoming = incoming + all_bare
-        if not any(e.kind == "TESTED_BY" for e in incoming):
-            bare_tb = store.search_edges_by_target_name(node.name, kind="TESTED_BY")
-            bare_tb = [
-                e for e in bare_tb
+        # TESTED_BY edges are stored as source=production, target=test by the
+        # parser, so a tested production node is the *source* of its TESTED_BY
+        # edge -- look outgoing, not incoming. See: #515
+        outgoing_tb = [
+            e for e in store.get_edges_by_source(node.qualified_name)
+            if e.kind == "TESTED_BY"
+        ]
+        if not outgoing_tb and node.parent_name:
+            # Class-qualified source (e.g. "ClassName::method") which lacks the
+            # file-path prefix used in node.qualified_name.
+            class_qn = f"{node.parent_name}::{node.name}"
+            outgoing_tb += [
+                e for e in store.get_edges_by_source(class_qn)
+                if e.kind == "TESTED_BY"
+            ]
+        if not outgoing_tb:
+            # Bare-name source fallback: unresolved TESTED_BY edges may store the
+            # production function by its plain name (e.g. "authenticate").
+            bare_rows = conn.execute(
+                "SELECT * FROM edges WHERE kind = 'TESTED_BY' AND source_qualified = ?",
+                (node.name,),
+            ).fetchall()
+            outgoing_tb += [
+                e for e in (store._row_to_edge(r) for r in bare_rows)
                 if _is_plausible_caller(e.file_path, node.file_path, node.name)
             ]
-            incoming = incoming + bare_tb
         # Check INHERITS -- classes with subclasses are not dead.
         if node.kind == "Class" and not any(e.kind == "INHERITS" for e in incoming):
             bare_inh = store.search_edges_by_target_name(node.name, kind="INHERITS")
             incoming = incoming + bare_inh
         has_callers = any(e.kind == "CALLS" for e in incoming)
-        has_test_refs = any(e.kind == "TESTED_BY" for e in incoming)
+        has_test_refs = bool(outgoing_tb)
         has_importers = any(e.kind == "IMPORTS_FROM" for e in incoming)
         has_references = any(e.kind == "REFERENCES" for e in incoming)
         has_subclasses = any(e.kind == "INHERITS" for e in incoming)
diff --git a/code_review_graph/tools/_common.py b/code_review_graph/tools/_common.py
index a44fb0ca..93256590 100644
--- a/code_review_graph/tools/_common.py
+++ b/code_review_graph/tools/_common.py
@@ -2,12 +2,16 @@
 
 from __future__ import annotations
 
+import logging
+import sqlite3
 from pathlib import Path
 from typing import Any
 
 from ..graph import GraphStore
 from ..incremental import find_project_root, get_db_path
 
+logger = logging.getLogger(__name__)
+
 
 def _error_response(
     message: str, status: str = "error", **extra: Any,
@@ -15,6 +19,62 @@ def _error_response(
     """Build a standardised error response dict."""
     return {"status": status, "error": message, "summary": message, **extra}
 
+
+# Metadata keys written by a build/update run. Their presence proves the graph
+# was built at least once, even if the resulting graph legitimately has 0 nodes
+# (e.g. an empty repo). Used to distinguish "never built" from "built, empty".
+_BUILD_MARKER_KEYS: tuple[str, ...] = ("last_updated", "last_build_type")
+
+
+def _graph_is_built(store: GraphStore) -> bool:
+    """Return True if the graph has been built at least once.
+
+    A graph counts as built when it contains any node, or when a build marker
+    metadata row is present. The marker check ensures a legitimately empty repo
+    that was actually built is not mistaken for a never-built repo.
+    """
+    try:
+        if store.get_stats().total_nodes > 0:
+            return True
+        return any(store.get_metadata(key) for key in _BUILD_MARKER_KEYS)
+    except sqlite3.Error:
+        logger.warning("Failed to read graph build state", exc_info=True)
+        # Be conservative: if we cannot tell, do not block the read tool.
+        return True
+
+
+def _not_built_response() -> dict[str, Any]:
+    """Standard response returned by read tools when the graph is not built."""
+    return {
+        "status": "not_built",
+        "message": "Graph not built — run build_or_update_graph_tool first.",
+        "summary": "Graph not built — run build_or_update_graph_tool first.",
+        "next_tool_suggestions": ["build_or_update_graph_tool"],
+    }
+
+
+def _get_store_for_read(
+    repo_root: str | None = None,
+) -> tuple[GraphStore | None, Path | None, dict[str, Any] | None]:
+    """Open the store for a read tool, guarding against an unbuilt graph.
+
+    Returns ``(store, root, None)`` when the graph has been built; the caller
+    owns ``store`` and must close it. Returns ``(None, None, not_built)`` when
+    the graph is missing or never built, in which case the caller should return
+    the ``not_built`` dict immediately (no store to close).
+    """
+    root = _resolve_root(repo_root)
+    db_path = get_db_path(root)
+    # A missing db file means the graph was never built. Opening GraphStore
+    # would silently create an empty db, so short-circuit before that happens.
+    if not Path(db_path).exists():
+        return None, None, _not_built_response()
+    store = GraphStore(db_path)
+    if not _graph_is_built(store):
+        store.close()
+        return None, None, _not_built_response()
+    return store, root, None
+
 # Common JS/TS builtin method names filtered from callers_of results.
 # "Who calls .map()?" returns hundreds of hits and is never useful.
 # These are kept in the graph (callees_of still shows them) but excluded
diff --git a/code_review_graph/tools/analysis_tools.py b/code_review_graph/tools/analysis_tools.py
index b67e6340..d7d9c603 100644
--- a/code_review_graph/tools/analysis_tools.py
+++ b/code_review_graph/tools/analysis_tools.py
@@ -11,7 +11,7 @@
     find_surprising_connections,
     generate_suggested_questions,
 )
-from ._common import _get_store
+from ._common import _get_store_for_read, _not_built_response
 
 
 def get_hub_nodes_func(
@@ -28,7 +28,9 @@ def get_hub_nodes_func(
         repo_root: Repository root (auto-detected if omitted).
         top_n: Number of top hubs to return (default 10).
     """
-    store, _root = _get_store(repo_root or None)
+    store, _root, not_built = _get_store_for_read(repo_root or None)
+    if store is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         hubs = find_hub_nodes(store, top_n=top_n)
         return {
@@ -58,7 +60,9 @@ def get_bridge_nodes_func(
         repo_root: Repository root (auto-detected if omitted).
         top_n: Number of top bridges to return (default 10).
     """
-    store, _root = _get_store(repo_root or None)
+    store, _root, not_built = _get_store_for_read(repo_root or None)
+    if store is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         bridges = find_bridge_nodes(store, top_n=top_n)
         return {
@@ -86,7 +90,9 @@ def get_knowledge_gaps_func(
     Args:
         repo_root: Repository root (auto-detected if omitted).
     """
-    store, _root = _get_store(repo_root or None)
+    store, _root, not_built = _get_store_for_read(repo_root or None)
+    if store is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         gaps = find_knowledge_gaps(store)
         total = sum(len(v) for v in gaps.values())
@@ -128,7 +134,9 @@ def get_surprising_connections_func(
         repo_root: Repository root (auto-detected if omitted).
         top_n: Number of top surprises to return (default 15).
     """
-    store, _root = _get_store(repo_root or None)
+    store, _root, not_built = _get_store_for_read(repo_root or None)
+    if store is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         surprises = find_surprising_connections(
             store, top_n=top_n
@@ -158,7 +166,9 @@ def get_suggested_questions_func(
     Args:
         repo_root: Repository root (auto-detected if omitted).
     """
-    store, _root = _get_store(repo_root or None)
+    store, _root, not_built = _get_store_for_read(repo_root or None)
+    if store is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         questions = generate_suggested_questions(store)
         by_priority: dict[str, list[dict[str, Any]]] = {
diff --git a/code_review_graph/tools/community_tools.py b/code_review_graph/tools/community_tools.py
index 4c6647c6..9ab50bf2 100644
--- a/code_review_graph/tools/community_tools.py
+++ b/code_review_graph/tools/community_tools.py
@@ -9,7 +9,7 @@
 from ..context_savings import attach_context_savings
 from ..graph import node_to_dict
 from ..hints import generate_hints, get_session
-from ._common import _get_store
+from ._common import _get_store_for_read, _not_built_response
 
 # ---------------------------------------------------------------------------
 # Tool 13: list_communities  [EXPLORE]
@@ -40,7 +40,9 @@ def list_communities_func(
     Returns:
         List of communities with size and cohesion scores.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         communities = get_communities(
             store, sort_by=sort_by, min_size=min_size
@@ -91,7 +93,9 @@ def get_community_func(
     Returns:
         Community details, or not_found status.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         community: dict | None = None
         all_communities = get_communities(store)
@@ -211,7 +215,9 @@ def get_architecture_overview_func(
         Architecture overview with communities, cross-community edges,
         and warnings.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         full_overview = get_architecture_overview(store)
         overview = full_overview
diff --git a/code_review_graph/tools/context.py b/code_review_graph/tools/context.py
index 5abb8f3f..87137f68 100644
--- a/code_review_graph/tools/context.py
+++ b/code_review_graph/tools/context.py
@@ -8,7 +8,7 @@
 from pathlib import Path
 from typing import Any
 
-from ._common import _get_store, compact_response
+from ._common import _get_store_for_read, _not_built_response, compact_response
 
 logger = logging.getLogger(__name__)
 
@@ -52,7 +52,9 @@ def get_minimal_context(
         repo_root: Repository root path. Auto-detected if None.
         base: Git ref for diff comparison.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         # 1. Quick stats
         stats = store.get_stats()
diff --git a/code_review_graph/tools/docs.py b/code_review_graph/tools/docs.py
index 19f02fa3..cc346b8b 100644
--- a/code_review_graph/tools/docs.py
+++ b/code_review_graph/tools/docs.py
@@ -8,7 +8,13 @@
 
 from ..embeddings import EmbeddingStore, embed_all_nodes
 from ..incremental import find_project_root, get_db_path
-from ._common import _get_store, _resolve_root, _validate_repo_root
+from ._common import (
+    _get_store,
+    _get_store_for_read,
+    _not_built_response,
+    _resolve_root,
+    _validate_repo_root,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -207,7 +213,9 @@ def generate_wiki_func(
     from ..incremental import get_data_dir
     from ..wiki import generate_wiki
 
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         wiki_dir = get_data_dir(root) / "wiki"
         result = generate_wiki(store, wiki_dir, force=force)
diff --git a/code_review_graph/tools/flows_tools.py b/code_review_graph/tools/flows_tools.py
index 4df1f713..64887533 100644
--- a/code_review_graph/tools/flows_tools.py
+++ b/code_review_graph/tools/flows_tools.py
@@ -7,7 +7,7 @@
 
 from ..flows import get_flow_by_id, get_flows
 from ..hints import generate_hints, get_session
-from ._common import _get_store
+from ._common import _get_store_for_read, _not_built_response
 
 # ---------------------------------------------------------------------------
 # Tool 10: list_flows  [EXPLORE]
@@ -40,7 +40,9 @@ def list_flows(
     Returns:
         List of flows with criticality scores.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         fetch_limit = (
             limit if not kind else limit * 10
@@ -109,7 +111,9 @@ def get_flow(
     Returns:
         Flow details with steps, or not_found status.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         flow: dict | None = None
 
diff --git a/code_review_graph/tools/query.py b/code_review_graph/tools/query.py
index 3b442f8a..47e400dc 100644
--- a/code_review_graph/tools/query.py
+++ b/code_review_graph/tools/query.py
@@ -12,7 +12,12 @@
 from ..hints import generate_hints, get_session
 from ..incremental import get_changed_files, get_db_path, get_staged_and_unstaged
 from ..search import hybrid_search
-from ._common import _BUILTIN_CALL_NAMES, _get_store, _resolve_graph_file_paths
+from ._common import (
+    _BUILTIN_CALL_NAMES,
+    _get_store_for_read,
+    _not_built_response,
+    _resolve_graph_file_paths,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -55,7 +60,9 @@ def get_impact_radius(
         Changed nodes, impacted nodes, impacted files, connecting edges,
         plus ``truncated`` flag and ``total_impacted`` count.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         if changed_files is None:
             changed_files = get_changed_files(root, base)
@@ -147,6 +154,7 @@ def query_graph(
     target: str,
     repo_root: str | None = None,
     detail_level: str = "standard",
+    max_results: int = 0,
 ) -> dict[str, Any]:
     """Run a predefined graph query.
 
@@ -156,11 +164,17 @@ def query_graph(
         target: The node name, qualified name, or file path to query about.
         repo_root: Repository root path. Auto-detected if omitted.
         detail_level: "standard" (full output) or "minimal" (summary only).
+        max_results: Cap on returned ``results`` (and matching ``edges``) so a
+            hot symbol cannot return an unbounded payload.  ``0`` (default)
+            means no cap.  When the cap trims results the response carries
+            ``truncated=True`` and ``total_results`` with the full count.
 
     Returns:
         Matching nodes and edges for the query.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         if pattern not in _QUERY_PATTERNS:
             return {
@@ -293,9 +307,11 @@ def query_graph(
                         results.append(node_to_dict(child))
 
         elif pattern == "tests_for":
-            for e in store.get_edges_by_target(qn):
+            # TESTED_BY edges are stored as source=production, target=test
+            # by the parser, so look them up by source. See: #515
+            for e in store.get_edges_by_source(qn):
                 if e.kind == "TESTED_BY":
-                    test = store.get_node(e.source_qualified)
+                    test = store.get_node(e.target_qualified)
                     if test:
                         results.append(node_to_dict(test))
             # Also search by naming convention
@@ -331,10 +347,32 @@ def query_graph(
                 for n in store.get_nodes_by_file(graph_path):
                     results.append(node_to_dict(n))
 
+        total_results = len(results)
+
+        # Cap the payload so callers_of/callees_of on a hot symbol cannot
+        # return unbounded results.  Keep the head (queries append in a stable
+        # graph order) and trim edges to the surviving result set when we can.
+        truncated = False
+        if max_results and total_results > max_results:
+            truncated = True
+            results = results[:max_results]
+            kept_qns = {
+                r.get("qualified_name") for r in results if "qualified_name" in r
+            }
+            if kept_qns:
+                edges_out = [
+                    e for e in edges_out
+                    if e.get("source") in kept_qns or e.get("target") in kept_qns
+                ][:max_results]
+            else:
+                edges_out = edges_out[:max_results]
+
         summary = (
-            f"Found {len(results)} result(s) "
+            f"Found {total_results} result(s) "
             f"for {pattern}('{target}')"
         )
+        if truncated:
+            summary += f" (showing first {len(results)})"
 
         if detail_level == "minimal":
             minimal_results = [
@@ -345,17 +383,21 @@ def query_graph(
                 }
                 for r in results[:5]
             ]
-            return {
+            response: dict[str, Any] = {
                 "status": "ok",
                 "pattern": pattern,
                 "target": target,
                 "description": _QUERY_PATTERNS[pattern],
                 "summary": summary,
-                "result_count": len(results),
+                "result_count": total_results,
                 "results": minimal_results,
             }
+            if truncated:
+                response["truncated"] = True
+                response["total_results"] = total_results
+            return response
 
-        return {
+        response = {
             "status": "ok",
             "pattern": pattern,
             "target": target,
@@ -364,6 +406,10 @@ def query_graph(
             "results": results,
             "edges": edges_out,
         }
+        if truncated:
+            response["truncated"] = True
+            response["total_results"] = total_results
+        return response
     finally:
         store.close()
 
@@ -401,7 +447,9 @@ def semantic_search_nodes(
     Returns:
         Ranked list of matching nodes.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         results = hybrid_search(
             store, query, kind=kind, limit=limit, context_files=context_files,
@@ -462,7 +510,9 @@ def list_graph_stats(repo_root: str | None = None) -> dict[str, Any]:
     Returns:
         Total nodes, edges, breakdown by kind, languages, and last update time.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         stats = store.get_stats()
 
@@ -539,7 +589,9 @@ def find_large_functions(
     Returns:
         Oversized nodes with line counts, ordered largest first.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         nodes = store.get_nodes_by_size(
             min_lines=min_lines,
@@ -609,7 +661,9 @@ def traverse_graph_func(
         token_budget: Approximate token limit for results.
         repo_root: Repository root path.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         results = hybrid_search(store, query, limit=1)
         if not results:
diff --git a/code_review_graph/tools/refactor_tools.py b/code_review_graph/tools/refactor_tools.py
index c92891b3..f0561466 100644
--- a/code_review_graph/tools/refactor_tools.py
+++ b/code_review_graph/tools/refactor_tools.py
@@ -13,7 +13,7 @@
     rename_preview,
     suggest_refactorings,
 )
-from ._common import _get_store, _validate_repo_root
+from ._common import _get_store_for_read, _not_built_response, _validate_repo_root
 
 # ---------------------------------------------------------------------------
 # Tool 17: refactor_tool  [REFACTOR]
@@ -57,7 +57,9 @@ def refactor_func(
             ),
         }
 
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         if mode == "rename":
             if not old_name or not new_name:
diff --git a/code_review_graph/tools/review.py b/code_review_graph/tools/review.py
index a896983e..33831a90 100644
--- a/code_review_graph/tools/review.py
+++ b/code_review_graph/tools/review.py
@@ -7,12 +7,12 @@
 from typing import Any
 
 from ..changes import analyze_changes, parse_diff_ranges, parse_git_diff_ranges  # noqa: F401
-from ..context_savings import attach_context_savings, estimate_file_tokens
+from ..context_savings import attach_context_savings, estimate_file_tokens, estimate_tokens
 from ..flows import get_affected_flows as _get_affected_flows
 from ..graph import edge_to_dict, node_to_dict
 from ..hints import generate_hints, get_session
 from ..incremental import get_changed_files, get_staged_and_unstaged
-from ._common import _get_store, _resolve_graph_file_paths
+from ._common import _get_store_for_read, _not_built_response, _resolve_graph_file_paths
 
 logger = logging.getLogger(__name__)
 
@@ -30,6 +30,7 @@ def get_review_context(
     repo_root: str | None = None,
     base: str = "HEAD~1",
     detail_level: str = "standard",
+    max_tokens: int = 0,
 ) -> dict[str, Any]:
     """Generate a focused review context from changed files.
 
@@ -46,12 +47,18 @@ def get_review_context(
             "minimal" returns summary, risk level, changed/impacted file counts,
             top 5 key entity names, test gap count, and next tool suggestions.
             Default: "standard".
+        max_tokens: Token budget for the standard response (estimated via the
+            chars/4 heuristic).  When the source snippets would exceed it, the
+            lowest-risk files are dropped first and an honest ``omitted`` note
+            is reported.  ``0`` (default) disables budgeting.
 
     Returns:
         Structured review context with subgraph, source snippets, and
         review guidance.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         # Get impact radius first
         if changed_files is None:
@@ -167,6 +174,16 @@ def get_review_context(
         guidance = _generate_review_guidance(impact, changed_files)
         context["review_guidance"] = guidance
 
+        # Enforce the token budget by dropping the lowest-risk source snippets
+        # first.  Without this guard a large review can return 85k-400k tokens
+        # and break the token promise.  The snippets dominate the payload, so
+        # we budget them; the structural subgraph + guidance stay intact.
+        budget_note = None
+        if max_tokens and include_source and context.get("source_snippets"):
+            budget_note = _budget_source_snippets(
+                context, impact["changed_nodes"], max_tokens
+            )
+
         summary_parts = [
             f"Review context for {len(changed_files)} changed file(s):",
             f"  - {len(impact['changed_nodes'])} directly changed nodes",
@@ -176,18 +193,95 @@ def get_review_context(
             "Review guidance:",
             guidance,
         ]
+        if budget_note:
+            summary_parts.append("")
+            summary_parts.append(budget_note["note"])
 
         result = {
             "status": "ok",
             "summary": "\n".join(summary_parts),
             "context": context,
         }
+        if budget_note:
+            result["omitted"] = budget_note
         attach_context_savings(result, original_tokens=original_tokens)
         return result
     finally:
         store.close()
 
 
+def _file_risk_rank(changed_nodes: list, snippet_files: list[str]) -> list[str]:
+    """Order snippet files highest-risk first.
+
+    Risk proxy: the number of directly-changed nodes in each file (more
+    changed entities -> more to review).  Files with no node mapping sort
+    last but keep their original order for stability.
+    """
+    counts: dict[str, int] = {}
+    for n in changed_nodes:
+        fp = getattr(n, "file_path", None)
+        if fp:
+            counts[fp] = counts.get(fp, 0) + 1
+
+    def _key(rel_path: str) -> int:
+        # ``snippet`` keys are repo-relative; node file_paths are absolute.
+        # Match on suffix so either representation ranks correctly.
+        for fp, c in counts.items():
+            if fp.endswith(rel_path) or rel_path.endswith(fp):
+                return c
+        return 0
+
+    return sorted(snippet_files, key=_key, reverse=True)
+
+
+def _budget_source_snippets(
+    context: dict[str, Any], changed_nodes: list, max_tokens: int
+) -> dict[str, Any] | None:
+    """Trim ``context['source_snippets']`` to fit ``max_tokens``.
+
+    Keeps the highest-risk files (most changed nodes) and drops the rest.
+    Returns an ``omitted`` summary dict, or ``None`` if nothing was dropped.
+    Mutates ``context`` in place.
+    """
+    snippets: dict[str, str] = context["source_snippets"]
+    # Budget against everything except the snippets, so structural context is
+    # never sacrificed for snippets.
+    scaffold = dict(context)
+    scaffold.pop("source_snippets", None)
+    overhead = estimate_tokens(scaffold)
+    remaining = max_tokens - overhead
+
+    ordered = _file_risk_rank(changed_nodes, list(snippets))
+    kept: dict[str, str] = {}
+    used = 0
+    omitted_files: list[str] = []
+    for rel_path in ordered:
+        snippet = snippets[rel_path]
+        # +1 token slack for the JSON key/quoting overhead per entry.
+        cost = estimate_tokens(snippet) + estimate_tokens(rel_path) + 1
+        if remaining > 0 and used + cost <= remaining:
+            kept[rel_path] = snippet
+            used += cost
+        else:
+            omitted_files.append(rel_path)
+
+    if not omitted_files:
+        return None
+
+    context["source_snippets"] = kept
+    note = (
+        f"Token budget ({max_tokens}) reached: omitted source for "
+        f"{len(omitted_files)} lower-risk file(s). Re-run with a higher "
+        f"max_tokens or pass changed_files explicitly to see them."
+    )
+    return {
+        "source_files": len(omitted_files),
+        "omitted_file_names": omitted_files[:20],
+        "max_tokens": max_tokens,
+        "note": note,
+    }
+
+
 def _extract_relevant_lines(
     lines: list[str], nodes: list, file_path: str
 ) -> str:
@@ -308,7 +402,9 @@ def get_affected_flows_func(
     Returns:
         Affected flows sorted by criticality, with step details.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         if changed_files is None:
             changed_files = get_changed_files(root, base)
@@ -360,6 +456,7 @@ def detect_changes_func(
     max_depth: int = 2,
     repo_root: str | None = None,
     detail_level: str = "standard",
+    max_tokens: int = 0,
 ) -> dict[str, Any]:
     """Detect changes and produce risk-scored review guidance.
 
@@ -379,12 +476,18 @@ def detect_changes_func(
             "minimal" returns only summary, risk_score, changed_file_count,
             test_gap_count, and top 3 review priorities (text only).
             Default: "standard".
+        max_tokens: Token budget for the standard response (estimated via the
+            chars/4 heuristic).  When the analysis would exceed it, the
+            lowest-risk changed functions and flows are dropped first and an
+            honest ``omitted`` note is reported.  ``0`` (default) disables it.
 
     Returns:
         Risk-scored analysis with changed functions, affected flows,
         test gaps, and review priorities.
     """
-    store, root = _get_store(repo_root)
+    store, root, not_built = _get_store_for_read(repo_root)
+    if store is None or root is None:
+        return not_built if not_built is not None else _not_built_response()
     try:
         # Detect changed files if not provided.
         if changed_files is None:
@@ -466,6 +569,15 @@ def detect_changes_func(
                 "changed_files": changed_files,
                 **analysis,
             }
+            if max_tokens:
+                budget_note = _budget_detect_changes(result, max_tokens)
+                if budget_note:
+                    result["omitted"] = budget_note
+                    summary = result.get("summary", "")
+                    result["summary"] = (
+                        f"{summary}\n{budget_note['note']}" if summary
+                        else budget_note["note"]
+                    )
         result["_hints"] = generate_hints(
             "detect_changes", result, get_session()
         )
@@ -475,3 +587,88 @@ def detect_changes_func(
         return {"status": "error", "error": str(exc)}
     finally:
         store.close()
+
+
+def _budget_detect_changes(
+    result: dict[str, Any], max_tokens: int
+) -> dict[str, Any] | None:
+    """Trim a standard detect_changes result to fit ``max_tokens``.
+
+    The big-list fields are trimmed lowest-signal first, in this order:
+
+    1. ``affected_flows``   (kept in upstream criticality order)
+    2. ``changed_functions`` (re-ranked highest-risk first)
+    3. ``test_gaps``        (a flat list that balloons on large PRs)
+    4. ``review_priorities`` (the curated top-N; trimmed last, kept longest)
+
+    The summary scaffold and risk score always survive.  Returns an
+    ``omitted`` summary dict, or ``None`` when nothing was dropped.  Mutates
+    ``result`` in place.
+    """
+    if estimate_tokens(result) <= max_tokens:
+        return None
+
+    funcs = result.get("changed_functions") or []
+    flows = result.get("affected_flows") or []
+    gaps = result.get("test_gaps") or []
+    prios = result.get("review_priorities") or []
+    orig = {
+        "changed_functions": len(funcs),
+        "affected_flows": len(flows),
+        "test_gaps": len(gaps),
+        "review_priorities": len(prios),
+    }
+
+    # Highest-risk functions first; the other lists keep their upstream order.
+    ranked_funcs = sorted(
+        funcs, key=lambda f: f.get("risk_score", 0.0), reverse=True
+    )
+
+    # Counts kept per field; start with everything, shrink as needed.
+    keep = dict(orig)
+    sources = {
+        "changed_functions": ranked_funcs,
+        "affected_flows": flows,
+        "test_gaps": gaps,
+        "review_priorities": prios,
+    }
+
+    def _apply() -> None:
+        for f, src in sources.items():
+            result[f] = src[: keep[f]]
+
+    def _fits() -> bool:
+        _apply()
+        return estimate_tokens(result) <= max_tokens
+
+    # Trim each field down to zero in priority order until the payload fits.
+    # Binary-search the largest count that still fits to keep this O(log N)
+    # per field rather than O(N) decrements on large PRs.
+    for field in ("affected_flows", "changed_functions", "test_gaps",
+                  "review_priorities"):
+        if _fits():
+            break
+        lo, hi = 0, keep[field]  # hi currently overflows
+        while lo < hi:
+            mid = (lo + hi + 1) // 2
+            keep[field] = mid
+            if _fits():
+                lo = mid
+            else:
+                hi = mid - 1
+        keep[field] = lo
+    _apply()
+
+    dropped = {f: orig[f] - keep[f] for f in orig}
+    if not any(dropped.values()):
+        return None
+
+    note = (
+        f"Token budget ({max_tokens}) reached: omitted "
+        f"{dropped['changed_functions']} function(s), "
+        f"{dropped['affected_flows']} flow(s), "
+        f"{dropped['test_gaps']} test gap(s), and "
+        f"{dropped['review_priorities']} review priority(ies). "
+        f"Re-run with a higher max_tokens to see them."
+    )
+    return {**dropped, "max_tokens": max_tokens, "note": note}
diff --git a/docs/COMMANDS.md b/docs/COMMANDS.md
index 893ed352..753b7bec 100644
--- a/docs/COMMANDS.md
+++ b/docs/COMMANDS.md
@@ -56,7 +56,7 @@ changed_files: list[str] | None  # Auto-detected from VCS
 max_depth: int = 2               # Hops in graph
 repo_root: str | None
 base: str = "HEAD~1"
-detail_level: str = "standard"   # "standard" or "minimal"
+detail_level: str = "minimal"    # "minimal" (default) or "standard"
 ```
 Relevant responses may include compact estimated `context_savings` metadata.
 
@@ -66,7 +66,8 @@ pattern: str    # callers_of, callees_of, imports_of, importers_of,
                 # children_of, tests_for, inheritors_of, file_summary
 target: str     # Node name, qualified name, or file path
 repo_root: str | None
-detail_level: str = "standard"   # "standard" or "minimal"
+detail_level: str = "minimal"    # "minimal" (default) or "standard"
+max_results: int = 100           # Cap so a hot symbol can't return unbounded results
 ```
 
 #### `get_review_context_tool`
@@ -78,6 +79,7 @@ max_lines_per_file: int = 200
 repo_root: str | None
 base: str = "HEAD~1"
 detail_level: str = "standard"   # "standard" or "minimal"
+max_tokens: int = 6000           # Token budget; drops lowest-risk snippets, reports omissions
 ```
 Relevant responses may include compact estimated `context_savings` metadata.
 
@@ -98,7 +100,7 @@ limit: int = 20
 repo_root: str | None
 model: str | None    # Embedding model (falls back to CRG_EMBEDDING_MODEL env var)
 provider: str | None # local, openai, google, minimax
-detail_level: str = "standard"
+detail_level: str = "minimal"    # "minimal" (default) or "standard"
 ```
 
 #### `embed_graph_tool`
@@ -218,7 +220,8 @@ changed_files: list[str] | None
 include_source: bool = False
 max_depth: int = 2
 repo_root: str | None
-detail_level: str = "standard"
+detail_level: str = "minimal"    # "minimal" (default) or "standard"
+max_tokens: int = 6000           # Token budget; drops lowest-risk items, reports omissions
 ```
 Primary tool for code review. Maps changed files to affected functions, flows, communities, and test coverage gaps. Returns risk scores and prioritized review items.
 Relevant responses may include compact estimated `context_savings` metadata.
@@ -315,6 +318,7 @@ code-review-graph embed --provider local       # Compute vector embeddings for s
 
 # Monitor and inspect
 code-review-graph status                       # Graph statistics
+code-review-graph doctor                       # Health checklist (✓/✗ + next-step hints)
 code-review-graph watch                        # Auto-update on file changes
 code-review-graph visualize                    # Generate interactive HTML graph
 code-review-graph visualize --format graphml   # Export GraphML
@@ -356,12 +360,40 @@ code-review-graph daemon remove <path_or_alias>     # Remove a repo from daemon
 code-review-graph eval                         # Run evaluation benchmarks
 
 # Server
-code-review-graph serve                        # Start MCP server (stdio)
+code-review-graph serve                        # Start MCP server (stdio); lean 7-tool set by default
 code-review-graph serve --http                 # Streamable HTTP on localhost:5555
-code-review-graph serve --tools query_graph_tool,detect_changes_tool  # Tool allowlist
+code-review-graph serve --tools all            # Expose all 30 tools
+code-review-graph serve --tools query_graph_tool,detect_changes_tool  # Custom allowlist
+code-review-graph serve --detail minimal       # Force minimal detail_level server-wide
 code-review-graph mcp                          # Alias for serve
 ```
 
+### `doctor` — verify your install
+
+`code-review-graph doctor` runs a fast, read-only health checklist and prints a
+`✓`/`✗` line per check with an actionable next-step hint. It exits non-zero when a
+**critical** check fails (no graph, MCP server won't import) so it can gate CI and
+install scripts; warnings such as a stale graph or missing embeddings are surfaced
+but never fail the exit code.
+
+Checks:
+
+1. **graph** — `graph.db` exists and has nodes (critical → run `build`)
+2. **freshness** — stored `git_head_sha` vs current `HEAD` (warning → run `update`)
+3. **mcp-config** — at least one repo-local MCP config file present (warning → `install`)
+4. **serve-cmd** — the detected `serve` launcher resolves (`uvx`/`uv run`/python)
+5. **server** — `code_review_graph.main` imports and registers > 0 tools (critical)
+6. **hooks** — platform-native or git pre-commit hooks installed (warning → `install`)
+7. **embeddings** — present, or a note that semantic search falls back to keyword (FTS5)
+
+When a graph exists, the closing line surfaces the latest `detect-changes`
+Token Savings number as a "see your savings" proof.
+
+```bash
+code-review-graph doctor                       # check the current repo
+code-review-graph doctor --repo /path/to/repo  # check a specific repo
+```
+
 ## Standalone Daemon CLI (`crg-daemon`)
 
 The `crg-daemon` command is included with every `code-review-graph` installation — no
diff --git a/docs/FAQ.md b/docs/FAQ.md
index c04ebc3c..88b26cfb 100644
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@@ -112,18 +112,30 @@ each project's public documentation (check upstream docs for current behavior):
 
 | Tool | Approach | Persistence | External deps | Review focus |
 |---|---|---|---|---|
-| **code-review-graph** | Tree-sitter AST → structural graph (calls, imports, inheritance, tests) over MCP + CLI | SQLite in `.code-review-graph/`, incremental updates | None for the core; embeddings optional | Yes — blast radius, risk-scored change analysis, test-gap detection |
+| **code-review-graph** | Tree-sitter AST → structural graph (calls, imports, inheritance, tests) over MCP + CLI | SQLite in `.code-review-graph/`, incremental updates | None for the core; embeddings optional | Yes — risk-scored change review, execution-flow criticality, impact radius, a PR-review GitHub Action, dead-code, wiki, and hybrid keyword+vector search — locally and free |
 | **Serena** | LSP-backed symbol retrieval and editing tools over MCP | Language-server state plus per-project memories | A language server per language | General coding-agent toolkit, not review-specific |
-| **codegraph** | AST/call-graph indexing over MCP (several projects share this name; details vary by implementation) | Varies by implementation | Varies by implementation | Generally retrieval-focused |
+| **codegraph** | AST/call-graph indexing over MCP (several projects share this name; details vary by implementation) | Varies by implementation | Varies by implementation | Navigation/retrieval-focused — some popular graph tools in this space are keyword/FTS-only today and position deeper PR-review intelligence as a forthcoming hosted/commercial product |
 | **claude-context** | Chunk + embed semantic code search over MCP | Vector index in a vector database | Embedding provider + vector DB (cloud or self-hosted) | Search-focused, not review-specific |
 | **repomix** | Packs the whole repo into one AI-friendly file | None — regenerated per run | Node.js | One-shot context packing; no structural queries |
 
-Rough guidance: if you want symbol-precise *editing* tools, Serena's LSP approach is
-a better fit. If you want semantic *search* and are happy running a vector store,
-claude-context covers that. If your repo is small enough to paste wholesale into a
-large context window, repomix is the simplest thing that works. CRG's niche is the
-persistent structural graph for **review**: impact analysis, risk scoring, and
-test-coverage tracing with no external services.
+**The wedge, in one line:** CRG focuses on *review*; retrieval tools focus on
+*navigation*. Both are useful, often together.
+
+Rough guidance — pick by the job in front of you:
+
+- **You want symbol-precise *editing* tools.** Serena's LSP approach is a better fit.
+- **You want semantic *search* and are happy running a vector store.** claude-context
+  covers that.
+- **Your repo fits wholesale into a large context window.** repomix is the simplest
+  thing that works.
+- **You want navigation/retrieval over a graph.** Several adjacent graph tools do this
+  well; note that some popular ones are keyword/FTS-only today and gate richer
+  PR-review intelligence behind a forthcoming hosted/commercial product.
+- **You want review now, locally, for free.** That is CRG's niche: a persistent
+  structural graph that ships risk-scored change review, execution-flow criticality,
+  impact radius, a PR-review GitHub Action, dead-code detection, a generated wiki, and
+  hybrid keyword+vector search — with no external services, no waitlist, and no paid
+  tier to unlock the review features.
 
 ## When should I not use it?
 
diff --git a/docs/REPRODUCING.md b/docs/REPRODUCING.md
index 92143b7d..298f3028 100644
--- a/docs/REPRODUCING.md
+++ b/docs/REPRODUCING.md
@@ -374,6 +374,27 @@ they are not quoted before being measured.
 a job-summary table. It is deliberately **report-only**: regressions do not
 fail the default branch yet.
 
+The job summary leads with the **median per-question token reduction** for
+that run — the median of per-question `(1 - graph_tokens / baseline_tokens)
+* 100` over the `agent_baseline` rows (falling back to `token_efficiency`
+when no agent-baseline rows exist). It is computed by
+`code_review_graph.eval.reporter.median_token_reduction`, so you can
+reproduce the exact headline locally:
+
+```bash
+code-review-graph eval --repo httpx,flask \
+  --benchmark agent_baseline --output-dir evaluate/results
+python -c "from code_review_graph.eval.reporter import median_token_reduction_table; \
+print(median_token_reduction_table('evaluate/results'))"
+```
+
+When a run produces no usable rows (clone or measurement failures) the table
+honestly reports `n/a` rather than fabricating a number. The
+`benchmarks: reproducible` badge in the README links to these runs: it
+asserts the pipeline is reproducible, not a frozen headline number — the
+canonical numbers stay in this file and the README, never auto-committed
+from CI.
+
 ## Which benchmark measures what
 
 There are four different "token" benchmarks in the repo. They are all valid
diff --git a/install.ps1 b/install.ps1
new file mode 100644
index 00000000..7c1515a4
--- /dev/null
+++ b/install.ps1
@@ -0,0 +1,134 @@
+<#
+.SYNOPSIS
+    code-review-graph one-line installer (Windows / PowerShell).
+
+.DESCRIPTION
+    What this does, in order:
+      1. Ensures `uv` (https://docs.astral.sh/uv/) is available, installing it
+         via the official Astral installer if missing.
+      2. Installs the `code-review-graph` CLI as a uv tool (falling back to
+         pipx, then `pip --user`).
+      3. Prints the next steps.
+
+    NOTE: This installs `uv`, a single static Python toolchain manager. It is
+    NOT a bundled/standalone runtime -- uv manages Python for you, so you do
+    not have to set Python up yourself, but a Python interpreter is still
+    downloaded/used under the hood by uv.
+
+    Idempotent: safe to re-run.
+
+.EXAMPLE
+    irm https://raw.githubusercontent.com/tirth8205/code-review-graph/main/install.ps1 | iex
+
+.EXAMPLE
+    powershell -ExecutionPolicy Bypass -File install.ps1
+#>
+
+$ErrorActionPreference = 'Stop'
+
+# Pinned to the official Astral uv installer. We echo this before running it so
+# the user can see exactly what is being executed.
+$UvInstallerUrl = 'https://astral.sh/uv/install.ps1'
+
+function Write-Info { param([string]$Message) Write-Host $Message }
+function Write-Warn { param([string]$Message) Write-Warning $Message }
+
+function Test-Command {
+    param([Parameter(Mandatory = $true)][string]$Name)
+    return [bool](Get-Command $Name -ErrorAction SilentlyContinue)
+}
+
+# --- 1. ensure uv ----------------------------------------------------------
+
+function Install-Uv {
+    if (Test-Command 'uv') {
+        Write-Info "uv already installed: $(uv --version)"
+        return
+    }
+
+    Write-Info 'uv not found. Installing uv via the official Astral installer.'
+    Write-Info "  This runs: irm $UvInstallerUrl | iex"
+    Write-Info '  (uv is a single static binary that manages Python for you;'
+    Write-Info '   this is not a bundled runtime.)'
+
+    Invoke-RestMethod -Uri $UvInstallerUrl | Invoke-Expression
+
+    # uv installs to %USERPROFILE%\.local\bin. Make it visible for the rest of
+    # this session even if the user has not restarted their shell.
+    if (-not (Test-Command 'uv')) {
+        $candidate = Join-Path $env:USERPROFILE '.local\bin'
+        if (Test-Path (Join-Path $candidate 'uv.exe')) {
+            $env:Path = "$candidate;$env:Path"
+        }
+    }
+
+    if (-not (Test-Command 'uv')) {
+        throw "uv was installed but is not on PATH. Open a new terminal and re-run this script."
+    }
+    Write-Info "uv installed: $(uv --version)"
+}
+
+# --- 2. install the CLI ----------------------------------------------------
+
+function Install-Crg {
+    Write-Info 'Installing code-review-graph with: uv tool install code-review-graph'
+    try {
+        uv tool install code-review-graph
+        if ($LASTEXITCODE -eq 0) { return }
+    } catch {
+        Write-Warn "uv tool install failed: $_"
+    }
+    Write-Warn 'uv tool install failed; trying pipx.'
+
+    if (Test-Command 'pipx') {
+        Write-Info 'Installing with: pipx install code-review-graph'
+        try {
+            pipx install code-review-graph
+            if ($LASTEXITCODE -eq 0) { return }
+        } catch {
+            Write-Warn "pipx install failed: $_"
+        }
+    }
+
+    $pip = $null
+    if (Test-Command 'pip') { $pip = 'pip' }
+    elseif (Test-Command 'pip3') { $pip = 'pip3' }
+    if ($pip) {
+        Write-Info "Installing with: $pip install --user code-review-graph"
+        try {
+            & $pip install --user code-review-graph
+            if ($LASTEXITCODE -eq 0) { return }
+        } catch {
+            Write-Warn "$pip install failed: $_"
+        }
+    }
+
+    throw "All install methods failed (uv tool / pipx / pip --user). See https://github.com/tirth8205/code-review-graph for manual instructions."
+}
+
+# --- 3. next steps ---------------------------------------------------------
+
+function Write-NextSteps {
+    Write-Info ''
+    Write-Info 'code-review-graph installed. Next steps:'
+    Write-Info ''
+    Write-Info '  1. Configure your AI coding tools:'
+    Write-Info '       code-review-graph install'
+    Write-Info ''
+    Write-Info '  2. Build the graph for your project (run inside a repo):'
+    Write-Info '       code-review-graph build'
+    Write-Info ''
+    Write-Info '  3. Verify the graph (health / stats check):'
+    Write-Info '       code-review-graph status'
+    Write-Info ''
+    if (-not (Test-Command 'code-review-graph')) {
+        Write-Warn "'code-review-graph' is not on your PATH yet. Open a new terminal, or add your uv / Python user scripts directory to PATH."
+    }
+}
+
+# --- main ------------------------------------------------------------------
+
+Write-Info 'code-review-graph installer'
+Install-Uv
+Install-Crg
+Write-NextSteps
diff --git a/install.sh b/install.sh
new file mode 100755
index 00000000..2285afce
--- /dev/null
+++ b/install.sh
@@ -0,0 +1,150 @@
+#!/bin/sh
+# code-review-graph one-line installer (macOS / Linux).
+#
+# What this does, in order:
+#   1. Ensures `uv` (https://docs.astral.sh/uv/) is available, installing it via
+#      the official Astral installer if missing.
+#   2. Installs the `code-review-graph` CLI as a uv tool (falling back to pipx,
+#      then `pip --user`).
+#   3. Prints the next steps.
+#
+# NOTE: This installs `uv`, a single static Python toolchain manager. It is NOT
+# a bundled/standalone runtime — uv manages Python for you, so you do not have
+# to set Python up yourself, but a Python interpreter is still downloaded/used
+# under the hood by uv.
+#
+# Idempotent: safe to re-run. POSIX sh, no bashisms.
+#
+# Usage:
+#   curl -LsSf https://raw.githubusercontent.com/tirth8205/code-review-graph/main/install.sh | sh
+#   # or, from a checkout:
+#   sh install.sh
+
+set -eu
+
+# --- helpers ---------------------------------------------------------------
+
+# Pinned to the official Astral uv installer. We echo this before running it so
+# the user can see exactly what is being executed.
+UV_INSTALLER_URL="https://astral.sh/uv/install.sh"
+
+info() { printf '%s\n' "$*"; }
+warn() { printf 'WARNING: %s\n' "$*" >&2; }
+err()  { printf 'ERROR: %s\n' "$*" >&2; }
+
+die() {
+    err "$*"
+    exit 1
+}
+
+have() {
+    command -v "$1" >/dev/null 2>&1
+}
+
+# --- 1. ensure uv ----------------------------------------------------------
+
+ensure_uv() {
+    if have uv; then
+        info "uv already installed: $(uv --version 2>/dev/null || echo 'uv')"
+        return 0
+    fi
+
+    info "uv not found. Installing uv via the official Astral installer."
+    info "  This runs: curl -LsSf ${UV_INSTALLER_URL} | sh"
+    info "  (uv is a single static binary that manages Python for you;"
+    info "   this is not a bundled runtime.)"
+
+    if have curl; then
+        curl -LsSf "$UV_INSTALLER_URL" | sh || die "uv installation failed (curl)."
+    elif have wget; then
+        wget -qO- "$UV_INSTALLER_URL" | sh || die "uv installation failed (wget)."
+    else
+        die "Neither curl nor wget is available. Install one, or install uv \
+manually: https://docs.astral.sh/uv/getting-started/installation/"
+    fi
+
+    # uv installs to ~/.local/bin (or $XDG_BIN_HOME). Make it visible for the
+    # rest of this script even if the user has not restarted their shell.
+    if ! have uv; then
+        for dir in "$HOME/.local/bin" "$HOME/.cargo/bin" "${XDG_BIN_HOME:-}"; do
+            if [ -n "$dir" ] && [ -x "$dir/uv" ]; then
+                PATH="$dir:$PATH"
+                export PATH
+                break
+            fi
+        done
+    fi
+
+    have uv || die "uv was installed but is not on PATH. Open a new shell (or \
+add ~/.local/bin to PATH) and re-run this script."
+    info "uv installed: $(uv --version 2>/dev/null || echo 'uv')"
+}
+
+# --- 2. install the CLI ----------------------------------------------------
+
+install_crg() {
+    # Preferred path: uv tool install (isolated, fast, no venv juggling).
+    info "Installing code-review-graph with: uv tool install code-review-graph"
+    if uv tool install code-review-graph; then
+        return 0
+    fi
+    warn "uv tool install failed; trying pipx."
+
+    if have pipx; then
+        info "Installing with: pipx install code-review-graph"
+        if pipx install code-review-graph; then
+            return 0
+        fi
+        warn "pipx install failed; trying pip --user."
+    fi
+
+    # Last-resort fallback: pip --user.
+    PIP=""
+    if have pip3; then
+        PIP="pip3"
+    elif have pip; then
+        PIP="pip"
+    fi
+    if [ -n "$PIP" ]; then
+        info "Installing with: $PIP install --user code-review-graph"
+        if "$PIP" install --user code-review-graph; then
+            return 0
+        fi
+    fi
+
+    die "All install methods failed (uv tool / pipx / pip --user). \
+See https://github.com/tirth8205/code-review-graph for manual instructions."
+}
+
+# --- 3. next steps ---------------------------------------------------------
+
+print_next_steps() {
+    info ""
+    info "code-review-graph installed. Next steps:"
+    info ""
+    info "  1. Configure your AI coding tools:"
+    info "       code-review-graph install"
+    info ""
+    info "  2. Build the graph for your project (run inside a repo):"
+    info "       code-review-graph build"
+    info ""
+    info "  3. Verify the graph (health / stats check):"
+    info "       code-review-graph status"
+    info ""
+    if ! have code-review-graph; then
+        warn "'code-review-graph' is not on your PATH yet. If you installed via \
+uv, run 'uv tool update-shell' or open a new terminal. If you used 'pip \
+--user', add your user scripts dir to PATH."
+    fi
+}
+
+# --- main ------------------------------------------------------------------
+
+main() {
+    info "code-review-graph installer"
+    ensure_uv
+    install_crg
+    print_next_steps
+}
+
+main "$@"
diff --git a/pyproject.toml b/pyproject.toml
index f6471eb1..9d469964 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "code-review-graph"
-version = "2.3.6"
+version = "2.4.0"
 description = "Local-first knowledge graph for token-efficient code review through MCP and CLI"
 readme = {file = "README.md", content-type = "text/markdown"}
 license = "MIT"
diff --git a/scripts/render_pr_comment.py b/scripts/render_pr_comment.py
index 78b9a8c6..29423037 100644
--- a/scripts/render_pr_comment.py
+++ b/scripts/render_pr_comment.py
@@ -20,6 +20,7 @@
 import argparse
 import json
 import logging
+import os
 import re
 import sys
 from pathlib import Path
@@ -54,6 +55,57 @@ def risk_level(score: float) -> str:
     return "low"
 
 
+def relativize_path(value: Any) -> str:
+    """Strip CI-runner absolute prefixes so paths render repo-relative.
+
+    detect-changes emits paths exactly as the graph stored them; in CI the
+    repo is checked out under an absolute prefix
+    (``/home/runner/work/<repo>/<repo>/...``), which is ugly in a PR comment
+    and leaks the runner layout. We strip that prefix so the reader sees
+    ``code_review_graph/embeddings.py`` instead.
+
+    Handles both bare paths and ``path::symbol`` qualified names (the
+    ``::symbol`` suffix is preserved). Already-relative paths and non-path
+    tokens (``?``) are returned unchanged.
+
+    Strategy, in order:
+      1. ``GITHUB_WORKSPACE`` prefix (set by Actions ``checkout``).
+      2. The ``<repo>/<repo>/`` doubled-segment that Actions uses under
+         ``/work/`` (derived from ``GITHUB_REPOSITORY`` when present).
+      3. Otherwise return the input untouched — never guess-mangle a path.
+    """
+    text = str(value)
+    path_part, sep, symbol = text.partition("::")
+    # Only touch absolute, POSIX-style runner paths; leave everything else.
+    if not path_part.startswith("/"):
+        return text
+
+    rel = _strip_workspace_prefix(path_part)
+    if rel is None:
+        return text
+    return f"{rel}{sep}{symbol}" if sep else rel
+
+
+def _strip_workspace_prefix(path_part: str) -> str | None:
+    """Return ``path_part`` made repo-relative, or None when nothing matches."""
+    workspace = os.environ.get("GITHUB_WORKSPACE", "").strip()
+    if workspace:
+        prefix = workspace.rstrip("/") + "/"
+        if path_part.startswith(prefix):
+            return path_part[len(prefix):]
+
+    # Fallback: Actions checks repos out at /work/<name>/<name>/<files...>.
+    repo = os.environ.get("GITHUB_REPOSITORY", "")
+    name = repo.split("/")[-1] if "/" in repo else ""
+    if name:
+        marker = f"/{name}/{name}/"
+        idx = path_part.find(marker)
+        if idx != -1:
+            return path_part[idx + len(marker):]
+
+    return None
+
+
 def md_escape(value: Any, limit: int = _MAX_CELL) -> str:
     """Escape a value for safe inclusion in markdown tables and lists.
 
@@ -74,8 +126,11 @@ def md_escape(value: Any, limit: int = _MAX_CELL) -> str:
 
 
 def _location(entry: dict[str, Any]) -> str:
-    """Format ``file:line`` for a node-ish dict (file_path/file + line_start)."""
-    file_path = entry.get("file_path") or entry.get("file") or "?"
+    """Format ``file:line`` for a node-ish dict (file_path/file + line_start).
+
+    Paths are relativized first so CI-runner absolute prefixes don't leak.
+    """
+    file_path = relativize_path(entry.get("file_path") or entry.get("file") or "?")
     line_start = entry.get("line_start")
     if line_start:
         return f"{md_escape(file_path)}:{line_start}"
@@ -103,7 +158,7 @@ def _functions_table(
         else:
             tested = "yes"
         lines.append(
-            f"| {score:.2f} | {risk_level(score)} | {md_escape(name)} "
+            f"| {score:.2f} | {risk_level(score)} | {md_escape(relativize_path(name))} "
             f"| {_location(entry)} | {tested} |"
         )
     if len(priorities) > max_functions:
@@ -146,7 +201,7 @@ def _gaps_section(gaps: list[dict[str, Any]], max_gaps: int = 5) -> list[str]:
             break
     for gap in shown:
         name = gap.get("qualified_name") or gap.get("name") or "?"
-        lines.append(f"- {md_escape(name)} ({_location(gap)})")
+        lines.append(f"- {md_escape(relativize_path(name))} ({_location(gap)})")
     remaining = len(gaps) - len(shown)
     if remaining > 0:
         lines.append(f"- ...and {remaining} more without direct tests")
diff --git a/tests/test_action_render.py b/tests/test_action_render.py
index dd091b7c..1f12cd0c 100644
--- a/tests/test_action_render.py
+++ b/tests/test_action_render.py
@@ -66,6 +66,91 @@ def test_md_escape_caps_length():
     assert len(escaped) <= render._MAX_CELL
 
 
+# ---------------------------------------------------------------------------
+# relativize_path (strip CI-runner absolute prefixes)
+# ---------------------------------------------------------------------------
+
+
+def test_relativize_strips_github_workspace(monkeypatch):
+    monkeypatch.setenv("GITHUB_WORKSPACE", "/home/runner/work/repo/repo")
+    out = render.relativize_path(
+        "/home/runner/work/repo/repo/code_review_graph/embeddings.py"
+    )
+    assert out == "code_review_graph/embeddings.py"
+
+
+def test_relativize_keeps_symbol_suffix(monkeypatch):
+    monkeypatch.setenv("GITHUB_WORKSPACE", "/home/runner/work/repo/repo")
+    out = render.relativize_path(
+        "/home/runner/work/repo/repo/code_review_graph/embeddings.py::get_provider"
+    )
+    assert out == "code_review_graph/embeddings.py::get_provider"
+
+
+def test_relativize_handles_workspace_with_trailing_slash(monkeypatch):
+    monkeypatch.setenv("GITHUB_WORKSPACE", "/home/runner/work/repo/repo/")
+    out = render.relativize_path(
+        "/home/runner/work/repo/repo/pkg/mod.py::fn"
+    )
+    assert out == "pkg/mod.py::fn"
+
+
+def test_relativize_leaves_already_relative_paths(monkeypatch):
+    monkeypatch.setenv("GITHUB_WORKSPACE", "/home/runner/work/repo/repo")
+    assert render.relativize_path("auth/session.py::rotate_token") == (
+        "auth/session.py::rotate_token"
+    )
+    assert render.relativize_path("auth/session.py") == "auth/session.py"
+
+
+def test_relativize_falls_back_to_repo_segment_without_env(monkeypatch):
+    monkeypatch.delenv("GITHUB_WORKSPACE", raising=False)
+    monkeypatch.setenv("GITHUB_REPOSITORY", "owner/code-review-graph")
+    out = render.relativize_path(
+        "/home/runner/work/code-review-graph/code-review-graph/"
+        "scripts/render_pr_comment.py::main"
+    )
+    assert out == "scripts/render_pr_comment.py::main"
+
+
+def test_relativize_no_env_no_match_returns_input(monkeypatch):
+    monkeypatch.delenv("GITHUB_WORKSPACE", raising=False)
+    monkeypatch.delenv("GITHUB_REPOSITORY", raising=False)
+    # Nothing to strip against; render the path as-is rather than mangling it.
+    weird = "/opt/build/some/place/file.py::fn"
+    assert render.relativize_path(weird) == weird
+
+
+def test_relativize_handles_none_and_question_mark(monkeypatch):
+    monkeypatch.setenv("GITHUB_WORKSPACE", "/home/runner/work/repo/repo")
+    assert render.relativize_path("?") == "?"
+
+
+def test_render_markdown_relativizes_absolute_paths(monkeypatch):
+    monkeypatch.setenv("GITHUB_WORKSPACE", "/home/runner/work/repo/repo")
+    ws = "/home/runner/work/repo/repo"
+    abs_report = {
+        "risk_score": 0.72,
+        "review_priorities": [
+            {
+                "qualified_name": f"{ws}/code_review_graph/embeddings.py::get_provider",
+                "file_path": f"{ws}/code_review_graph/embeddings.py",
+                "line_start": 42,
+                "risk_score": 0.72,
+                "is_test": False,
+            }
+        ],
+        "affected_flows": [],
+        "test_gaps": [],
+    }
+    body = render.render_markdown(abs_report)
+    # Absolute CI-runner prefix must not leak into the rendered comment.
+    assert "/home/runner/work" not in body
+    assert render.md_escape("code_review_graph/embeddings.py::get_provider") in body
+    # Location column path is markdown-escaped (underscores) like every cell.
+    assert f"{render.md_escape('code_review_graph/embeddings.py')}:42" in body
+
+
 # ---------------------------------------------------------------------------
 # render_markdown
 # ---------------------------------------------------------------------------
diff --git a/tests/test_changes.py b/tests/test_changes.py
index 34dfdbb3..f6aec109 100644
--- a/tests/test_changes.py
+++ b/tests/test_changes.py
@@ -63,11 +63,13 @@ def _add_call(self, source_qn: str, target_qn: str, path: str = "app.py") -> Non
         self.store.upsert_edge(edge)
         self.store.commit()
 
-    def _add_tested_by(self, test_qn: str, target_qn: str, path: str = "app.py") -> None:
+    def _add_tested_by(self, production_qn: str, test_qn: str, path: str = "app.py") -> None:
+        # TESTED_BY edges are stored as source=production, target=test
+        # by the parser. See: #515
         edge = EdgeInfo(
             kind="TESTED_BY",
-            source=test_qn,
-            target=target_qn,
+            source=production_qn,
+            target=test_qn,
             file_path=path,
             line=1,
         )
@@ -225,7 +227,7 @@ def test_risk_score_untested_is_higher(self):
         self._add_func("untested_func", path="a.py", line_start=1, line_end=10)
         self._add_func("tested_func", path="b.py", line_start=1, line_end=10)
         self._add_func("test_tested_func", path="test_b.py", is_test=True)
-        self._add_tested_by("test_b.py::test_tested_func", "b.py::tested_func", "test_b.py")
+        self._add_tested_by("b.py::tested_func", "test_b.py::test_tested_func", "test_b.py")
 
         untested = self.store.get_node("a.py::untested_func")
         tested = self.store.get_node("b.py::tested_func")
@@ -367,7 +369,7 @@ def test_analyze_detects_test_gaps(self):
 
         # Only tested_c has a test.
         self._add_func("test_c", path="test_app.py", is_test=True)
-        self._add_tested_by("test_app.py::test_c", "app.py::tested_c", "test_app.py")
+        self._add_tested_by("app.py::tested_c", "test_app.py::test_c", "test_app.py")
 
         result = analyze_changes(
             self.store,
@@ -435,11 +437,11 @@ def test_detect_changes_tool_no_changes(self):
         # Patch _get_store to use our test store,
         # and get_changed_files/get_staged_and_unstaged to return empty.
         with (
-            patch("code_review_graph.tools.review._get_store") as mock_get_store,
+            patch("code_review_graph.tools.review._get_store_for_read") as mock_get_store,
             patch("code_review_graph.tools.review.get_changed_files", return_value=[]),
             patch("code_review_graph.tools.review.get_staged_and_unstaged", return_value=[]),
         ):
-            mock_get_store.return_value = (self.store, Path("/fake/repo"))
+            mock_get_store.return_value = (self.store, Path("/fake/repo"), None)
             # Prevent the store from being closed by the tool
             # (our teardown handles it).
             self.store.close = lambda: None
@@ -457,14 +459,14 @@ def test_detect_changes_tool_with_changes(self):
         self._add_func("my_func", path="/fake/repo/app.py", line_start=1, line_end=10)
 
         with (
-            patch("code_review_graph.tools.review._get_store") as mock_get_store,
+            patch("code_review_graph.tools.review._get_store_for_read") as mock_get_store,
             patch("code_review_graph.tools.review.get_changed_files", return_value=["app.py"]),
             patch(
                 "code_review_graph.tools.review.parse_git_diff_ranges",
                 return_value={"app.py": [(1, 10)]},
             ),
         ):
-            mock_get_store.return_value = (self.store, Path("/fake/repo"))
+            mock_get_store.return_value = (self.store, Path("/fake/repo"), None)
             self.store.close = lambda: None
 
             result = detect_changes_func(base="HEAD~1", repo_root="/fake/repo")
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 8f582e52..ae2a5e98 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -66,6 +66,7 @@ def test_serve_passes_auto_watch_flag(self):
             repo_root="repo-root",
             auto_watch=True,
             tools=None,
+            detail_level=None,
         )
 
     def test_mcp_alias_maps_to_serve(self):
diff --git a/tests/test_doctor.py b/tests/test_doctor.py
new file mode 100644
index 00000000..eb7e8241
--- /dev/null
+++ b/tests/test_doctor.py
@@ -0,0 +1,290 @@
+"""Tests for the `code-review-graph doctor` health checklist."""
+
+from __future__ import annotations
+
+import subprocess
+import sys
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from code_review_graph import doctor
+
+
+def _git(repo: Path, *args: str) -> None:
+    subprocess.run(
+        ["git", "-C", str(repo), "-c", "user.email=t@example.com",
+         "-c", "user.name=Test", "-c", "commit.gpgsign=false", *args],
+        check=True,
+        capture_output=True,
+        stdin=subprocess.DEVNULL,
+        timeout=30,
+    )
+
+
+@pytest.fixture
+def isolated_env(monkeypatch):
+    """Strip env that would otherwise leak the dev machine into checks."""
+    monkeypatch.delenv("CRG_DATA_DIR", raising=False)
+    monkeypatch.delenv("CRG_REPO_ROOT", raising=False)
+    return monkeypatch
+
+
+class TestCheckResult:
+    def test_check_result_glyphs(self):
+        ok = doctor.CheckResult(name="x", ok=True, message="m")
+        bad = doctor.CheckResult(name="y", ok=False, message="m")
+        assert ok.glyph == "✓"
+        assert bad.glyph == "✗"
+
+
+class TestGraphCheck:
+    def test_unbuilt_repo_flags_missing_graph(self, tmp_path, isolated_env):
+        repo = tmp_path / "repo"
+        repo.mkdir()
+        (repo / ".git").mkdir()
+
+        result = doctor.check_graph_db(repo)
+        assert result.ok is False
+        assert result.critical is True
+        # The hint must point the user at the build command.
+        assert "build" in (result.hint or "").lower()
+
+    def test_built_repo_passes_graph_check(self, tmp_path, isolated_env):
+        repo = tmp_path / "repo"
+        (repo / "src").mkdir(parents=True)
+        (repo / "src" / "app.py").write_text(
+            "def greet(name):\n    return 'hi ' + name\n", encoding="utf-8"
+        )
+        _git(repo, "init", "-q")
+        _git(repo, "add", ".")
+        _git(repo, "commit", "-q", "-m", "initial")
+
+        from code_review_graph.graph import GraphStore
+        from code_review_graph.incremental import full_build, get_db_path
+
+        store = GraphStore(get_db_path(repo))
+        try:
+            full_build(repo, store)
+        finally:
+            store.close()
+
+        result = doctor.check_graph_db(repo)
+        assert result.ok is True
+        assert "node" in result.message.lower()
+
+
+class TestFreshnessCheck:
+    def test_stale_graph_warns_but_is_not_critical(self, tmp_path, isolated_env):
+        repo = tmp_path / "repo"
+        (repo / "src").mkdir(parents=True)
+        (repo / "src" / "app.py").write_text(
+            "def greet(name):\n    return 'hi ' + name\n", encoding="utf-8"
+        )
+        _git(repo, "init", "-q")
+        _git(repo, "add", ".")
+        _git(repo, "commit", "-q", "-m", "initial")
+
+        from code_review_graph.graph import GraphStore
+        from code_review_graph.incremental import full_build, get_db_path
+
+        store = GraphStore(get_db_path(repo))
+        try:
+            full_build(repo, store)
+        finally:
+            store.close()
+
+        # Advance HEAD so the stored sha is now stale.
+        (repo / "src" / "app.py").write_text(
+            "def greet(name):\n    return 'hello ' + name\n", encoding="utf-8"
+        )
+        _git(repo, "add", ".")
+        _git(repo, "commit", "-q", "-m", "second")
+
+        result = doctor.check_freshness(repo)
+        assert result.ok is False
+        # Staleness is a warning, never a hard failure.
+        assert result.critical is False
+        assert "update" in (result.hint or "").lower()
+
+    def test_fresh_graph_passes_freshness(self, tmp_path, isolated_env):
+        repo = tmp_path / "repo"
+        (repo / "src").mkdir(parents=True)
+        (repo / "src" / "app.py").write_text(
+            "def greet(name):\n    return 'hi ' + name\n", encoding="utf-8"
+        )
+        _git(repo, "init", "-q")
+        _git(repo, "add", ".")
+        _git(repo, "commit", "-q", "-m", "initial")
+
+        from code_review_graph.graph import GraphStore
+        from code_review_graph.incremental import full_build, get_db_path
+
+        store = GraphStore(get_db_path(repo))
+        try:
+            full_build(repo, store)
+        finally:
+            store.close()
+
+        result = doctor.check_freshness(repo)
+        assert result.ok is True
+
+
+class TestServerSmoke:
+    def test_server_import_reports_tool_count(self, tmp_path):
+        result = doctor.check_server_boot(tmp_path)
+        assert result.ok is True
+        # The message should surface a positive tool count.
+        assert "tool" in result.message.lower()
+
+
+class TestMcpConfigCheck:
+    def test_missing_config_is_non_critical_with_install_hint(self, tmp_path, isolated_env):
+        repo = tmp_path / "repo"
+        repo.mkdir()
+        (repo / ".git").mkdir()
+        result = doctor.check_mcp_config(repo)
+        # No config files at all -> not OK but never blocks (install is the fix).
+        assert result.critical is False
+        assert "install" in (result.hint or "").lower()
+
+    def test_present_config_passes(self, tmp_path, isolated_env):
+        repo = tmp_path / "repo"
+        repo.mkdir()
+        (repo / ".git").mkdir()
+        (repo / ".mcp.json").write_text('{"mcpServers": {}}', encoding="utf-8")
+        result = doctor.check_mcp_config(repo)
+        assert result.ok is True
+        assert ".mcp.json" in result.message
+
+
+class TestEmbeddingsCheck:
+    def test_no_embeddings_notes_keyword_fallback(self, tmp_path, isolated_env):
+        repo = tmp_path / "repo"
+        (repo / "src").mkdir(parents=True)
+        (repo / "src" / "app.py").write_text(
+            "def greet(name):\n    return 'hi ' + name\n", encoding="utf-8"
+        )
+        _git(repo, "init", "-q")
+        _git(repo, "add", ".")
+        _git(repo, "commit", "-q", "-m", "initial")
+
+        from code_review_graph.graph import GraphStore
+        from code_review_graph.incremental import full_build, get_db_path
+
+        store = GraphStore(get_db_path(repo))
+        try:
+            full_build(repo, store)
+        finally:
+            store.close()
+
+        result = doctor.check_embeddings(repo)
+        # Embeddings are optional; absence is never critical.
+        assert result.critical is False
+        assert "keyword" in result.message.lower()
+
+
+class TestRunDoctor:
+    def test_unbuilt_repo_returns_nonzero_exit(self, tmp_path, isolated_env):
+        repo = tmp_path / "repo"
+        repo.mkdir()
+        (repo / ".git").mkdir()
+        results, exit_code = doctor.run_doctor(repo)
+        assert exit_code != 0
+        # At least one critical check must have failed.
+        assert any(r.critical and not r.ok for r in results)
+
+    def test_built_repo_returns_zero_exit(self, tmp_path, isolated_env):
+        repo = tmp_path / "repo"
+        (repo / "src").mkdir(parents=True)
+        (repo / "src" / "app.py").write_text(
+            "def greet(name):\n    return 'hi ' + name\n", encoding="utf-8"
+        )
+        _git(repo, "init", "-q")
+        _git(repo, "add", ".")
+        _git(repo, "commit", "-q", "-m", "initial")
+        (repo / ".mcp.json").write_text('{"mcpServers": {}}', encoding="utf-8")
+
+        from code_review_graph.graph import GraphStore
+        from code_review_graph.incremental import full_build, get_db_path
+
+        store = GraphStore(get_db_path(repo))
+        try:
+            full_build(repo, store)
+        finally:
+            store.close()
+
+        results, exit_code = doctor.run_doctor(repo)
+        assert exit_code == 0
+        # No critical check failed.
+        assert not any(r.critical and not r.ok for r in results)
+
+
+class TestDoctorCliWiring:
+    def test_cli_doctor_subcommand_invokes_handler(self, tmp_path):
+        repo = tmp_path / "repo"
+        repo.mkdir()
+        (repo / ".git").mkdir()
+        argv = ["code-review-graph", "doctor", "--repo", str(repo)]
+        with patch.object(sys, "argv", argv):
+            with patch("code_review_graph.doctor.run_doctor") as mock_run:
+                mock_run.return_value = (
+                    [doctor.CheckResult(name="graph", ok=True, message="ok")],
+                    0,
+                )
+                from code_review_graph import cli
+                with pytest.raises(SystemExit) as exc:
+                    cli.main()
+                assert exc.value.code == 0
+                mock_run.assert_called_once()
+
+    def test_cli_doctor_exits_nonzero_on_critical_failure(self, tmp_path):
+        repo = tmp_path / "repo"
+        repo.mkdir()
+        argv = ["code-review-graph", "doctor", "--repo", str(repo)]
+        with patch.object(sys, "argv", argv):
+            with patch("code_review_graph.doctor.run_doctor") as mock_run:
+                mock_run.return_value = (
+                    [doctor.CheckResult(
+                        name="graph", ok=False, critical=True,
+                        message="missing", hint="run build",
+                    )],
+                    1,
+                )
+                from code_review_graph import cli
+                with pytest.raises(SystemExit) as exc:
+                    cli.main()
+                assert exc.value.code == 1
+
+
+class TestPrintReport:
+    def test_print_report_emits_glyphs_and_summary(self, capsys):
+        results = [
+            doctor.CheckResult(name="graph", ok=True, message="42 nodes"),
+            doctor.CheckResult(
+                name="freshness", ok=False, critical=False,
+                message="stale", hint="run update",
+            ),
+        ]
+        doctor.print_report(results, repo_root=Path("/tmp/x"))
+        out = capsys.readouterr().out
+        assert "✓" in out
+        assert "✗" in out
+        assert "run update" in out
+        # One-line summary at the end.
+        assert "passed" in out.lower() or "healthy" in out.lower() or "issue" in out.lower()
+
+
+class TestGitHeadSha:
+    def test_current_head_sha_uses_safe_subprocess(self, tmp_path):
+        """The freshness git call must not use shell and must pass stdin=DEVNULL."""
+        repo = tmp_path / "repo"
+        repo.mkdir()
+        with patch("subprocess.run") as mock_run:
+            mock_run.return_value = MagicMock(returncode=0, stdout="abc123\n")
+            sha = doctor._current_head_sha(repo)
+        assert sha == "abc123"
+        _, kwargs = mock_run.call_args
+        assert kwargs.get("stdin") is subprocess.DEVNULL
+        assert "shell" not in kwargs or kwargs["shell"] is False
diff --git a/tests/test_enrich.py b/tests/test_enrich.py
index 862f20c6..307ab57d 100644
--- a/tests/test_enrich.py
+++ b/tests/test_enrich.py
@@ -99,9 +99,11 @@ def _seed_data(self):
                 file_path=f"{self.tmpdir}/build.py", line=15,
             ),
             EdgeInfo(
+                # TESTED_BY edges are stored as source=production, target=test
+                # by the parser. See: #515
                 kind="TESTED_BY",
-                source=f"{self.tmpdir}/test_parser.py::test_parse_file",
-                target=f"{self.tmpdir}/parser.py::parse_file",
+                source=f"{self.tmpdir}/parser.py::parse_file",
+                target=f"{self.tmpdir}/test_parser.py::test_parse_file",
                 file_path=f"{self.tmpdir}/test_parser.py", line=1,
             ),
         ]
diff --git a/tests/test_eval.py b/tests/test_eval.py
index 612dcecd..93e932d1 100644
--- a/tests/test_eval.py
+++ b/tests/test_eval.py
@@ -12,6 +12,8 @@
     generate_full_report,
     generate_markdown_report,
     generate_readme_tables,
+    median_token_reduction,
+    median_token_reduction_table,
 )
 
 try:
@@ -215,6 +217,106 @@ def test_generate_readme_tables():
         assert "1000" in tables
 
 
+def _write_agent_baseline_csv(results_dir: Path, rows: list[dict]) -> None:
+    path = results_dir / "test_agent_baseline_2026-01-01.csv"
+    fieldnames = [
+        "repo", "question", "terms", "files_matched", "top_files",
+        "baseline_tokens", "graph_tokens", "baseline_to_graph_ratio",
+        "status", "error",
+    ]
+    with open(path, "w", newline="") as f:
+        w = csv.DictWriter(f, fieldnames=fieldnames)
+        w.writeheader()
+        for r in rows:
+            w.writerow({k: r.get(k, "") for k in fieldnames})
+
+
+def test_median_token_reduction_from_agent_baseline():
+    """Median reduction is computed per-question from agent_baseline rows."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        results_dir = Path(tmpdir)
+        # Three OK rows: reductions 90%, 80%, 50% -> median 80%.
+        _write_agent_baseline_csv(results_dir, [
+            {"repo": "r", "question": "q1", "baseline_tokens": "1000",
+             "graph_tokens": "100", "status": "ok"},
+            {"repo": "r", "question": "q2", "baseline_tokens": "1000",
+             "graph_tokens": "200", "status": "ok"},
+            {"repo": "r", "question": "q3", "baseline_tokens": "1000",
+             "graph_tokens": "500", "status": "ok"},
+        ])
+        stats = median_token_reduction(results_dir)
+        assert stats["source"] == "agent_baseline"
+        assert stats["n"] == 3
+        assert stats["median_percent"] == 80.0
+        assert stats["max_percent"] == 90.0
+        assert stats["min_percent"] == 50.0
+
+
+def test_median_token_reduction_excludes_failed_rows():
+    """Rows with status != ok must not contribute to the median."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        results_dir = Path(tmpdir)
+        _write_agent_baseline_csv(results_dir, [
+            {"repo": "r", "question": "q1", "baseline_tokens": "1000",
+             "graph_tokens": "100", "status": "ok"},
+            {"repo": "r", "question": "q2", "baseline_tokens": "1000",
+             "graph_tokens": "0", "status": "no_graph_results"},
+            {"repo": "r", "question": "q3", "baseline_tokens": "0",
+             "graph_tokens": "0", "status": "error"},
+        ])
+        stats = median_token_reduction(results_dir)
+        assert stats["n"] == 1
+        assert stats["median_percent"] == 90.0
+
+
+def test_median_token_reduction_falls_back_to_token_efficiency():
+    """With no agent_baseline rows, fall back to token_efficiency."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        results_dir = Path(tmpdir)
+        te_path = results_dir / "test_token_efficiency_2026-01-01.csv"
+        with open(te_path, "w", newline="") as f:
+            w = csv.DictWriter(
+                f,
+                fieldnames=[
+                    "repo", "commit", "description", "changed_files",
+                    "naive_tokens", "standard_tokens", "graph_tokens",
+                    "naive_to_graph_ratio", "standard_to_graph_ratio",
+                    "status", "error",
+                ],
+            )
+            w.writeheader()
+            w.writerow({
+                "repo": "r", "commit": "abc", "naive_tokens": "1000",
+                "graph_tokens": "250", "status": "ok",
+            })
+        stats = median_token_reduction(results_dir)
+        assert stats["source"] == "token_efficiency"
+        assert stats["median_percent"] == 75.0
+
+
+def test_median_token_reduction_empty_dir_is_honest():
+    """Empty/failed runs report n/a rather than fabricating a number."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        stats = median_token_reduction(Path(tmpdir))
+        assert stats["n"] == 0
+        assert stats["median_percent"] is None
+        table = median_token_reduction_table(Path(tmpdir))
+        assert "n/a" in table
+        assert "Per-question token reduction" in table
+
+
+def test_median_token_reduction_table_renders_percent():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        results_dir = Path(tmpdir)
+        _write_agent_baseline_csv(results_dir, [
+            {"repo": "r", "question": "q1", "baseline_tokens": "1000",
+             "graph_tokens": "100", "status": "ok"},
+        ])
+        table = median_token_reduction_table(results_dir)
+        assert "90.0%" in table
+        assert "`agent_baseline`" in table
+
+
 def test_generate_full_report():
     """Feed sample CSV data and verify report sections."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/test_graph.py b/tests/test_graph.py
index fb407810..06282845 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -248,6 +248,119 @@ def test_metadata(self):
         assert self.store.get_metadata("test_key") == "test_value"
         assert self.store.get_metadata("nonexistent") is None
 
+    def test_get_transitive_tests_follows_direct_tested_by_edge(self):
+        """Regression test for #515: get_transitive_tests must follow
+        TESTED_BY edges by source_qualified (production) since the parser
+        stores source=production, target=test. The test function uses an
+        unconventional name so the bare-name fallback cannot mask the bug.
+        """
+        self.store.upsert_node(self._make_file_node("/src/calc.py"))
+        self.store.upsert_node(self._make_func_node("add", "/src/calc.py"))
+        self.store.upsert_node(self._make_file_node("/tests/check.py"))
+        self.store.upsert_node(self._make_func_node(
+            "verify_addition", "/tests/check.py", is_test=True,
+        ))
+        self.store.upsert_edge(EdgeInfo(
+            kind="TESTED_BY",
+            source="/src/calc.py::add",
+            target="/tests/check.py::verify_addition",
+            file_path="/tests/check.py", line=1,
+        ))
+        self.store.commit()
+
+        results = self.store.get_transitive_tests("/src/calc.py::add")
+        qns = {r["qualified_name"] for r in results}
+        assert "/tests/check.py::verify_addition" in qns
+        assert all(not r["indirect"] for r in results)
+
+    def test_get_transitive_tests_follows_calls_then_tested_by(self):
+        """Transitive coverage: caller -> CALLS -> callee -> TESTED_BY -> test.
+        Uses an unconventional test name so the bare-name fallback cannot
+        match. See: #515.
+        """
+        self.store.upsert_node(self._make_file_node("/src/svc.py"))
+        self.store.upsert_node(self._make_func_node("orchestrate", "/src/svc.py"))
+        self.store.upsert_node(self._make_func_node("compute", "/src/svc.py"))
+        self.store.upsert_node(self._make_file_node("/tests/check.py"))
+        self.store.upsert_node(self._make_func_node(
+            "verify_compute", "/tests/check.py", is_test=True,
+        ))
+        self.store.upsert_edge(EdgeInfo(
+            kind="CALLS", source="/src/svc.py::orchestrate",
+            target="/src/svc.py::compute", file_path="/src/svc.py", line=2,
+        ))
+        self.store.upsert_edge(EdgeInfo(
+            kind="TESTED_BY",
+            source="/src/svc.py::compute",
+            target="/tests/check.py::verify_compute",
+            file_path="/tests/check.py", line=1,
+        ))
+        self.store.commit()
+
+        results = self.store.get_transitive_tests(
+            "/src/svc.py::orchestrate", max_depth=2,
+        )
+        qns = {r["qualified_name"] for r in results}
+        assert "/tests/check.py::verify_compute" in qns
+        match = next(
+            r for r in results
+            if r["qualified_name"] == "/tests/check.py::verify_compute"
+        )
+        assert match["indirect"] is True
+
+    def test_parse_store_get_transitive_tests_end_to_end(self):
+        """End-to-end producer->store->consumer guard for #515.
+
+        Parse a real fixture pair (production + test) through the parser,
+        persist the emitted nodes/edges, and confirm get_transitive_tests
+        surfaces the test as covering the production code. This couples the
+        parser's canonical TESTED_BY direction (source=production,
+        target=test) to the consumer query, so a future parser flip would
+        break this test even if every hand-seeded fixture test still passed.
+        """
+        from code_review_graph.parser import CodeParser
+
+        fixtures = Path(__file__).parent / "fixtures"
+        parser = CodeParser()
+        all_nodes: list[NodeInfo] = []
+        all_edges: list[EdgeInfo] = []
+        for fixture in ("sample_python.py", "test_sample.py"):
+            nodes, edges = parser.parse_file(fixtures / fixture)
+            all_nodes.extend(nodes)
+            all_edges.extend(edges)
+
+        for n in all_nodes:
+            self.store.upsert_node(n)
+        for e in all_edges:
+            self.store.upsert_edge(e)
+        self.store.commit()
+
+        tested_by = [e for e in all_edges if e.kind == "TESTED_BY"]
+        assert tested_by, "fixture pair should yield at least one TESTED_BY edge"
+
+        # Producer direction guard: every TESTED_BY target must be a stored
+        # Test node, and querying the consumer (get_transitive_tests) by the
+        # edge's *source* (production) must surface that test target. If a
+        # future parser flip swapped the direction, the target would point at
+        # production code and this end-to-end assertion would fail.
+        checked = 0
+        for edge in tested_by:
+            target = self.store.get_node(edge.target)
+            assert target is not None, f"missing test node {edge.target}"
+            assert target.is_test, (
+                f"TESTED_BY target {edge.target!r} should be a test node; "
+                f"a flipped parser would put production code here"
+            )
+
+            results = self.store.get_transitive_tests(edge.source)
+            qns = {r["qualified_name"] for r in results}
+            assert edge.target in qns, (
+                f"get_transitive_tests({edge.source!r}) should surface test "
+                f"{edge.target!r}; got {sorted(qns)}"
+            )
+            checked += 1
+        assert checked >= 1
+
     def test_get_all_community_ids_logs_when_column_missing(self, caplog):
         conn = sqlite3.connect(":memory:")
         conn.row_factory = sqlite3.Row
@@ -401,7 +514,7 @@ def test_uncapped_small_frontier_unchanged(self):
             # Only callee_2 has a test
             if i == 2:
                 self.store.upsert_edge(EdgeInfo(
-                    kind="TESTED_BY", source=test_qn, target=callee_qn,
+                    kind="TESTED_BY", source=callee_qn, target=test_qn,
                     file_path="/t/test_hub.py", line=1,
                 ))
         self.store.commit()
diff --git a/tests/test_install_scripts.py b/tests/test_install_scripts.py
new file mode 100644
index 00000000..73751602
--- /dev/null
+++ b/tests/test_install_scripts.py
@@ -0,0 +1,92 @@
+"""Syntax and content lint for the one-line installer scripts.
+
+These tests guard the frictionless-install entry point: ``install.sh``
+(POSIX sh) and ``install.ps1`` (PowerShell). They run on every platform.
+The sh syntax check uses ``sh -n``; the PowerShell check is skipped when no
+``pwsh`` / ``powershell`` binary is available.
+"""
+
+from __future__ import annotations
+
+import shutil
+import subprocess
+from pathlib import Path
+
+import pytest
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+INSTALL_SH = REPO_ROOT / "install.sh"
+INSTALL_PS1 = REPO_ROOT / "install.ps1"
+
+# Official Astral installer URLs the scripts must pin to.
+ASTRAL_SH_URL = "https://astral.sh/uv/install.sh"
+ASTRAL_PS1_URL = "https://astral.sh/uv/install.ps1"
+
+
+def test_install_scripts_exist():
+    assert INSTALL_SH.is_file(), "install.sh missing at repo root"
+    assert INSTALL_PS1.is_file(), "install.ps1 missing at repo root"
+
+
+def test_install_sh_is_posix_syntax_valid():
+    """`sh -n install.sh` must parse without errors (no bashisms allowed)."""
+    sh = shutil.which("sh")
+    if not sh:
+        pytest.skip("no POSIX sh available")
+    result = subprocess.run(
+        [sh, "-n", str(INSTALL_SH)],
+        capture_output=True,
+        text=True,
+        stdin=subprocess.DEVNULL,
+        timeout=30,
+    )
+    assert result.returncode == 0, f"sh -n failed: {result.stderr}"
+
+
+def test_install_sh_has_no_obvious_bashisms():
+    """Lightweight guard against the most common bash-only constructs."""
+    text = INSTALL_SH.read_text()
+    assert text.startswith("#!/bin/sh"), "install.sh must use the /bin/sh shebang"
+    # `[[ ... ]]`, arrays, and `function name {` are bash-only.
+    assert "[[" not in text, "double-bracket test is a bashism"
+    assert "function " not in text, "the `function` keyword is a bashism"
+
+
+def test_install_sh_pins_official_uv_installer():
+    text = INSTALL_SH.read_text()
+    assert ASTRAL_SH_URL in text, "install.sh must pin the official Astral uv installer"
+    # The CLI install path and fallbacks must be present.
+    assert "uv tool install code-review-graph" in text
+    assert "pipx install code-review-graph" in text
+    assert "pip" in text and "--user" in text
+
+
+def test_install_ps1_pins_official_uv_installer():
+    text = INSTALL_PS1.read_text()
+    assert ASTRAL_PS1_URL in text, "install.ps1 must pin the official Astral uv installer"
+    assert "uv tool install code-review-graph" in text
+    assert "pipx install code-review-graph" in text
+    assert "--user code-review-graph" in text
+
+
+def test_install_ps1_syntax_valid_if_powershell_available():
+    """Parse install.ps1 with PowerShell when present; skip otherwise."""
+    pwsh = shutil.which("pwsh") or shutil.which("powershell")
+    if not pwsh:
+        pytest.skip("no PowerShell (pwsh/powershell) available")
+    # Tokenize the script; a parse error returns a non-zero exit code.
+    cmd = (
+        "$ErrorActionPreference='Stop';"
+        "$errors=$null;"
+        "[void][System.Management.Automation.PSParser]::Tokenize("
+        f"(Get-Content -Raw '{INSTALL_PS1}'), [ref]$errors);"
+        "if ($errors.Count -gt 0) { $errors | ForEach-Object { $_.Message }; exit 1 }"
+    )
+    result = subprocess.run(
+        [pwsh, "-NoProfile", "-NonInteractive", "-Command", cmd],
+        capture_output=True,
+        text=True,
+        stdin=subprocess.DEVNULL,
+        timeout=60,
+    )
+    assert result.returncode == 0, f"PowerShell parse failed: {result.stdout}{result.stderr}"
diff --git a/tests/test_main.py b/tests/test_main.py
index 3b87f316..38170a4a 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -18,13 +18,35 @@
 
 @pytest.fixture(autouse=True)
 def _isolate_crg_tools_env(monkeypatch):
-    """Always strip CRG_TOOLS so that any test invoking ``crg_main.main``
-    does not accidentally permanently shrink the global tool registry
-    when the suite runs under a developer environment that exports
+    """Always strip CRG_TOOLS / CRG_DETAIL_LEVEL so that any test invoking
+    ``crg_main.main`` does not accidentally permanently shrink the global tool
+    registry when the suite runs under a developer environment that exports
     ``CRG_TOOLS``.  Without this the snapshot/restore in
     ``TestApplyToolFilter._restore_tools`` only sees the already-filtered
     set and cannot restore the dropped tools."""
     monkeypatch.delenv("CRG_TOOLS", raising=False)
+    monkeypatch.delenv("CRG_DETAIL_LEVEL", raising=False)
+
+
+@pytest.fixture(autouse=True)
+def _restore_tool_registry():
+    """Snapshot the full tool registry before each test and restore after.
+
+    ``main()`` now trims to the lean set by default (and ``_apply_tool_filter``
+    removes tools permanently via ``remove_tool``), so any test that starts the
+    server would otherwise leave the global registry shrunk for every later
+    test.  Re-register anything that was dropped, and reset the detail-level
+    override."""
+    import asyncio
+
+    original = asyncio.run(crg_main.mcp.list_tools())
+    saved_override = crg_main._detail_level_override
+    yield
+    crg_main._detail_level_override = saved_override
+    current_names = {t.name for t in asyncio.run(crg_main.mcp.list_tools())}
+    for tool in original:
+        if tool.name not in current_names:
+            crg_main.mcp.add_tool(tool)
 
 
 class TestResolveRepoRoot:
@@ -282,12 +304,37 @@ async def _tool_names() -> set[str]:
         return {t.name for t in await crg_main.mcp.list_tools()}
 
     @pytest.mark.asyncio
-    async def test_no_filter_keeps_all_tools(self):
-        """When neither --tools nor CRG_TOOLS is set, all tools remain."""
+    async def test_default_is_lean_set(self):
+        """With nothing specified, the server trims to the curated lean set."""
         before = await self._tool_names()
+        assert before == set(await self._tool_names())  # sanity
         crg_main._apply_tool_filter(None)
         after = await self._tool_names()
-        assert before == after
+        assert after == set(crg_main.LEAN_TOOLS)
+        # Lean is strictly a subset and never larger than the full registry.
+        assert after < before
+        assert len(crg_main.LEAN_TOOLS) == 7
+
+    @pytest.mark.asyncio
+    async def test_all_keyword_restores_full_set(self):
+        """``--tools all`` (or CRG_TOOLS=all) keeps every registered tool."""
+        before = await self._tool_names()
+        assert len(before) == 30
+        crg_main._apply_tool_filter("all")
+        after = await self._tool_names()
+        assert after == before
+        assert len(after) == 30
+
+    @pytest.mark.asyncio
+    async def test_all_keyword_case_insensitive(self):
+        before = await self._tool_names()
+        crg_main._apply_tool_filter("ALL")
+        assert await self._tool_names() == before
+
+    @pytest.mark.asyncio
+    async def test_lean_keyword_uses_curated_set(self):
+        crg_main._apply_tool_filter("lean")
+        assert await self._tool_names() == set(crg_main.LEAN_TOOLS)
 
     @pytest.mark.asyncio
     async def test_filter_via_argument(self):
@@ -305,6 +352,13 @@ async def test_filter_via_env_var(self, monkeypatch):
         remaining = await self._tool_names()
         assert remaining == {"query_graph_tool"}
 
+    @pytest.mark.asyncio
+    async def test_env_var_all_restores_full_set(self, monkeypatch):
+        before = await self._tool_names()
+        monkeypatch.setenv("CRG_TOOLS", "all")
+        crg_main._apply_tool_filter(None)
+        assert await self._tool_names() == before
+
     @pytest.mark.asyncio
     async def test_argument_takes_precedence_over_env(self, monkeypatch):
         """CLI --tools wins over CRG_TOOLS env var."""
@@ -314,8 +368,27 @@ async def test_argument_takes_precedence_over_env(self, monkeypatch):
         assert remaining == {"query_graph_tool"}
 
     @pytest.mark.asyncio
-    async def test_empty_string_is_noop(self):
-        """An empty string should not remove all tools."""
+    async def test_unknown_names_ignored_gracefully(self):
+        """Unknown tool names don't error; only the valid ones survive."""
+        crg_main._apply_tool_filter(
+            "query_graph_tool,this_tool_does_not_exist,another_bogus_tool"
+        )
+        remaining = await self._tool_names()
+        assert remaining == {"query_graph_tool"}
+
+    @pytest.mark.asyncio
+    async def test_all_unknown_names_removes_everything(self):
+        """A spec of only-unknown names is honoured (removes all real tools).
+
+        This is intentional: the names were explicit, just wrong. It is the
+        caller's responsibility to pass real names; we never *expand* output.
+        """
+        crg_main._apply_tool_filter("nonexistent_a,nonexistent_b")
+        assert await self._tool_names() == set()
+
+    @pytest.mark.asyncio
+    async def test_empty_string_keeps_all(self):
+        """An explicit empty string should not remove all tools."""
         before = await self._tool_names()
         crg_main._apply_tool_filter("")
         after = await self._tool_names()
@@ -327,3 +400,121 @@ async def test_whitespace_handling(self):
         crg_main._apply_tool_filter(" query_graph_tool , semantic_search_nodes_tool ")
         remaining = await self._tool_names()
         assert remaining == {"query_graph_tool", "semantic_search_nodes_tool"}
+
+    def test_stderr_notice_printed_when_trimming(self, capsys):
+        """Trimming to lean prints a one-line stderr notice; stdout stays clean."""
+        crg_main._apply_tool_filter("query_graph_tool")
+        captured = capsys.readouterr()
+        assert captured.out == ""  # stdout must stay clean for JSON-RPC
+        assert "lean tool mode" in captured.err
+        assert "--tools all" in captured.err
+
+    def test_no_stderr_notice_when_keeping_all(self, capsys):
+        """``all`` keeps everything and prints nothing."""
+        crg_main._apply_tool_filter("all")
+        captured = capsys.readouterr()
+        assert captured.out == ""
+        assert captured.err == ""
+
+    def test_lean_tools_are_all_registered(self):
+        """Every name in LEAN_TOOLS must be a real registered tool."""
+        import asyncio
+
+        registered = {t.name for t in asyncio.run(crg_main.mcp.list_tools())}
+        for name in crg_main.LEAN_TOOLS:
+            assert name in registered, f"LEAN_TOOLS lists unknown tool {name!r}"
+
+
+class TestResolveDetailLevel:
+    """Server-wide detail-level override (``CRG_DETAIL_LEVEL`` / --detail)."""
+
+    @pytest.fixture(autouse=True)
+    def _reset_override(self, monkeypatch):
+        original = crg_main._detail_level_override
+        monkeypatch.delenv("CRG_DETAIL_LEVEL", raising=False)
+        crg_main._detail_level_override = None
+        yield
+        crg_main._detail_level_override = original
+
+    def test_no_override_returns_argument(self):
+        assert crg_main._resolve_detail_level("standard") == "standard"
+        assert crg_main._resolve_detail_level("minimal") == "minimal"
+
+    def test_module_override_wins(self):
+        crg_main._detail_level_override = "minimal"
+        assert crg_main._resolve_detail_level("standard") == "minimal"
+
+    def test_env_override_wins(self, monkeypatch):
+        monkeypatch.setenv("CRG_DETAIL_LEVEL", "standard")
+        assert crg_main._resolve_detail_level("minimal") == "standard"
+
+    def test_module_override_beats_env(self, monkeypatch):
+        monkeypatch.setenv("CRG_DETAIL_LEVEL", "standard")
+        crg_main._detail_level_override = "minimal"
+        assert crg_main._resolve_detail_level("standard") == "minimal"
+
+    def test_unknown_override_ignored(self, monkeypatch):
+        """An invalid override never silently expands output."""
+        monkeypatch.setenv("CRG_DETAIL_LEVEL", "bogus")
+        assert crg_main._resolve_detail_level("minimal") == "minimal"
+
+    def test_override_is_case_insensitive(self, monkeypatch):
+        monkeypatch.setenv("CRG_DETAIL_LEVEL", "  MINIMAL  ")
+        assert crg_main._resolve_detail_level("standard") == "minimal"
+
+
+class TestMcpWrapperDefaults:
+    """The MCP tool wrappers default to ``minimal`` so the token moat holds."""
+
+    @staticmethod
+    def _wrapper(name):
+        fn = getattr(crg_main, name)
+        return getattr(fn, "fn", None) or fn
+
+    def test_query_graph_tool_defaults_minimal(self):
+        sig = inspect.signature(self._wrapper("query_graph_tool"))
+        assert sig.parameters["detail_level"].default == "minimal"
+
+    def test_semantic_search_defaults_minimal(self):
+        sig = inspect.signature(self._wrapper("semantic_search_nodes_tool"))
+        assert sig.parameters["detail_level"].default == "minimal"
+
+    def test_impact_radius_defaults_minimal(self):
+        sig = inspect.signature(self._wrapper("get_impact_radius_tool"))
+        assert sig.parameters["detail_level"].default == "minimal"
+
+    def test_detect_changes_defaults_minimal(self):
+        sig = inspect.signature(self._wrapper("detect_changes_tool"))
+        assert sig.parameters["detail_level"].default == "minimal"
+
+    def test_query_graph_tool_has_max_results_cap(self):
+        sig = inspect.signature(self._wrapper("query_graph_tool"))
+        assert "max_results" in sig.parameters
+        assert sig.parameters["max_results"].default == 100
+
+    def test_review_tools_have_max_tokens(self):
+        for name in ("get_review_context_tool", "detect_changes_tool"):
+            sig = inspect.signature(self._wrapper(name))
+            assert "max_tokens" in sig.parameters
+            assert sig.parameters["max_tokens"].default == 6000
+
+
+class TestServeDetailFlag:
+    """``serve --detail`` sets the module-level override before the loop."""
+
+    @pytest.fixture(autouse=True)
+    def _reset(self):
+        original = crg_main._detail_level_override
+        yield
+        crg_main._detail_level_override = original
+
+    def test_detail_flag_sets_override(self, monkeypatch):
+        monkeypatch.setattr(crg_main.mcp, "run", lambda **kw: None)
+        crg_main.main(repo_root=None, detail_level="minimal")
+        assert crg_main._detail_level_override == "minimal"
+
+    def test_invalid_detail_flag_ignored(self, monkeypatch):
+        monkeypatch.setattr(crg_main.mcp, "run", lambda **kw: None)
+        crg_main._detail_level_override = None
+        crg_main.main(repo_root=None, detail_level="loud")
+        assert crg_main._detail_level_override is None
diff --git a/tests/test_not_built_guard.py b/tests/test_not_built_guard.py
new file mode 100644
index 00000000..53a87dc5
--- /dev/null
+++ b/tests/test_not_built_guard.py
@@ -0,0 +1,204 @@
+"""Tests for the graph-not-built guard on read tools.
+
+A read tool run against a repo whose graph was never built must return a
+``not_built`` status instead of a misleading empty result. A built graph
+(even one with zero matches) must keep returning a normal response.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from code_review_graph.graph import GraphStore
+from code_review_graph.parser import NodeInfo
+from code_review_graph.tools import (
+    get_architecture_overview_func,
+    get_impact_radius,
+    list_communities_func,
+    list_flows,
+    list_graph_stats,
+    query_graph,
+    semantic_search_nodes,
+)
+from code_review_graph.tools._common import (
+    _get_store_for_read,
+    _graph_is_built,
+    _not_built_response,
+)
+
+
+def _make_unbuilt_repo(tmp_path: Path) -> Path:
+    """A repo that looks like a project root but has never been built."""
+    (tmp_path / ".git").mkdir()
+    (tmp_path / ".code-review-graph").mkdir()
+    return tmp_path
+
+
+def _make_built_repo(tmp_path: Path, *, with_nodes: bool = True) -> Path:
+    """A repo with a real graph.db that has been through a build."""
+    (tmp_path / ".git").mkdir()
+    db_dir = tmp_path / ".code-review-graph"
+    db_dir.mkdir()
+    db_path = db_dir / "graph.db"
+    with GraphStore(db_path) as store:
+        if with_nodes:
+            store.upsert_node(NodeInfo(
+                kind="File", name="a.py", file_path=str(tmp_path / "a.py"),
+                line_start=1, line_end=10, language="python",
+            ))
+            store.upsert_node(NodeInfo(
+                kind="Function", name="hello", file_path=str(tmp_path / "a.py"),
+                line_start=2, line_end=4, language="python",
+            ))
+        else:
+            # Simulate a build over an empty repo: a build marker is written
+            # even though no nodes were produced.
+            store.set_metadata("last_updated", "2026-01-01T00:00:00")
+            store.set_metadata("last_build_type", "full")
+        store.commit()
+    return tmp_path
+
+
+# ---------------------------------------------------------------------------
+# Helper-level unit tests
+# ---------------------------------------------------------------------------
+
+
+def test_not_built_response_shape():
+    resp = _not_built_response()
+    assert resp["status"] == "not_built"
+    assert "build_or_update_graph_tool" in resp["message"]
+    assert resp["next_tool_suggestions"] == ["build_or_update_graph_tool"]
+
+
+def test_graph_is_built_false_when_empty(tmp_path):
+    db_path = tmp_path / "graph.db"
+    with GraphStore(db_path) as store:
+        assert _graph_is_built(store) is False
+
+
+def test_graph_is_built_true_with_nodes(tmp_path):
+    db_path = tmp_path / "graph.db"
+    with GraphStore(db_path) as store:
+        store.upsert_node(NodeInfo(
+            kind="File", name="a.py", file_path="a.py",
+            line_start=1, line_end=1, language="python",
+        ))
+        store.commit()
+        assert _graph_is_built(store) is True
+
+
+def test_graph_is_built_true_with_build_marker_no_nodes(tmp_path):
+    db_path = tmp_path / "graph.db"
+    with GraphStore(db_path) as store:
+        store.set_metadata("last_updated", "2026-01-01T00:00:00")
+        store.commit()
+        assert _graph_is_built(store) is True
+
+
+def test_get_store_for_read_missing_db(tmp_path):
+    _make_unbuilt_repo(tmp_path)
+    store, root, not_built = _get_store_for_read(str(tmp_path))
+    assert store is None
+    assert root is None
+    assert not_built is not None
+    assert not_built["status"] == "not_built"
+    # No graph.db should have been created by the guard.
+    assert not (tmp_path / ".code-review-graph" / "graph.db").exists()
+
+
+def test_get_store_for_read_built(tmp_path):
+    _make_built_repo(tmp_path)
+    store, root, not_built = _get_store_for_read(str(tmp_path))
+    try:
+        assert not_built is None
+        assert store is not None
+        assert root is not None
+    finally:
+        if store is not None:
+            store.close()
+
+
+# ---------------------------------------------------------------------------
+# Read tools on an unbuilt repo -> not_built
+# ---------------------------------------------------------------------------
+
+
+def test_query_graph_unbuilt_returns_not_built(tmp_path):
+    _make_unbuilt_repo(tmp_path)
+    result = query_graph(pattern="callers_of", target="anything", repo_root=str(tmp_path))
+    assert result["status"] == "not_built"
+    assert "build_or_update_graph_tool" in result["next_tool_suggestions"]
+
+
+def test_semantic_search_unbuilt_returns_not_built(tmp_path):
+    _make_unbuilt_repo(tmp_path)
+    result = semantic_search_nodes("anything", repo_root=str(tmp_path))
+    assert result["status"] == "not_built"
+
+
+def test_impact_radius_unbuilt_returns_not_built(tmp_path):
+    _make_unbuilt_repo(tmp_path)
+    result = get_impact_radius(changed_files=["a.py"], repo_root=str(tmp_path))
+    assert result["status"] == "not_built"
+
+
+def test_list_graph_stats_unbuilt_returns_not_built(tmp_path):
+    _make_unbuilt_repo(tmp_path)
+    result = list_graph_stats(repo_root=str(tmp_path))
+    assert result["status"] == "not_built"
+
+
+def test_list_flows_unbuilt_returns_not_built(tmp_path):
+    _make_unbuilt_repo(tmp_path)
+    result = list_flows(repo_root=str(tmp_path))
+    assert result["status"] == "not_built"
+
+
+def test_list_communities_unbuilt_returns_not_built(tmp_path):
+    _make_unbuilt_repo(tmp_path)
+    result = list_communities_func(repo_root=str(tmp_path))
+    assert result["status"] == "not_built"
+
+
+def test_architecture_overview_unbuilt_returns_not_built(tmp_path):
+    _make_unbuilt_repo(tmp_path)
+    result = get_architecture_overview_func(repo_root=str(tmp_path))
+    assert result["status"] == "not_built"
+
+
+# ---------------------------------------------------------------------------
+# Built graph with no match -> normal empty result (NOT the guard)
+# ---------------------------------------------------------------------------
+
+
+def test_query_graph_built_no_match_is_not_guarded(tmp_path):
+    _make_built_repo(tmp_path)
+    result = query_graph(
+        pattern="callers_of", target="does_not_exist", repo_root=str(tmp_path),
+    )
+    assert result["status"] != "not_built"
+    # Real "no node" path, not the build guard.
+    assert result["status"] in ("not_found", "ok")
+
+
+def test_semantic_search_built_no_match_is_not_guarded(tmp_path):
+    _make_built_repo(tmp_path)
+    result = semantic_search_nodes("zzz_nonexistent_zzz", repo_root=str(tmp_path))
+    assert result["status"] == "ok"
+    assert result["results"] == []
+
+
+def test_list_graph_stats_built_returns_ok(tmp_path):
+    _make_built_repo(tmp_path)
+    result = list_graph_stats(repo_root=str(tmp_path))
+    assert result["status"] == "ok"
+    assert result["total_nodes"] >= 1
+
+
+def test_built_but_empty_graph_is_not_guarded(tmp_path):
+    """A build over an empty repo (0 nodes + build marker) is not 'not_built'."""
+    _make_built_repo(tmp_path, with_nodes=False)
+    result = list_graph_stats(repo_root=str(tmp_path))
+    assert result["status"] == "ok"
+    assert result["total_nodes"] == 0
diff --git a/tests/test_parser.py b/tests/test_parser.py
index d1d96411..2a77ee0d 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -357,6 +357,39 @@ def test_tested_by_edges_generated(self):
         tested_by = [e for e in edges if e.kind == "TESTED_BY"]
         assert len(tested_by) >= 1
 
+    def test_tested_by_edge_direction(self):
+        """Regression for #515: TESTED_BY must point production -> test.
+
+        Producer-side guard. Reads naturally as "X is tested by Y":
+        source = production code, target = the test that covers it.
+        Consumer-side queries (tests_for, get_transitive_tests,
+        test-gap detection, flow criticality, dead-code) were fixed in
+        #515 to match this canonical direction. Without this assertion the
+        parser could silently flip the direction and every consumer
+        test would still pass against the inverted edges.
+        """
+        nodes, edges = self.parser.parse_file(FIXTURES / "test_sample.py")
+        tested_by = [e for e in edges if e.kind == "TESTED_BY"]
+        assert len(tested_by) >= 1, "fixture should yield at least one TESTED_BY edge"
+
+        test_file = str(FIXTURES / "test_sample.py")
+        test_qualified = {
+            f"{test_file}::{n.name}" for n in nodes if n.kind == "Test"
+        }
+        assert test_qualified, "fixture should yield at least one Test node"
+
+        for edge in tested_by:
+            assert edge.target in test_qualified, (
+                f"TESTED_BY edge has wrong direction: target={edge.target!r} "
+                f"is not a Test node from {test_file}. "
+                f"Expected target in {sorted(test_qualified)}. "
+                f"Edge: kind={edge.kind} source={edge.source} target={edge.target}"
+            )
+            assert edge.source not in test_qualified, (
+                f"TESTED_BY edge points test -> test: "
+                f"{edge.source} -> {edge.target}"
+            )
+
     def test_recursion_depth_guard(self):
         """Parser should not crash on deeply nested code."""
         # Generate Python code with many nested functions (> _MAX_AST_DEPTH)
diff --git a/tests/test_refactor.py b/tests/test_refactor.py
index bc83bc7f..452b1f81 100644
--- a/tests/test_refactor.py
+++ b/tests/test_refactor.py
@@ -797,6 +797,75 @@ def test_only_references_edge_sufficient(self):
         assert "handleCreate" not in dead_names
 
 
+class TestFindDeadCodeWithTestedBy:
+    """Regression for #515: dead-code detection must read TESTED_BY edges
+    in the canonical direction (source=production, target=test).
+
+    A production function whose only reference is its test must NOT be
+    flagged as dead. Before the fix, find_dead_code looked for TESTED_BY
+    edges where the production node was the *target*, but the parser writes
+    the production node as the *source*, so tested-but-uncalled functions
+    were wrongly reported as dead.
+    """
+
+    def setup_method(self):
+        self.tmp = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
+        self.store = GraphStore(self.tmp.name)
+        self._seed()
+
+    def teardown_method(self):
+        self.store.close()
+        Path(self.tmp.name).unlink(missing_ok=True)
+
+    def _seed(self):
+        # Production file with a function that is tested but never called.
+        self.store.upsert_node(NodeInfo(
+            kind="File", name="/repo/calc.py", file_path="/repo/calc.py",
+            line_start=1, line_end=100, language="python",
+        ))
+        # Unconventional name so no naming-convention heuristic rescues it.
+        self.store.upsert_node(NodeInfo(
+            kind="Function", name="combine", file_path="/repo/calc.py",
+            line_start=10, line_end=20, language="python",
+        ))
+        # A truly dead function (no edges at all).
+        self.store.upsert_node(NodeInfo(
+            kind="Function", name="orphan", file_path="/repo/calc.py",
+            line_start=30, line_end=40, language="python",
+        ))
+        # Test file + test.
+        self.store.upsert_node(NodeInfo(
+            kind="File", name="/repo/spec.py", file_path="/repo/spec.py",
+            line_start=1, line_end=50, language="python",
+        ))
+        self.store.upsert_node(NodeInfo(
+            kind="Test", name="verify_combine_behaviour",
+            file_path="/repo/spec.py", line_start=5, line_end=10,
+            language="python", is_test=True,
+        ))
+        # Canonical TESTED_BY: source=production, target=test.
+        self.store.upsert_edge(EdgeInfo(
+            kind="TESTED_BY",
+            source="/repo/calc.py::combine",
+            target="/repo/spec.py::verify_combine_behaviour",
+            file_path="/repo/spec.py", line=6,
+        ))
+        self.store.commit()
+
+    def test_tested_function_not_dead(self):
+        """A function whose only reference is a canonical TESTED_BY edge
+        (source=production) must not be flagged as dead."""
+        dead = find_dead_code(self.store)
+        dead_names = {d["name"] for d in dead}
+        assert "combine" not in dead_names
+
+    def test_orphan_function_still_dead(self):
+        """A function with no edges at all is still reported as dead."""
+        dead = find_dead_code(self.store)
+        dead_names = {d["name"] for d in dead}
+        assert "orphan" in dead_names
+
+
 class TestTransitiveImportResolution:
     """Tests for 2-hop transitive import resolution in plausible caller."""
 
diff --git a/tests/test_tools.py b/tests/test_tools.py
index 578536d4..ad0d9078 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -11,7 +11,9 @@
 import code_review_graph.tools.docs as docs_module
 from code_review_graph.graph import GraphStore, _sanitize_name, node_to_dict
 from code_review_graph.parser import EdgeInfo, NodeInfo
+from code_review_graph.context_savings import estimate_tokens
 from code_review_graph.tools import (
+    detect_changes_func,
     get_affected_flows_func,
     get_architecture_overview_func,
     get_community_func,
@@ -369,6 +371,71 @@ def test_validate_repo_root_error_mentions_svn_marker(self, tmp_path):
             _validate_repo_root(tmp_path)
 
 
+class TestQueryGraphTestsFor:
+    """Regression tests for #515: query_graph(pattern='tests_for')
+    must follow direct TESTED_BY edges (source=production, target=test)
+    rather than relying on the naming-convention fallback.
+    """
+
+    def setup_method(self):
+        import tempfile as _tempfile
+        self._tmpdir = _tempfile.TemporaryDirectory()
+        self.repo_root = Path(self._tmpdir.name)
+        # _validate_repo_root requires .git or .code-review-graph.
+        (self.repo_root / ".code-review-graph").mkdir()
+        # find_project_root / get_db_path look here for the DB.
+        from code_review_graph.incremental import get_db_path
+        self.db_path = get_db_path(self.repo_root)
+        self.store = GraphStore(str(self.db_path))
+        self._seed_graph()
+
+    def teardown_method(self):
+        self.store.close()
+        self._tmpdir.cleanup()
+
+    def _seed_graph(self):
+        # Production function with an unconventional name so the
+        # naming-convention fallback (test_<name> / Test<name>) cannot match.
+        self.store.upsert_node(NodeInfo(
+            kind="File", name="/src/calc.py", file_path="/src/calc.py",
+            line_start=1, line_end=20, language="python",
+        ))
+        self.store.upsert_node(NodeInfo(
+            kind="Function", name="combine", file_path="/src/calc.py",
+            line_start=1, line_end=5, language="python",
+        ))
+        self.store.upsert_node(NodeInfo(
+            kind="File", name="/tests/spec.py", file_path="/tests/spec.py",
+            line_start=1, line_end=20, language="python",
+        ))
+        self.store.upsert_node(NodeInfo(
+            kind="Test", name="verify_combine_behaviour",
+            file_path="/tests/spec.py",
+            line_start=1, line_end=5, language="python", is_test=True,
+        ))
+        # Parser-canonical direction: source=production, target=test.
+        self.store.upsert_edge(EdgeInfo(
+            kind="TESTED_BY",
+            source="/src/calc.py::combine",
+            target="/tests/spec.py::verify_combine_behaviour",
+            file_path="/tests/spec.py", line=1,
+        ))
+        self.store.commit()
+        # Release the writer connection so query_graph can open its own.
+        self.store.close()
+
+    def test_query_graph_tests_for_finds_direct_edge(self):
+        from code_review_graph.tools import query_graph
+        result = query_graph(
+            pattern="tests_for",
+            target="/src/calc.py::combine",
+            repo_root=str(self.repo_root),
+        )
+        assert result["status"] == "ok"
+        qns = {r["qualified_name"] for r in result["results"]}
+        assert "/tests/spec.py::verify_combine_behaviour" in qns
+
+
 class TestGetDocsSection:
     """Tests for the get_docs_section tool."""
 
@@ -506,8 +573,8 @@ def test_store_closed_on_success(
     ):
         store = MagicMock()
         monkeypatch.setattr(
-            analysis_module, "_get_store",
-            lambda repo_root=None: (store, tmp_path),
+            analysis_module, "_get_store_for_read",
+            lambda repo_root=None: (store, tmp_path, None),
         )
         monkeypatch.setattr(
             analysis_module, analysis_name, lambda *a, **k: ret,
@@ -524,8 +591,8 @@ def test_store_closed_when_analysis_raises(
     ):
         store = MagicMock()
         monkeypatch.setattr(
-            analysis_module, "_get_store",
-            lambda repo_root=None: (store, tmp_path),
+            analysis_module, "_get_store_for_read",
+            lambda repo_root=None: (store, tmp_path, None),
         )
 
         def boom(*args, **kwargs):
@@ -1563,3 +1630,256 @@ def test_task_routing_refactor(self):
             task="refactor auth module", repo_root=str(self.root),
         )
         assert "refactor" in result["next_tool_suggestions"]
+
+
+def _seed_large_review_repo(repo: Path, n_files: int = 12, lines: int = 400) -> list[str]:
+    """Build a repo + graph with many large changed files, returning rel paths."""
+    (repo / ".git").mkdir(parents=True, exist_ok=True)
+    src = repo / "src"
+    src.mkdir(parents=True, exist_ok=True)
+    graph_dir = repo / ".code-review-graph"
+    graph_dir.mkdir(exist_ok=True)
+    store = GraphStore(graph_dir / "graph.db")
+    rel_paths: list[str] = []
+    try:
+        for i in range(n_files):
+            rel = f"src/mod_{i:02d}.py"
+            rel_paths.append(rel)
+            full = repo / rel
+            # Big file -> _extract_relevant_lines kicks in, but each changed
+            # function still contributes a sizable snippet.
+            body = "\n".join(
+                f"    x{j} = compute_{i}_{j}()  # {'pad ' * 8}"
+                for j in range(lines)
+            )
+            full.write_text(f"def handle_{i}():\n{body}\n", encoding="utf-8")
+            abs_path = str(full)
+            store.upsert_node(NodeInfo(
+                kind="File", name=abs_path, file_path=abs_path,
+                line_start=1, line_end=lines + 1, language="python",
+            ))
+            store.upsert_node(NodeInfo(
+                kind="Function", name=f"handle_{i}", file_path=abs_path,
+                line_start=1, line_end=lines + 1, language="python",
+            ))
+        store.commit()
+    finally:
+        store.close()
+    return rel_paths
+
+
+class TestReviewTokenBudget:
+    """get_review_context must honour a token budget (no 85k-400k blowups)."""
+
+    def test_large_review_is_capped_and_reports_omissions(self, tmp_path):
+        repo = tmp_path / "bigrepo"
+        # Few files (small scaffold) but big snippets (the budget pressure).
+        rel_paths = _seed_large_review_repo(repo, n_files=10, lines=600)
+
+        # First measure the unbudgeted size so the budget is provably tight.
+        full = get_review_context(
+            changed_files=rel_paths, repo_root=str(repo),
+            include_source=True, max_tokens=0,
+        )
+        full_tokens = estimate_tokens(full)
+        # Budget to roughly half: forces a partial drop without dropping all.
+        budget = full_tokens // 2
+        assert budget > 0
+
+        result = get_review_context(
+            changed_files=rel_paths,
+            repo_root=str(repo),
+            include_source=True,
+            max_tokens=budget,
+        )
+
+        assert result["status"] == "ok"
+        # The response must fit the budget (with a small slack for the note
+        # and the omitted metadata appended after budgeting).
+        assert estimate_tokens(result) <= budget * 1.15
+        # The budgeted response is strictly smaller than the unbudgeted one.
+        assert estimate_tokens(result) < full_tokens
+        # Omissions are reported honestly.
+        assert "omitted" in result
+        omitted = result["omitted"]
+        assert omitted["source_files"] >= 1
+        assert "Token budget" in omitted["note"]
+        # Some snippets survived; not all were dropped.
+        kept = result["context"]["source_snippets"]
+        assert 0 < len(kept) < len(rel_paths)
+
+    def test_unbudgeted_review_keeps_every_snippet(self, tmp_path):
+        repo = tmp_path / "bigrepo2"
+        rel_paths = _seed_large_review_repo(repo, n_files=6, lines=200)
+
+        result = get_review_context(
+            changed_files=rel_paths,
+            repo_root=str(repo),
+            include_source=True,
+            max_tokens=0,  # disabled
+        )
+        assert "omitted" not in result
+        assert len(result["context"]["source_snippets"]) == len(rel_paths)
+
+    def test_generous_budget_omits_nothing(self, tmp_path):
+        repo = tmp_path / "bigrepo3"
+        rel_paths = _seed_large_review_repo(repo, n_files=3, lines=20)
+
+        result = get_review_context(
+            changed_files=rel_paths,
+            repo_root=str(repo),
+            include_source=True,
+            max_tokens=1_000_000,
+        )
+        assert "omitted" not in result
+        assert len(result["context"]["source_snippets"]) == len(rel_paths)
+
+    def test_budget_never_drops_structural_context(self, tmp_path):
+        repo = tmp_path / "bigrepo4"
+        rel_paths = _seed_large_review_repo(repo, n_files=10, lines=300)
+
+        result = get_review_context(
+            changed_files=rel_paths,
+            repo_root=str(repo),
+            include_source=True,
+            max_tokens=4000,
+        )
+        # Subgraph + guidance always survive — only snippets are budgeted.
+        ctx = result["context"]
+        assert "graph" in ctx
+        assert "review_guidance" in ctx
+
+
+class TestDetectChangesTokenBudget:
+    """detect_changes_func can balloon too — guard it with max_tokens."""
+
+    def _seed(self, repo: Path, n: int = 60) -> None:
+        (repo / ".git").mkdir(parents=True, exist_ok=True)
+        graph_dir = repo / ".code-review-graph"
+        graph_dir.mkdir(parents=True, exist_ok=True)
+        store = GraphStore(graph_dir / "graph.db")
+        abs_file = str(repo / "src" / "big.py")
+        (repo / "src").mkdir(parents=True, exist_ok=True)
+        (repo / "src" / "big.py").write_text("# big\n", encoding="utf-8")
+        try:
+            store.upsert_node(NodeInfo(
+                kind="File", name=abs_file, file_path=abs_file,
+                line_start=1, line_end=2, language="python",
+            ))
+            for i in range(n):
+                store.upsert_node(NodeInfo(
+                    kind="Function",
+                    name=f"func_{i}_{'x' * 40}",
+                    file_path=abs_file,
+                    line_start=i + 1, line_end=i + 2, language="python",
+                ))
+            store.commit()
+        finally:
+            store.close()
+
+    def test_large_detect_changes_is_capped(self, tmp_path):
+        repo = tmp_path / "dc"
+        self._seed(repo, n=80)
+        abs_file = str(repo / "src" / "big.py")
+
+        budget = 3000
+        result = detect_changes_func(
+            changed_files=[abs_file],
+            repo_root=str(repo),
+            detail_level="standard",
+            max_tokens=budget,
+        )
+        assert result["status"] == "ok"
+        assert estimate_tokens(result) <= budget * 1.15
+        assert "omitted" in result
+        assert result["omitted"]["changed_functions"] >= 1
+        assert "Token budget" in result["omitted"]["note"]
+        # Highest-signal guidance preserved.
+        assert "review_priorities" in result
+
+    def test_detect_changes_unbudgeted_keeps_all(self, tmp_path):
+        repo = tmp_path / "dc2"
+        self._seed(repo, n=80)
+        abs_file = str(repo / "src" / "big.py")
+
+        result = detect_changes_func(
+            changed_files=[abs_file],
+            repo_root=str(repo),
+            detail_level="standard",
+            max_tokens=0,
+        )
+        assert "omitted" not in result
+        assert len(result["changed_functions"]) == 80
+
+
+class TestQueryGraphMaxResults:
+    """query_graph max_results caps unbounded callers_of/callees_of payloads."""
+
+    def setup_method(self):
+        self.tmp_dir = tempfile.mkdtemp()
+        self.root = Path(self.tmp_dir).resolve()
+        (self.root / ".git").mkdir()
+        (self.root / ".code-review-graph").mkdir()
+        db_path = str(self.root / ".code-review-graph" / "graph.db")
+        target_file = str(self.root / "hot.py")
+        self.hot_qn = f"{target_file}::hot"
+        with GraphStore(db_path) as store:
+            store.upsert_node(NodeInfo(
+                kind="Function", name="hot", file_path=target_file,
+                line_start=1, line_end=2, language="python",
+            ))
+            for i in range(50):
+                caller_file = str(self.root / f"caller_{i}.py")
+                caller_qn = f"{caller_file}::caller_{i}"
+                store.upsert_node(NodeInfo(
+                    kind="Function", name=f"caller_{i}", file_path=caller_file,
+                    line_start=1, line_end=2, language="python",
+                ))
+                store.upsert_edge(EdgeInfo(
+                    kind="CALLS", source=caller_qn,
+                    target=f"{target_file}::hot", file_path=caller_file, line=1,
+                ))
+            store.commit()
+
+    def teardown_method(self):
+        import shutil
+
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+
+    def test_cap_enforced(self):
+        result = query_graph(
+            pattern="callers_of", target=self.hot_qn,
+            repo_root=str(self.root), max_results=10,
+        )
+        assert result["status"] == "ok"
+        assert len(result["results"]) == 10
+        assert result["truncated"] is True
+        assert result["total_results"] == 50
+        # Edges are also bounded.
+        assert len(result["edges"]) <= 10
+
+    def test_no_cap_returns_all(self):
+        result = query_graph(
+            pattern="callers_of", target=self.hot_qn,
+            repo_root=str(self.root), max_results=0,
+        )
+        assert len(result["results"]) == 50
+        assert "truncated" not in result
+
+    def test_cap_above_total_is_noop(self):
+        result = query_graph(
+            pattern="callers_of", target=self.hot_qn,
+            repo_root=str(self.root), max_results=1000,
+        )
+        assert len(result["results"]) == 50
+        assert "truncated" not in result
+
+    def test_minimal_detail_reports_total_when_capped(self):
+        result = query_graph(
+            pattern="callers_of", target=self.hot_qn,
+            repo_root=str(self.root), detail_level="minimal", max_results=10,
+        )
+        assert result["result_count"] == 50
+        assert result["truncated"] is True
+        assert result["total_results"] == 50
+        assert len(result["results"]) <= 5  # minimal caps display at 5
diff --git a/uv.lock b/uv.lock
index 3b2f09f9..8a3c9887 100644
--- a/uv.lock
+++ b/uv.lock
@@ -331,7 +331,7 @@ wheels = [
 
 [[package]]
 name = "code-review-graph"
-version = "2.3.6"
+version = "2.4.0"
 source = { editable = "." }
 dependencies = [
     { name = "fastmcp" },