From 06f557251d635049842855100ad6132d8cf28cac Mon Sep 17 00:00:00 2001
From: Josh Tuffy <jtuffy117@gmail.com>
Date: Sun, 15 Mar 2026 18:45:15 -0400
Subject: [PATCH 1/6] heatmap

---
 .agents/rules/amendments.md                   |  34 ++
 .agents/rules/project.md                      |  23 +-
 .agents/workflows/gen-figures.md              | 112 +++++++
 .agents/workflows/researcher.md               | 313 ++++++------------
 src/compute_permit_sim/schemas/batch.py       |  95 +++++-
 .../schemas/sweep_params.py                   |  40 +++
 src/compute_permit_sim/services/sweep.py      | 117 ++++++-
 .../vis/components/history.py                 |   9 +-
 .../vis/components/history_items.py           |  57 +++-
 src/compute_permit_sim/vis/export.py          | 142 +++++++-
 src/compute_permit_sim/vis/page.py            |  14 +-
 src/compute_permit_sim/vis/panels/batch.py    | 270 ++++++++++++++-
 .../vis/panels/batch_results.py               | 116 ++++++-
 src/compute_permit_sim/vis/plotting.py        | 148 ++++++++-
 src/compute_permit_sim/vis/state/history.py   |  12 +-
 src/compute_permit_sim/vis/state/run_state.py |  11 +-
 tests/factories.py                            |  36 +-
 tests/services/test_sweep.py                  | 100 ++++++
 tests/vis/test_export.py                      |  48 +++
 19 files changed, 1440 insertions(+), 257 deletions(-)
 create mode 100644 .agents/workflows/gen-figures.md

diff --git a/.agents/rules/amendments.md b/.agents/rules/amendments.md
index 6552644..72890f0 100644
--- a/.agents/rules/amendments.md
+++ b/.agents/rules/amendments.md
@@ -35,3 +35,37 @@ Sessionfriction identified during prune-repo + cleanup work:
   - **Logging config** (`vis/logging_config.py` is canonical; never configure in `page.py`)
   - **Schema field removal checklist** (grep callers, confirm never populated, no `list[dict]` placeholders)
 - `python.md` sync-guard test bullet was vague. Expanded with the concrete pattern: compare `model_fields` against the reactive registry at test time.
+
+## 2026-03-12 — Always use vis/plotting.py for paper figures
+
+`vis/plotting.py` is the single source of truth for all chart functions. When generating
+figures for papers, scripts, or exports, **always call functions from there** — never write
+custom matplotlib from scratch in agent_workspace scripts.
+
+Available functions to reach for first:
+- `plot_sweep_curve(SweepResult)` — 1D sweep line chart with tipping point annotation
+- `plot_mc_trajectory(MonteCarloResult)` — compliance over steps, mean ± SD
+- `plot_mc_violator_trajectory`, `plot_mc_audit_trajectory`, `plot_mc_payoff_comparison`
+
+If a needed figure type does not exist in `vis/plotting.py` (e.g. a 2D heatmap), **add it
+there** following the `create_figure()` style, then use it from both the UI and scripts.
+Do not create ad-hoc matplotlib code in agent_workspace when an equivalent function
+already exists or could be added once and shared.
+
+## 2026-03-14 — Reflect: plotting discipline and scripting infrastructure
+
+Three friction sources identified, all patched this session:
+
+1. **`project.md` Plots section was too sparse** — 2 lines with no function inventory.
+   Replaced with the full table of all 12 public functions and a mandatory "check before
+   writing any matplotlib" gate. The agent cannot now claim ignorance of what exists.
+
+2. **`researcher.md` step 4 had zero mention of `vis/plotting.py`** — meaning every
+   research visualisation session was allowed to invent ad-hoc matplotlib. Added an
+   `[!IMPORTANT]` callout before the visualise step enforcing the same gate.
+
+3. **No `/gen-figures` workflow existed** — figure generation for paper sections was
+   improvised each time. Created `.agents/workflows/gen-figures.md` with a step-by-step
+   thin-caller checklist, a copy-paste script template, and a `// turbo` run step.
+   Also created `scripts/README.md` as the cross-session script index so existing
+   scripts are discoverable rather than silently re-invented.
diff --git a/.agents/rules/project.md b/.agents/rules/project.md
index a646e01..9a403d2 100644
--- a/.agents/rules/project.md
+++ b/.agents/rules/project.md
@@ -100,7 +100,28 @@ All export functions return `bytes` for Solara's `FileDownload`. Key functions:
 
 ## Plots (`vis/plotting.py`)
 
-Accept typed result objects, return `matplotlib.Figure`, never import Solara. Use `fig_to_png(fig)` from `results.py` to convert to bytes for downloads. Standard figsize `(7, 4)`.
+**Before writing any matplotlib code**, check this inventory. If the function you need exists here, call it. If it doesn't exist, add it here following the `create_figure()` style — then use it from both scripts and the UI.
+
+All functions accept typed result objects, return `matplotlib.Figure`, never import Solara. Use `fig_to_png(fig)` from `results.py` to convert to bytes for downloads.
+
+| Function | Input | Use for |
+|---|---|---|
+| `plot_sweep_curve(result, metric, reference_lines)` | `SweepResult` | 1D sweep line chart with tipping point + optional scenario markers |
+| `plot_sweep_heatmap(grid, x_values, y_values, ...)` | 2D `list[list[float]]` | 2D compliance heatmap (joint sensitivity) |
+| `plot_mc_trajectory(result)` | `MonteCarloResult` | Compliance mean ± SD over steps |
+| `plot_mc_violator_trajectory(result)` | `MonteCarloResult` | Violator count mean ± SD over steps |
+| `plot_mc_audit_trajectory(result)` | `MonteCarloResult` | Audit rate band over steps |
+| `plot_mc_payoff_comparison(result)` | `MonteCarloResult` | Compliant vs. violating lab payoff bar chart |
+| `plot_compliance_distribution(df)` | agents DataFrame | Bar chart: Compliant / Uncaught / Caught-by-source |
+| `plot_audit_source_distribution(df)` | agents DataFrame | Bar chart: labs caught per AuditSource channel |
+| `plot_audit_targeting(rates, counts, ...)` | scalar rates | Compliant vs. non-compliant audit rate bar |
+| `plot_audit_coefficient_distribution(df)` | agents DataFrame | Histogram of per-lab audit coefficients |
+| `plot_time_series(data, label, color_key)` | `pd.Series` | Generic single-series step chart |
+| `plot_scatter(df, x_col, y_col, ...)` | DataFrame | Scatter with compliance coloring |
+
+All figures are created via `create_figure()` (standardized style, `Agg` backend). Never call `plt.figure()` or `plt.subplots()` in scripts.
+
+**Committed figure scripts** — see `scripts/README.md` for an index of existing scripts. Always check there before re-creating a script.
 
 ## Testing
 
diff --git a/.agents/workflows/gen-figures.md b/.agents/workflows/gen-figures.md
new file mode 100644
index 0000000..c52733d
--- /dev/null
+++ b/.agents/workflows/gen-figures.md
@@ -0,0 +1,112 @@
+---
+description: Generate one or more figures for the paper or a report — enforces the thin-caller pattern where all plot logic lives in vis/plotting.py.
+---
+
+# Gen-Figures Workflow
+
+Use this workflow whenever you need to produce `.png` figures for the paper, a report,
+or any committed output. Do **not** improvise — follow these steps in order.
+
+## Step 1 — Check `vis/plotting.py` first
+
+Open `project.md` and read the **Plots** section inventory table.  
+Find the function that matches the figure you need.
+
+- **Exists?** → go to Step 3.
+- **Doesn't exist?** → you must add it to `vis/plotting.py` first (Step 2), then proceed.
+
+**Never write raw `plt.figure()` or `plt.subplots()` in a script or agent_workspace file.**
+Use `create_figure()` from `vis/plotting.py` at minimum, and prefer a proper named function.
+
+## Step 2 — Add a missing function to `vis/plotting.py` (if needed)
+
+1. Follow the `create_figure()` style exactly — see existing functions for the pattern.
+2. Accept typed result objects (`SweepResult`, `MonteCarloResult`, `pd.DataFrame`) — no raw dicts.
+3. Return `matplotlib.Figure` (never call `plt.show()` or `plt.savefig()` inside the function).
+4. Add it to the inventory table in `project.md` → Plots section.
+5. Run `uv run ruff check . --fix && uv run mypy .` — fix any issues before proceeding.
+
+## Step 3 — Check `scripts/README.md` for an existing script
+
+Open `scripts/README.md`.  
+If a script already generates the figures you need (or close to it), **run that script** rather than writing a new one.
+
+```bash
+uv run python scripts/<existing_script>.py --out-dir agent_workspace/figures
+```
+
+If the existing script's parameters or scenarios need adjustment, edit it in place — don't create a duplicate.
+
+## Step 4 — Write a thin-caller script (if no existing script covers it)
+
+Create a new script in `scripts/` following the naming convention `gen_<section_or_topic>_figs.py`.
+
+The script must follow the **thin-caller pattern**:
+- All imports from `vis.plotting`, `services.*`, `schemas.*`
+- No matplotlib setup — no `plt.figure()`, `plt.subplots()`, `matplotlib.use()`
+- Each figure: call the `vis/plotting.py` function → `fig.savefig(out_dir / "name.png", dpi=150, bbox_inches="tight")`
+- Accept `--out-dir` as a CLI argument (default: `agent_workspace/figures`)
+- Print progress lines so it's easy to monitor
+
+Minimal template:
+```python
+"""Generate <topic> figures for the paper.
+
+Thin caller only — all plot logic lives in vis/plotting.py.
+Output: agent_workspace/figures/<fig_name>.png
+
+Usage:
+    uv run python scripts/gen_<topic>_figs.py [--out-dir PATH]
+"""
+from __future__ import annotations
+import argparse
+from pathlib import Path
+
+
+def main(out_dir: Path) -> None:
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    from compute_permit_sim.services.config_manager import load_scenario
+    from compute_permit_sim.services.sweep import run_sweep
+    from compute_permit_sim.vis.plotting import plot_sweep_curve  # add as needed
+
+    base = load_scenario("basic/<scenario>.json")
+    result = run_sweep(base, "audit.base_prob", [...], n_runs=50)
+    fig = plot_sweep_curve(result)
+    fig.savefig(out_dir / "fig_<name>.png", dpi=150, bbox_inches="tight")
+    print("Done.")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--out-dir", type=Path, default=Path("agent_workspace/figures"))
+    args = parser.parse_args()
+    main(args.out_dir)
+```
+
+## Step 5 — Update `scripts/README.md`
+
+After writing or modifying a script, update the index in `scripts/README.md`:
+
+```
+| gen_<topic>_figs.py | Generates <figures> for Section X. Scenarios: <...>. |
+```
+
+## Step 6 — Run and verify
+
+// turbo
+```bash
+uv run python scripts/<script_name>.py --out-dir agent_workspace/figures
+```
+
+Check that:
+- All expected `.png` files are created in `out_dir`
+- No matplotlib warnings or errors in output
+- Figures look correct (open them and inspect)
+
+## Step 7 — Commit the script
+
+```bash
+git add scripts/<script_name>.py scripts/README.md
+git commit -m "scripts: add <topic> figure generator"
+```
diff --git a/.agents/workflows/researcher.md b/.agents/workflows/researcher.md
index d8d6618..4595ce2 100644
--- a/.agents/workflows/researcher.md
+++ b/.agents/workflows/researcher.md
@@ -1,249 +1,146 @@
 ---
-description: Researcher workflow — run experiments against the simulation, analyze results, and iterate toward interesting insights. Mimics a domain researcher's scientific process.
+description: Researcher workflow — run parameter sweep experiments against the simulation, analyze results, and iterate toward interesting insights. Produces reproducible figures and a findings log.
 ---
 
 # Researcher Workflow
 
-Use this workflow when the goal is not to build software but to **generate knowledge**
-about the simulation: discover interesting parameter regimes, confirm or refute
-hypotheses, or produce results suitable for the paper.
+Use this workflow when the goal is to **generate knowledge** from the simulation through
+parameter sweeps — discover interesting compliance regimes, confirm or refute hypotheses,
+and produce figures suitable for the paper.
 
-The agent acts as a computational researcher. It forms a hypothesis, designs an
-experiment, runs it, analyzes the output, **generates and evaluates figures**, updates
-its understanding, and iterates.
+> [!IMPORTANT]
+> **Read `agent_workspace/research/synthesis.md` first** — this is the single source of
+> truth for what is already known. Do not repeat experiments already answered there.
 
-## Workspace
-
-All research artifacts live in `agent_workspace/` (gitignored, Docker-excluded):
+## Workspace Structure
 
 ```
 agent_workspace/
-├── scripts/          # reusable helpers — kept across sessions
-│   ├── analyze_scenarios.py   # run canonical scenarios, print metrics
-│   └── collect_code.py        # dump codebase to text for LLM context
-└── research/         # one subfolder per research session
-    └── YYYY-MM-DD_slug/       # e.g. 2026-03-07_audit-tipping-point
-        ├── exp1_<name>.py     # experiment scripts
-        ├── exp2_<name>.py
-        ├── findings.md        # this session's findings (canonical output)
-        ├── *.png              # figures saved by experiment scripts
-        └── scenarios/         # draft scenario JSONs for this session
-            └── *.json
+├── sections/
+│   └── <section_slug>/          # e.g. section_43
+│       ├── explore_sweep.py     # thin-caller sweep runner — the main experiment tool
+│       ├── findings.md          # running log of all experiments and verdicts
+│       └── figures/
+│           └── run_NNN/         # one folder per research session / batch of runs
+│               ├── *.png        # generated sweep figures
+│               └── params.json  # complete parameter + result record for every figure
+└── research/
+    └── synthesis.md             # cross-session synthesis (update with /synthesize-research)
 ```
 
-**Session folder contract** — every researcher workflow invocation produces:
-1. `findings.md` — hypothesis, result, interpretation, next steps
-2. At least one experiment script (`exp<n>_<name>.py`) that is re-runnable
-3. Any generated figures (`.png`) from matplotlib
-4. Draft scenario JSONs scoped under `scenarios/` if new configs were explored
-
-**Never write to `outputs/`** — that's the user's UI export directory.
-Research outputs go under `agent_workspace/research/<session>/`.
-
-## Setup
-
-Before running experiments, orient yourself:
-
-1. **Read `agent_workspace/research/synthesis.md`** first — this is the single source
-   of truth for what is already known. It tells you what has been confirmed, what the
-   interesting regime is, and which open questions remain. Do not repeat experiments
-   that are already answered there.
-
-2. **Check the previous session folder** (`agent_workspace/research/*/findings.md`)
-   for the most recent raw findings, in case the synthesis hasn't been updated yet.
-
-3. **Load the default scenario** as your baseline. All experiments should be expressed
-   as deltas from it so results are comparable.
-
-   > [!IMPORTANT]
-   > The **default `ScenarioConfig` is degenerate** (100% compliance). More broadly,
-   > **any config where `permit_cap ≥ n_agents` is degenerate** — agents can always
-   > obtain a permit, so compliance costs nothing regardless of enforcement parameters.
-   > The interesting regime is **`permit_cap < n_agents`** (forced scarcity). Compliance
-   > tracks the Q/N ratio approximately linearly: `avg_compliance ≈ cap / n_agents`.
-   > Always start from `research_margin_baseline.json` with a tightened cap (e.g. cap=10,
-   > n_agents=15) as the canonical interesting starting point.
-   >
-   > **`detection_rate = nan` is a diagnostic signal**, not missing data. It means zero
-   > violations occurred — which confirms a degenerate config. Treat it as a hard
-   > signal to rethink the parameter regime, not as an experiment result.
-
-4. **Check the service API** — experiments call services directly (no Solara dependency):
-   ```python
-   from compute_permit_sim.services.simulation_runner import run_single
-   from compute_permit_sim.services.monte_carlo import run_monte_carlo
-   from compute_permit_sim.services.sweep import run_sweep
-   from compute_permit_sim.schemas import ScenarioConfig
-   ```
-
-5. **Scenario file lifecycle:**
-   - During research: write to `agent_workspace/research/scenarios/`
-   - When finalized and validated: promote to `scenarios/basic/` with a clear, non-`research_` name
-   - Never write draft scenario files to `scenarios/basic/` — that directory is for user-facing configs
+**Run folder contract**: every invocation of `explore_sweep.py` either creates a new
+`run_NNN/` folder (auto-incremented) or adds to an existing one (using `--run N`).
+Each run folder contains the figures and a `params.json` that records every experiment
+in that folder for reproducibility.
 
+## Setup (first time in a repo)
 
-## Service API Reference
-
-All services are pure Python with no Solara dependency. Call them directly in experiment scripts.
-
-### Basic run — `run_single`
-```python
-from compute_permit_sim.services.simulation_runner import run_single
-from compute_permit_sim.schemas import ScenarioConfig
-
-result = run_single(config)  # returns SimulationRun
-print(result.metrics.avg_compliance)
-print(result.metrics.final_compliance)
-print(result.metrics.detection_rate)
-```
-
-### Monte Carlo — `run_monte_carlo`
-```python
-from compute_permit_sim.services.monte_carlo import run_monte_carlo
-
-result = run_monte_carlo(
-    config=cfg,
-    n_runs=30,          # replications
-    store_raw=True,     # include per-seed rows in result.raw_seeds
-    seeds=[0..n-1],     # optional: explicit seed list
-)
-# Key fields:
-# result.avg_compliance.mean / .std
-# result.final_compliance.mean / .std
-# result.p10_compliance, result.p90_compliance
-# result.pct_runs_full_compliance
-# result.detection_rate.mean
-# result.step_compliance   — list[MetricStats], one per step
-# result.raw_seeds         — list[SeedResult] if store_raw=True
-```
-
-### Parameter sweep — `run_sweep`
-```python
-from compute_permit_sim.services.sweep import run_sweep
-
-result = run_sweep(
-    base_config=cfg,
-    param_path="audit.base_prob",      # dot-path into ScenarioConfig
-    values=[0.02, 0.05, 0.10, 0.20],  # explicit value list
-    param_label="Base Audit Rate π₀", # optional, for display/export
-    n_runs=20,
-)
-# result.points — list[SweepPoint], each has .param_value + .result (MonteCarloResult)
-for pt in result.points:
-    print(pt.param_value, pt.result.avg_compliance.mean)
-```
-
-To generate a sweep value range from param registry defaults:
-```python
-from compute_permit_sim.schemas.sweep_params import get_param, generate_values
-param = get_param("audit.base_prob")
-values = generate_values(param, min_val=0.02, max_val=0.30, step=0.02)
-```
+1. Ensure the simulator installs cleanly:
+   ```bash
+   uv sync
+   ```
 
-### Loading scenario files
-```python
-from compute_permit_sim.services.config_manager import load_scenario
+2. Create the section folder and figures directory if they don't exist:
+   ```bash
+   mkdir -p agent_workspace/sections/<slug>/figures
+   ```
 
-# From scenarios/basic/ (committed):
-cfg = load_scenario("basic/scenario_2_strict.json")
+3. Create `findings.md` in the section folder with this header:
+   ```markdown
+   # Sweep Findings Log
 
-# From agent_workspace (use absolute path):
-from pathlib import Path
-cfg = load_scenario(str(Path("agent_workspace/research/2026-03-07_audit-tipping-point/scenarios/research_margin_baseline.json")))
-```
+   | # | Scenario | Param | Range | n | Verdict | Notes | Figure |
+   |---|---|---|---|---|---|---|---|
+   ```
 
-### Inline config construction + overrides
-```python
-from compute_permit_sim.schemas import ScenarioConfig
+4. The sweep runner script `explore_sweep.py` must exist (copy from another section or
+   create fresh — the pattern is documented below).
 
-# Inline — good for one-off experiments:
-cfg = ScenarioConfig(name="My Test", steps=60, n_agents=15)
+## Running an Experiment
 
-# Override one field on a loaded scenario (frozen model):
-from compute_permit_sim.schemas.sweep_params import override_config
-cfg2 = override_config(cfg, "audit.base_prob", 0.12)
+```bash
+uv run python agent_workspace/sections/<slug>/explore_sweep.py \
+    --scenario basic/scenario_2_strict.json \
+    --param market.fixed_price \
+    --min 5 --max 200 --step 10 \
+    --n-runs 30 \
+    --out crisis_fixed_price.png \
+    --ref "70|orange"          # optional: x-value|color for scenario default marker
 ```
 
+Key flags:
+| Flag | Required | Description |
+|---|---|---|
+| `--scenario` | ✓ | Path relative to `scenarios/`, e.g. `basic/scenario_2_strict.json` |
+| `--param` | ✓ | Dot-path into `ScenarioConfig`, e.g. `market.fixed_price` |
+| `--min/--max/--step` | ✓ | Value range for the sweep |
+| `--n-runs` | | Monte Carlo replications per point (default: 30) |
+| `--out` | ✓ | PNG filename, saved inside the current run folder |
+| `--run N` | | Add to existing `run_N` instead of auto-incrementing |
+| `--ref "X\|color"` | | Mark a scenario's default value on the curve (pipe-delimited; NO `$` in the arg — bash expands it) |
 
-Each iteration is one experiment. Aim for 3–5 iterations before synthesising conclusions.
-
-### 1 — Hypothesise
-
-State a falsifiable hypothesis in plain English, e.g.:
-> "Increasing `audit.base_prob` beyond 0.15 produces diminishing compliance returns
->  regardless of `collateral_amount`."
+> [!CAUTION]
+> Never use `$` in `--ref` labels. Bash expands `$70` to empty before Python sees it.
+> The plotting code appends the x-value and scenario name automatically: `"Strict Enforcement default: 70"`.
 
-A good hypothesis:
-- Names the mechanism it expects to activate
-- Predicts the direction and rough magnitude of an effect
-- Is refutable by the numbers you will produce
+## Interpreting Results
 
-### 2 — Design
+The script prints a `VERDICT` on exit:
+- **INTERESTING**: compliance range > 20 pp OR max SD > 7 pp across the sweep
+- **FLAT**: sweep produces no meaningful variation — move on
 
-Choose the right experiment type:
+**What makes a sweep interesting for the paper:**
+- Non-linear effect: knee, plateau, phase transition
+- Tipping point where ≥ 95% compliance is first achieved or lost
+- Wide compliance range (> 30 pp) with low SD (signal not noise)
+- Result that contradicts an intuitive expectation
 
-| Goal | Tool |
-|---|---|
-| Point-in-time result for one config | `run_single` |
-| Distribution of outcomes across seeds | `run_monte_carlo` (N ≥ 30) |
-| How one parameter shifts compliance | `run_sweep` |
+**What to skip:**
+- Flat lines — even if mechanistically correct, they don't tell a visual story
+- Sweeps where SD > mean × 0.5 — variance dominates, result is noise
 
-Write the script in `agent_workspace/research/exp_<n>_<short_name>.py`.
+## Tools Available
 
-### 3 — Run and observe
+All are in `vis/plotting.py` — check there before writing any matplotlib:
 
-```bash
-uv run python agent_workspace/research/exp_1_baseline.py
+```python
+from compute_permit_sim.vis.plotting import plot_sweep_curve
+from compute_permit_sim.services.sweep import run_sweep
+from compute_permit_sim.services.config_manager import load_scenario
+from compute_permit_sim.schemas.sweep_params import get_param, generate_values
 ```
 
-- Note anomalies: did anything behave unexpectedly? Runtime? NaN values?
-- Jot raw numbers as inline comments in the script before moving to analysis.
-
-### 4 — Visualise and evaluate
-
-After getting numeric results, generate figures. Ask yourself:
+`plot_sweep_curve(result, metric="avg_compliance", reference_lines=[(x, label, color)])`:
+- Plots mean ± 1 SD band
+- Annotates every 95% threshold crossing as "Tipping (95%) ≈ X"
+- Marks scenario default values as diamonds on the curve, labelled with scenario name
+- Reference line labels are ignored (label is auto-derived); only x-value and color matter
 
-**Is this graph interesting?**
-- Does it show a non-linear effect? (knee, saturation, phase transition)
-- Does it contradict the expected direction?
-- Does variance dominate? (if SD > mean × 0.5 the result is noise — don't graph it)
-- Would a policy-maker care about the magnitude?
+## Findings Log
 
-**If visually boring** (flat line, constant slope, trivial intercept), skip saving it
-and iterate the hypothesis. An uninteresting result is useful information — note it.
+After each experiment, add a row to `findings.md`:
 
-**If promising**, save the PNG to `agent_workspace/research/` and use `generate_image`
-to visualise what an ideal version of that graph would look like (different color scheme,
-better annotations, additional reference lines) — then iterate toward it in matplotlib.
-
-### 5 — Analyse
-
-- Compute the effect size relative to baseline (% change, not just absolute).
-- Check whether variance swamps the signal: if SD > mean × 0.5, the result is noise.
-- Look for non-linearities: does the curve have a knee? Is there a saturation point?
-- Cross-reference against `SweepResult.tipping_point()` if applicable.
-
-### 6 — Update understanding and iterate
+```
+| NN | Scenario | param.path | min–max | n_runs | FLAT/INTERESTING | one-line note | run_NNN/filename.png |
+```
 
-- Re-state whether the hypothesis was confirmed, refuted, or inconclusive.
-- Refine the next hypothesis based on what surprised you.
-- Stop when you have confident results across ≥ 2 related dimensions,
-  or when the last two iterations produce no new insight.
+Keep the log as the source of truth for what has been run. The `params.json` in each
+run folder is the reproducibility record; the findings log is the research narrative.
 
-## Synthesis
+## Iteration Loop
 
-After the loop, produce a markdown summary saved to `agent_workspace/research/findings_<date>.md`:
+1. **Hypothesise**: pick a scenario + parameter expected to show non-linear behaviour
+2. **Run**: `explore_sweep.py --scenario ... --param ... --min ... --max ...`
+3. **View**: read the PNG with `view_file` — check labels, tipping points, curve shape
+4. **Judge**: INTERESTING (save, log, continue) or FLAT (note why, pick different param)
+5. **Repeat** until you have 2–3 interesting curves per scenario for the paper
 
-- One paragraph per experiment: hypothesis → result → interpretation
-- A table of key numeric findings
-- Recommended parameter ranges for the paper's figures
-- Any caveats (e.g. sensitivity to `n_runs`, boundary effects)
-- Embed the most interesting figures as images
+**Typical interesting dimensions per scenario type:**
+- *High-price scenario* (e.g. Crisis): sweep `market.fixed_price` and `lab.economic_value_max`
+- *Enforcement scenario* (e.g. Maxwell): sweep `audit.base_prob` and `audit.penalty_amount`
+- *Supply-constrained* (e.g. Lawless): sweep `market.permit_cap`
+- *Dynamic* (e.g. Dynamic Escalation): sweep `audit.audit_escalation`
 
-## Notes on scale
+## After the Session
 
-- Monte Carlo with `n_runs=50` takes ~10–30 s for 100 steps.
-- Sweeps over 10+ points with `n_runs=20` each can take several minutes.
-- For quick orientation experiments, use `n_runs=10` and `steps=50`; scale up to
-  confirm final results.
-- All services are pure Python with no Solara dependency — safe to call headlessly.
+Run `/synthesize-research` to merge findings into `agent_workspace/research/synthesis.md`.
diff --git a/src/compute_permit_sim/schemas/batch.py b/src/compute_permit_sim/schemas/batch.py
index 97cf69e..11cd984 100644
--- a/src/compute_permit_sim/schemas/batch.py
+++ b/src/compute_permit_sim/schemas/batch.py
@@ -30,6 +30,12 @@ class BatchColumnNames:
     PARAM_VALUE = "param_value"
     N_RUNS = "n_runs"
 
+    # 2-D grid sweep — per-axis identifiers
+    PARAM_X_PATH = "param_x_path"
+    PARAM_X_VALUE = "param_x_value"
+    PARAM_Y_PATH = "param_y_path"
+    PARAM_Y_VALUE = "param_y_value"
+
     # Compliance
     COMPLIANCE_RATE = "compliance_rate"
     N_VIOLATORS = "n_violators"
@@ -201,15 +207,90 @@ def compliance_series(self) -> list[tuple[float, float, float]]:
         ]
 
     def tipping_point(self, threshold: float = 0.95) -> float | None:
-        """Return first param value where mean avg_compliance >= threshold.
+        """Return the boundary param value where mean avg_compliance crosses threshold.
+
+        Direction-aware: detects whether compliance rises or falls with the
+        parameter and returns the appropriate boundary.
+
+        - Upward sweep (compliance rises with param, e.g. audit rate):
+          returns first param_value where compliance >= threshold.
+        - Downward sweep (compliance falls with param, e.g. permit price):
+          returns last param_value where compliance >= threshold,
+          i.e. the ceiling before compliance drops below threshold.
 
         Args:
-            threshold: Compliance fraction to consider as 'achieved' (default 0.95).
+            threshold: Compliance fraction to consider as the boundary
+                (default 0.95).
 
         Returns:
-            First param_value meeting the threshold, or None if never reached.
+            Boundary param_value, or None if compliance never reaches threshold.
         """
-        for pt in self.points:
-            if pt.result.avg_compliance.mean >= threshold:
-                return pt.param_value
-        return None
+        if not self.points:
+            return None
+
+        means = [pt.result.avg_compliance.mean for pt in self.points]
+
+        # Detect direction: compare first and last point
+        # Use a simple heuristic: if the last mean < first mean, it's a downward sweep.
+        is_downward = means[-1] < means[0]
+
+        if is_downward:
+            # Last point where compliance is still at or above the threshold
+            result = None
+            for pt in self.points:
+                if pt.result.avg_compliance.mean >= threshold:
+                    result = pt.param_value
+                else:
+                    break  # First drop below threshold — stop here
+            return result
+        else:
+            # First point where compliance reaches or exceeds the threshold
+            for pt in self.points:
+                if pt.result.avg_compliance.mean >= threshold:
+                    return pt.param_value
+            return None
+
+
+@dataclass(frozen=True)
+class GridSweepResult:
+    """Results of a 2D joint-sensitivity parameter sweep over a scenario.
+
+    Stores mean compliance at every (x, y) grid cell.
+
+    Attributes:
+        grid: ``grid[y_idx][x_idx]`` = mean compliance fraction (0–1)
+              over ``n_runs`` seeds at parameter values
+              ``(x_values[x_idx], y_values[y_idx])``.
+    """
+
+    scenario_name: str
+    param_x_path: str  # e.g. "audit.base_prob"
+    param_x_label: str  # human-readable, e.g. "Base Audit Probability"
+    param_y_path: str  # e.g. "collateral_amount"
+    param_y_label: str  # human-readable, e.g. "Collateral K (M$)"
+    config: ScenarioConfig
+    x_values: list[float]  # ordered x-axis values
+    y_values: list[float]  # ordered y-axis values
+    grid: list[list[float]]  # [y_idx][x_idx] = mean compliance in [0, 1]
+    n_runs: int
+    # Short unique identifier matching SimulationRun.sim_id convention
+    id: str = field(default_factory=lambda: str(uuid4())[:8])
+
+    def compliance_at(self, x: float, y: float) -> float | None:
+        """Return mean compliance for an exact (x, y) cell, or None if not found."""
+        try:
+            x_idx = self.x_values.index(x)
+            y_idx = self.y_values.index(y)
+        except ValueError:
+            return None
+        return self.grid[y_idx][x_idx]
+
+    @property
+    def compliance_min(self) -> float:
+        """Minimum mean compliance across all grid cells."""
+        return min(v for row in self.grid for v in row)
+
+    @property
+    def compliance_max(self) -> float:
+        """Maximum mean compliance across all grid cells."""
+        return max(v for row in self.grid for v in row)
diff --git a/src/compute_permit_sim/schemas/sweep_params.py b/src/compute_permit_sim/schemas/sweep_params.py
index ec59138..d5fa652 100644
--- a/src/compute_permit_sim/schemas/sweep_params.py
+++ b/src/compute_permit_sim/schemas/sweep_params.py
@@ -170,6 +170,46 @@ class SweepParam:
         description="Upper bound of risk appetite multiplier (>1 = risk-seeking).",
         category="Agents",
     ),
+    SweepParam(
+        path="lab.capability_value",
+        label="Capability Race Premium V_b",
+        unit="M$",
+        default_min=0.0,
+        default_max=300.0,
+        default_step=20.0,
+        description="Strategic value of model capabilities from training (arms-race premium added to gain from cheating).",
+        category="Agents",
+    ),
+    SweepParam(
+        path="lab.racing_factor",
+        label="Racing Factor c_r",
+        unit="",
+        default_min=0.0,
+        default_max=5.0,
+        default_step=0.25,
+        description="Urgency multiplier on capability value; higher = stronger competitive pressure to cheat.",
+        category="Agents",
+    ),
+    SweepParam(
+        path="lab.reputation_escalation_factor",
+        label="Reputation Escalation Factor",
+        unit="",
+        default_min=0.0,
+        default_max=5.0,
+        default_step=0.25,
+        description="Per-violation multiplier on reputation cost: rep_t = base × (1+factor)^n_caught. 0 = no escalation.",
+        category="Agents",
+    ),
+    SweepParam(
+        path="lab.reputation_sensitivity",
+        label="Reputation Sensitivity R",
+        unit="M$",
+        default_min=0.0,
+        default_max=100.0,
+        default_step=5.0,
+        description="Base reputation cost per violation (M$). Compounds with reputation_escalation_factor.",
+        category="Agents",
+    ),
     # --- Dynamics ---
     SweepParam(
         path="audit.signal_exponent",
diff --git a/src/compute_permit_sim/services/sweep.py b/src/compute_permit_sim/services/sweep.py
index 5d558f3..7d4ef07 100644
--- a/src/compute_permit_sim/services/sweep.py
+++ b/src/compute_permit_sim/services/sweep.py
@@ -18,7 +18,7 @@
 
 from __future__ import annotations
 
-from compute_permit_sim.schemas.batch import SweepPoint, SweepResult
+from compute_permit_sim.schemas.batch import GridSweepResult, SweepPoint, SweepResult
 from compute_permit_sim.schemas.config import ScenarioConfig
 from compute_permit_sim.services.monte_carlo import run_monte_carlo
 
@@ -143,3 +143,118 @@ def run_sweep_from_registry(
         n_runs=n_runs,
         seeds=seeds,
     )
+
+
+def run_grid_sweep(
+    base_config: ScenarioConfig,
+    param_x_path: str,
+    param_y_path: str,
+    x_values: list[float],
+    y_values: list[float],
+    param_x_label: str | None = None,
+    param_y_label: str | None = None,
+    n_runs: int = 20,
+    seeds: list[int] | None = None,
+) -> GridSweepResult:
+    """Run a 2D joint-sensitivity sweep over two parameters.
+
+    Each (x, y) cell is evaluated with ``n_runs`` Monte Carlo replications.
+    Results are stored as ``grid[y_idx][x_idx] = mean_compliance``.
+
+    All seeds are shared across all cells so that parameter variation, not
+    noise, drives differences between cells.
+
+    Args:
+        base_config: Base scenario configuration.
+        param_x_path: Dot-path for the x-axis parameter, e.g. ``"audit.base_prob"``.
+        param_y_path: Dot-path for the y-axis parameter, e.g. ``"collateral_amount"``.
+        x_values: Ordered x-axis values.
+        y_values: Ordered y-axis values.
+        param_x_label: Human-readable x-axis label; defaults to ``param_x_path``.
+        param_y_label: Human-readable y-axis label; defaults to ``param_y_path``.
+        n_runs: MC replications per cell. Ignored if ``seeds`` is provided.
+        seeds: Explicit seeds; overrides ``n_runs`` if given.
+
+    Returns:
+        :class:`~compute_permit_sim.schemas.batch.GridSweepResult` with the 2D
+        compliance grid and axis metadata.
+    """
+    label_x = param_x_label or param_x_path
+    label_y = param_y_label or param_y_path
+    run_seeds = seeds if seeds is not None else list(range(n_runs))
+
+    # grid[y_idx][x_idx] = mean compliance
+    grid: list[list[float]] = []
+    for y in y_values:
+        row: list[float] = []
+        for x in x_values:
+            cfg = override_config(base_config, param_x_path, x)
+            cfg = override_config(cfg, param_y_path, y)
+            mc = run_monte_carlo(cfg, seeds=run_seeds)
+            row.append(mc.avg_compliance.mean)
+        grid.append(row)
+
+    return GridSweepResult(
+        scenario_name=base_config.name,
+        param_x_path=param_x_path,
+        param_x_label=label_x,
+        param_y_path=param_y_path,
+        param_y_label=label_y,
+        config=base_config,
+        x_values=list(x_values),
+        y_values=list(y_values),
+        grid=grid,
+        n_runs=len(run_seeds),
+    )
+
+
+def run_grid_sweep_from_registry(
+    base_config: ScenarioConfig,
+    param_x_path: str,
+    param_y_path: str,
+    x_min: float | None = None,
+    x_max: float | None = None,
+    x_step: float | None = None,
+    y_min: float | None = None,
+    y_max: float | None = None,
+    y_step: float | None = None,
+    n_runs: int = 20,
+    seeds: list[int] | None = None,
+) -> GridSweepResult:
+    """Run a 2D grid sweep using registry defaults for both axis ranges.
+
+    Looks up each path in ``SWEEPABLE_PARAMS`` to fill in default
+    min/max/step values.  Any supplied arguments override those defaults.
+
+    Args:
+        base_config: Base scenario configuration.
+        param_x_path: Dot-path registered in ``SWEEPABLE_PARAMS`` for x-axis.
+        param_y_path: Dot-path registered in ``SWEEPABLE_PARAMS`` for y-axis.
+        x_min/x_max/x_step: Override registry defaults for x-axis.
+        y_min/y_max/y_step: Override registry defaults for y-axis.
+        n_runs: MC replications per cell.
+        seeds: Explicit seeds (overrides n_runs if provided).
+
+    Returns:
+        :class:`~compute_permit_sim.schemas.batch.GridSweepResult`.
+
+    Raises:
+        KeyError: If either path is not in the registry.
+    """
+    from compute_permit_sim.schemas.sweep_params import generate_values, get_param
+
+    px = get_param(param_x_path)
+    py = get_param(param_y_path)
+    x_values = generate_values(px, min_val=x_min, max_val=x_max, step=x_step)
+    y_values = generate_values(py, min_val=y_min, max_val=y_max, step=y_step)
+    return run_grid_sweep(
+        base_config,
+        param_x_path=px.path,
+        param_y_path=py.path,
+        x_values=x_values,
+        y_values=y_values,
+        param_x_label=px.label,
+        param_y_label=py.label,
+        n_runs=n_runs,
+        seeds=seeds,
+    )
diff --git a/src/compute_permit_sim/vis/components/history.py b/src/compute_permit_sim/vis/components/history.py
index f8b81c6..30c2fcb 100644
--- a/src/compute_permit_sim/vis/components/history.py
+++ b/src/compute_permit_sim/vis/components/history.py
@@ -27,12 +27,13 @@ def UnifiedHistoryList() -> None:
     (BatchHistoryList + RunHistoryList) pattern which caused double-nested
     ``run-history-compact`` for batch items and mismatched styling.
     """
-    from compute_permit_sim.vis.state.run_state import mc_run, sweep_run
+    from compute_permit_sim.vis.state.run_state import grid_run, mc_run, sweep_run
 
     batch_results = session_history.batch_results.value
     run_history = session_history.run_history.value
     mc_current = mc_run.value.result
     sweep_current = sweep_run.value.result
+    grid_current = grid_run.value.result
 
     # Use Markdown for empty state — matches RunHistoryList convention and avoids
     # alternating root container types (Column A vs Column B) which reacton rejects.
@@ -42,7 +43,11 @@ def UnifiedHistoryList() -> None:
 
     with solara.Column(classes=["run-history-compact"]):
         for result in batch_results:
-            is_current = (result is mc_current) or (result is sweep_current)
+            is_current = (
+                (result is mc_current)
+                or (result is sweep_current)
+                or (result is grid_current)
+            )
             BatchHistoryItem(result, is_current)
         for run in run_history:
             is_selected = (session_history.selected_run.value is not None) and (
diff --git a/src/compute_permit_sim/vis/components/history_items.py b/src/compute_permit_sim/vis/components/history_items.py
index 2ff8eb4..b810677 100644
--- a/src/compute_permit_sim/vis/components/history_items.py
+++ b/src/compute_permit_sim/vis/components/history_items.py
@@ -11,7 +11,11 @@
 import solara.lab
 
 from compute_permit_sim.schemas import SimulationRun
-from compute_permit_sim.schemas.batch import MonteCarloResult, SweepResult
+from compute_permit_sim.schemas.batch import (
+    GridSweepResult,
+    MonteCarloResult,
+    SweepResult,
+)
 from compute_permit_sim.services.config_manager import save_scenario
 from compute_permit_sim.vis.components.dialogs import RunConfigDialog
 from compute_permit_sim.vis.components.results import DownloadCSV, DownloadExcel
@@ -143,12 +147,17 @@ def perform_save() -> None:
 
 @solara.component
 def BatchHistoryItem(result: BatchResult, is_current: bool) -> None:
-    """One-line history row for an MC or Sweep batch result.
+    """One-line history row for an MC, Sweep, or Grid Sweep batch result.
 
-    Mirrors ``RunHistoryItem`` exactly: type-icon | ⓘ | id-label | save | Excel | CSV | JSON.
-    The short ``result.id`` is displayed as the label; full details are in the ⓘ dialog.
+    Mirrors ``RunHistoryItem`` exactly: type-icon | \u24d8 | id-label | save | Excel | CSV | JSON.
+    The short ``result.id`` is displayed as the label; full details are in the \u24d8 dialog.
     """
-    from compute_permit_sim.vis.state.run_state import RunState, mc_run, sweep_run
+    from compute_permit_sim.vis.state.run_state import (
+        RunState,
+        grid_run,
+        mc_run,
+        sweep_run,
+    )
 
     if isinstance(result, MonteCarloResult):
         type_icon = "mdi-chart-bell-curve-cumulative"
@@ -181,7 +190,7 @@ def dl_excel() -> bytes | str:
 
             return export_monte_carlo_to_excel(result, output_path="")
 
-    else:  # SweepResult
+    elif isinstance(result, SweepResult):
         type_icon = "mdi-trending-up"
         dialog_title = f"Sweep Run: {result.id}"
         tp = result.tipping_point()
@@ -217,6 +226,42 @@ def dl_excel() -> bytes | str:
 
             return export_sweep_to_excel(result, output_path="")
 
+    else:  # GridSweepResult
+        type_icon = "mdi-view-grid"
+        dialog_title = f"Grid Sweep: {result.id}"
+        safe_s = result.scenario_name.lower().replace(" ", "_")
+        safe_x = result.param_x_path.replace(".", "_")
+        safe_y = result.param_y_path.replace(".", "_")
+        batch_summary = (
+            f"**{len(result.x_values)}×{len(result.y_values)} grid sweep**"
+            f" · {result.scenario_name}  \n"
+            f"X: **{result.param_x_label}**  \n"
+            f"Y: **{result.param_y_label}**  \n"
+            f"Compliance: {result.compliance_min:.1%}–{result.compliance_max:.1%}"
+        )
+        csv_fname = f"grid_{safe_s}_{safe_x}_x_{safe_y}_{result.id}.csv"
+        xlsx_fname = f"grid_{safe_s}_{safe_x}_x_{safe_y}_{result.id}.xlsx"
+
+        def view() -> None:
+            session_history.selected_run.value = None  # clear basic run highlight
+            mc_run.set(RunState[MonteCarloResult](phase="idle"))
+            sweep_run.set(RunState[SweepResult](phase="idle"))
+            grid_run.set(RunState[GridSweepResult](phase="ready", result=result))
+
+        def dl_csv() -> bytes | str:
+            from compute_permit_sim.vis.export import (
+                export_grid_sweep_to_csv,  # noqa: PLC0415
+            )
+
+            return export_grid_sweep_to_csv(result, output_path="")
+
+        def dl_excel() -> bytes | str:
+            from compute_permit_sim.vis.export import (
+                export_grid_sweep_to_excel,  # noqa: PLC0415
+            )
+
+            return export_grid_sweep_to_excel(result, output_path="")
+
     # use_state calls must be unconditional (Solara hook rules) — always before any return
     show_save, set_show_save = solara.use_state(False)
     save_name, set_save_name = solara.use_state(f"scenario_{result.id}")
diff --git a/src/compute_permit_sim/vis/export.py b/src/compute_permit_sim/vis/export.py
index 57213da..d78d3e5 100644
--- a/src/compute_permit_sim/vis/export.py
+++ b/src/compute_permit_sim/vis/export.py
@@ -21,6 +21,10 @@
 Sweep:
     export_sweep_to_excel  — Excel with Config/Sweep/Graphs
     export_sweep_to_csv    — one row per parameter value
+
+Grid Sweep:
+    export_grid_sweep_to_excel — Excel with Config/Grid pivot/Heatmap PNG
+    export_grid_sweep_to_csv   — long-format CSV, one row per (x, y) cell
 """
 
 import io
@@ -35,12 +39,13 @@
     BatchColumnNames as _BCN,
 )
 from compute_permit_sim.schemas.batch import (
-    MetricStats as _MetricStats,
-)
-from compute_permit_sim.schemas.batch import (
+    GridSweepResult,
     MonteCarloResult,
     SweepResult,
 )
+from compute_permit_sim.schemas.batch import (
+    MetricStats as _MetricStats,
+)
 from compute_permit_sim.schemas.columns import ColumnNames
 from compute_permit_sim.services.metrics import calculate_compliance
 from compute_permit_sim.vis.plotting import (
@@ -744,6 +749,137 @@ def export_sweep_to_csv(
     return output_path
 
 
+def export_grid_sweep_to_csv(
+    result: GridSweepResult,
+    output_path: str | None = None,
+) -> "str | bytes":
+    """Export a GridSweepResult to long-format CSV with one row per grid cell.
+
+    Columns: scenario, param_x_path, param_x_value, param_y_path,
+    param_y_value, n_runs, compliance.
+
+    Args:
+        result: A ``GridSweepResult`` instance.
+        output_path: ``None`` = auto-generate, ``""`` = return bytes.
+    """
+    rows = [
+        {
+            _BCN.SCENARIO: result.scenario_name,
+            _BCN.PARAM_X_PATH: result.param_x_path,
+            _BCN.PARAM_X_VALUE: x,
+            _BCN.PARAM_Y_PATH: result.param_y_path,
+            _BCN.PARAM_Y_VALUE: y,
+            _BCN.N_RUNS: result.n_runs,
+            _BCN.COMPLIANCE_RATE: result.grid[y_idx][x_idx],
+        }
+        for y_idx, y in enumerate(result.y_values)
+        for x_idx, x in enumerate(result.x_values)
+    ]
+
+    df = _pd.DataFrame(rows)
+    if output_path == "":
+        return df.to_csv(index=False).encode("utf-8")
+    if output_path is None:
+        _os.makedirs("outputs", exist_ok=True)
+        safe_s = result.scenario_name.lower().replace(" ", "_")
+        safe_x = result.param_x_path.replace(".", "_")
+        safe_y = result.param_y_path.replace(".", "_")
+        output_path = f"outputs/grid_{safe_s}_{safe_x}_x_{safe_y}.csv"
+    df.to_csv(output_path, index=False)
+    return output_path
+
+
+def export_grid_sweep_to_excel(
+    result: GridSweepResult,
+    output_path: str | None = None,
+) -> "str | bytes":
+    """Export a GridSweepResult to a formatted Excel workbook.
+
+    Sheets:
+      ``Config``   — base scenario configuration
+      ``Grid``     — pivot table: rows=y_values, cols=x_values, cells=compliance%
+      ``Heatmap``  — embedded PNG of the compliance heatmap
+
+    Args:
+        result: A ``GridSweepResult`` instance.
+        output_path: ``None`` = auto-generate, ``""`` = return bytes.
+    """
+    import io as _io
+
+    import xlsxwriter as _xlsxwriter
+
+    from compute_permit_sim.vis.plotting import plot_sweep_heatmap
+
+    return_bytes = output_path == ""
+    output: _io.BytesIO | str
+    if return_bytes:
+        output = _io.BytesIO()
+    elif output_path is None:
+        _os.makedirs("outputs", exist_ok=True)
+        safe_s = result.scenario_name.lower().replace(" ", "_")
+        safe_x = result.param_x_path.replace(".", "_")
+        safe_y = result.param_y_path.replace(".", "_")
+        output_path = f"outputs/grid_{safe_s}_{safe_x}_x_{safe_y}.xlsx"
+        output = output_path
+    else:
+        output = output_path
+
+    workbook = _xlsxwriter.Workbook(output)
+    header_fmt = workbook.add_format(
+        {"bold": True, "bg_color": "#2196F3", "font_color": "white", "border": 1}
+    )
+    data_fmt = workbook.add_format({"border": 1})
+    pct_fmt = workbook.add_format({"border": 1, "num_format": "0.0%"})
+
+    try:
+        # === Config sheet ===
+        if result.config is not None:
+            cfg_sheet = workbook.add_worksheet("Config")
+            _write_config_sheet(cfg_sheet, result.config, header_fmt, data_fmt)
+
+        # === Grid (pivot) sheet ===
+        grid_sheet = workbook.add_worksheet("Grid")
+        grid_sheet.set_column("A:A", 20)
+        # Header row: blank corner, then x-axis values
+        grid_sheet.write(0, 0, f"{result.param_x_label} →", header_fmt)
+        for x_idx, x in enumerate(result.x_values):
+            grid_sheet.write(0, x_idx + 1, x, header_fmt)
+        # Data rows: y-axis values then compliance cells
+        for y_idx, y in enumerate(result.y_values):
+            grid_sheet.write(y_idx + 1, 0, y, data_fmt)
+            for x_idx, compliance in enumerate(result.grid[y_idx]):
+                grid_sheet.write(y_idx + 1, x_idx + 1, compliance, pct_fmt)
+        # Side label for y-axis
+        grid_sheet.write(
+            0, 0, f"{result.param_x_label} → / {result.param_y_label} ↓", header_fmt
+        )
+
+        # === Heatmap sheet ===
+        heatmap_sheet = workbook.add_worksheet("Heatmap")
+        fig = plot_sweep_heatmap(
+            compliance_grid=result.grid,
+            x_values=result.x_values,
+            y_values=result.y_values,
+            x_param_label=result.param_x_label,
+            y_param_label=result.param_y_label,
+            title=f"Compliance Heatmap — {result.scenario_name}",
+        )
+        heatmap_sheet.insert_image(
+            0, 0, "heatmap.png", {"image_data": _fig_to_bytes(fig)}
+        )
+
+    finally:
+        workbook.close()
+
+    if return_bytes:
+        assert isinstance(output, _io.BytesIO)
+        output.seek(0)
+        return output.read()
+
+    assert output_path is not None
+    return output_path
+
+
 def export_monte_carlo_to_excel(
     result: MonteCarloResult,
     output_path: str | None = None,
diff --git a/src/compute_permit_sim/vis/page.py b/src/compute_permit_sim/vis/page.py
index f01dc29..92a53e5 100644
--- a/src/compute_permit_sim/vis/page.py
+++ b/src/compute_permit_sim/vis/page.py
@@ -4,8 +4,10 @@
     basic_run.phase == "running"          → RunSpinner (basic sim)
     mc_run.phase == "running"             → RunSpinner (Monte Carlo)
     sweep_run.phase == "running"          → RunSpinner (Sweep)
+    grid_run.phase == "running"           → RunSpinner (Grid Sweep)
     mc_run.phase == "ready"               → BatchResultsPanel
     sweep_run.phase == "ready"            → BatchResultsPanel
+    grid_run.phase == "ready"             → BatchResultsPanel
     basic_run.phase == "ready" OR history → AnalysisPanel
     else                                  → EmptyState
 """
@@ -27,7 +29,12 @@
 from compute_permit_sim.vis.panels.batch_results import BatchResultsPanel
 from compute_permit_sim.vis.panels.config import ConfigPanel
 from compute_permit_sim.vis.state.history import session_history
-from compute_permit_sim.vis.state.run_state import basic_run, mc_run, sweep_run
+from compute_permit_sim.vis.state.run_state import (
+    basic_run,
+    grid_run,
+    mc_run,
+    sweep_run,
+)
 
 configure_logging()
 logger = logging.getLogger(__name__)
@@ -109,12 +116,13 @@ def toggle_theme():
         basic = basic_run.value
         mc = mc_run.value
         sw = sweep_run.value
+        gr = grid_run.value
 
         if basic.is_running:
             RunSpinner("Simulating\u2026")
-        elif mc.is_running or sw.is_running:
+        elif mc.is_running or sw.is_running or gr.is_running:
             RunSpinner("Running batch analysis\u2026")
-        elif mc.is_ready or sw.is_ready:
+        elif mc.is_ready or sw.is_ready or gr.is_ready:
             BatchResultsPanel()
         elif basic.is_ready or session_history.selected_run.value is not None:
             AnalysisPanel()
diff --git a/src/compute_permit_sim/vis/panels/batch.py b/src/compute_permit_sim/vis/panels/batch.py
index e906ed9..ad958e2 100644
--- a/src/compute_permit_sim/vis/panels/batch.py
+++ b/src/compute_permit_sim/vis/panels/batch.py
@@ -28,7 +28,7 @@
 )
 from compute_permit_sim.vis.components.history import UnifiedHistoryList
 from compute_permit_sim.vis.components.results import SidebarLabel
-from compute_permit_sim.vis.state.run_state import RunState, mc_run, sweep_run
+from compute_permit_sim.vis.state.run_state import RunState, grid_run, mc_run, sweep_run
 
 # Pre-built lookup map (module-level constant — registry never changes at runtime)
 _PARAM_MAP: dict[str, SweepParam] = {p.path: p for p in SWEEPABLE_PARAMS}
@@ -38,6 +38,7 @@
 # ---------------------------------------------------------------------------
 _mc_status = solara.reactive("")
 _sweep_status = solara.reactive("")
+_grid_status = solara.reactive("")
 
 # ---------------------------------------------------------------------------
 # Background workers
@@ -140,6 +141,59 @@ def _run_sweep_background(
         _sweep_status.set(f"Error: {e}")
 
 
+def _run_grid_background(
+    scenario_name: str,
+    param_x: SweepParam,
+    param_y: SweepParam,
+    x_values: list[float],
+    y_values: list[float],
+    n_runs: int,
+) -> None:
+    """Run 2D grid sweep off the event loop thread and update grid_run reactive."""
+    from compute_permit_sim.schemas.batch import GridSweepResult
+    from compute_permit_sim.services.sweep import run_grid_sweep
+
+    try:
+        config = _load_scenario_by_name(scenario_name)
+
+        if config is None:
+            _grid_status.set(f"Scenario '{scenario_name}' not found.")
+            grid_run.set(RunState[GridSweepResult](phase="idle"))
+            return
+
+        n_cells = len(x_values) * len(y_values)
+        _grid_status.set(
+            f"Grid {len(x_values)}×{len(y_values)} = {n_cells} cells × {n_runs} runs..."
+        )
+
+        result = run_grid_sweep(
+            config,
+            param_x_path=param_x.path,
+            param_y_path=param_y.path,
+            x_values=x_values,
+            y_values=y_values,
+            param_x_label=param_x.label,
+            param_y_label=param_y.label,
+            n_runs=n_runs,
+        )
+
+        from compute_permit_sim.vis.state.history import (
+            session_history,  # noqa: PLC0415
+        )
+
+        session_history.add_batch_result(result)
+        grid_run.set(RunState[GridSweepResult](phase="ready", result=result))
+        _grid_status.set(
+            f"Done: {len(x_values)}×{len(y_values)} grid — "
+            f"compliance {result.compliance_min:.1%}–{result.compliance_max:.1%}"
+        )
+    except Exception as e:  # noqa: BLE001
+        from compute_permit_sim.schemas.batch import GridSweepResult
+
+        grid_run.set(RunState[GridSweepResult](phase="idle"))
+        _grid_status.set(f"Error: {e}")
+
+
 # ---------------------------------------------------------------------------
 # Sub-components
 # ---------------------------------------------------------------------------
@@ -391,9 +445,220 @@ def on_param_label_change(label: str) -> None:
 # ---------------------------------------------------------------------------
 
 
+@solara.component
+def _GridSweepCard(scenario_names: list[str]) -> Any:
+    """Sidebar card for configuring and launching a 2D grid sweep."""
+    selected_scenario, set_selected_scenario = solara.use_state(
+        scenario_names[0] if scenario_names else ""
+    )
+
+    all_categories = categories()
+
+    # --- X-axis param ---
+    cat_x, set_cat_x = solara.use_state(all_categories[0] if all_categories else "")
+    params_x = params_for_category(cat_x)
+    path_x, set_path_x = solara.use_state(params_x[0].path if params_x else "")
+    param_x = _PARAM_MAP.get(path_x)
+    min_x, set_min_x = solara.use_state(param_x.default_min if param_x else 0.0)
+    max_x, set_max_x = solara.use_state(param_x.default_max if param_x else 1.0)
+    step_x, set_step_x = solara.use_state(param_x.default_step if param_x else 0.1)
+
+    # --- Y-axis param ---
+    cat_y, set_cat_y = solara.use_state(all_categories[0] if all_categories else "")
+    params_y = params_for_category(cat_y)
+    path_y, set_path_y = solara.use_state(params_y[0].path if params_y else "")
+    param_y = _PARAM_MAP.get(path_y)
+    min_y, set_min_y = solara.use_state(param_y.default_min if param_y else 0.0)
+    max_y, set_max_y = solara.use_state(param_y.default_max if param_y else 1.0)
+    step_y, set_step_y = solara.use_state(param_y.default_step if param_y else 0.1)
+
+    n_runs, set_n_runs = solara.use_state(20)
+
+    is_running = grid_run.value.is_running
+    status = _grid_status.value
+
+    # Compute preview — both axes must be valid
+    n_pts_x, n_pts_y = 0, 0
+    preview_error = ""
+    if param_x and step_x > 0 and min_x <= max_x:
+        try:
+            n_pts_x = len(generate_values(param_x, min_x, max_x, step_x))
+        except Exception:
+            preview_error = "Invalid X range"
+    if param_y and step_y > 0 and min_y <= max_y:
+        try:
+            n_pts_y = len(generate_values(param_y, min_y, max_y, step_y))
+        except Exception:
+            preview_error = "Invalid Y range"
+
+    def _on_cat_x(cat: str) -> None:
+        set_cat_x(cat)
+        ps = params_for_category(cat)
+        if ps:
+            set_path_x(ps[0].path)
+            set_min_x(ps[0].default_min)
+            set_max_x(ps[0].default_max)
+            set_step_x(ps[0].default_step)
+
+    def _on_path_x(label: str) -> None:
+        path = {p.label: p.path for p in params_for_category(cat_x)}.get(label, "")
+        set_path_x(path)
+        p = _PARAM_MAP.get(path)
+        if p:
+            set_min_x(p.default_min)
+            set_max_x(p.default_max)
+            set_step_x(p.default_step)
+
+    def _on_cat_y(cat: str) -> None:
+        set_cat_y(cat)
+        ps = params_for_category(cat)
+        if ps:
+            set_path_y(ps[0].path)
+            set_min_y(ps[0].default_min)
+            set_max_y(ps[0].default_max)
+            set_step_y(ps[0].default_step)
+
+    def _on_path_y(label: str) -> None:
+        path = {p.label: p.path for p in params_for_category(cat_y)}.get(label, "")
+        set_path_y(path)
+        p = _PARAM_MAP.get(path)
+        if p:
+            set_min_y(p.default_min)
+            set_max_y(p.default_max)
+            set_step_y(p.default_step)
+
+    def on_run() -> None:
+        if not param_x or not param_y:
+            return
+        try:
+            x_vals = generate_values(param_x, min_x, max_x, step_x)
+            y_vals = generate_values(param_y, min_y, max_y, step_y)
+        except ValueError:
+            _grid_status.set("Invalid range — check min/max/step for both axes.")
+            return
+        from compute_permit_sim.schemas.batch import (  # noqa: PLC0415
+            GridSweepResult,
+            MonteCarloResult,
+            SweepResult,
+        )
+
+        grid_run.set(RunState[GridSweepResult](phase="running"))
+        mc_run.set(RunState[MonteCarloResult](phase="idle"))
+        sweep_run.set(RunState[SweepResult](phase="idle"))
+        _grid_status.set("Starting...")
+        threading.Thread(
+            target=_run_grid_background,
+            args=(selected_scenario, param_x, param_y, x_vals, y_vals, n_runs),
+            daemon=True,
+        ).start()
+
+    with solara.Card(title="Grid Sweep"):
+        if not scenario_names:
+            with solara.Column(classes=["sidebar-empty-text"]):
+                solara.Text("No scenarios found.")
+            return
+
+        solara.Select(
+            label="Scenario",
+            values=scenario_names,
+            value=selected_scenario,
+            on_value=set_selected_scenario,
+            dense=True,
+        )
+
+        # ── X-axis ──────────────────────────────────────────────────────────
+        with solara.Column(classes=["sidebar-hint-text"]):
+            solara.Text("X-axis parameter")
+        with solara.Row(style="gap: 4px;"):
+            solara.Select(
+                label="Category",
+                values=all_categories,
+                value=cat_x,
+                on_value=_on_cat_x,
+                dense=True,
+            )
+            labels_x = [p.label for p in params_for_category(cat_x)]
+            solara.Select(
+                label="Parameter",
+                values=labels_x,
+                value=param_x.label if param_x else (labels_x[0] if labels_x else ""),
+                on_value=_on_path_x,
+                dense=True,
+            )
+        with solara.Row(style="gap: 4px;"):
+            unit_x = param_x.unit if param_x else ""
+            solara.InputFloat(label=f"Min ({unit_x})", value=min_x, on_value=set_min_x)
+            solara.InputFloat(label=f"Max ({unit_x})", value=max_x, on_value=set_max_x)
+            solara.InputFloat(label="Step", value=step_x, on_value=set_step_x)
+
+        # ── Y-axis ──────────────────────────────────────────────────────────
+        with solara.Column(classes=["sidebar-hint-text"]):
+            solara.Text("Y-axis parameter")
+        with solara.Row(style="gap: 4px;"):
+            solara.Select(
+                label="Category",
+                values=all_categories,
+                value=cat_y,
+                on_value=_on_cat_y,
+                dense=True,
+            )
+            labels_y = [p.label for p in params_for_category(cat_y)]
+            solara.Select(
+                label="Parameter",
+                values=labels_y,
+                value=param_y.label if param_y else (labels_y[0] if labels_y else ""),
+                on_value=_on_path_y,
+                dense=True,
+            )
+        with solara.Row(style="gap: 4px;"):
+            unit_y = param_y.unit if param_y else ""
+            solara.InputFloat(label=f"Min ({unit_y})", value=min_y, on_value=set_min_y)
+            solara.InputFloat(label=f"Max ({unit_y})", value=max_y, on_value=set_max_y)
+            solara.InputFloat(label="Step", value=step_y, on_value=set_step_y)
+
+        # ── Replications + simulation count preview ──────────────────────────
+        solara.SliderInt(
+            label=f"Runs per cell: {n_runs}",
+            value=n_runs,
+            on_value=set_n_runs,
+            min=5,
+            max=100,
+            step=5,
+        )
+        if preview_error:
+            with solara.Column(classes=["sidebar-error-text"]):
+                solara.Text(preview_error)
+        elif n_pts_x > 0 and n_pts_y > 0:
+            total = n_pts_x * n_pts_y * n_runs
+            with solara.Column(classes=["sidebar-hint-text"]):
+                solara.Text(
+                    f"{n_pts_x}\u00d7{n_pts_y} = {n_pts_x * n_pts_y} cells"
+                    f" \u00d7 {n_runs} = {total:,} total simulations"
+                )
+
+        solara.Button(
+            "Running..." if is_running else "Run Grid Sweep",
+            on_click=on_run,
+            color="primary",
+            block=True,
+            disabled=is_running
+            or not selected_scenario
+            or not param_x
+            or not param_y
+            or n_pts_x == 0
+            or n_pts_y == 0,
+            small=True,
+        )
+        if status and (
+            "Error" in status or "not found" in status or "Invalid" in status
+        ):
+            with solara.Column(classes=["sidebar-error-text"]):
+                solara.Text(status)
+
+
 @solara.component
 def BatchPanel() -> Any:
-    """Sidebar panel with Monte Carlo and Parameter Sweep configurators."""
+    """Sidebar panel with Monte Carlo, Parameter Sweep, and Grid Sweep configurators."""
     from compute_permit_sim.vis.state.history import session_history  # noqa: PLC0415
 
     # Use the same name map as LoadScenarioDialog for consistency
@@ -403,6 +668,7 @@ def BatchPanel() -> Any:
         SidebarLabel("**BATCH ANALYSIS**")
         _MonteCarloCard(scenario_names=scenario_names)
         _SweepCard(scenario_names=scenario_names)
+        _GridSweepCard(scenario_names=scenario_names)
 
         # ── History — batch results + individual runs in one stream ────────
         solara.Markdown("---")
diff --git a/src/compute_permit_sim/vis/panels/batch_results.py b/src/compute_permit_sim/vis/panels/batch_results.py
index 5116a6a..da01cff 100644
--- a/src/compute_permit_sim/vis/panels/batch_results.py
+++ b/src/compute_permit_sim/vis/panels/batch_results.py
@@ -20,7 +20,7 @@
     MetricChip,
     ResultsActions,
 )
-from compute_permit_sim.vis.state.run_state import mc_run, sweep_run
+from compute_permit_sim.vis.state.run_state import grid_run, mc_run, sweep_run
 
 # ---------------------------------------------------------------------------
 # Monte Carlo results
@@ -305,6 +305,115 @@ def _SweepResultsView() -> Any:
             solara.Markdown("\n".join([header, sep, body]))
 
 
+# ---------------------------------------------------------------------------
+# Grid sweep results
+# ---------------------------------------------------------------------------
+
+
+@solara.component
+def _GridSweepResultsView() -> Any:
+    from compute_permit_sim.vis.export import (
+        export_grid_sweep_to_csv,
+        export_grid_sweep_to_excel,
+    )
+    from compute_permit_sim.vis.plotting import plot_sweep_heatmap
+
+    result = grid_run.value.result
+    if result is None:
+        solara.Text("No grid sweep result to display.")
+        return
+
+    safe_s = result.scenario_name.lower().replace(" ", "_")
+    safe_x = result.param_x_path.replace(".", "_")
+    safe_y = result.param_y_path.replace(".", "_")
+
+    fig = plot_sweep_heatmap(
+        compliance_grid=result.grid,
+        x_values=result.x_values,
+        y_values=result.y_values,
+        x_param_label=result.param_x_label,
+        y_param_label=result.param_y_label,
+        title=f"Compliance Heatmap — {result.scenario_name}",
+    )
+
+    with solara.Column(classes=["analysis-panel"]):
+        with solara.Card("Summary", style="margin-bottom: 12px;"):
+            with solara.Row(
+                style="align-items: center; justify-content: space-between; flex-wrap: wrap;"
+            ):
+                with solara.Row(style="gap: 24px; flex-wrap: wrap; flex: 1;"):
+                    MetricChip("Scenario", result.scenario_name)
+                    MetricChip("X-axis", result.param_x_label)
+                    MetricChip("Y-axis", result.param_y_label)
+                    MetricChip(
+                        "Grid size",
+                        f"{len(result.x_values)}\u00d7{len(result.y_values)}",
+                    )
+                    MetricChip("Seeds per cell", str(result.n_runs))
+                    MetricChip(
+                        "Compliance range",
+                        f"{result.compliance_min:.1%}\u2013{result.compliance_max:.1%}",
+                    )
+
+                with ResultsActions():
+                    RunConfigDialog(
+                        config=result.config,
+                        title=f"Grid Sweep: {result.id}",
+                        batch_summary=(
+                            f"**{len(result.x_values)}\u00d7{len(result.y_values)} grid sweep**"
+                            f" \u00b7 {result.scenario_name}  \n"
+                            f"X: **{result.param_x_label}**  \n"
+                            f"Y: **{result.param_y_label}**  \n"
+                            f"Compliance range: "
+                            f"{result.compliance_min:.1%}\u2013{result.compliance_max:.1%}"
+                        ),
+                    )
+                    DownloadCSV(
+                        "Download grid CSV",
+                        lambda r=result: export_grid_sweep_to_csv(  # type: ignore[misc]
+                            r, output_path=""
+                        ),
+                        f"grid_{safe_s}_{safe_x}_x_{safe_y}.csv",
+                    )
+                    DownloadExcel(
+                        "Download Excel workbook",
+                        lambda r=result: export_grid_sweep_to_excel(  # type: ignore[misc]
+                            r, output_path=""
+                        ),
+                        f"grid_{safe_s}_{safe_x}_x_{safe_y}.xlsx",
+                    )
+                    DownloadJSON(
+                        "Download config JSON (for reproducibility)",
+                        lambda r=result: r.config.model_dump_json(  # type: ignore[misc]
+                            indent=2
+                        ).encode("utf-8"),
+                        f"grid_config_{safe_s}.json",
+                    )
+
+        with solara.Card("Results", style="margin-top: 0;"):
+            ExpandableChart(
+                fig,
+                download_filename=f"grid_{safe_s}_{safe_x}_x_{safe_y}.png",
+            )
+
+        with solara.Card("Per-Cell Compliance", style="margin-top: 0;"):
+            # Header: blank corner + x-axis values
+            x_hdrs = [result.param_x_label] + [f"{x:.4g}" for x in result.x_values]
+            header = "| " + " | ".join(x_hdrs) + " |"
+            sep = "|" + "|".join(["---"] * len(x_hdrs)) + "|"
+            # One row per y value — compliance as percentage
+            data_rows = []
+            for y_idx, y in enumerate(result.y_values):
+                cells = [f"{y:.4g}"] + [
+                    f"{result.grid[y_idx][x_idx]:.1%}"
+                    for x_idx in range(len(result.x_values))
+                ]
+                data_rows.append("| " + " | ".join(cells) + " |")
+            y_label_row = f"*Y: {result.param_y_label}*"
+            solara.Markdown(y_label_row)
+            solara.Markdown("\n".join([header, sep] + data_rows))
+
+
 # ---------------------------------------------------------------------------
 # Top-level
 # ---------------------------------------------------------------------------
@@ -314,16 +423,19 @@ def _SweepResultsView() -> Any:
 def BatchResultsPanel() -> Any:
     """Right-pane panel for batch results.
 
-    Reads result directly from mc_run / sweep_run RunState singletons.
+    Reads result directly from mc_run / sweep_run / grid_run RunState singletons.
     Page-level state machine in page.py ensures this panel is only rendered
     when a result is ready — no spinner gate needed here.
     """
     mc = mc_run.value
     sw = sweep_run.value
+    gr = grid_run.value
 
     if mc.is_ready and mc.result is not None:
         _MCResultsView()
     elif sw.is_ready and sw.result is not None:
         _SweepResultsView()
+    elif gr.is_ready and gr.result is not None:
+        _GridSweepResultsView()
     else:
         solara.Text("No batch results to display.")
diff --git a/src/compute_permit_sim/vis/plotting.py b/src/compute_permit_sim/vis/plotting.py
index ba5d4cc..0084963 100644
--- a/src/compute_permit_sim/vis/plotting.py
+++ b/src/compute_permit_sim/vis/plotting.py
@@ -15,6 +15,7 @@
 import textwrap
 
 import matplotlib
+import numpy as np
 import pandas as pd
 from matplotlib.axes import Axes
 from matplotlib.figure import Figure
@@ -642,15 +643,22 @@ def plot_mc_payoff_comparison(result) -> "Figure":
     return fig
 
 
-def plot_sweep_curve(result, metric: str = "avg_compliance") -> "Figure":
+def plot_sweep_curve(
+    result,
+    metric: str = "avg_compliance",
+    reference_lines: list[tuple[float, str, str]] | None = None,
+) -> "Figure":
     """Plot a 1D parameter sweep curve: param value on X, metric on Y.
 
     Renders the mean as a line with ± 1 SD shading. Annotates the tipping
-    point (first value where compliance ≥ 95 %) if present.
+    point (first value where compliance ≥ 95 %) if present.
 
     Args:
         result: A ``SweepResult`` instance.
         metric: Attribute name on ``MonteCarloResult`` to plot (default: avg_compliance).
+        reference_lines: Optional list of ``(x_value, label, color)`` tuples
+            for annotating known calibration points (e.g. scenario pa values).
+            Each draws a vertical dotted line with a small text label.
 
     Returns:
         Matplotlib Figure.
@@ -670,19 +678,7 @@ def plot_sweep_curve(result, metric: str = "avg_compliance") -> "Figure":
 
     color = CHART_COLOR_MAP.get("compliant", "#42A5F5")
     ax.plot(xs, means, color=color, linewidth=2, marker="o", markersize=5, label="Mean")
-    ax.fill_between(xs, lows, highs, alpha=0.18, color=color, label="± 1 SD")
-
-    tp = result.tipping_point(threshold=0.95)
-    if tp is not None:
-        ax.axvline(tp, color="#FFA726", linewidth=1.5, linestyle="--")
-        ax.annotate(
-            f"Tipping ≈ {tp:.3f}",
-            xy=(tp, 0.95),
-            xytext=(tp, 0.70),
-            fontsize=8,
-            color="#FFA726",
-            arrowprops={"arrowstyle": "->", "color": "#FFA726"},
-        )
+    ax.fill_between(xs, lows, highs, alpha=0.18, color=color, label="\u00b1 1 SD")
 
     is_compliance = "compliance" in metric
     if is_compliance:
@@ -694,10 +690,130 @@ def plot_sweep_curve(result, metric: str = "avg_compliance") -> "Figure":
 
     ax.set_xlabel(_wrap(result.param_label, width=40))
     ax.set_title(
-        _wrap(f"Sensitivity: {result.param_label} — {result.scenario_name}"),
+        _wrap(f"Sensitivity: {result.param_label} \u2014 {result.scenario_name}"),
         fontsize=11,
         fontweight="600",
     )
     ax.legend(fontsize=9)
     fig.tight_layout()
     return fig
+
+
+def plot_sweep_heatmap(
+    compliance_grid: list[list[float]],
+    x_values: list[float],
+    y_values: list[float],
+    x_param_label: str = "Base Audit Rate \u03c0\u2080",
+    y_param_label: str = "Collateral K (M$)",
+    x_tick_labels: list[str] | None = None,
+    y_tick_labels: list[str] | None = None,
+    title: str | None = None,
+    highlight: tuple[float, float] | None = None,
+    highlight_label: str = "Calibration",
+) -> "Figure":
+    """Heatmap of average compliance over a 2D parameter grid.
+
+    Renders each cell with its mean compliance rate as a shaded colour and an
+    inline percentage annotation. Designed for joint-sensitivity analysis
+    (e.g. pa x K grid) and re-usable for any two-parameter sweep.
+
+    Args:
+        compliance_grid: 2D list ``[y_idx][x_idx]`` of mean compliance fractions.
+        x_values: Parameter values along the x-axis (e.g. audit rates).
+        y_values: Parameter values along the y-axis (e.g. collateral amounts).
+        x_param_label: Human-readable x-axis label.
+        y_param_label: Human-readable y-axis label.
+        x_tick_labels: Optional custom tick labels for x-axis; defaults to
+            auto-formatted ``x_values`` as percentages.
+        y_tick_labels: Optional custom tick labels for y-axis; defaults to
+            auto-formatted ``y_values`` as dollar amounts.
+        title: Optional chart title.
+        highlight: Optional ``(x_val, y_val)`` calibration point to outline
+            with a red border.
+        highlight_label: Label shown adjacent to the highlighted cell.
+
+    Returns:
+        Matplotlib Figure.
+    """
+    import matplotlib.patches as mpatches
+
+    fig, ax = create_figure(figsize=(7, 5))
+    data = np.array(compliance_grid)  # shape: (n_y, n_x)
+
+    im = ax.imshow(
+        data,
+        aspect="auto",
+        origin="lower",
+        cmap="Blues",
+        vmin=0.0,
+        vmax=1.0,
+        interpolation="nearest",
+    )
+
+    # Colorbar with shared percent formatter
+    cbar = fig.colorbar(
+        im, ax=ax, format=matplotlib.ticker.PercentFormatter(xmax=1), shrink=0.85
+    )
+    cbar.set_label("Mean Compliance Rate", fontsize=10)
+
+    # Tick labels — default to % for x (audit rate) and $M for y (collateral)
+    xt_labels = x_tick_labels or [f"{v:.0%}" for v in x_values]
+    yt_labels = y_tick_labels or [f"${v:.0f}M" for v in y_values]
+    ax.set_xticks(range(len(x_values)))
+    ax.set_xticklabels(xt_labels, fontsize=8, rotation=45, ha="right")
+    ax.set_yticks(range(len(y_values)))
+    ax.set_yticklabels(yt_labels, fontsize=8)
+
+    # Per-cell compliance annotation
+    for yi in range(len(y_values)):
+        for xi in range(len(x_values)):
+            val = float(data[yi, xi])
+            text_color = "white" if val > 0.65 else "#333333"
+            ax.text(
+                xi,
+                yi,
+                f"{val:.0%}",
+                ha="center",
+                va="center",
+                fontsize=7,
+                color=text_color,
+                fontweight="500",
+            )
+
+    # Optional highlight: red border around a calibration cell
+    if highlight is not None:
+        hx_val, hy_val = highlight
+        hx_idx = min(range(len(x_values)), key=lambda i: abs(x_values[i] - hx_val))
+        hy_idx = min(range(len(y_values)), key=lambda i: abs(y_values[i] - hy_val))
+        rect = mpatches.FancyBboxPatch(
+            (hx_idx - 0.45, hy_idx - 0.45),
+            0.9,
+            0.9,
+            boxstyle="square,pad=0",
+            linewidth=2.5,
+            edgecolor=CHART_COLOR_MAP.get("violator", "#EF5350"),
+            facecolor="none",
+            zorder=3,
+        )
+        ax.add_patch(rect)
+        ax.text(
+            hx_idx,
+            hy_idx + 0.52,
+            highlight_label,
+            ha="center",
+            va="bottom",
+            fontsize=7,
+            color=CHART_COLOR_MAP.get("violator", "#EF5350"),
+            fontweight="bold",
+            zorder=4,
+        )
+
+    ax.set_xlabel(_wrap(x_param_label, width=40), fontsize=11, fontweight="500")
+    ax.set_ylabel(_wrap(y_param_label, width=30), fontsize=11, fontweight="500")
+    if title:
+        ax.set_title(_wrap(title), fontsize=11, fontweight="600")
+
+    # Suppress grid — imshow cells provide visual separation
+    ax.grid(False)
+    fig.tight_layout()
+    return fig
diff --git a/src/compute_permit_sim/vis/state/history.py b/src/compute_permit_sim/vis/state/history.py
index 30e5c85..fd91502 100644
--- a/src/compute_permit_sim/vis/state/history.py
+++ b/src/compute_permit_sim/vis/state/history.py
@@ -5,10 +5,14 @@
 import solara
 
 from compute_permit_sim.schemas import SimulationRun
-from compute_permit_sim.schemas.batch import MonteCarloResult, SweepResult
-
-# Union type for batch results — MC aggregate or sweep aggregate.
-BatchResult = MonteCarloResult | SweepResult
+from compute_permit_sim.schemas.batch import (
+    GridSweepResult,
+    MonteCarloResult,
+    SweepResult,
+)
+
+# Union type for batch results — MC aggregate, 1D sweep, or 2D grid sweep.
+BatchResult = MonteCarloResult | SweepResult | GridSweepResult
 
 
 class SessionHistory:
diff --git a/src/compute_permit_sim/vis/state/run_state.py b/src/compute_permit_sim/vis/state/run_state.py
index aad1c45..126fe28 100644
--- a/src/compute_permit_sim/vis/state/run_state.py
+++ b/src/compute_permit_sim/vis/state/run_state.py
@@ -25,7 +25,11 @@
 from pydantic import BaseModel, ConfigDict
 
 from compute_permit_sim.schemas import SimulationRun
-from compute_permit_sim.schemas.batch import MonteCarloResult, SweepResult
+from compute_permit_sim.schemas.batch import (
+    GridSweepResult,
+    MonteCarloResult,
+    SweepResult,
+)
 
 T = TypeVar("T")
 
@@ -69,3 +73,8 @@ def is_ready(self) -> bool:
 sweep_run: solara.Reactive[RunState[SweepResult]] = solara.reactive(
     RunState[SweepResult]()
 )
+
+#: 2D grid sweep batch run state
+grid_run: solara.Reactive[RunState[GridSweepResult]] = solara.reactive(
+    RunState[GridSweepResult]()
+)
diff --git a/tests/factories.py b/tests/factories.py
index 6ba5739..804ff05 100644
--- a/tests/factories.py
+++ b/tests/factories.py
@@ -1,6 +1,11 @@
 """Test data factories for generating valid schema objects."""
 
-from typing import Any
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from compute_permit_sim.schemas.batch import GridSweepResult
 
 from compute_permit_sim.schemas import (
     AgentSnapshot,
@@ -60,3 +65,32 @@ def create_scenario_config(
     }
     data = {**defaults, **kwargs}
     return ScenarioConfig(name=name, **data)
+
+
+def create_grid_sweep_result(
+    n_x: int = 3,
+    n_y: int = 2,
+    scenario_name: str = "Test Scenario",
+) -> "GridSweepResult":
+    """Create a minimal GridSweepResult for testing — no simulation run needed."""
+    from compute_permit_sim.schemas.batch import GridSweepResult
+
+    x_values = [float(i) * 0.1 for i in range(1, n_x + 1)]
+    y_values = [float(j) * 10.0 for j in range(1, n_y + 1)]
+    # grid[y_idx][x_idx] = synthetic compliance value in [0, 1]
+    grid = [
+        [float(y_idx * n_x + x_idx) / (n_x * n_y) for x_idx in range(n_x)]
+        for y_idx in range(n_y)
+    ]
+    return GridSweepResult(
+        scenario_name=scenario_name,
+        param_x_path="audit.base_prob",
+        param_x_label="Base Audit Probability",
+        param_y_path="collateral_amount",
+        param_y_label="Collateral K (M$)",
+        config=create_scenario_config(name=scenario_name),
+        x_values=x_values,
+        y_values=y_values,
+        grid=grid,
+        n_runs=5,
+    )
diff --git a/tests/services/test_sweep.py b/tests/services/test_sweep.py
index 8597bfb..3f7dd26 100644
--- a/tests/services/test_sweep.py
+++ b/tests/services/test_sweep.py
@@ -86,3 +86,103 @@ def test_default_param_label(self) -> None:
         cfg = self._base()
         result = run_sweep(cfg, "audit.base_prob", [0.1], n_runs=2)
         assert result.param_label == "audit.base_prob"
+
+
+class TestRunGridSweep:
+    def _base(self) -> ScenarioConfig:
+        return ScenarioConfig(n_agents=4, steps=3)
+
+    def test_grid_shape(self) -> None:
+        from compute_permit_sim.services.sweep import run_grid_sweep
+
+        x_values = [0.05, 0.10, 0.15]
+        y_values = [0.0, 10.0]
+        result = run_grid_sweep(
+            self._base(),
+            "audit.base_prob",
+            "collateral_amount",
+            x_values,
+            y_values,
+            n_runs=2,
+        )
+        assert len(result.grid) == len(y_values)
+        assert all(len(row) == len(x_values) for row in result.grid)
+
+    def test_grid_values_in_range(self) -> None:
+        from compute_permit_sim.services.sweep import run_grid_sweep
+
+        result = run_grid_sweep(
+            self._base(),
+            "audit.base_prob",
+            "collateral_amount",
+            [0.05, 0.20],
+            [0.0, 5.0],
+            n_runs=2,
+        )
+        for row in result.grid:
+            for v in row:
+                assert 0.0 <= v <= 1.0
+
+    def test_metadata(self) -> None:
+        from compute_permit_sim.services.sweep import run_grid_sweep
+
+        cfg = ScenarioConfig(name="GridTest", n_agents=2, steps=2)
+        result = run_grid_sweep(
+            cfg,
+            "audit.base_prob",
+            "collateral_amount",
+            [0.1],
+            [0.0],
+            param_x_label="X Label",
+            param_y_label="Y Label",
+            n_runs=2,
+        )
+        assert result.scenario_name == "GridTest"
+        assert result.param_x_path == "audit.base_prob"
+        assert result.param_y_path == "collateral_amount"
+        assert result.param_x_label == "X Label"
+        assert result.param_y_label == "Y Label"
+        assert result.n_runs == 2
+
+    def test_compliance_at(self) -> None:
+        from compute_permit_sim.services.sweep import run_grid_sweep
+
+        x_vals = [0.05, 0.20]
+        y_vals = [0.0, 10.0]
+        result = run_grid_sweep(
+            self._base(),
+            "audit.base_prob",
+            "collateral_amount",
+            x_vals,
+            y_vals,
+            n_runs=2,
+        )
+        for x in x_vals:
+            for y in y_vals:
+                val = result.compliance_at(x, y)
+                assert val is not None
+                assert 0.0 <= val <= 1.0
+        # Non-existent cell returns None
+        assert result.compliance_at(0.99, 99.0) is None
+
+    def test_reproducible(self) -> None:
+        from compute_permit_sim.services.sweep import run_grid_sweep
+
+        seeds = [0, 1, 2]
+        r1 = run_grid_sweep(
+            self._base(),
+            "audit.base_prob",
+            "collateral_amount",
+            [0.05],
+            [0.0],
+            seeds=seeds,
+        )
+        r2 = run_grid_sweep(
+            self._base(),
+            "audit.base_prob",
+            "collateral_amount",
+            [0.05],
+            [0.0],
+            seeds=seeds,
+        )
+        assert abs(r1.grid[0][0] - r2.grid[0][0]) < 1e-10
diff --git a/tests/vis/test_export.py b/tests/vis/test_export.py
index 2870237..5395d12 100644
--- a/tests/vis/test_export.py
+++ b/tests/vis/test_export.py
@@ -109,3 +109,51 @@ def test_export_run_to_excel_creates_file(sample_run: SimulationRun) -> None:
         # Header row is parsed, we expect 2 agents
         assert len(df_agents) == 2
         assert "Agent's base economic value (v_i)" in df_agents.columns
+
+
+# ---------------------------------------------------------------------------
+# Grid sweep export tests
+# ---------------------------------------------------------------------------
+
+
+def test_export_grid_sweep_to_csv_bytes() -> None:
+    """CSV export returns bytes with n_x * n_y rows and expected columns."""
+    from compute_permit_sim.vis.export import export_grid_sweep_to_csv
+    from tests.factories import create_grid_sweep_result
+
+    n_x, n_y = 3, 2
+    result = create_grid_sweep_result(n_x=n_x, n_y=n_y)
+    csv_bytes = export_grid_sweep_to_csv(result, output_path="")
+    assert isinstance(csv_bytes, bytes)
+
+    import io
+
+    df = pd.read_csv(io.BytesIO(csv_bytes))
+    assert len(df) == n_x * n_y
+    required_cols = {
+        "param_x_path",
+        "param_x_value",
+        "param_y_path",
+        "param_y_value",
+        "n_runs",
+        "compliance_rate",
+    }
+    assert required_cols.issubset(set(df.columns))
+
+
+def test_export_grid_sweep_to_excel_bytes() -> None:
+    """Excel export returns non-empty bytes with Config, Grid, and Heatmap sheets."""
+    from compute_permit_sim.vis.export import export_grid_sweep_to_excel
+    from tests.factories import create_grid_sweep_result
+
+    result = create_grid_sweep_result(n_x=2, n_y=2)
+    xlsx_bytes = export_grid_sweep_to_excel(result, output_path="")
+    assert isinstance(xlsx_bytes, bytes)
+    assert len(xlsx_bytes) > 0
+
+    import io
+
+    with pd.ExcelFile(io.BytesIO(xlsx_bytes)) as xl:
+        assert "Config" in xl.sheet_names
+        assert "Grid" in xl.sheet_names
+        assert "Heatmap" in xl.sheet_names

From 53eccf6f05c2dcf47edf1fd07c25fec6121934e3 Mon Sep 17 00:00:00 2001
From: Josh Tuffy <jtuffy117@gmail.com>
Date: Sun, 15 Mar 2026 20:29:28 -0400
Subject: [PATCH 2/6] update

---
 src/compute_permit_sim/vis/plotting.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/compute_permit_sim/vis/plotting.py b/src/compute_permit_sim/vis/plotting.py
index 0084963..d0ca874 100644
--- a/src/compute_permit_sim/vis/plotting.py
+++ b/src/compute_permit_sim/vis/plotting.py
@@ -817,3 +817,18 @@ def plot_sweep_heatmap(
     ax.grid(False)
     fig.tight_layout()
     return fig
+
+
+def save_figure(fig: Figure, path: str, dpi: int = 150) -> None:
+    """Save a Figure to *path* using canonical export settings.
+
+    Single source of truth for dpi and bbox behaviour across all scripts and
+    agent_workspace callers.  Never call ``fig.savefig(...)`` directly in
+    workspace scripts — use this instead.
+
+    Args:
+        fig:  A ``matplotlib.figure.Figure`` returned by any plotting function.
+        path: Destination file path (PNG recommended).
+        dpi:  Resolution; default 150 for paper-quality output.
+    """
+    fig.savefig(path, dpi=dpi, bbox_inches="tight")

From 728492e4f65e92263f7b807b5517eea2f2016eea Mon Sep 17 00:00:00 2001
From: Josh Tuffy <jtuffy117@gmail.com>
Date: Tue, 17 Mar 2026 18:35:58 -0400
Subject: [PATCH 3/6] fix scenarios

---
 scenarios/basic/scenario_2_strict.json | 5 +++--
 scenarios/basic/scenario_3_smart.json  | 5 ++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/scenarios/basic/scenario_2_strict.json b/scenarios/basic/scenario_2_strict.json
index 9375a1f..d2bb04d 100644
--- a/scenarios/basic/scenario_2_strict.json
+++ b/scenarios/basic/scenario_2_strict.json
@@ -12,9 +12,10 @@
     },
     "lab": {
         "capability_value": 40.0,
-        "racing_factor": 2.0
+        "racing_factor": 2.0,
+        "audit_coefficient": 0.1
     },
-    "collateral_amount": 100.0,
+    "collateral_amount": 15.75,
     "market": {
         "fixed_price": 70.0
     }
diff --git a/scenarios/basic/scenario_3_smart.json b/scenarios/basic/scenario_3_smart.json
index 12617d3..797c393 100644
--- a/scenarios/basic/scenario_3_smart.json
+++ b/scenarios/basic/scenario_3_smart.json
@@ -4,10 +4,13 @@
     "steps": 10,
     "n_agents": 20,
     "audit": {
-        "base_prob": 0.2,
+        "base_prob": 0.1,
         "monitoring_prob": 0.2,
         "signal_dependent": true
     },
+    "lab": {
+        "audit_coefficient": 0.5
+    },
     "collateral_amount": 15.75,
     "market": {
         "fixed_price": 2.0,

From 33e51739ed57a8e8480ac09176f02df4142252fe Mon Sep 17 00:00:00 2001
From: Josh Tuffy <jtuffy117@gmail.com>
Date: Tue, 17 Mar 2026 21:16:44 -0400
Subject: [PATCH 4/6] code review notes

---
 pyproject.toml                                |  1 +
 src/compute_permit_sim/core/market.py         | 19 +++++++++++--
 src/compute_permit_sim/schemas/batch.py       | 16 +++++------
 src/compute_permit_sim/services/mesa_model.py |  4 ++-
 .../services/monte_carlo.py                   | 22 +++++++--------
 src/compute_permit_sim/vis/export.py          | 28 +++++++++----------
 .../vis/panels/batch_results.py               | 20 ++++++-------
 7 files changed, 63 insertions(+), 47 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 01d9a80..742d309 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,3 +51,4 @@ python_files = ["test_*.py"]
 ignore_missing_imports = true
 check_untyped_defs = true
 plugins = ["pydantic.mypy"]
+exclude = ["agent_workspace"]
diff --git a/src/compute_permit_sim/core/market.py b/src/compute_permit_sim/core/market.py
index d3cd15f..62884e8 100644
--- a/src/compute_permit_sim/core/market.py
+++ b/src/compute_permit_sim/core/market.py
@@ -16,18 +16,28 @@ class SimpleClearingMarket:
     Attributes:
         max_supply: Total permits available (Q).
         current_price: The most recent clearing price.
+        _rng: Seeded RNG for deterministic permit lottery (fixed-price mode).
     """
 
-    def __init__(self, permit_cap: float, fixed_price: float | None = None) -> None:
+    def __init__(
+        self,
+        permit_cap: float,
+        fixed_price: float | None = None,
+        rng: random.Random | None = None,
+    ) -> None:
         """
         Args:
             permit_cap: Total permits available (Q).
             fixed_price: Optional fixed price; if set, all qualifying bidders pay
                 this price instead of the auction-cleared rate.
+            rng: Seeded RNG for reproducible permit lottery in fixed-price
+                over-subscription. Defaults to the global random module when
+                None — callers should always pass the model-level RNG.
         """
         self.max_supply: float = permit_cap
         self.current_price: float = 0.0
         self.fixed_price: float | None = fixed_price
+        self._rng: random.Random | None = rng
 
     def set_fixed_price(self, price: float) -> None:
         """Set a fixed price for the market.
@@ -125,8 +135,11 @@ def allocate(
                 for lab_id, qty in qualifying:
                     allocations[lab_id] = qty
             else:
-                # Over-subscribed: randomly sample up to permit_cap units
-                winners = random.sample(fp_units, available)
+                # Over-subscribed: randomly sample up to permit_cap units.
+                # Use the model-level RNG for reproducibility; fall back to
+                # global random only in non-Mesa (standalone test) contexts.
+                _rng = self._rng if self._rng is not None else random
+                winners = _rng.sample(fp_units, available)
                 for lab_id in winners:
                     allocations[lab_id] += 1
 
diff --git a/src/compute_permit_sim/schemas/batch.py b/src/compute_permit_sim/schemas/batch.py
index 11cd984..9e4f8b8 100644
--- a/src/compute_permit_sim/schemas/batch.py
+++ b/src/compute_permit_sim/schemas/batch.py
@@ -64,10 +64,10 @@ class BatchColumnNames:
     AUDIT_RATE = "audit_rate"
     AUDIT_RATE_MEAN = "audit_rate_mean"
     AUDIT_RATE_STD = "audit_rate_std"
-    FALSE_POSITIVE_RATE_MEAN = "false_positive_rate_mean"
-    FALSE_POSITIVE_RATE_STD = "false_positive_rate_std"
-    DETECTION_RATE_MEAN = "detection_rate_mean"
-    DETECTION_RATE_STD = "detection_rate_std"
+    COMPLIANT_AUDIT_FRACTION_MEAN = "compliant_audit_fraction_mean"
+    COMPLIANT_AUDIT_FRACTION_STD = "compliant_audit_fraction_std"
+    CATCH_RATE_MEAN = "catch_rate_mean"
+    CATCH_RATE_STD = "catch_rate_std"
 
 
 @dataclass(frozen=True)
@@ -87,8 +87,8 @@ class PerSeedResult:
     avg_payoff_compliant: float  # NaN if no compliant labs
     avg_payoff_violator: float  # NaN if no violators
     audit_rate: float
-    false_positive_rate: float
-    detection_rate: float  # NaN if no audited violators
+    compliant_audit_fraction: float
+    catch_rate: float  # NaN if no audited violators
 
 
 @dataclass(frozen=True)
@@ -167,8 +167,8 @@ class MonteCarloResult:
 
     # --- Audit burden ---
     audit_rate: MetricStats  # audits / total lab-steps
-    false_positive_rate: MetricStats  # audits on compliant / total audits
-    detection_rate: MetricStats  # caught / audits on violators
+    compliant_audit_fraction: MetricStats  # audits on compliant / total audits
+    catch_rate: MetricStats  # caught / audits on violators
 
     # --- Raw per-seed data (optional, set store_raw=True in run_monte_carlo) ---
     raw_seeds: list[PerSeedResult] = field(default_factory=list)
diff --git a/src/compute_permit_sim/services/mesa_model.py b/src/compute_permit_sim/services/mesa_model.py
index 4abe0ba..a96dc27 100644
--- a/src/compute_permit_sim/services/mesa_model.py
+++ b/src/compute_permit_sim/services/mesa_model.py
@@ -88,7 +88,9 @@ def __init__(self, config: ScenarioConfig | None = None, **kwargs) -> None:
         self.config = config
         self.running = True
 
-        self.market = SimpleClearingMarket(permit_cap=config.market.permit_cap)
+        self.market = SimpleClearingMarket(
+            permit_cap=config.market.permit_cap, rng=self.random
+        )
         if config.market.fixed_price is not None:
             self.market.set_fixed_price(config.market.fixed_price)
         self.auditor = Auditor(config.audit, rng=self.random)
diff --git a/src/compute_permit_sim/services/monte_carlo.py b/src/compute_permit_sim/services/monte_carlo.py
index 555994c..3a88851 100644
--- a/src/compute_permit_sim/services/monte_carlo.py
+++ b/src/compute_permit_sim/services/monte_carlo.py
@@ -48,8 +48,8 @@ class _RunResult(NamedTuple):
 
     # Audit burden
     audit_rate: float
-    false_positive_rate: float
-    detection_rate: float  # NaN if 0 audited violators
+    compliant_audit_fraction: float
+    catch_rate: float  # NaN if 0 audited violators
 
 
 def _run_once(config: ScenarioConfig, seed: int) -> _RunResult:
@@ -135,10 +135,10 @@ def _run_once(config: ScenarioConfig, seed: int) -> _RunResult:
         avg_payoff_compliant=avg_payoff_compliant,
         avg_payoff_violator=avg_payoff_violator,
         audit_rate=total_audits / total_lab_steps if total_lab_steps else 0.0,
-        false_positive_rate=(
+        compliant_audit_fraction=(
             audits_on_compliant / total_audits if total_audits else 0.0
         ),
-        detection_rate=(
+        catch_rate=(
             violations_caught / audits_on_violators
             if audits_on_violators
             else float("nan")
@@ -269,14 +269,14 @@ def run_monte_carlo(
             else MetricStats.nan()
         ),
         audit_rate=MetricStats.from_values([r.audit_rate for r in raw]),
-        false_positive_rate=MetricStats.from_values(
-            [r.false_positive_rate for r in raw]
+        compliant_audit_fraction=MetricStats.from_values(
+            [r.compliant_audit_fraction for r in raw]
         ),
-        detection_rate=(
+        catch_rate=(
             MetricStats.from_values(
-                [r.detection_rate for r in raw if not math.isnan(r.detection_rate)]
+                [r.catch_rate for r in raw if not math.isnan(r.catch_rate)]
             )
-            if any(not math.isnan(r.detection_rate) for r in raw)
+            if any(not math.isnan(r.catch_rate) for r in raw)
             else MetricStats.nan()
         ),
         raw_seeds=[
@@ -289,8 +289,8 @@ def run_monte_carlo(
                 avg_payoff_compliant=r.avg_payoff_compliant,
                 avg_payoff_violator=r.avg_payoff_violator,
                 audit_rate=r.audit_rate,
-                false_positive_rate=r.false_positive_rate,
-                detection_rate=r.detection_rate,
+                compliant_audit_fraction=r.compliant_audit_fraction,
+                catch_rate=r.catch_rate,
             )
             for s, r in zip(run_seeds, raw)
         ]
diff --git a/src/compute_permit_sim/vis/export.py b/src/compute_permit_sim/vis/export.py
index d78d3e5..ab18f3f 100644
--- a/src/compute_permit_sim/vis/export.py
+++ b/src/compute_permit_sim/vis/export.py
@@ -534,10 +534,10 @@ def export_monte_carlo_to_csv(
             _BCN.PAYOFF_VIOLATOR_STD: r.payoff_violator.std,
             _BCN.AUDIT_RATE_MEAN: r.audit_rate.mean,
             _BCN.AUDIT_RATE_STD: r.audit_rate.std,
-            _BCN.FALSE_POSITIVE_RATE_MEAN: r.false_positive_rate.mean,
-            _BCN.FALSE_POSITIVE_RATE_STD: r.false_positive_rate.std,
-            _BCN.DETECTION_RATE_MEAN: r.detection_rate.mean,
-            _BCN.DETECTION_RATE_STD: r.detection_rate.std,
+            _BCN.COMPLIANT_AUDIT_FRACTION_MEAN: r.compliant_audit_fraction.mean,
+            _BCN.COMPLIANT_AUDIT_FRACTION_STD: r.compliant_audit_fraction.std,
+            _BCN.CATCH_RATE_MEAN: r.catch_rate.mean,
+            _BCN.CATCH_RATE_STD: r.catch_rate.std,
         }
         for r in results
     ]
@@ -585,8 +585,8 @@ def export_mc_per_seed_to_csv(
             _BCN.PAYOFF_COMPLIANT_MEAN: s.avg_payoff_compliant,
             _BCN.PAYOFF_VIOLATOR_MEAN: s.avg_payoff_violator,
             _BCN.AUDIT_RATE_MEAN: s.audit_rate,
-            _BCN.FALSE_POSITIVE_RATE_MEAN: s.false_positive_rate,
-            _BCN.DETECTION_RATE_MEAN: s.detection_rate,
+            _BCN.COMPLIANT_AUDIT_FRACTION_MEAN: s.compliant_audit_fraction,
+            _BCN.CATCH_RATE_MEAN: s.catch_rate,
         }
         for s in result.raw_seeds
     ]
@@ -731,8 +731,8 @@ def export_sweep_to_csv(
             _BCN.PAYOFF_VIOLATOR_MEAN: pt.result.payoff_violator.mean,
             _BCN.AUDIT_RATE_MEAN: pt.result.audit_rate.mean,
             _BCN.AUDIT_RATE_STD: pt.result.audit_rate.std,
-            _BCN.FALSE_POSITIVE_RATE_MEAN: pt.result.false_positive_rate.mean,
-            _BCN.DETECTION_RATE_MEAN: pt.result.detection_rate.mean,
+            _BCN.COMPLIANT_AUDIT_FRACTION_MEAN: pt.result.compliant_audit_fraction.mean,
+            _BCN.CATCH_RATE_MEAN: pt.result.catch_rate.mean,
         }
         for pt in result.points
     ]
@@ -964,10 +964,10 @@ def export_monte_carlo_to_excel(
             ("Audit Rate", result.audit_rate.mean, result.audit_rate.std),
             (
                 "False Positive Rate",
-                result.false_positive_rate.mean,
-                result.false_positive_rate.std,
+                result.compliant_audit_fraction.mean,
+                result.compliant_audit_fraction.std,
             ),
-            ("Detection Rate", result.detection_rate.mean, result.detection_rate.std),
+            ("Detection Rate", result.catch_rate.mean, result.catch_rate.std),
         ]
         for label, mean_val, std_val in _mc_summary_rows:
             is_pct = (
@@ -1023,8 +1023,8 @@ def export_monte_carlo_to_excel(
                     s.avg_payoff_compliant,
                     s.avg_payoff_violator,
                     s.audit_rate,
-                    s.false_positive_rate,
-                    s.detection_rate,
+                    s.compliant_audit_fraction,
+                    s.catch_rate,
                 ]
                 for col, v in enumerate(vals):
                     seed_sheet.write(
@@ -1138,7 +1138,7 @@ def export_sweep_to_excel(
                 pt.result.avg_price.mean,
                 pt.result.avg_net_payoff.mean,
                 pt.result.audit_rate.mean,
-                pt.result.detection_rate.mean,
+                pt.result.catch_rate.mean,
             ]
             for col, v in enumerate(vals):
                 sweep_sheet.write(
diff --git a/src/compute_permit_sim/vis/panels/batch_results.py b/src/compute_permit_sim/vis/panels/batch_results.py
index da01cff..20aecf6 100644
--- a/src/compute_permit_sim/vis/panels/batch_results.py
+++ b/src/compute_permit_sim/vis/panels/batch_results.py
@@ -100,9 +100,9 @@ def _MCResultsView() -> Any:
                     )
                     DownloadJSON(
                         "Download config JSON (for reproducibility)",
-                        lambda r=result: r.config.model_dump_json(indent=2).encode(
+                        lambda r=result: r.config.model_dump_json(indent=2).encode(  # type: ignore[misc]
                             "utf-8"
-                        ),  # type: ignore[misc]
+                        ),
                         f"mc_config_{safe}.json",
                     )
 
@@ -182,16 +182,16 @@ def _MCResultsView() -> Any:
                     "\u2014",
                 ),
                 (
-                    "False Positive Rate",
-                    f"{result.false_positive_rate.mean:.1%}",
-                    f"{result.false_positive_rate.std:.1%}",
+                    "Compliant Audit Fraction",
+                    f"{result.compliant_audit_fraction.mean:.1%}",
+                    f"{result.compliant_audit_fraction.std:.1%}",
                     "\u2014",
                     "\u2014",
                 ),
                 (
-                    "Detection Rate",
-                    f"{result.detection_rate.mean:.1%}",
-                    f"{result.detection_rate.std:.1%}",
+                    "Catch Rate (given audit)",
+                    f"{result.catch_rate.mean:.1%}",
+                    f"{result.catch_rate.std:.1%}",
                     "\u2014",
                     "\u2014",
                 ),
@@ -266,9 +266,9 @@ def _SweepResultsView() -> Any:
                     )
                     DownloadJSON(
                         "Download config JSON (for reproducibility)",
-                        lambda r=result: r.config.model_dump_json(indent=2).encode(
+                        lambda r=result: r.config.model_dump_json(indent=2).encode(  # type: ignore[misc]
                             "utf-8"
-                        ),  # type: ignore[misc]
+                        ),
                         f"sweep_config_{safe_s}_{safe_p}.json",
                     )
 

From f05785370d58d96bec6b7460dc6ab7d887205df4 Mon Sep 17 00:00:00 2001
From: Josh Tuffy <jtuffy117@gmail.com>
Date: Wed, 18 Mar 2026 15:23:58 -0400
Subject: [PATCH 5/6] ruf format

---
 src/compute_permit_sim/vis/export.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/compute_permit_sim/vis/export.py b/src/compute_permit_sim/vis/export.py
index 0ae7a9e..23b27a4 100644
--- a/src/compute_permit_sim/vis/export.py
+++ b/src/compute_permit_sim/vis/export.py
@@ -1247,7 +1247,10 @@ def export_grid_sweep_to_excel(
         grid_sheet = workbook.add_worksheet("Grid")
         grid_sheet.set_column("A:A", 20)
         grid_sheet.write(
-            0, 0, f"{result.param_x_label} \u2192 / {result.param_y_label} \u2193", header_fmt
+            0,
+            0,
+            f"{result.param_x_label} \u2192 / {result.param_y_label} \u2193",
+            header_fmt,
         )
         for x_idx, x in enumerate(result.x_values):
             grid_sheet.write(0, x_idx + 1, x, header_fmt)

From 8985fc21c56ed5f7c896ae35fe6de88ecb6ffa2b Mon Sep 17 00:00:00 2001
From: emlynsg <emlyn.graham@gmail.com>
Date: Thu, 19 Mar 2026 20:19:14 +0900
Subject: [PATCH 6/6] Fix stale p_catch formula in defaults.py, sync
 batch_test.json params

- defaults.py: correct p_catch comment to match nested detection model
- batch_test.json: sync params with scenario_4_feedback_compliance
  (base_prob=0.20, penalty=50, escalation=0.5, steps=50)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 scenarios/batch_test.json                  | 8 ++++----
 src/compute_permit_sim/schemas/defaults.py | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/scenarios/batch_test.json b/scenarios/batch_test.json
index f303c95..31270c1 100644
--- a/scenarios/batch_test.json
+++ b/scenarios/batch_test.json
@@ -3,21 +3,21 @@
   "description": "Demonstrates that feedback mechanisms (reputation, audit escalation) can drive compliance even under moderate enforcement.",
   "notes": "",
   "n_agents": 20,
-  "steps": 40,
+  "steps": 50,
   "flop_threshold": 1e25,
   "collateral_amount": 0.0,
   "audit": {
-    "base_prob": 0.3,
+    "base_prob": 0.20,
     "signal_dependent": false,
     "signal_exponent": 1.0,
     "false_positive_rate": 0.0,
     "false_negative_rate": 0.05,
-    "penalty_amount": 100.0,
+    "penalty_amount": 50.0,
     "backcheck_prob": 0.0,
     "whistleblower_prob": 0.0,
     "monitoring_prob": 0.0,
     "max_audits_per_step": null,
-    "audit_escalation": 1.5,
+    "audit_escalation": 0.5,
     "audit_decay_rate": 0.1
   },
   "market": {
diff --git a/src/compute_permit_sim/schemas/defaults.py b/src/compute_permit_sim/schemas/defaults.py
index d6de624..4c2aba2 100644
--- a/src/compute_permit_sim/schemas/defaults.py
+++ b/src/compute_permit_sim/schemas/defaults.py
@@ -77,7 +77,7 @@
 DEFAULT_SIGNAL_EXPONENT = 1.0
 #
 # Stage 2: AUDIT OUTCOME — given audit, does it find a violation?
-#   p_catch_if_audited = (1 - FNR) + FNR × backcheck_prob
+#   p_catch_if_audited = 1 - FNR × (1 - backcheck_prob) × (1 - p_w) × (1 - p_m)
 DEFAULT_AUDIT_FALSE_POS_RATE = 0.0  # alpha: P(false alarm | compliant firm audited)
 DEFAULT_AUDIT_FALSE_NEG_RATE = 0.40  # beta: 40% miss rate in Minimal env
 # Penalty structure: