From 06f557251d635049842855100ad6132d8cf28cac Mon Sep 17 00:00:00 2001 From: Josh Tuffy Date: Sun, 15 Mar 2026 18:45:15 -0400 Subject: [PATCH 1/6] heatmap --- .agents/rules/amendments.md | 34 ++ .agents/rules/project.md | 23 +- .agents/workflows/gen-figures.md | 112 +++++++ .agents/workflows/researcher.md | 313 ++++++------------ src/compute_permit_sim/schemas/batch.py | 95 +++++- .../schemas/sweep_params.py | 40 +++ src/compute_permit_sim/services/sweep.py | 117 ++++++- .../vis/components/history.py | 9 +- .../vis/components/history_items.py | 57 +++- src/compute_permit_sim/vis/export.py | 142 +++++++- src/compute_permit_sim/vis/page.py | 14 +- src/compute_permit_sim/vis/panels/batch.py | 270 ++++++++++++++- .../vis/panels/batch_results.py | 116 ++++++- src/compute_permit_sim/vis/plotting.py | 148 ++++++++- src/compute_permit_sim/vis/state/history.py | 12 +- src/compute_permit_sim/vis/state/run_state.py | 11 +- tests/factories.py | 36 +- tests/services/test_sweep.py | 100 ++++++ tests/vis/test_export.py | 48 +++ 19 files changed, 1440 insertions(+), 257 deletions(-) create mode 100644 .agents/workflows/gen-figures.md diff --git a/.agents/rules/amendments.md b/.agents/rules/amendments.md index 6552644..72890f0 100644 --- a/.agents/rules/amendments.md +++ b/.agents/rules/amendments.md @@ -35,3 +35,37 @@ Sessionfriction identified during prune-repo + cleanup work: - **Logging config** (`vis/logging_config.py` is canonical; never configure in `page.py`) - **Schema field removal checklist** (grep callers, confirm never populated, no `list[dict]` placeholders) - `python.md` sync-guard test bullet was vague. Expanded with the concrete pattern: compare `model_fields` against the reactive registry at test time. + +## 2026-03-12 — Always use vis/plotting.py for paper figures + +`vis/plotting.py` is the single source of truth for all chart functions. When generating +figures for papers, scripts, or exports, **always call functions from there** — never write +custom matplotlib from scratch in agent_workspace scripts. + +Available functions to reach for first: +- `plot_sweep_curve(SweepResult)` — 1D sweep line chart with tipping point annotation +- `plot_mc_trajectory(MonteCarloResult)` — compliance over steps, mean ± SD +- `plot_mc_violator_trajectory`, `plot_mc_audit_trajectory`, `plot_mc_payoff_comparison` + +If a needed figure type does not exist in `vis/plotting.py` (e.g. a 2D heatmap), **add it +there** following the `create_figure()` style, then use it from both the UI and scripts. +Do not create ad-hoc matplotlib code in agent_workspace when an equivalent function +already exists or could be added once and shared. + +## 2026-03-14 — Reflect: plotting discipline and scripting infrastructure + +Three friction sources identified, all patched this session: + +1. **`project.md` Plots section was too sparse** — 2 lines with no function inventory. + Replaced with the full table of all 12 public functions and a mandatory "check before + writing any matplotlib" gate. The agent cannot now claim ignorance of what exists. + +2. **`researcher.md` step 4 had zero mention of `vis/plotting.py`** — meaning every + research visualisation session was allowed to invent ad-hoc matplotlib. Added an + `[!IMPORTANT]` callout before the visualise step enforcing the same gate. + +3. **No `/gen-figures` workflow existed** — figure generation for paper sections was + improvised each time. Created `.agents/workflows/gen-figures.md` with a step-by-step + thin-caller checklist, a copy-paste script template, and a `// turbo` run step. + Also created `scripts/README.md` as the cross-session script index so existing + scripts are discoverable rather than silently re-invented. diff --git a/.agents/rules/project.md b/.agents/rules/project.md index a646e01..9a403d2 100644 --- a/.agents/rules/project.md +++ b/.agents/rules/project.md @@ -100,7 +100,28 @@ All export functions return `bytes` for Solara's `FileDownload`. Key functions: ## Plots (`vis/plotting.py`) -Accept typed result objects, return `matplotlib.Figure`, never import Solara. Use `fig_to_png(fig)` from `results.py` to convert to bytes for downloads. Standard figsize `(7, 4)`. +**Before writing any matplotlib code**, check this inventory. If the function you need exists here, call it. If it doesn't exist, add it here following the `create_figure()` style — then use it from both scripts and the UI. + +All functions accept typed result objects, return `matplotlib.Figure`, never import Solara. Use `fig_to_png(fig)` from `results.py` to convert to bytes for downloads. + +| Function | Input | Use for | +|---|---|---| +| `plot_sweep_curve(result, metric, reference_lines)` | `SweepResult` | 1D sweep line chart with tipping point + optional scenario markers | +| `plot_sweep_heatmap(grid, x_values, y_values, ...)` | 2D `list[list[float]]` | 2D compliance heatmap (joint sensitivity) | +| `plot_mc_trajectory(result)` | `MonteCarloResult` | Compliance mean ± SD over steps | +| `plot_mc_violator_trajectory(result)` | `MonteCarloResult` | Violator count mean ± SD over steps | +| `plot_mc_audit_trajectory(result)` | `MonteCarloResult` | Audit rate band over steps | +| `plot_mc_payoff_comparison(result)` | `MonteCarloResult` | Compliant vs. violating lab payoff bar chart | +| `plot_compliance_distribution(df)` | agents DataFrame | Bar chart: Compliant / Uncaught / Caught-by-source | +| `plot_audit_source_distribution(df)` | agents DataFrame | Bar chart: labs caught per AuditSource channel | +| `plot_audit_targeting(rates, counts, ...)` | scalar rates | Compliant vs. non-compliant audit rate bar | +| `plot_audit_coefficient_distribution(df)` | agents DataFrame | Histogram of per-lab audit coefficients | +| `plot_time_series(data, label, color_key)` | `pd.Series` | Generic single-series step chart | +| `plot_scatter(df, x_col, y_col, ...)` | DataFrame | Scatter with compliance coloring | + +All figures are created via `create_figure()` (standardized style, `Agg` backend). Never call `plt.figure()` or `plt.subplots()` in scripts. + +**Committed figure scripts** — see `scripts/README.md` for an index of existing scripts. Always check there before re-creating a script. ## Testing diff --git a/.agents/workflows/gen-figures.md b/.agents/workflows/gen-figures.md new file mode 100644 index 0000000..c52733d --- /dev/null +++ b/.agents/workflows/gen-figures.md @@ -0,0 +1,112 @@ +--- +description: Generate one or more figures for the paper or a report — enforces the thin-caller pattern where all plot logic lives in vis/plotting.py. +--- + +# Gen-Figures Workflow + +Use this workflow whenever you need to produce `.png` figures for the paper, a report, +or any committed output. Do **not** improvise — follow these steps in order. + +## Step 1 — Check `vis/plotting.py` first + +Open `project.md` and read the **Plots** section inventory table. +Find the function that matches the figure you need. + +- **Exists?** → go to Step 3. +- **Doesn't exist?** → you must add it to `vis/plotting.py` first (Step 2), then proceed. + +**Never write raw `plt.figure()` or `plt.subplots()` in a script or agent_workspace file.** +Use `create_figure()` from `vis/plotting.py` at minimum, and prefer a proper named function. + +## Step 2 — Add a missing function to `vis/plotting.py` (if needed) + +1. Follow the `create_figure()` style exactly — see existing functions for the pattern. +2. Accept typed result objects (`SweepResult`, `MonteCarloResult`, `pd.DataFrame`) — no raw dicts. +3. Return `matplotlib.Figure` (never call `plt.show()` or `plt.savefig()` inside the function). +4. Add it to the inventory table in `project.md` → Plots section. +5. Run `uv run ruff check . --fix && uv run mypy .` — fix any issues before proceeding. + +## Step 3 — Check `scripts/README.md` for an existing script + +Open `scripts/README.md`. +If a script already generates the figures you need (or close to it), **run that script** rather than writing a new one. + +```bash +uv run python scripts/.py --out-dir agent_workspace/figures +``` + +If the existing script's parameters or scenarios need adjustment, edit it in place — don't create a duplicate. + +## Step 4 — Write a thin-caller script (if no existing script covers it) + +Create a new script in `scripts/` following the naming convention `gen__figs.py`. + +The script must follow the **thin-caller pattern**: +- All imports from `vis.plotting`, `services.*`, `schemas.*` +- No matplotlib setup — no `plt.figure()`, `plt.subplots()`, `matplotlib.use()` +- Each figure: call the `vis/plotting.py` function → `fig.savefig(out_dir / "name.png", dpi=150, bbox_inches="tight")` +- Accept `--out-dir` as a CLI argument (default: `agent_workspace/figures`) +- Print progress lines so it's easy to monitor + +Minimal template: +```python +"""Generate figures for the paper. + +Thin caller only — all plot logic lives in vis/plotting.py. +Output: agent_workspace/figures/.png + +Usage: + uv run python scripts/gen__figs.py [--out-dir PATH] +""" +from __future__ import annotations +import argparse +from pathlib import Path + + +def main(out_dir: Path) -> None: + out_dir.mkdir(parents=True, exist_ok=True) + + from compute_permit_sim.services.config_manager import load_scenario + from compute_permit_sim.services.sweep import run_sweep + from compute_permit_sim.vis.plotting import plot_sweep_curve # add as needed + + base = load_scenario("basic/.json") + result = run_sweep(base, "audit.base_prob", [...], n_runs=50) + fig = plot_sweep_curve(result) + fig.savefig(out_dir / "fig_.png", dpi=150, bbox_inches="tight") + print("Done.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--out-dir", type=Path, default=Path("agent_workspace/figures")) + args = parser.parse_args() + main(args.out_dir) +``` + +## Step 5 — Update `scripts/README.md` + +After writing or modifying a script, update the index in `scripts/README.md`: + +``` +| gen__figs.py | Generates for Section X. Scenarios: <...>. | +``` + +## Step 6 — Run and verify + +// turbo +```bash +uv run python scripts/.py --out-dir agent_workspace/figures +``` + +Check that: +- All expected `.png` files are created in `out_dir` +- No matplotlib warnings or errors in output +- Figures look correct (open them and inspect) + +## Step 7 — Commit the script + +```bash +git add scripts/.py scripts/README.md +git commit -m "scripts: add figure generator" +``` diff --git a/.agents/workflows/researcher.md b/.agents/workflows/researcher.md index d8d6618..4595ce2 100644 --- a/.agents/workflows/researcher.md +++ b/.agents/workflows/researcher.md @@ -1,249 +1,146 @@ --- -description: Researcher workflow — run experiments against the simulation, analyze results, and iterate toward interesting insights. Mimics a domain researcher's scientific process. +description: Researcher workflow — run parameter sweep experiments against the simulation, analyze results, and iterate toward interesting insights. Produces reproducible figures and a findings log. --- # Researcher Workflow -Use this workflow when the goal is not to build software but to **generate knowledge** -about the simulation: discover interesting parameter regimes, confirm or refute -hypotheses, or produce results suitable for the paper. +Use this workflow when the goal is to **generate knowledge** from the simulation through +parameter sweeps — discover interesting compliance regimes, confirm or refute hypotheses, +and produce figures suitable for the paper. -The agent acts as a computational researcher. It forms a hypothesis, designs an -experiment, runs it, analyzes the output, **generates and evaluates figures**, updates -its understanding, and iterates. +> [!IMPORTANT] +> **Read `agent_workspace/research/synthesis.md` first** — this is the single source of +> truth for what is already known. Do not repeat experiments already answered there. -## Workspace - -All research artifacts live in `agent_workspace/` (gitignored, Docker-excluded): +## Workspace Structure ``` agent_workspace/ -├── scripts/ # reusable helpers — kept across sessions -│ ├── analyze_scenarios.py # run canonical scenarios, print metrics -│ └── collect_code.py # dump codebase to text for LLM context -└── research/ # one subfolder per research session - └── YYYY-MM-DD_slug/ # e.g. 2026-03-07_audit-tipping-point - ├── exp1_.py # experiment scripts - ├── exp2_.py - ├── findings.md # this session's findings (canonical output) - ├── *.png # figures saved by experiment scripts - └── scenarios/ # draft scenario JSONs for this session - └── *.json +├── sections/ +│ └── / # e.g. section_43 +│ ├── explore_sweep.py # thin-caller sweep runner — the main experiment tool +│ ├── findings.md # running log of all experiments and verdicts +│ └── figures/ +│ └── run_NNN/ # one folder per research session / batch of runs +│ ├── *.png # generated sweep figures +│ └── params.json # complete parameter + result record for every figure +└── research/ + └── synthesis.md # cross-session synthesis (update with /synthesize-research) ``` -**Session folder contract** — every researcher workflow invocation produces: -1. `findings.md` — hypothesis, result, interpretation, next steps -2. At least one experiment script (`exp_.py`) that is re-runnable -3. Any generated figures (`.png`) from matplotlib -4. Draft scenario JSONs scoped under `scenarios/` if new configs were explored - -**Never write to `outputs/`** — that's the user's UI export directory. -Research outputs go under `agent_workspace/research//`. - -## Setup - -Before running experiments, orient yourself: - -1. **Read `agent_workspace/research/synthesis.md`** first — this is the single source - of truth for what is already known. It tells you what has been confirmed, what the - interesting regime is, and which open questions remain. Do not repeat experiments - that are already answered there. - -2. **Check the previous session folder** (`agent_workspace/research/*/findings.md`) - for the most recent raw findings, in case the synthesis hasn't been updated yet. - -3. **Load the default scenario** as your baseline. All experiments should be expressed - as deltas from it so results are comparable. - - > [!IMPORTANT] - > The **default `ScenarioConfig` is degenerate** (100% compliance). More broadly, - > **any config where `permit_cap ≥ n_agents` is degenerate** — agents can always - > obtain a permit, so compliance costs nothing regardless of enforcement parameters. - > The interesting regime is **`permit_cap < n_agents`** (forced scarcity). Compliance - > tracks the Q/N ratio approximately linearly: `avg_compliance ≈ cap / n_agents`. - > Always start from `research_margin_baseline.json` with a tightened cap (e.g. cap=10, - > n_agents=15) as the canonical interesting starting point. - > - > **`detection_rate = nan` is a diagnostic signal**, not missing data. It means zero - > violations occurred — which confirms a degenerate config. Treat it as a hard - > signal to rethink the parameter regime, not as an experiment result. - -4. **Check the service API** — experiments call services directly (no Solara dependency): - ```python - from compute_permit_sim.services.simulation_runner import run_single - from compute_permit_sim.services.monte_carlo import run_monte_carlo - from compute_permit_sim.services.sweep import run_sweep - from compute_permit_sim.schemas import ScenarioConfig - ``` - -5. **Scenario file lifecycle:** - - During research: write to `agent_workspace/research/scenarios/` - - When finalized and validated: promote to `scenarios/basic/` with a clear, non-`research_` name - - Never write draft scenario files to `scenarios/basic/` — that directory is for user-facing configs +**Run folder contract**: every invocation of `explore_sweep.py` either creates a new +`run_NNN/` folder (auto-incremented) or adds to an existing one (using `--run N`). +Each run folder contains the figures and a `params.json` that records every experiment +in that folder for reproducibility. +## Setup (first time in a repo) -## Service API Reference - -All services are pure Python with no Solara dependency. Call them directly in experiment scripts. - -### Basic run — `run_single` -```python -from compute_permit_sim.services.simulation_runner import run_single -from compute_permit_sim.schemas import ScenarioConfig - -result = run_single(config) # returns SimulationRun -print(result.metrics.avg_compliance) -print(result.metrics.final_compliance) -print(result.metrics.detection_rate) -``` - -### Monte Carlo — `run_monte_carlo` -```python -from compute_permit_sim.services.monte_carlo import run_monte_carlo - -result = run_monte_carlo( - config=cfg, - n_runs=30, # replications - store_raw=True, # include per-seed rows in result.raw_seeds - seeds=[0..n-1], # optional: explicit seed list -) -# Key fields: -# result.avg_compliance.mean / .std -# result.final_compliance.mean / .std -# result.p10_compliance, result.p90_compliance -# result.pct_runs_full_compliance -# result.detection_rate.mean -# result.step_compliance — list[MetricStats], one per step -# result.raw_seeds — list[SeedResult] if store_raw=True -``` - -### Parameter sweep — `run_sweep` -```python -from compute_permit_sim.services.sweep import run_sweep - -result = run_sweep( - base_config=cfg, - param_path="audit.base_prob", # dot-path into ScenarioConfig - values=[0.02, 0.05, 0.10, 0.20], # explicit value list - param_label="Base Audit Rate π₀", # optional, for display/export - n_runs=20, -) -# result.points — list[SweepPoint], each has .param_value + .result (MonteCarloResult) -for pt in result.points: - print(pt.param_value, pt.result.avg_compliance.mean) -``` - -To generate a sweep value range from param registry defaults: -```python -from compute_permit_sim.schemas.sweep_params import get_param, generate_values -param = get_param("audit.base_prob") -values = generate_values(param, min_val=0.02, max_val=0.30, step=0.02) -``` +1. Ensure the simulator installs cleanly: + ```bash + uv sync + ``` -### Loading scenario files -```python -from compute_permit_sim.services.config_manager import load_scenario +2. Create the section folder and figures directory if they don't exist: + ```bash + mkdir -p agent_workspace/sections//figures + ``` -# From scenarios/basic/ (committed): -cfg = load_scenario("basic/scenario_2_strict.json") +3. Create `findings.md` in the section folder with this header: + ```markdown + # Sweep Findings Log -# From agent_workspace (use absolute path): -from pathlib import Path -cfg = load_scenario(str(Path("agent_workspace/research/2026-03-07_audit-tipping-point/scenarios/research_margin_baseline.json"))) -``` + | # | Scenario | Param | Range | n | Verdict | Notes | Figure | + |---|---|---|---|---|---|---|---| + ``` -### Inline config construction + overrides -```python -from compute_permit_sim.schemas import ScenarioConfig +4. The sweep runner script `explore_sweep.py` must exist (copy from another section or + create fresh — the pattern is documented below). -# Inline — good for one-off experiments: -cfg = ScenarioConfig(name="My Test", steps=60, n_agents=15) +## Running an Experiment -# Override one field on a loaded scenario (frozen model): -from compute_permit_sim.schemas.sweep_params import override_config -cfg2 = override_config(cfg, "audit.base_prob", 0.12) +```bash +uv run python agent_workspace/sections//explore_sweep.py \ + --scenario basic/scenario_2_strict.json \ + --param market.fixed_price \ + --min 5 --max 200 --step 10 \ + --n-runs 30 \ + --out crisis_fixed_price.png \ + --ref "70|orange" # optional: x-value|color for scenario default marker ``` +Key flags: +| Flag | Required | Description | +|---|---|---| +| `--scenario` | ✓ | Path relative to `scenarios/`, e.g. `basic/scenario_2_strict.json` | +| `--param` | ✓ | Dot-path into `ScenarioConfig`, e.g. `market.fixed_price` | +| `--min/--max/--step` | ✓ | Value range for the sweep | +| `--n-runs` | | Monte Carlo replications per point (default: 30) | +| `--out` | ✓ | PNG filename, saved inside the current run folder | +| `--run N` | | Add to existing `run_N` instead of auto-incrementing | +| `--ref "X\|color"` | | Mark a scenario's default value on the curve (pipe-delimited; NO `$` in the arg — bash expands it) | -Each iteration is one experiment. Aim for 3–5 iterations before synthesising conclusions. - -### 1 — Hypothesise - -State a falsifiable hypothesis in plain English, e.g.: -> "Increasing `audit.base_prob` beyond 0.15 produces diminishing compliance returns -> regardless of `collateral_amount`." +> [!CAUTION] +> Never use `$` in `--ref` labels. Bash expands `$70` to empty before Python sees it. +> The plotting code appends the x-value and scenario name automatically: `"Strict Enforcement default: 70"`. -A good hypothesis: -- Names the mechanism it expects to activate -- Predicts the direction and rough magnitude of an effect -- Is refutable by the numbers you will produce +## Interpreting Results -### 2 — Design +The script prints a `VERDICT` on exit: +- **INTERESTING**: compliance range > 20 pp OR max SD > 7 pp across the sweep +- **FLAT**: sweep produces no meaningful variation — move on -Choose the right experiment type: +**What makes a sweep interesting for the paper:** +- Non-linear effect: knee, plateau, phase transition +- Tipping point where ≥ 95% compliance is first achieved or lost +- Wide compliance range (> 30 pp) with low SD (signal not noise) +- Result that contradicts an intuitive expectation -| Goal | Tool | -|---|---| -| Point-in-time result for one config | `run_single` | -| Distribution of outcomes across seeds | `run_monte_carlo` (N ≥ 30) | -| How one parameter shifts compliance | `run_sweep` | +**What to skip:** +- Flat lines — even if mechanistically correct, they don't tell a visual story +- Sweeps where SD > mean × 0.5 — variance dominates, result is noise -Write the script in `agent_workspace/research/exp__.py`. +## Tools Available -### 3 — Run and observe +All are in `vis/plotting.py` — check there before writing any matplotlib: -```bash -uv run python agent_workspace/research/exp_1_baseline.py +```python +from compute_permit_sim.vis.plotting import plot_sweep_curve +from compute_permit_sim.services.sweep import run_sweep +from compute_permit_sim.services.config_manager import load_scenario +from compute_permit_sim.schemas.sweep_params import get_param, generate_values ``` -- Note anomalies: did anything behave unexpectedly? Runtime? NaN values? -- Jot raw numbers as inline comments in the script before moving to analysis. - -### 4 — Visualise and evaluate - -After getting numeric results, generate figures. Ask yourself: +`plot_sweep_curve(result, metric="avg_compliance", reference_lines=[(x, label, color)])`: +- Plots mean ± 1 SD band +- Annotates every 95% threshold crossing as "Tipping (95%) ≈ X" +- Marks scenario default values as diamonds on the curve, labelled with scenario name +- Reference line labels are ignored (label is auto-derived); only x-value and color matter -**Is this graph interesting?** -- Does it show a non-linear effect? (knee, saturation, phase transition) -- Does it contradict the expected direction? -- Does variance dominate? (if SD > mean × 0.5 the result is noise — don't graph it) -- Would a policy-maker care about the magnitude? +## Findings Log -**If visually boring** (flat line, constant slope, trivial intercept), skip saving it -and iterate the hypothesis. An uninteresting result is useful information — note it. +After each experiment, add a row to `findings.md`: -**If promising**, save the PNG to `agent_workspace/research/` and use `generate_image` -to visualise what an ideal version of that graph would look like (different color scheme, -better annotations, additional reference lines) — then iterate toward it in matplotlib. - -### 5 — Analyse - -- Compute the effect size relative to baseline (% change, not just absolute). -- Check whether variance swamps the signal: if SD > mean × 0.5, the result is noise. -- Look for non-linearities: does the curve have a knee? Is there a saturation point? -- Cross-reference against `SweepResult.tipping_point()` if applicable. - -### 6 — Update understanding and iterate +``` +| NN | Scenario | param.path | min–max | n_runs | FLAT/INTERESTING | one-line note | run_NNN/filename.png | +``` -- Re-state whether the hypothesis was confirmed, refuted, or inconclusive. -- Refine the next hypothesis based on what surprised you. -- Stop when you have confident results across ≥ 2 related dimensions, - or when the last two iterations produce no new insight. +Keep the log as the source of truth for what has been run. The `params.json` in each +run folder is the reproducibility record; the findings log is the research narrative. -## Synthesis +## Iteration Loop -After the loop, produce a markdown summary saved to `agent_workspace/research/findings_.md`: +1. **Hypothesise**: pick a scenario + parameter expected to show non-linear behaviour +2. **Run**: `explore_sweep.py --scenario ... --param ... --min ... --max ...` +3. **View**: read the PNG with `view_file` — check labels, tipping points, curve shape +4. **Judge**: INTERESTING (save, log, continue) or FLAT (note why, pick different param) +5. **Repeat** until you have 2–3 interesting curves per scenario for the paper -- One paragraph per experiment: hypothesis → result → interpretation -- A table of key numeric findings -- Recommended parameter ranges for the paper's figures -- Any caveats (e.g. sensitivity to `n_runs`, boundary effects) -- Embed the most interesting figures as images +**Typical interesting dimensions per scenario type:** +- *High-price scenario* (e.g. Crisis): sweep `market.fixed_price` and `lab.economic_value_max` +- *Enforcement scenario* (e.g. Maxwell): sweep `audit.base_prob` and `audit.penalty_amount` +- *Supply-constrained* (e.g. Lawless): sweep `market.permit_cap` +- *Dynamic* (e.g. Dynamic Escalation): sweep `audit.audit_escalation` -## Notes on scale +## After the Session -- Monte Carlo with `n_runs=50` takes ~10–30 s for 100 steps. -- Sweeps over 10+ points with `n_runs=20` each can take several minutes. -- For quick orientation experiments, use `n_runs=10` and `steps=50`; scale up to - confirm final results. -- All services are pure Python with no Solara dependency — safe to call headlessly. +Run `/synthesize-research` to merge findings into `agent_workspace/research/synthesis.md`. diff --git a/src/compute_permit_sim/schemas/batch.py b/src/compute_permit_sim/schemas/batch.py index 97cf69e..11cd984 100644 --- a/src/compute_permit_sim/schemas/batch.py +++ b/src/compute_permit_sim/schemas/batch.py @@ -30,6 +30,12 @@ class BatchColumnNames: PARAM_VALUE = "param_value" N_RUNS = "n_runs" + # 2-D grid sweep — per-axis identifiers + PARAM_X_PATH = "param_x_path" + PARAM_X_VALUE = "param_x_value" + PARAM_Y_PATH = "param_y_path" + PARAM_Y_VALUE = "param_y_value" + # Compliance COMPLIANCE_RATE = "compliance_rate" N_VIOLATORS = "n_violators" @@ -201,15 +207,90 @@ def compliance_series(self) -> list[tuple[float, float, float]]: ] def tipping_point(self, threshold: float = 0.95) -> float | None: - """Return first param value where mean avg_compliance >= threshold. + """Return the boundary param value where mean avg_compliance crosses threshold. + + Direction-aware: detects whether compliance rises or falls with the + parameter and returns the appropriate boundary. + + - Upward sweep (compliance rises with param, e.g. audit rate): + returns first param_value where compliance >= threshold. + - Downward sweep (compliance falls with param, e.g. permit price): + returns last param_value where compliance >= threshold, + i.e. the ceiling before compliance drops below threshold. Args: - threshold: Compliance fraction to consider as 'achieved' (default 0.95). + threshold: Compliance fraction to consider as the boundary + (default 0.95). Returns: - First param_value meeting the threshold, or None if never reached. + Boundary param_value, or None if compliance never reaches threshold. """ - for pt in self.points: - if pt.result.avg_compliance.mean >= threshold: - return pt.param_value - return None + if not self.points: + return None + + means = [pt.result.avg_compliance.mean for pt in self.points] + + # Detect direction: compare first and last point + # Use a simple heuristic: if the last mean < first mean, it's a downward sweep. + is_downward = means[-1] < means[0] + + if is_downward: + # Last point where compliance is still at or above the threshold + result = None + for pt in self.points: + if pt.result.avg_compliance.mean >= threshold: + result = pt.param_value + else: + break # First drop below threshold — stop here + return result + else: + # First point where compliance reaches or exceeds the threshold + for pt in self.points: + if pt.result.avg_compliance.mean >= threshold: + return pt.param_value + return None + + +@dataclass(frozen=True) +class GridSweepResult: + """Results of a 2D joint-sensitivity parameter sweep over a scenario. + + Stores mean compliance at every (x, y) grid cell. + + Attributes: + grid: ``grid[y_idx][x_idx]`` = mean compliance fraction (0–1) + over ``n_runs`` seeds at parameter values + ``(x_values[x_idx], y_values[y_idx])``. + """ + + scenario_name: str + param_x_path: str # e.g. "audit.base_prob" + param_x_label: str # human-readable, e.g. "Base Audit Probability" + param_y_path: str # e.g. "collateral_amount" + param_y_label: str # human-readable, e.g. "Collateral K (M$)" + config: ScenarioConfig + x_values: list[float] # ordered x-axis values + y_values: list[float] # ordered y-axis values + grid: list[list[float]] # [y_idx][x_idx] = mean compliance in [0, 1] + n_runs: int + # Short unique identifier matching SimulationRun.sim_id convention + id: str = field(default_factory=lambda: str(uuid4())[:8]) + + def compliance_at(self, x: float, y: float) -> float | None: + """Return mean compliance for an exact (x, y) cell, or None if not found.""" + try: + x_idx = self.x_values.index(x) + y_idx = self.y_values.index(y) + except ValueError: + return None + return self.grid[y_idx][x_idx] + + @property + def compliance_min(self) -> float: + """Minimum mean compliance across all grid cells.""" + return min(v for row in self.grid for v in row) + + @property + def compliance_max(self) -> float: + """Maximum mean compliance across all grid cells.""" + return max(v for row in self.grid for v in row) diff --git a/src/compute_permit_sim/schemas/sweep_params.py b/src/compute_permit_sim/schemas/sweep_params.py index ec59138..d5fa652 100644 --- a/src/compute_permit_sim/schemas/sweep_params.py +++ b/src/compute_permit_sim/schemas/sweep_params.py @@ -170,6 +170,46 @@ class SweepParam: description="Upper bound of risk appetite multiplier (>1 = risk-seeking).", category="Agents", ), + SweepParam( + path="lab.capability_value", + label="Capability Race Premium V_b", + unit="M$", + default_min=0.0, + default_max=300.0, + default_step=20.0, + description="Strategic value of model capabilities from training (arms-race premium added to gain from cheating).", + category="Agents", + ), + SweepParam( + path="lab.racing_factor", + label="Racing Factor c_r", + unit="", + default_min=0.0, + default_max=5.0, + default_step=0.25, + description="Urgency multiplier on capability value; higher = stronger competitive pressure to cheat.", + category="Agents", + ), + SweepParam( + path="lab.reputation_escalation_factor", + label="Reputation Escalation Factor", + unit="", + default_min=0.0, + default_max=5.0, + default_step=0.25, + description="Per-violation multiplier on reputation cost: rep_t = base × (1+factor)^n_caught. 0 = no escalation.", + category="Agents", + ), + SweepParam( + path="lab.reputation_sensitivity", + label="Reputation Sensitivity R", + unit="M$", + default_min=0.0, + default_max=100.0, + default_step=5.0, + description="Base reputation cost per violation (M$). Compounds with reputation_escalation_factor.", + category="Agents", + ), # --- Dynamics --- SweepParam( path="audit.signal_exponent", diff --git a/src/compute_permit_sim/services/sweep.py b/src/compute_permit_sim/services/sweep.py index 5d558f3..7d4ef07 100644 --- a/src/compute_permit_sim/services/sweep.py +++ b/src/compute_permit_sim/services/sweep.py @@ -18,7 +18,7 @@ from __future__ import annotations -from compute_permit_sim.schemas.batch import SweepPoint, SweepResult +from compute_permit_sim.schemas.batch import GridSweepResult, SweepPoint, SweepResult from compute_permit_sim.schemas.config import ScenarioConfig from compute_permit_sim.services.monte_carlo import run_monte_carlo @@ -143,3 +143,118 @@ def run_sweep_from_registry( n_runs=n_runs, seeds=seeds, ) + + +def run_grid_sweep( + base_config: ScenarioConfig, + param_x_path: str, + param_y_path: str, + x_values: list[float], + y_values: list[float], + param_x_label: str | None = None, + param_y_label: str | None = None, + n_runs: int = 20, + seeds: list[int] | None = None, +) -> GridSweepResult: + """Run a 2D joint-sensitivity sweep over two parameters. + + Each (x, y) cell is evaluated with ``n_runs`` Monte Carlo replications. + Results are stored as ``grid[y_idx][x_idx] = mean_compliance``. + + All seeds are shared across all cells so that parameter variation, not + noise, drives differences between cells. + + Args: + base_config: Base scenario configuration. + param_x_path: Dot-path for the x-axis parameter, e.g. ``"audit.base_prob"``. + param_y_path: Dot-path for the y-axis parameter, e.g. ``"collateral_amount"``. + x_values: Ordered x-axis values. + y_values: Ordered y-axis values. + param_x_label: Human-readable x-axis label; defaults to ``param_x_path``. + param_y_label: Human-readable y-axis label; defaults to ``param_y_path``. + n_runs: MC replications per cell. Ignored if ``seeds`` is provided. + seeds: Explicit seeds; overrides ``n_runs`` if given. + + Returns: + :class:`~compute_permit_sim.schemas.batch.GridSweepResult` with the 2D + compliance grid and axis metadata. + """ + label_x = param_x_label or param_x_path + label_y = param_y_label or param_y_path + run_seeds = seeds if seeds is not None else list(range(n_runs)) + + # grid[y_idx][x_idx] = mean compliance + grid: list[list[float]] = [] + for y in y_values: + row: list[float] = [] + for x in x_values: + cfg = override_config(base_config, param_x_path, x) + cfg = override_config(cfg, param_y_path, y) + mc = run_monte_carlo(cfg, seeds=run_seeds) + row.append(mc.avg_compliance.mean) + grid.append(row) + + return GridSweepResult( + scenario_name=base_config.name, + param_x_path=param_x_path, + param_x_label=label_x, + param_y_path=param_y_path, + param_y_label=label_y, + config=base_config, + x_values=list(x_values), + y_values=list(y_values), + grid=grid, + n_runs=len(run_seeds), + ) + + +def run_grid_sweep_from_registry( + base_config: ScenarioConfig, + param_x_path: str, + param_y_path: str, + x_min: float | None = None, + x_max: float | None = None, + x_step: float | None = None, + y_min: float | None = None, + y_max: float | None = None, + y_step: float | None = None, + n_runs: int = 20, + seeds: list[int] | None = None, +) -> GridSweepResult: + """Run a 2D grid sweep using registry defaults for both axis ranges. + + Looks up each path in ``SWEEPABLE_PARAMS`` to fill in default + min/max/step values. Any supplied arguments override those defaults. + + Args: + base_config: Base scenario configuration. + param_x_path: Dot-path registered in ``SWEEPABLE_PARAMS`` for x-axis. + param_y_path: Dot-path registered in ``SWEEPABLE_PARAMS`` for y-axis. + x_min/x_max/x_step: Override registry defaults for x-axis. + y_min/y_max/y_step: Override registry defaults for y-axis. + n_runs: MC replications per cell. + seeds: Explicit seeds (overrides n_runs if provided). + + Returns: + :class:`~compute_permit_sim.schemas.batch.GridSweepResult`. + + Raises: + KeyError: If either path is not in the registry. + """ + from compute_permit_sim.schemas.sweep_params import generate_values, get_param + + px = get_param(param_x_path) + py = get_param(param_y_path) + x_values = generate_values(px, min_val=x_min, max_val=x_max, step=x_step) + y_values = generate_values(py, min_val=y_min, max_val=y_max, step=y_step) + return run_grid_sweep( + base_config, + param_x_path=px.path, + param_y_path=py.path, + x_values=x_values, + y_values=y_values, + param_x_label=px.label, + param_y_label=py.label, + n_runs=n_runs, + seeds=seeds, + ) diff --git a/src/compute_permit_sim/vis/components/history.py b/src/compute_permit_sim/vis/components/history.py index f8b81c6..30c2fcb 100644 --- a/src/compute_permit_sim/vis/components/history.py +++ b/src/compute_permit_sim/vis/components/history.py @@ -27,12 +27,13 @@ def UnifiedHistoryList() -> None: (BatchHistoryList + RunHistoryList) pattern which caused double-nested ``run-history-compact`` for batch items and mismatched styling. """ - from compute_permit_sim.vis.state.run_state import mc_run, sweep_run + from compute_permit_sim.vis.state.run_state import grid_run, mc_run, sweep_run batch_results = session_history.batch_results.value run_history = session_history.run_history.value mc_current = mc_run.value.result sweep_current = sweep_run.value.result + grid_current = grid_run.value.result # Use Markdown for empty state — matches RunHistoryList convention and avoids # alternating root container types (Column A vs Column B) which reacton rejects. @@ -42,7 +43,11 @@ def UnifiedHistoryList() -> None: with solara.Column(classes=["run-history-compact"]): for result in batch_results: - is_current = (result is mc_current) or (result is sweep_current) + is_current = ( + (result is mc_current) + or (result is sweep_current) + or (result is grid_current) + ) BatchHistoryItem(result, is_current) for run in run_history: is_selected = (session_history.selected_run.value is not None) and ( diff --git a/src/compute_permit_sim/vis/components/history_items.py b/src/compute_permit_sim/vis/components/history_items.py index 2ff8eb4..b810677 100644 --- a/src/compute_permit_sim/vis/components/history_items.py +++ b/src/compute_permit_sim/vis/components/history_items.py @@ -11,7 +11,11 @@ import solara.lab from compute_permit_sim.schemas import SimulationRun -from compute_permit_sim.schemas.batch import MonteCarloResult, SweepResult +from compute_permit_sim.schemas.batch import ( + GridSweepResult, + MonteCarloResult, + SweepResult, +) from compute_permit_sim.services.config_manager import save_scenario from compute_permit_sim.vis.components.dialogs import RunConfigDialog from compute_permit_sim.vis.components.results import DownloadCSV, DownloadExcel @@ -143,12 +147,17 @@ def perform_save() -> None: @solara.component def BatchHistoryItem(result: BatchResult, is_current: bool) -> None: - """One-line history row for an MC or Sweep batch result. + """One-line history row for an MC, Sweep, or Grid Sweep batch result. - Mirrors ``RunHistoryItem`` exactly: type-icon | ⓘ | id-label | save | Excel | CSV | JSON. - The short ``result.id`` is displayed as the label; full details are in the ⓘ dialog. + Mirrors ``RunHistoryItem`` exactly: type-icon | \u24d8 | id-label | save | Excel | CSV | JSON. + The short ``result.id`` is displayed as the label; full details are in the \u24d8 dialog. """ - from compute_permit_sim.vis.state.run_state import RunState, mc_run, sweep_run + from compute_permit_sim.vis.state.run_state import ( + RunState, + grid_run, + mc_run, + sweep_run, + ) if isinstance(result, MonteCarloResult): type_icon = "mdi-chart-bell-curve-cumulative" @@ -181,7 +190,7 @@ def dl_excel() -> bytes | str: return export_monte_carlo_to_excel(result, output_path="") - else: # SweepResult + elif isinstance(result, SweepResult): type_icon = "mdi-trending-up" dialog_title = f"Sweep Run: {result.id}" tp = result.tipping_point() @@ -217,6 +226,42 @@ def dl_excel() -> bytes | str: return export_sweep_to_excel(result, output_path="") + else: # GridSweepResult + type_icon = "mdi-view-grid" + dialog_title = f"Grid Sweep: {result.id}" + safe_s = result.scenario_name.lower().replace(" ", "_") + safe_x = result.param_x_path.replace(".", "_") + safe_y = result.param_y_path.replace(".", "_") + batch_summary = ( + f"**{len(result.x_values)}×{len(result.y_values)} grid sweep**" + f" · {result.scenario_name} \n" + f"X: **{result.param_x_label}** \n" + f"Y: **{result.param_y_label}** \n" + f"Compliance: {result.compliance_min:.1%}–{result.compliance_max:.1%}" + ) + csv_fname = f"grid_{safe_s}_{safe_x}_x_{safe_y}_{result.id}.csv" + xlsx_fname = f"grid_{safe_s}_{safe_x}_x_{safe_y}_{result.id}.xlsx" + + def view() -> None: + session_history.selected_run.value = None # clear basic run highlight + mc_run.set(RunState[MonteCarloResult](phase="idle")) + sweep_run.set(RunState[SweepResult](phase="idle")) + grid_run.set(RunState[GridSweepResult](phase="ready", result=result)) + + def dl_csv() -> bytes | str: + from compute_permit_sim.vis.export import ( + export_grid_sweep_to_csv, # noqa: PLC0415 + ) + + return export_grid_sweep_to_csv(result, output_path="") + + def dl_excel() -> bytes | str: + from compute_permit_sim.vis.export import ( + export_grid_sweep_to_excel, # noqa: PLC0415 + ) + + return export_grid_sweep_to_excel(result, output_path="") + # use_state calls must be unconditional (Solara hook rules) — always before any return show_save, set_show_save = solara.use_state(False) save_name, set_save_name = solara.use_state(f"scenario_{result.id}") diff --git a/src/compute_permit_sim/vis/export.py b/src/compute_permit_sim/vis/export.py index 57213da..d78d3e5 100644 --- a/src/compute_permit_sim/vis/export.py +++ b/src/compute_permit_sim/vis/export.py @@ -21,6 +21,10 @@ Sweep: export_sweep_to_excel — Excel with Config/Sweep/Graphs export_sweep_to_csv — one row per parameter value + +Grid Sweep: + export_grid_sweep_to_excel — Excel with Config/Grid pivot/Heatmap PNG + export_grid_sweep_to_csv — long-format CSV, one row per (x, y) cell """ import io @@ -35,12 +39,13 @@ BatchColumnNames as _BCN, ) from compute_permit_sim.schemas.batch import ( - MetricStats as _MetricStats, -) -from compute_permit_sim.schemas.batch import ( + GridSweepResult, MonteCarloResult, SweepResult, ) +from compute_permit_sim.schemas.batch import ( + MetricStats as _MetricStats, +) from compute_permit_sim.schemas.columns import ColumnNames from compute_permit_sim.services.metrics import calculate_compliance from compute_permit_sim.vis.plotting import ( @@ -744,6 +749,137 @@ def export_sweep_to_csv( return output_path +def export_grid_sweep_to_csv( + result: GridSweepResult, + output_path: str | None = None, +) -> "str | bytes": + """Export a GridSweepResult to long-format CSV with one row per grid cell. + + Columns: scenario, param_x_path, param_x_value, param_y_path, + param_y_value, n_runs, compliance. + + Args: + result: A ``GridSweepResult`` instance. + output_path: ``None`` = auto-generate, ``""`` = return bytes. + """ + rows = [ + { + _BCN.SCENARIO: result.scenario_name, + _BCN.PARAM_X_PATH: result.param_x_path, + _BCN.PARAM_X_VALUE: x, + _BCN.PARAM_Y_PATH: result.param_y_path, + _BCN.PARAM_Y_VALUE: y, + _BCN.N_RUNS: result.n_runs, + _BCN.COMPLIANCE_RATE: result.grid[y_idx][x_idx], + } + for y_idx, y in enumerate(result.y_values) + for x_idx, x in enumerate(result.x_values) + ] + + df = _pd.DataFrame(rows) + if output_path == "": + return df.to_csv(index=False).encode("utf-8") + if output_path is None: + _os.makedirs("outputs", exist_ok=True) + safe_s = result.scenario_name.lower().replace(" ", "_") + safe_x = result.param_x_path.replace(".", "_") + safe_y = result.param_y_path.replace(".", "_") + output_path = f"outputs/grid_{safe_s}_{safe_x}_x_{safe_y}.csv" + df.to_csv(output_path, index=False) + return output_path + + +def export_grid_sweep_to_excel( + result: GridSweepResult, + output_path: str | None = None, +) -> "str | bytes": + """Export a GridSweepResult to a formatted Excel workbook. + + Sheets: + ``Config`` — base scenario configuration + ``Grid`` — pivot table: rows=y_values, cols=x_values, cells=compliance% + ``Heatmap`` — embedded PNG of the compliance heatmap + + Args: + result: A ``GridSweepResult`` instance. + output_path: ``None`` = auto-generate, ``""`` = return bytes. + """ + import io as _io + + import xlsxwriter as _xlsxwriter + + from compute_permit_sim.vis.plotting import plot_sweep_heatmap + + return_bytes = output_path == "" + output: _io.BytesIO | str + if return_bytes: + output = _io.BytesIO() + elif output_path is None: + _os.makedirs("outputs", exist_ok=True) + safe_s = result.scenario_name.lower().replace(" ", "_") + safe_x = result.param_x_path.replace(".", "_") + safe_y = result.param_y_path.replace(".", "_") + output_path = f"outputs/grid_{safe_s}_{safe_x}_x_{safe_y}.xlsx" + output = output_path + else: + output = output_path + + workbook = _xlsxwriter.Workbook(output) + header_fmt = workbook.add_format( + {"bold": True, "bg_color": "#2196F3", "font_color": "white", "border": 1} + ) + data_fmt = workbook.add_format({"border": 1}) + pct_fmt = workbook.add_format({"border": 1, "num_format": "0.0%"}) + + try: + # === Config sheet === + if result.config is not None: + cfg_sheet = workbook.add_worksheet("Config") + _write_config_sheet(cfg_sheet, result.config, header_fmt, data_fmt) + + # === Grid (pivot) sheet === + grid_sheet = workbook.add_worksheet("Grid") + grid_sheet.set_column("A:A", 20) + # Header row: blank corner, then x-axis values + grid_sheet.write(0, 0, f"{result.param_x_label} →", header_fmt) + for x_idx, x in enumerate(result.x_values): + grid_sheet.write(0, x_idx + 1, x, header_fmt) + # Data rows: y-axis values then compliance cells + for y_idx, y in enumerate(result.y_values): + grid_sheet.write(y_idx + 1, 0, y, data_fmt) + for x_idx, compliance in enumerate(result.grid[y_idx]): + grid_sheet.write(y_idx + 1, x_idx + 1, compliance, pct_fmt) + # Side label for y-axis + grid_sheet.write( + 0, 0, f"{result.param_x_label} → / {result.param_y_label} ↓", header_fmt + ) + + # === Heatmap sheet === + heatmap_sheet = workbook.add_worksheet("Heatmap") + fig = plot_sweep_heatmap( + compliance_grid=result.grid, + x_values=result.x_values, + y_values=result.y_values, + x_param_label=result.param_x_label, + y_param_label=result.param_y_label, + title=f"Compliance Heatmap — {result.scenario_name}", + ) + heatmap_sheet.insert_image( + 0, 0, "heatmap.png", {"image_data": _fig_to_bytes(fig)} + ) + + finally: + workbook.close() + + if return_bytes: + assert isinstance(output, _io.BytesIO) + output.seek(0) + return output.read() + + assert output_path is not None + return output_path + + def export_monte_carlo_to_excel( result: MonteCarloResult, output_path: str | None = None, diff --git a/src/compute_permit_sim/vis/page.py b/src/compute_permit_sim/vis/page.py index f01dc29..92a53e5 100644 --- a/src/compute_permit_sim/vis/page.py +++ b/src/compute_permit_sim/vis/page.py @@ -4,8 +4,10 @@ basic_run.phase == "running" → RunSpinner (basic sim) mc_run.phase == "running" → RunSpinner (Monte Carlo) sweep_run.phase == "running" → RunSpinner (Sweep) + grid_run.phase == "running" → RunSpinner (Grid Sweep) mc_run.phase == "ready" → BatchResultsPanel sweep_run.phase == "ready" → BatchResultsPanel + grid_run.phase == "ready" → BatchResultsPanel basic_run.phase == "ready" OR history → AnalysisPanel else → EmptyState """ @@ -27,7 +29,12 @@ from compute_permit_sim.vis.panels.batch_results import BatchResultsPanel from compute_permit_sim.vis.panels.config import ConfigPanel from compute_permit_sim.vis.state.history import session_history -from compute_permit_sim.vis.state.run_state import basic_run, mc_run, sweep_run +from compute_permit_sim.vis.state.run_state import ( + basic_run, + grid_run, + mc_run, + sweep_run, +) configure_logging() logger = logging.getLogger(__name__) @@ -109,12 +116,13 @@ def toggle_theme(): basic = basic_run.value mc = mc_run.value sw = sweep_run.value + gr = grid_run.value if basic.is_running: RunSpinner("Simulating\u2026") - elif mc.is_running or sw.is_running: + elif mc.is_running or sw.is_running or gr.is_running: RunSpinner("Running batch analysis\u2026") - elif mc.is_ready or sw.is_ready: + elif mc.is_ready or sw.is_ready or gr.is_ready: BatchResultsPanel() elif basic.is_ready or session_history.selected_run.value is not None: AnalysisPanel() diff --git a/src/compute_permit_sim/vis/panels/batch.py b/src/compute_permit_sim/vis/panels/batch.py index e906ed9..ad958e2 100644 --- a/src/compute_permit_sim/vis/panels/batch.py +++ b/src/compute_permit_sim/vis/panels/batch.py @@ -28,7 +28,7 @@ ) from compute_permit_sim.vis.components.history import UnifiedHistoryList from compute_permit_sim.vis.components.results import SidebarLabel -from compute_permit_sim.vis.state.run_state import RunState, mc_run, sweep_run +from compute_permit_sim.vis.state.run_state import RunState, grid_run, mc_run, sweep_run # Pre-built lookup map (module-level constant — registry never changes at runtime) _PARAM_MAP: dict[str, SweepParam] = {p.path: p for p in SWEEPABLE_PARAMS} @@ -38,6 +38,7 @@ # --------------------------------------------------------------------------- _mc_status = solara.reactive("") _sweep_status = solara.reactive("") +_grid_status = solara.reactive("") # --------------------------------------------------------------------------- # Background workers @@ -140,6 +141,59 @@ def _run_sweep_background( _sweep_status.set(f"Error: {e}") +def _run_grid_background( + scenario_name: str, + param_x: SweepParam, + param_y: SweepParam, + x_values: list[float], + y_values: list[float], + n_runs: int, +) -> None: + """Run 2D grid sweep off the event loop thread and update grid_run reactive.""" + from compute_permit_sim.schemas.batch import GridSweepResult + from compute_permit_sim.services.sweep import run_grid_sweep + + try: + config = _load_scenario_by_name(scenario_name) + + if config is None: + _grid_status.set(f"Scenario '{scenario_name}' not found.") + grid_run.set(RunState[GridSweepResult](phase="idle")) + return + + n_cells = len(x_values) * len(y_values) + _grid_status.set( + f"Grid {len(x_values)}×{len(y_values)} = {n_cells} cells × {n_runs} runs..." + ) + + result = run_grid_sweep( + config, + param_x_path=param_x.path, + param_y_path=param_y.path, + x_values=x_values, + y_values=y_values, + param_x_label=param_x.label, + param_y_label=param_y.label, + n_runs=n_runs, + ) + + from compute_permit_sim.vis.state.history import ( + session_history, # noqa: PLC0415 + ) + + session_history.add_batch_result(result) + grid_run.set(RunState[GridSweepResult](phase="ready", result=result)) + _grid_status.set( + f"Done: {len(x_values)}×{len(y_values)} grid — " + f"compliance {result.compliance_min:.1%}–{result.compliance_max:.1%}" + ) + except Exception as e: # noqa: BLE001 + from compute_permit_sim.schemas.batch import GridSweepResult + + grid_run.set(RunState[GridSweepResult](phase="idle")) + _grid_status.set(f"Error: {e}") + + # --------------------------------------------------------------------------- # Sub-components # --------------------------------------------------------------------------- @@ -391,9 +445,220 @@ def on_param_label_change(label: str) -> None: # --------------------------------------------------------------------------- +@solara.component +def _GridSweepCard(scenario_names: list[str]) -> Any: + """Sidebar card for configuring and launching a 2D grid sweep.""" + selected_scenario, set_selected_scenario = solara.use_state( + scenario_names[0] if scenario_names else "" + ) + + all_categories = categories() + + # --- X-axis param --- + cat_x, set_cat_x = solara.use_state(all_categories[0] if all_categories else "") + params_x = params_for_category(cat_x) + path_x, set_path_x = solara.use_state(params_x[0].path if params_x else "") + param_x = _PARAM_MAP.get(path_x) + min_x, set_min_x = solara.use_state(param_x.default_min if param_x else 0.0) + max_x, set_max_x = solara.use_state(param_x.default_max if param_x else 1.0) + step_x, set_step_x = solara.use_state(param_x.default_step if param_x else 0.1) + + # --- Y-axis param --- + cat_y, set_cat_y = solara.use_state(all_categories[0] if all_categories else "") + params_y = params_for_category(cat_y) + path_y, set_path_y = solara.use_state(params_y[0].path if params_y else "") + param_y = _PARAM_MAP.get(path_y) + min_y, set_min_y = solara.use_state(param_y.default_min if param_y else 0.0) + max_y, set_max_y = solara.use_state(param_y.default_max if param_y else 1.0) + step_y, set_step_y = solara.use_state(param_y.default_step if param_y else 0.1) + + n_runs, set_n_runs = solara.use_state(20) + + is_running = grid_run.value.is_running + status = _grid_status.value + + # Compute preview — both axes must be valid + n_pts_x, n_pts_y = 0, 0 + preview_error = "" + if param_x and step_x > 0 and min_x <= max_x: + try: + n_pts_x = len(generate_values(param_x, min_x, max_x, step_x)) + except Exception: + preview_error = "Invalid X range" + if param_y and step_y > 0 and min_y <= max_y: + try: + n_pts_y = len(generate_values(param_y, min_y, max_y, step_y)) + except Exception: + preview_error = "Invalid Y range" + + def _on_cat_x(cat: str) -> None: + set_cat_x(cat) + ps = params_for_category(cat) + if ps: + set_path_x(ps[0].path) + set_min_x(ps[0].default_min) + set_max_x(ps[0].default_max) + set_step_x(ps[0].default_step) + + def _on_path_x(label: str) -> None: + path = {p.label: p.path for p in params_for_category(cat_x)}.get(label, "") + set_path_x(path) + p = _PARAM_MAP.get(path) + if p: + set_min_x(p.default_min) + set_max_x(p.default_max) + set_step_x(p.default_step) + + def _on_cat_y(cat: str) -> None: + set_cat_y(cat) + ps = params_for_category(cat) + if ps: + set_path_y(ps[0].path) + set_min_y(ps[0].default_min) + set_max_y(ps[0].default_max) + set_step_y(ps[0].default_step) + + def _on_path_y(label: str) -> None: + path = {p.label: p.path for p in params_for_category(cat_y)}.get(label, "") + set_path_y(path) + p = _PARAM_MAP.get(path) + if p: + set_min_y(p.default_min) + set_max_y(p.default_max) + set_step_y(p.default_step) + + def on_run() -> None: + if not param_x or not param_y: + return + try: + x_vals = generate_values(param_x, min_x, max_x, step_x) + y_vals = generate_values(param_y, min_y, max_y, step_y) + except ValueError: + _grid_status.set("Invalid range — check min/max/step for both axes.") + return + from compute_permit_sim.schemas.batch import ( # noqa: PLC0415 + GridSweepResult, + MonteCarloResult, + SweepResult, + ) + + grid_run.set(RunState[GridSweepResult](phase="running")) + mc_run.set(RunState[MonteCarloResult](phase="idle")) + sweep_run.set(RunState[SweepResult](phase="idle")) + _grid_status.set("Starting...") + threading.Thread( + target=_run_grid_background, + args=(selected_scenario, param_x, param_y, x_vals, y_vals, n_runs), + daemon=True, + ).start() + + with solara.Card(title="Grid Sweep"): + if not scenario_names: + with solara.Column(classes=["sidebar-empty-text"]): + solara.Text("No scenarios found.") + return + + solara.Select( + label="Scenario", + values=scenario_names, + value=selected_scenario, + on_value=set_selected_scenario, + dense=True, + ) + + # ── X-axis ────────────────────────────────────────────────────────── + with solara.Column(classes=["sidebar-hint-text"]): + solara.Text("X-axis parameter") + with solara.Row(style="gap: 4px;"): + solara.Select( + label="Category", + values=all_categories, + value=cat_x, + on_value=_on_cat_x, + dense=True, + ) + labels_x = [p.label for p in params_for_category(cat_x)] + solara.Select( + label="Parameter", + values=labels_x, + value=param_x.label if param_x else (labels_x[0] if labels_x else ""), + on_value=_on_path_x, + dense=True, + ) + with solara.Row(style="gap: 4px;"): + unit_x = param_x.unit if param_x else "" + solara.InputFloat(label=f"Min ({unit_x})", value=min_x, on_value=set_min_x) + solara.InputFloat(label=f"Max ({unit_x})", value=max_x, on_value=set_max_x) + solara.InputFloat(label="Step", value=step_x, on_value=set_step_x) + + # ── Y-axis ────────────────────────────────────────────────────────── + with solara.Column(classes=["sidebar-hint-text"]): + solara.Text("Y-axis parameter") + with solara.Row(style="gap: 4px;"): + solara.Select( + label="Category", + values=all_categories, + value=cat_y, + on_value=_on_cat_y, + dense=True, + ) + labels_y = [p.label for p in params_for_category(cat_y)] + solara.Select( + label="Parameter", + values=labels_y, + value=param_y.label if param_y else (labels_y[0] if labels_y else ""), + on_value=_on_path_y, + dense=True, + ) + with solara.Row(style="gap: 4px;"): + unit_y = param_y.unit if param_y else "" + solara.InputFloat(label=f"Min ({unit_y})", value=min_y, on_value=set_min_y) + solara.InputFloat(label=f"Max ({unit_y})", value=max_y, on_value=set_max_y) + solara.InputFloat(label="Step", value=step_y, on_value=set_step_y) + + # ── Replications + simulation count preview ────────────────────────── + solara.SliderInt( + label=f"Runs per cell: {n_runs}", + value=n_runs, + on_value=set_n_runs, + min=5, + max=100, + step=5, + ) + if preview_error: + with solara.Column(classes=["sidebar-error-text"]): + solara.Text(preview_error) + elif n_pts_x > 0 and n_pts_y > 0: + total = n_pts_x * n_pts_y * n_runs + with solara.Column(classes=["sidebar-hint-text"]): + solara.Text( + f"{n_pts_x}\u00d7{n_pts_y} = {n_pts_x * n_pts_y} cells" + f" \u00d7 {n_runs} = {total:,} total simulations" + ) + + solara.Button( + "Running..." if is_running else "Run Grid Sweep", + on_click=on_run, + color="primary", + block=True, + disabled=is_running + or not selected_scenario + or not param_x + or not param_y + or n_pts_x == 0 + or n_pts_y == 0, + small=True, + ) + if status and ( + "Error" in status or "not found" in status or "Invalid" in status + ): + with solara.Column(classes=["sidebar-error-text"]): + solara.Text(status) + + @solara.component def BatchPanel() -> Any: - """Sidebar panel with Monte Carlo and Parameter Sweep configurators.""" + """Sidebar panel with Monte Carlo, Parameter Sweep, and Grid Sweep configurators.""" from compute_permit_sim.vis.state.history import session_history # noqa: PLC0415 # Use the same name map as LoadScenarioDialog for consistency @@ -403,6 +668,7 @@ def BatchPanel() -> Any: SidebarLabel("**BATCH ANALYSIS**") _MonteCarloCard(scenario_names=scenario_names) _SweepCard(scenario_names=scenario_names) + _GridSweepCard(scenario_names=scenario_names) # ── History — batch results + individual runs in one stream ──────── solara.Markdown("---") diff --git a/src/compute_permit_sim/vis/panels/batch_results.py b/src/compute_permit_sim/vis/panels/batch_results.py index 5116a6a..da01cff 100644 --- a/src/compute_permit_sim/vis/panels/batch_results.py +++ b/src/compute_permit_sim/vis/panels/batch_results.py @@ -20,7 +20,7 @@ MetricChip, ResultsActions, ) -from compute_permit_sim.vis.state.run_state import mc_run, sweep_run +from compute_permit_sim.vis.state.run_state import grid_run, mc_run, sweep_run # --------------------------------------------------------------------------- # Monte Carlo results @@ -305,6 +305,115 @@ def _SweepResultsView() -> Any: solara.Markdown("\n".join([header, sep, body])) +# --------------------------------------------------------------------------- +# Grid sweep results +# --------------------------------------------------------------------------- + + +@solara.component +def _GridSweepResultsView() -> Any: + from compute_permit_sim.vis.export import ( + export_grid_sweep_to_csv, + export_grid_sweep_to_excel, + ) + from compute_permit_sim.vis.plotting import plot_sweep_heatmap + + result = grid_run.value.result + if result is None: + solara.Text("No grid sweep result to display.") + return + + safe_s = result.scenario_name.lower().replace(" ", "_") + safe_x = result.param_x_path.replace(".", "_") + safe_y = result.param_y_path.replace(".", "_") + + fig = plot_sweep_heatmap( + compliance_grid=result.grid, + x_values=result.x_values, + y_values=result.y_values, + x_param_label=result.param_x_label, + y_param_label=result.param_y_label, + title=f"Compliance Heatmap — {result.scenario_name}", + ) + + with solara.Column(classes=["analysis-panel"]): + with solara.Card("Summary", style="margin-bottom: 12px;"): + with solara.Row( + style="align-items: center; justify-content: space-between; flex-wrap: wrap;" + ): + with solara.Row(style="gap: 24px; flex-wrap: wrap; flex: 1;"): + MetricChip("Scenario", result.scenario_name) + MetricChip("X-axis", result.param_x_label) + MetricChip("Y-axis", result.param_y_label) + MetricChip( + "Grid size", + f"{len(result.x_values)}\u00d7{len(result.y_values)}", + ) + MetricChip("Seeds per cell", str(result.n_runs)) + MetricChip( + "Compliance range", + f"{result.compliance_min:.1%}\u2013{result.compliance_max:.1%}", + ) + + with ResultsActions(): + RunConfigDialog( + config=result.config, + title=f"Grid Sweep: {result.id}", + batch_summary=( + f"**{len(result.x_values)}\u00d7{len(result.y_values)} grid sweep**" + f" \u00b7 {result.scenario_name} \n" + f"X: **{result.param_x_label}** \n" + f"Y: **{result.param_y_label}** \n" + f"Compliance range: " + f"{result.compliance_min:.1%}\u2013{result.compliance_max:.1%}" + ), + ) + DownloadCSV( + "Download grid CSV", + lambda r=result: export_grid_sweep_to_csv( # type: ignore[misc] + r, output_path="" + ), + f"grid_{safe_s}_{safe_x}_x_{safe_y}.csv", + ) + DownloadExcel( + "Download Excel workbook", + lambda r=result: export_grid_sweep_to_excel( # type: ignore[misc] + r, output_path="" + ), + f"grid_{safe_s}_{safe_x}_x_{safe_y}.xlsx", + ) + DownloadJSON( + "Download config JSON (for reproducibility)", + lambda r=result: r.config.model_dump_json( # type: ignore[misc] + indent=2 + ).encode("utf-8"), + f"grid_config_{safe_s}.json", + ) + + with solara.Card("Results", style="margin-top: 0;"): + ExpandableChart( + fig, + download_filename=f"grid_{safe_s}_{safe_x}_x_{safe_y}.png", + ) + + with solara.Card("Per-Cell Compliance", style="margin-top: 0;"): + # Header: blank corner + x-axis values + x_hdrs = [result.param_x_label] + [f"{x:.4g}" for x in result.x_values] + header = "| " + " | ".join(x_hdrs) + " |" + sep = "|" + "|".join(["---"] * len(x_hdrs)) + "|" + # One row per y value — compliance as percentage + data_rows = [] + for y_idx, y in enumerate(result.y_values): + cells = [f"{y:.4g}"] + [ + f"{result.grid[y_idx][x_idx]:.1%}" + for x_idx in range(len(result.x_values)) + ] + data_rows.append("| " + " | ".join(cells) + " |") + y_label_row = f"*Y: {result.param_y_label}*" + solara.Markdown(y_label_row) + solara.Markdown("\n".join([header, sep] + data_rows)) + + # --------------------------------------------------------------------------- # Top-level # --------------------------------------------------------------------------- @@ -314,16 +423,19 @@ def _SweepResultsView() -> Any: def BatchResultsPanel() -> Any: """Right-pane panel for batch results. - Reads result directly from mc_run / sweep_run RunState singletons. + Reads result directly from mc_run / sweep_run / grid_run RunState singletons. Page-level state machine in page.py ensures this panel is only rendered when a result is ready — no spinner gate needed here. """ mc = mc_run.value sw = sweep_run.value + gr = grid_run.value if mc.is_ready and mc.result is not None: _MCResultsView() elif sw.is_ready and sw.result is not None: _SweepResultsView() + elif gr.is_ready and gr.result is not None: + _GridSweepResultsView() else: solara.Text("No batch results to display.") diff --git a/src/compute_permit_sim/vis/plotting.py b/src/compute_permit_sim/vis/plotting.py index ba5d4cc..0084963 100644 --- a/src/compute_permit_sim/vis/plotting.py +++ b/src/compute_permit_sim/vis/plotting.py @@ -15,6 +15,7 @@ import textwrap import matplotlib +import numpy as np import pandas as pd from matplotlib.axes import Axes from matplotlib.figure import Figure @@ -642,15 +643,22 @@ def plot_mc_payoff_comparison(result) -> "Figure": return fig -def plot_sweep_curve(result, metric: str = "avg_compliance") -> "Figure": +def plot_sweep_curve( + result, + metric: str = "avg_compliance", + reference_lines: list[tuple[float, str, str]] | None = None, +) -> "Figure": """Plot a 1D parameter sweep curve: param value on X, metric on Y. Renders the mean as a line with ± 1 SD shading. Annotates the tipping - point (first value where compliance ≥ 95 %) if present. + point (first value where compliance ≥ 95 %) if present. Args: result: A ``SweepResult`` instance. metric: Attribute name on ``MonteCarloResult`` to plot (default: avg_compliance). + reference_lines: Optional list of ``(x_value, label, color)`` tuples + for annotating known calibration points (e.g. scenario pa values). + Each draws a vertical dotted line with a small text label. Returns: Matplotlib Figure. @@ -670,19 +678,7 @@ def plot_sweep_curve(result, metric: str = "avg_compliance") -> "Figure": color = CHART_COLOR_MAP.get("compliant", "#42A5F5") ax.plot(xs, means, color=color, linewidth=2, marker="o", markersize=5, label="Mean") - ax.fill_between(xs, lows, highs, alpha=0.18, color=color, label="± 1 SD") - - tp = result.tipping_point(threshold=0.95) - if tp is not None: - ax.axvline(tp, color="#FFA726", linewidth=1.5, linestyle="--") - ax.annotate( - f"Tipping ≈ {tp:.3f}", - xy=(tp, 0.95), - xytext=(tp, 0.70), - fontsize=8, - color="#FFA726", - arrowprops={"arrowstyle": "->", "color": "#FFA726"}, - ) + ax.fill_between(xs, lows, highs, alpha=0.18, color=color, label="\u00b1 1 SD") is_compliance = "compliance" in metric if is_compliance: @@ -694,10 +690,130 @@ def plot_sweep_curve(result, metric: str = "avg_compliance") -> "Figure": ax.set_xlabel(_wrap(result.param_label, width=40)) ax.set_title( - _wrap(f"Sensitivity: {result.param_label} — {result.scenario_name}"), + _wrap(f"Sensitivity: {result.param_label} \u2014 {result.scenario_name}"), fontsize=11, fontweight="600", ) ax.legend(fontsize=9) fig.tight_layout() return fig + + +def plot_sweep_heatmap( + compliance_grid: list[list[float]], + x_values: list[float], + y_values: list[float], + x_param_label: str = "Base Audit Rate \u03c0\u2080", + y_param_label: str = "Collateral K (M$)", + x_tick_labels: list[str] | None = None, + y_tick_labels: list[str] | None = None, + title: str | None = None, + highlight: tuple[float, float] | None = None, + highlight_label: str = "Calibration", +) -> "Figure": + """Heatmap of average compliance over a 2D parameter grid. + + Renders each cell with its mean compliance rate as a shaded colour and an + inline percentage annotation. Designed for joint-sensitivity analysis + (e.g. pa x K grid) and re-usable for any two-parameter sweep. + + Args: + compliance_grid: 2D list ``[y_idx][x_idx]`` of mean compliance fractions. + x_values: Parameter values along the x-axis (e.g. audit rates). + y_values: Parameter values along the y-axis (e.g. collateral amounts). + x_param_label: Human-readable x-axis label. + y_param_label: Human-readable y-axis label. + x_tick_labels: Optional custom tick labels for x-axis; defaults to + auto-formatted ``x_values`` as percentages. + y_tick_labels: Optional custom tick labels for y-axis; defaults to + auto-formatted ``y_values`` as dollar amounts. + title: Optional chart title. + highlight: Optional ``(x_val, y_val)`` calibration point to outline + with a red border. + highlight_label: Label shown adjacent to the highlighted cell. + + Returns: + Matplotlib Figure. + """ + import matplotlib.patches as mpatches + + fig, ax = create_figure(figsize=(7, 5)) + data = np.array(compliance_grid) # shape: (n_y, n_x) + + im = ax.imshow( + data, + aspect="auto", + origin="lower", + cmap="Blues", + vmin=0.0, + vmax=1.0, + interpolation="nearest", + ) + + # Colorbar with shared percent formatter + cbar = fig.colorbar( + im, ax=ax, format=matplotlib.ticker.PercentFormatter(xmax=1), shrink=0.85 + ) + cbar.set_label("Mean Compliance Rate", fontsize=10) + + # Tick labels — default to % for x (audit rate) and $M for y (collateral) + xt_labels = x_tick_labels or [f"{v:.0%}" for v in x_values] + yt_labels = y_tick_labels or [f"${v:.0f}M" for v in y_values] + ax.set_xticks(range(len(x_values))) + ax.set_xticklabels(xt_labels, fontsize=8, rotation=45, ha="right") + ax.set_yticks(range(len(y_values))) + ax.set_yticklabels(yt_labels, fontsize=8) + + # Per-cell compliance annotation + for yi in range(len(y_values)): + for xi in range(len(x_values)): + val = float(data[yi, xi]) + text_color = "white" if val > 0.65 else "#333333" + ax.text( + xi, + yi, + f"{val:.0%}", + ha="center", + va="center", + fontsize=7, + color=text_color, + fontweight="500", + ) + + # Optional highlight: red border around a calibration cell + if highlight is not None: + hx_val, hy_val = highlight + hx_idx = min(range(len(x_values)), key=lambda i: abs(x_values[i] - hx_val)) + hy_idx = min(range(len(y_values)), key=lambda i: abs(y_values[i] - hy_val)) + rect = mpatches.FancyBboxPatch( + (hx_idx - 0.45, hy_idx - 0.45), + 0.9, + 0.9, + boxstyle="square,pad=0", + linewidth=2.5, + edgecolor=CHART_COLOR_MAP.get("violator", "#EF5350"), + facecolor="none", + zorder=3, + ) + ax.add_patch(rect) + ax.text( + hx_idx, + hy_idx + 0.52, + highlight_label, + ha="center", + va="bottom", + fontsize=7, + color=CHART_COLOR_MAP.get("violator", "#EF5350"), + fontweight="bold", + zorder=4, + ) + + ax.set_xlabel(_wrap(x_param_label, width=40), fontsize=11, fontweight="500") + ax.set_ylabel(_wrap(y_param_label, width=30), fontsize=11, fontweight="500") + if title: + ax.set_title(_wrap(title), fontsize=11, fontweight="600") + + # Suppress grid — imshow cells provide visual separation + ax.grid(False) + fig.tight_layout() + return fig diff --git a/src/compute_permit_sim/vis/state/history.py b/src/compute_permit_sim/vis/state/history.py index 30e5c85..fd91502 100644 --- a/src/compute_permit_sim/vis/state/history.py +++ b/src/compute_permit_sim/vis/state/history.py @@ -5,10 +5,14 @@ import solara from compute_permit_sim.schemas import SimulationRun -from compute_permit_sim.schemas.batch import MonteCarloResult, SweepResult - -# Union type for batch results — MC aggregate or sweep aggregate. -BatchResult = MonteCarloResult | SweepResult +from compute_permit_sim.schemas.batch import ( + GridSweepResult, + MonteCarloResult, + SweepResult, +) + +# Union type for batch results — MC aggregate, 1D sweep, or 2D grid sweep. +BatchResult = MonteCarloResult | SweepResult | GridSweepResult class SessionHistory: diff --git a/src/compute_permit_sim/vis/state/run_state.py b/src/compute_permit_sim/vis/state/run_state.py index aad1c45..126fe28 100644 --- a/src/compute_permit_sim/vis/state/run_state.py +++ b/src/compute_permit_sim/vis/state/run_state.py @@ -25,7 +25,11 @@ from pydantic import BaseModel, ConfigDict from compute_permit_sim.schemas import SimulationRun -from compute_permit_sim.schemas.batch import MonteCarloResult, SweepResult +from compute_permit_sim.schemas.batch import ( + GridSweepResult, + MonteCarloResult, + SweepResult, +) T = TypeVar("T") @@ -69,3 +73,8 @@ def is_ready(self) -> bool: sweep_run: solara.Reactive[RunState[SweepResult]] = solara.reactive( RunState[SweepResult]() ) + +#: 2D grid sweep batch run state +grid_run: solara.Reactive[RunState[GridSweepResult]] = solara.reactive( + RunState[GridSweepResult]() +) diff --git a/tests/factories.py b/tests/factories.py index 6ba5739..804ff05 100644 --- a/tests/factories.py +++ b/tests/factories.py @@ -1,6 +1,11 @@ """Test data factories for generating valid schema objects.""" -from typing import Any +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from compute_permit_sim.schemas.batch import GridSweepResult from compute_permit_sim.schemas import ( AgentSnapshot, @@ -60,3 +65,32 @@ def create_scenario_config( } data = {**defaults, **kwargs} return ScenarioConfig(name=name, **data) + + +def create_grid_sweep_result( + n_x: int = 3, + n_y: int = 2, + scenario_name: str = "Test Scenario", +) -> "GridSweepResult": + """Create a minimal GridSweepResult for testing — no simulation run needed.""" + from compute_permit_sim.schemas.batch import GridSweepResult + + x_values = [float(i) * 0.1 for i in range(1, n_x + 1)] + y_values = [float(j) * 10.0 for j in range(1, n_y + 1)] + # grid[y_idx][x_idx] = synthetic compliance value in [0, 1] + grid = [ + [float(y_idx * n_x + x_idx) / (n_x * n_y) for x_idx in range(n_x)] + for y_idx in range(n_y) + ] + return GridSweepResult( + scenario_name=scenario_name, + param_x_path="audit.base_prob", + param_x_label="Base Audit Probability", + param_y_path="collateral_amount", + param_y_label="Collateral K (M$)", + config=create_scenario_config(name=scenario_name), + x_values=x_values, + y_values=y_values, + grid=grid, + n_runs=5, + ) diff --git a/tests/services/test_sweep.py b/tests/services/test_sweep.py index 8597bfb..3f7dd26 100644 --- a/tests/services/test_sweep.py +++ b/tests/services/test_sweep.py @@ -86,3 +86,103 @@ def test_default_param_label(self) -> None: cfg = self._base() result = run_sweep(cfg, "audit.base_prob", [0.1], n_runs=2) assert result.param_label == "audit.base_prob" + + +class TestRunGridSweep: + def _base(self) -> ScenarioConfig: + return ScenarioConfig(n_agents=4, steps=3) + + def test_grid_shape(self) -> None: + from compute_permit_sim.services.sweep import run_grid_sweep + + x_values = [0.05, 0.10, 0.15] + y_values = [0.0, 10.0] + result = run_grid_sweep( + self._base(), + "audit.base_prob", + "collateral_amount", + x_values, + y_values, + n_runs=2, + ) + assert len(result.grid) == len(y_values) + assert all(len(row) == len(x_values) for row in result.grid) + + def test_grid_values_in_range(self) -> None: + from compute_permit_sim.services.sweep import run_grid_sweep + + result = run_grid_sweep( + self._base(), + "audit.base_prob", + "collateral_amount", + [0.05, 0.20], + [0.0, 5.0], + n_runs=2, + ) + for row in result.grid: + for v in row: + assert 0.0 <= v <= 1.0 + + def test_metadata(self) -> None: + from compute_permit_sim.services.sweep import run_grid_sweep + + cfg = ScenarioConfig(name="GridTest", n_agents=2, steps=2) + result = run_grid_sweep( + cfg, + "audit.base_prob", + "collateral_amount", + [0.1], + [0.0], + param_x_label="X Label", + param_y_label="Y Label", + n_runs=2, + ) + assert result.scenario_name == "GridTest" + assert result.param_x_path == "audit.base_prob" + assert result.param_y_path == "collateral_amount" + assert result.param_x_label == "X Label" + assert result.param_y_label == "Y Label" + assert result.n_runs == 2 + + def test_compliance_at(self) -> None: + from compute_permit_sim.services.sweep import run_grid_sweep + + x_vals = [0.05, 0.20] + y_vals = [0.0, 10.0] + result = run_grid_sweep( + self._base(), + "audit.base_prob", + "collateral_amount", + x_vals, + y_vals, + n_runs=2, + ) + for x in x_vals: + for y in y_vals: + val = result.compliance_at(x, y) + assert val is not None + assert 0.0 <= val <= 1.0 + # Non-existent cell returns None + assert result.compliance_at(0.99, 99.0) is None + + def test_reproducible(self) -> None: + from compute_permit_sim.services.sweep import run_grid_sweep + + seeds = [0, 1, 2] + r1 = run_grid_sweep( + self._base(), + "audit.base_prob", + "collateral_amount", + [0.05], + [0.0], + seeds=seeds, + ) + r2 = run_grid_sweep( + self._base(), + "audit.base_prob", + "collateral_amount", + [0.05], + [0.0], + seeds=seeds, + ) + assert abs(r1.grid[0][0] - r2.grid[0][0]) < 1e-10 diff --git a/tests/vis/test_export.py b/tests/vis/test_export.py index 2870237..5395d12 100644 --- a/tests/vis/test_export.py +++ b/tests/vis/test_export.py @@ -109,3 +109,51 @@ def test_export_run_to_excel_creates_file(sample_run: SimulationRun) -> None: # Header row is parsed, we expect 2 agents assert len(df_agents) == 2 assert "Agent's base economic value (v_i)" in df_agents.columns + + +# --------------------------------------------------------------------------- +# Grid sweep export tests +# --------------------------------------------------------------------------- + + +def test_export_grid_sweep_to_csv_bytes() -> None: + """CSV export returns bytes with n_x * n_y rows and expected columns.""" + from compute_permit_sim.vis.export import export_grid_sweep_to_csv + from tests.factories import create_grid_sweep_result + + n_x, n_y = 3, 2 + result = create_grid_sweep_result(n_x=n_x, n_y=n_y) + csv_bytes = export_grid_sweep_to_csv(result, output_path="") + assert isinstance(csv_bytes, bytes) + + import io + + df = pd.read_csv(io.BytesIO(csv_bytes)) + assert len(df) == n_x * n_y + required_cols = { + "param_x_path", + "param_x_value", + "param_y_path", + "param_y_value", + "n_runs", + "compliance_rate", + } + assert required_cols.issubset(set(df.columns)) + + +def test_export_grid_sweep_to_excel_bytes() -> None: + """Excel export returns non-empty bytes with Config, Grid, and Heatmap sheets.""" + from compute_permit_sim.vis.export import export_grid_sweep_to_excel + from tests.factories import create_grid_sweep_result + + result = create_grid_sweep_result(n_x=2, n_y=2) + xlsx_bytes = export_grid_sweep_to_excel(result, output_path="") + assert isinstance(xlsx_bytes, bytes) + assert len(xlsx_bytes) > 0 + + import io + + with pd.ExcelFile(io.BytesIO(xlsx_bytes)) as xl: + assert "Config" in xl.sheet_names + assert "Grid" in xl.sheet_names + assert "Heatmap" in xl.sheet_names From 53eccf6f05c2dcf47edf1fd07c25fec6121934e3 Mon Sep 17 00:00:00 2001 From: Josh Tuffy Date: Sun, 15 Mar 2026 20:29:28 -0400 Subject: [PATCH 2/6] update --- src/compute_permit_sim/vis/plotting.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/compute_permit_sim/vis/plotting.py b/src/compute_permit_sim/vis/plotting.py index 0084963..d0ca874 100644 --- a/src/compute_permit_sim/vis/plotting.py +++ b/src/compute_permit_sim/vis/plotting.py @@ -817,3 +817,18 @@ def plot_sweep_heatmap( ax.grid(False) fig.tight_layout() return fig + + +def save_figure(fig: Figure, path: str, dpi: int = 150) -> None: + """Save a Figure to *path* using canonical export settings. + + Single source of truth for dpi and bbox behaviour across all scripts and + agent_workspace callers. Never call ``fig.savefig(...)`` directly in + workspace scripts — use this instead. + + Args: + fig: A ``matplotlib.figure.Figure`` returned by any plotting function. + path: Destination file path (PNG recommended). + dpi: Resolution; default 150 for paper-quality output. + """ + fig.savefig(path, dpi=dpi, bbox_inches="tight") From 728492e4f65e92263f7b807b5517eea2f2016eea Mon Sep 17 00:00:00 2001 From: Josh Tuffy Date: Tue, 17 Mar 2026 18:35:58 -0400 Subject: [PATCH 3/6] fix scenarios --- scenarios/basic/scenario_2_strict.json | 5 +++-- scenarios/basic/scenario_3_smart.json | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/scenarios/basic/scenario_2_strict.json b/scenarios/basic/scenario_2_strict.json index 9375a1f..d2bb04d 100644 --- a/scenarios/basic/scenario_2_strict.json +++ b/scenarios/basic/scenario_2_strict.json @@ -12,9 +12,10 @@ }, "lab": { "capability_value": 40.0, - "racing_factor": 2.0 + "racing_factor": 2.0, + "audit_coefficient": 0.1 }, - "collateral_amount": 100.0, + "collateral_amount": 15.75, "market": { "fixed_price": 70.0 } diff --git a/scenarios/basic/scenario_3_smart.json b/scenarios/basic/scenario_3_smart.json index 12617d3..797c393 100644 --- a/scenarios/basic/scenario_3_smart.json +++ b/scenarios/basic/scenario_3_smart.json @@ -4,10 +4,13 @@ "steps": 10, "n_agents": 20, "audit": { - "base_prob": 0.2, + "base_prob": 0.1, "monitoring_prob": 0.2, "signal_dependent": true }, + "lab": { + "audit_coefficient": 0.5 + }, "collateral_amount": 15.75, "market": { "fixed_price": 2.0, From 33e51739ed57a8e8480ac09176f02df4142252fe Mon Sep 17 00:00:00 2001 From: Josh Tuffy Date: Tue, 17 Mar 2026 21:16:44 -0400 Subject: [PATCH 4/6] code review notes --- pyproject.toml | 1 + src/compute_permit_sim/core/market.py | 19 +++++++++++-- src/compute_permit_sim/schemas/batch.py | 16 +++++------ src/compute_permit_sim/services/mesa_model.py | 4 ++- .../services/monte_carlo.py | 22 +++++++-------- src/compute_permit_sim/vis/export.py | 28 +++++++++---------- .../vis/panels/batch_results.py | 20 ++++++------- 7 files changed, 63 insertions(+), 47 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 01d9a80..742d309 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,3 +51,4 @@ python_files = ["test_*.py"] ignore_missing_imports = true check_untyped_defs = true plugins = ["pydantic.mypy"] +exclude = ["agent_workspace"] diff --git a/src/compute_permit_sim/core/market.py b/src/compute_permit_sim/core/market.py index d3cd15f..62884e8 100644 --- a/src/compute_permit_sim/core/market.py +++ b/src/compute_permit_sim/core/market.py @@ -16,18 +16,28 @@ class SimpleClearingMarket: Attributes: max_supply: Total permits available (Q). current_price: The most recent clearing price. + _rng: Seeded RNG for deterministic permit lottery (fixed-price mode). """ - def __init__(self, permit_cap: float, fixed_price: float | None = None) -> None: + def __init__( + self, + permit_cap: float, + fixed_price: float | None = None, + rng: random.Random | None = None, + ) -> None: """ Args: permit_cap: Total permits available (Q). fixed_price: Optional fixed price; if set, all qualifying bidders pay this price instead of the auction-cleared rate. + rng: Seeded RNG for reproducible permit lottery in fixed-price + over-subscription. Defaults to the global random module when + None — callers should always pass the model-level RNG. """ self.max_supply: float = permit_cap self.current_price: float = 0.0 self.fixed_price: float | None = fixed_price + self._rng: random.Random | None = rng def set_fixed_price(self, price: float) -> None: """Set a fixed price for the market. @@ -125,8 +135,11 @@ def allocate( for lab_id, qty in qualifying: allocations[lab_id] = qty else: - # Over-subscribed: randomly sample up to permit_cap units - winners = random.sample(fp_units, available) + # Over-subscribed: randomly sample up to permit_cap units. + # Use the model-level RNG for reproducibility; fall back to + # global random only in non-Mesa (standalone test) contexts. + _rng = self._rng if self._rng is not None else random + winners = _rng.sample(fp_units, available) for lab_id in winners: allocations[lab_id] += 1 diff --git a/src/compute_permit_sim/schemas/batch.py b/src/compute_permit_sim/schemas/batch.py index 11cd984..9e4f8b8 100644 --- a/src/compute_permit_sim/schemas/batch.py +++ b/src/compute_permit_sim/schemas/batch.py @@ -64,10 +64,10 @@ class BatchColumnNames: AUDIT_RATE = "audit_rate" AUDIT_RATE_MEAN = "audit_rate_mean" AUDIT_RATE_STD = "audit_rate_std" - FALSE_POSITIVE_RATE_MEAN = "false_positive_rate_mean" - FALSE_POSITIVE_RATE_STD = "false_positive_rate_std" - DETECTION_RATE_MEAN = "detection_rate_mean" - DETECTION_RATE_STD = "detection_rate_std" + COMPLIANT_AUDIT_FRACTION_MEAN = "compliant_audit_fraction_mean" + COMPLIANT_AUDIT_FRACTION_STD = "compliant_audit_fraction_std" + CATCH_RATE_MEAN = "catch_rate_mean" + CATCH_RATE_STD = "catch_rate_std" @dataclass(frozen=True) @@ -87,8 +87,8 @@ class PerSeedResult: avg_payoff_compliant: float # NaN if no compliant labs avg_payoff_violator: float # NaN if no violators audit_rate: float - false_positive_rate: float - detection_rate: float # NaN if no audited violators + compliant_audit_fraction: float + catch_rate: float # NaN if no audited violators @dataclass(frozen=True) @@ -167,8 +167,8 @@ class MonteCarloResult: # --- Audit burden --- audit_rate: MetricStats # audits / total lab-steps - false_positive_rate: MetricStats # audits on compliant / total audits - detection_rate: MetricStats # caught / audits on violators + compliant_audit_fraction: MetricStats # audits on compliant / total audits + catch_rate: MetricStats # caught / audits on violators # --- Raw per-seed data (optional, set store_raw=True in run_monte_carlo) --- raw_seeds: list[PerSeedResult] = field(default_factory=list) diff --git a/src/compute_permit_sim/services/mesa_model.py b/src/compute_permit_sim/services/mesa_model.py index 4abe0ba..a96dc27 100644 --- a/src/compute_permit_sim/services/mesa_model.py +++ b/src/compute_permit_sim/services/mesa_model.py @@ -88,7 +88,9 @@ def __init__(self, config: ScenarioConfig | None = None, **kwargs) -> None: self.config = config self.running = True - self.market = SimpleClearingMarket(permit_cap=config.market.permit_cap) + self.market = SimpleClearingMarket( + permit_cap=config.market.permit_cap, rng=self.random + ) if config.market.fixed_price is not None: self.market.set_fixed_price(config.market.fixed_price) self.auditor = Auditor(config.audit, rng=self.random) diff --git a/src/compute_permit_sim/services/monte_carlo.py b/src/compute_permit_sim/services/monte_carlo.py index 555994c..3a88851 100644 --- a/src/compute_permit_sim/services/monte_carlo.py +++ b/src/compute_permit_sim/services/monte_carlo.py @@ -48,8 +48,8 @@ class _RunResult(NamedTuple): # Audit burden audit_rate: float - false_positive_rate: float - detection_rate: float # NaN if 0 audited violators + compliant_audit_fraction: float + catch_rate: float # NaN if 0 audited violators def _run_once(config: ScenarioConfig, seed: int) -> _RunResult: @@ -135,10 +135,10 @@ def _run_once(config: ScenarioConfig, seed: int) -> _RunResult: avg_payoff_compliant=avg_payoff_compliant, avg_payoff_violator=avg_payoff_violator, audit_rate=total_audits / total_lab_steps if total_lab_steps else 0.0, - false_positive_rate=( + compliant_audit_fraction=( audits_on_compliant / total_audits if total_audits else 0.0 ), - detection_rate=( + catch_rate=( violations_caught / audits_on_violators if audits_on_violators else float("nan") @@ -269,14 +269,14 @@ def run_monte_carlo( else MetricStats.nan() ), audit_rate=MetricStats.from_values([r.audit_rate for r in raw]), - false_positive_rate=MetricStats.from_values( - [r.false_positive_rate for r in raw] + compliant_audit_fraction=MetricStats.from_values( + [r.compliant_audit_fraction for r in raw] ), - detection_rate=( + catch_rate=( MetricStats.from_values( - [r.detection_rate for r in raw if not math.isnan(r.detection_rate)] + [r.catch_rate for r in raw if not math.isnan(r.catch_rate)] ) - if any(not math.isnan(r.detection_rate) for r in raw) + if any(not math.isnan(r.catch_rate) for r in raw) else MetricStats.nan() ), raw_seeds=[ @@ -289,8 +289,8 @@ def run_monte_carlo( avg_payoff_compliant=r.avg_payoff_compliant, avg_payoff_violator=r.avg_payoff_violator, audit_rate=r.audit_rate, - false_positive_rate=r.false_positive_rate, - detection_rate=r.detection_rate, + compliant_audit_fraction=r.compliant_audit_fraction, + catch_rate=r.catch_rate, ) for s, r in zip(run_seeds, raw) ] diff --git a/src/compute_permit_sim/vis/export.py b/src/compute_permit_sim/vis/export.py index d78d3e5..ab18f3f 100644 --- a/src/compute_permit_sim/vis/export.py +++ b/src/compute_permit_sim/vis/export.py @@ -534,10 +534,10 @@ def export_monte_carlo_to_csv( _BCN.PAYOFF_VIOLATOR_STD: r.payoff_violator.std, _BCN.AUDIT_RATE_MEAN: r.audit_rate.mean, _BCN.AUDIT_RATE_STD: r.audit_rate.std, - _BCN.FALSE_POSITIVE_RATE_MEAN: r.false_positive_rate.mean, - _BCN.FALSE_POSITIVE_RATE_STD: r.false_positive_rate.std, - _BCN.DETECTION_RATE_MEAN: r.detection_rate.mean, - _BCN.DETECTION_RATE_STD: r.detection_rate.std, + _BCN.COMPLIANT_AUDIT_FRACTION_MEAN: r.compliant_audit_fraction.mean, + _BCN.COMPLIANT_AUDIT_FRACTION_STD: r.compliant_audit_fraction.std, + _BCN.CATCH_RATE_MEAN: r.catch_rate.mean, + _BCN.CATCH_RATE_STD: r.catch_rate.std, } for r in results ] @@ -585,8 +585,8 @@ def export_mc_per_seed_to_csv( _BCN.PAYOFF_COMPLIANT_MEAN: s.avg_payoff_compliant, _BCN.PAYOFF_VIOLATOR_MEAN: s.avg_payoff_violator, _BCN.AUDIT_RATE_MEAN: s.audit_rate, - _BCN.FALSE_POSITIVE_RATE_MEAN: s.false_positive_rate, - _BCN.DETECTION_RATE_MEAN: s.detection_rate, + _BCN.COMPLIANT_AUDIT_FRACTION_MEAN: s.compliant_audit_fraction, + _BCN.CATCH_RATE_MEAN: s.catch_rate, } for s in result.raw_seeds ] @@ -731,8 +731,8 @@ def export_sweep_to_csv( _BCN.PAYOFF_VIOLATOR_MEAN: pt.result.payoff_violator.mean, _BCN.AUDIT_RATE_MEAN: pt.result.audit_rate.mean, _BCN.AUDIT_RATE_STD: pt.result.audit_rate.std, - _BCN.FALSE_POSITIVE_RATE_MEAN: pt.result.false_positive_rate.mean, - _BCN.DETECTION_RATE_MEAN: pt.result.detection_rate.mean, + _BCN.COMPLIANT_AUDIT_FRACTION_MEAN: pt.result.compliant_audit_fraction.mean, + _BCN.CATCH_RATE_MEAN: pt.result.catch_rate.mean, } for pt in result.points ] @@ -964,10 +964,10 @@ def export_monte_carlo_to_excel( ("Audit Rate", result.audit_rate.mean, result.audit_rate.std), ( "False Positive Rate", - result.false_positive_rate.mean, - result.false_positive_rate.std, + result.compliant_audit_fraction.mean, + result.compliant_audit_fraction.std, ), - ("Detection Rate", result.detection_rate.mean, result.detection_rate.std), + ("Detection Rate", result.catch_rate.mean, result.catch_rate.std), ] for label, mean_val, std_val in _mc_summary_rows: is_pct = ( @@ -1023,8 +1023,8 @@ def export_monte_carlo_to_excel( s.avg_payoff_compliant, s.avg_payoff_violator, s.audit_rate, - s.false_positive_rate, - s.detection_rate, + s.compliant_audit_fraction, + s.catch_rate, ] for col, v in enumerate(vals): seed_sheet.write( @@ -1138,7 +1138,7 @@ def export_sweep_to_excel( pt.result.avg_price.mean, pt.result.avg_net_payoff.mean, pt.result.audit_rate.mean, - pt.result.detection_rate.mean, + pt.result.catch_rate.mean, ] for col, v in enumerate(vals): sweep_sheet.write( diff --git a/src/compute_permit_sim/vis/panels/batch_results.py b/src/compute_permit_sim/vis/panels/batch_results.py index da01cff..20aecf6 100644 --- a/src/compute_permit_sim/vis/panels/batch_results.py +++ b/src/compute_permit_sim/vis/panels/batch_results.py @@ -100,9 +100,9 @@ def _MCResultsView() -> Any: ) DownloadJSON( "Download config JSON (for reproducibility)", - lambda r=result: r.config.model_dump_json(indent=2).encode( + lambda r=result: r.config.model_dump_json(indent=2).encode( # type: ignore[misc] "utf-8" - ), # type: ignore[misc] + ), f"mc_config_{safe}.json", ) @@ -182,16 +182,16 @@ def _MCResultsView() -> Any: "\u2014", ), ( - "False Positive Rate", - f"{result.false_positive_rate.mean:.1%}", - f"{result.false_positive_rate.std:.1%}", + "Compliant Audit Fraction", + f"{result.compliant_audit_fraction.mean:.1%}", + f"{result.compliant_audit_fraction.std:.1%}", "\u2014", "\u2014", ), ( - "Detection Rate", - f"{result.detection_rate.mean:.1%}", - f"{result.detection_rate.std:.1%}", + "Catch Rate (given audit)", + f"{result.catch_rate.mean:.1%}", + f"{result.catch_rate.std:.1%}", "\u2014", "\u2014", ), @@ -266,9 +266,9 @@ def _SweepResultsView() -> Any: ) DownloadJSON( "Download config JSON (for reproducibility)", - lambda r=result: r.config.model_dump_json(indent=2).encode( + lambda r=result: r.config.model_dump_json(indent=2).encode( # type: ignore[misc] "utf-8" - ), # type: ignore[misc] + ), f"sweep_config_{safe_s}_{safe_p}.json", ) From f05785370d58d96bec6b7460dc6ab7d887205df4 Mon Sep 17 00:00:00 2001 From: Josh Tuffy Date: Wed, 18 Mar 2026 15:23:58 -0400 Subject: [PATCH 5/6] ruf format --- src/compute_permit_sim/vis/export.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/compute_permit_sim/vis/export.py b/src/compute_permit_sim/vis/export.py index 0ae7a9e..23b27a4 100644 --- a/src/compute_permit_sim/vis/export.py +++ b/src/compute_permit_sim/vis/export.py @@ -1247,7 +1247,10 @@ def export_grid_sweep_to_excel( grid_sheet = workbook.add_worksheet("Grid") grid_sheet.set_column("A:A", 20) grid_sheet.write( - 0, 0, f"{result.param_x_label} \u2192 / {result.param_y_label} \u2193", header_fmt + 0, + 0, + f"{result.param_x_label} \u2192 / {result.param_y_label} \u2193", + header_fmt, ) for x_idx, x in enumerate(result.x_values): grid_sheet.write(0, x_idx + 1, x, header_fmt) From 8985fc21c56ed5f7c896ae35fe6de88ecb6ffa2b Mon Sep 17 00:00:00 2001 From: emlynsg Date: Thu, 19 Mar 2026 20:19:14 +0900 Subject: [PATCH 6/6] Fix stale p_catch formula in defaults.py, sync batch_test.json params - defaults.py: correct p_catch comment to match nested detection model - batch_test.json: sync params with scenario_4_feedback_compliance (base_prob=0.20, penalty=50, escalation=0.5, steps=50) Co-Authored-By: Claude Opus 4.6 --- scenarios/batch_test.json | 8 ++++---- src/compute_permit_sim/schemas/defaults.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scenarios/batch_test.json b/scenarios/batch_test.json index f303c95..31270c1 100644 --- a/scenarios/batch_test.json +++ b/scenarios/batch_test.json @@ -3,21 +3,21 @@ "description": "Demonstrates that feedback mechanisms (reputation, audit escalation) can drive compliance even under moderate enforcement.", "notes": "", "n_agents": 20, - "steps": 40, + "steps": 50, "flop_threshold": 1e25, "collateral_amount": 0.0, "audit": { - "base_prob": 0.3, + "base_prob": 0.20, "signal_dependent": false, "signal_exponent": 1.0, "false_positive_rate": 0.0, "false_negative_rate": 0.05, - "penalty_amount": 100.0, + "penalty_amount": 50.0, "backcheck_prob": 0.0, "whistleblower_prob": 0.0, "monitoring_prob": 0.0, "max_audits_per_step": null, - "audit_escalation": 1.5, + "audit_escalation": 0.5, "audit_decay_rate": 0.1 }, "market": { diff --git a/src/compute_permit_sim/schemas/defaults.py b/src/compute_permit_sim/schemas/defaults.py index d6de624..4c2aba2 100644 --- a/src/compute_permit_sim/schemas/defaults.py +++ b/src/compute_permit_sim/schemas/defaults.py @@ -77,7 +77,7 @@ DEFAULT_SIGNAL_EXPONENT = 1.0 # # Stage 2: AUDIT OUTCOME — given audit, does it find a violation? -# p_catch_if_audited = (1 - FNR) + FNR × backcheck_prob +# p_catch_if_audited = 1 - FNR × (1 - backcheck_prob) × (1 - p_w) × (1 - p_m) DEFAULT_AUDIT_FALSE_POS_RATE = 0.0 # alpha: P(false alarm | compliant firm audited) DEFAULT_AUDIT_FALSE_NEG_RATE = 0.40 # beta: 40% miss rate in Minimal env # Penalty structure: