From 3a41d44266fb2fe4068b7b915b5760de1b293d29 Mon Sep 17 00:00:00 2001 From: emlynsg Date: Thu, 12 Mar 2026 20:30:40 +0100 Subject: [PATCH 1/3] Fix naming, RNG seed, and scenario files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Metric rename (batch.py, monte_carlo.py, export.py, batch_results.py): false_positive_rate → compliant_audit_fraction detection_rate → detection_rate_given_audit Avoids collision with the input config parameter false_positive_rate (alpha) and clarifies that the metric was conditional on audit, not population-level. RNG fix (market.py, game_loop.py): fixed-price over-subscription path used global random.sample(); now accepts and threads the caller's seeded rng so exact reproduction is possible across all audit steps. Scenario files: scenario_5_ratchet → scenario_5_reputation_ratchet scenario_7_oscillation → scenario_6_enforcement_cycles scenario_6_lawless deleted Sweep configs updated to match. Adds scripts/generate_figure_data.py to regenerate compliance_ratchet.csv / compliance_oscillation.csv. Minor doc fixes: AuditConfig p_catch formula (was missing p_w, p_m terms); game_loop docstring six-phase → seven-phase; Makefile figure-data target. Also move type: ignore[misc] to lambda definition line (pre-existing mypy false negative caused by comment being on the wrong line of a multi-line expr). Co-Authored-By: Claude Sonnet 4.6 --- Makefile | 8 ++- .../basic/scenario_5_reputation_ratchet.json | 31 ++++++++ .../basic/scenario_6_enforcement_cycles.json | 31 ++++++++ scenarios/sweeps/sweep_decay_oscillation.json | 9 +++ .../sweeps/sweep_escalation_dynamic.json | 9 +++ scripts/generate_figure_data.py | 72 +++++++++++++++++++ src/compute_permit_sim/core/game_loop.py | 4 +- src/compute_permit_sim/core/market.py | 6 +- src/compute_permit_sim/schemas/batch.py | 18 ++--- src/compute_permit_sim/schemas/config.py | 5 +- .../services/monte_carlo.py | 26 ++++--- src/compute_permit_sim/vis/export.py | 40 ++++++----- .../vis/panels/batch_results.py | 20 +++--- 13 files changed, 224 insertions(+), 55 deletions(-) create mode 100644 scenarios/basic/scenario_5_reputation_ratchet.json create mode 100644 scenarios/basic/scenario_6_enforcement_cycles.json create mode 100644 scenarios/sweeps/sweep_decay_oscillation.json create mode 100644 scenarios/sweeps/sweep_escalation_dynamic.json create mode 100644 scripts/generate_figure_data.py diff --git a/Makefile b/Makefile index 9157d8f..fee2663 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,13 @@ # Makefile for Compute Permit Simulator -.PHONY: run viz app heatmap solara lint format test check clean help +.PHONY: run viz app heatmap solara lint format test check clean help figure-data help: @echo "Available commands:" @echo " make run - Run the simulation once (all scenarios)" @echo " make mc - Monte Carlo: 50 runs per scenario, exports CSV + LaTeX table" - @echo " make sweep - Sensitivity sweep: π₀ on Lawless scenario" + @echo " make sweep - Sensitivity sweeps (pi_0 and collateral on minimal scenario)" + @echo " make figure-data - Generate documents/figures/ CSVs for paper plots (200 runs)" @echo " make paper-results - Run MC + sweep and print LaTeX table to stdout" @echo " make app - Run the Solara interactive dashboard (alias: viz)" @echo " make lint - Run linters (ruff check)" @@ -27,6 +28,9 @@ sweep: uv run main.py --sweep-file sweep_pi0_lawless.json uv run main.py --sweep-file sweep_collateral_lawless.json +figure-data: + uv run scripts/generate_figure_data.py + list-sweeps: @echo "Available sweep files:" @ls scenarios/sweeps/*.json 2>/dev/null || echo " (none)" diff --git a/scenarios/basic/scenario_5_reputation_ratchet.json b/scenarios/basic/scenario_5_reputation_ratchet.json new file mode 100644 index 0000000..5c240a8 --- /dev/null +++ b/scenarios/basic/scenario_5_reputation_ratchet.json @@ -0,0 +1,31 @@ +{ + "name": "Reputation Ratchet", + "description": "Reputation-driven convergence to full compliance. Random audits (pi_0=0.20, signal-independent) with strong reputation escalation (epsilon=1.5, no audit-coefficient dynamics). Labs start non-compliant (expected burden ~11M$ << gain ~80M$), but each catch permanently multiplies perceived reputation cost by 2.5x. After ~4-5 catches the expected burden exceeds even the highest-value gain, and labs comply permanently. Full compliance reached in expectation by step 30-40.", + "steps": 50, + "n_agents": 20, + "audit": { + "base_prob": 0.20, + "signal_dependent": false, + "false_negative_rate": 0.05, + "penalty_amount": 50.0, + "audit_escalation": 0.0, + "audit_decay_rate": 0.0 + }, + "lab": { + "compute_capacity_min": 1e26, + "compute_capacity_max": 1e26, + "economic_value_min": 70.0, + "economic_value_max": 100.0, + "capability_value": 0.0, + "racing_factor": 0.0, + "risk_profile_min": 1.0, + "risk_profile_max": 1.0, + "reputation_sensitivity": 10.0, + "reputation_escalation_factor": 1.5, + "audit_coefficient": 1.0 + }, + "market": { + "permit_cap": 8.0 + }, + "collateral_amount": 0.0 +} diff --git a/scenarios/basic/scenario_6_enforcement_cycles.json b/scenarios/basic/scenario_6_enforcement_cycles.json new file mode 100644 index 0000000..4410410 --- /dev/null +++ b/scenarios/basic/scenario_6_enforcement_cycles.json @@ -0,0 +1,31 @@ +{ + "name": "Enforcement Cycles", + "description": "Audit-coefficient escalation with fast decay produces persistent enforcement cycles. Labs start with low audit coefficient (c_base=0.3): initial detection probability ~32%, expected penalty ~$31.8M < gain ~$80M so labs cheat. On catch, coefficient spikes by +2.0 (to ~2.3, detection ~95%): lab complies. But 60% per-step decay rapidly returns coefficient toward 0.3: after 2 steps coefficient ~0.62 (detection ~61%, expected penalty ~$61M < $80M) so lab cheats again. Cycle period ~4-5 steps per lab. No reputation escalation — pure audit-coefficient dynamics. Requires signal_dependent=True so the coefficient affects detection probability.", + "steps": 60, + "n_agents": 20, + "audit": { + "base_prob": 0.05, + "signal_dependent": true, + "false_negative_rate": 0.05, + "penalty_amount": 100.0, + "audit_escalation": 2.0, + "audit_decay_rate": 0.60 + }, + "lab": { + "compute_capacity_min": 1e26, + "compute_capacity_max": 1e26, + "economic_value_min": 75.0, + "economic_value_max": 90.0, + "capability_value": 0.0, + "racing_factor": 0.0, + "risk_profile_min": 1.0, + "risk_profile_max": 1.0, + "reputation_sensitivity": 0.0, + "reputation_escalation_factor": 0.0, + "audit_coefficient": 0.3 + }, + "market": { + "permit_cap": 5.0 + }, + "collateral_amount": 0.0 +} diff --git a/scenarios/sweeps/sweep_decay_oscillation.json b/scenarios/sweeps/sweep_decay_oscillation.json new file mode 100644 index 0000000..86a3675 --- /dev/null +++ b/scenarios/sweeps/sweep_decay_oscillation.json @@ -0,0 +1,9 @@ +{ + "scenario_file": "basic/scenario_6_enforcement_cycles.json", + "param_path": "audit.audit_decay_rate", + "param_label": "Audit Decay Rate \u03b4", + "min_val": 0.0, + "max_val": 0.90, + "interval": 0.10, + "n_runs": 50 +} diff --git a/scenarios/sweeps/sweep_escalation_dynamic.json b/scenarios/sweeps/sweep_escalation_dynamic.json new file mode 100644 index 0000000..b626a53 --- /dev/null +++ b/scenarios/sweeps/sweep_escalation_dynamic.json @@ -0,0 +1,9 @@ +{ + "scenario_file": "basic/scenario_6_enforcement_cycles.json", + "param_path": "audit.audit_escalation", + "param_label": "Audit Escalation Factor \u0394", + "min_val": 0.0, + "max_val": 3.0, + "interval": 0.25, + "n_runs": 50 +} diff --git a/scripts/generate_figure_data.py b/scripts/generate_figure_data.py new file mode 100644 index 0000000..fe8f68d --- /dev/null +++ b/scripts/generate_figure_data.py @@ -0,0 +1,72 @@ +"""Generate CSV data for the two dynamic scenario plots in documents/figures/. + +Outputs: + documents/figures/compliance_ratchet.csv (scenario_5_reputation_ratchet) + documents/figures/compliance_oscillation.csv (scenario_6_enforcement_cycles) + +Each CSV has columns: step, mean, lower (mean-std), upper (mean+std) + +Usage: + uv run scripts/generate_figure_data.py [--runs N] +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +from compute_permit_sim.services.config_manager import load_scenario +from compute_permit_sim.services.monte_carlo import run_monte_carlo + + +def write_csv(path: Path, step_compliance: list) -> None: + """Write step,mean,lower,upper CSV from a list of MetricStats.""" + with path.open("w") as f: + f.write("step,mean,lower,upper\n") + for i, stats in enumerate(step_compliance): + mean = stats.mean + std = stats.std + lower = max(0.0, mean - std) + upper = min(1.0, mean + std) + f.write(f"{i + 1},{mean:.4f},{lower:.4f},{upper:.4f}\n") + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--runs", + type=int, + default=200, + help="Monte Carlo runs per scenario (default: 200)", + ) + args = parser.parse_args() + + out_dir = Path(__file__).parent.parent / "documents" / "figures" + out_dir.mkdir(parents=True, exist_ok=True) + + scenarios = [ + ( + "basic/scenario_5_reputation_ratchet.json", + "compliance_ratchet.csv", + "Reputation Ratchet", + ), + ( + "basic/scenario_6_enforcement_cycles.json", + "compliance_oscillation.csv", + "Enforcement Cycles", + ), + ] + + for scenario_file, csv_name, label in scenarios: + print(f"Running {label} ({args.runs} seeds)...", flush=True) + config = load_scenario(scenario_file) + result = run_monte_carlo(config, n_runs=args.runs) + out_path = out_dir / csv_name + write_csv(out_path, result.step_compliance) + print( + f" -> {out_path} (final compliance: {result.final_compliance.mean:.1%})" + ) + + +if __name__ == "__main__": + main() diff --git a/src/compute_permit_sim/core/game_loop.py b/src/compute_permit_sim/core/game_loop.py index ec5dd7d..bf9cbca 100644 --- a/src/compute_permit_sim/core/game_loop.py +++ b/src/compute_permit_sim/core/game_loop.py @@ -1,6 +1,6 @@ """Core game loop — pure business logic for one simulation step. -Orchestrates the six-phase turn sequence: +Orchestrates the seven-phase turn sequence: 0. Collateral posting (above-threshold labs only) 1. Trading (bids + market allocation, above-threshold labs only) 2. Compliance decisions (above-threshold labs with excess only) @@ -130,7 +130,7 @@ def execute_step( outcome.agent_outcomes[lab.lab_id].bid_price = bid_per outcome.agent_outcomes[lab.lab_id].permits_wanted = qty - clearing_price, allocations = market.allocate(bids) + clearing_price, allocations = market.allocate(bids, rng=_rng) outcome.clearing_price = clearing_price for lab in above: diff --git a/src/compute_permit_sim/core/market.py b/src/compute_permit_sim/core/market.py index d3cd15f..1ce12f5 100644 --- a/src/compute_permit_sim/core/market.py +++ b/src/compute_permit_sim/core/market.py @@ -68,7 +68,9 @@ def resolve_price(self, bids: list[float]) -> float: return clearing_price def allocate( - self, bids: list[tuple[int, int, float]] + self, + bids: list[tuple[int, int, float]], + rng: random.Random | None = None, ) -> tuple[float, dict[int, int]]: """Resolve price and allocate permits via uniform-price auction. @@ -126,7 +128,7 @@ def allocate( allocations[lab_id] = qty else: # Over-subscribed: randomly sample up to permit_cap units - winners = random.sample(fp_units, available) + winners = (rng or random).sample(fp_units, available) for lab_id in winners: allocations[lab_id] += 1 diff --git a/src/compute_permit_sim/schemas/batch.py b/src/compute_permit_sim/schemas/batch.py index 97cf69e..7f13e4e 100644 --- a/src/compute_permit_sim/schemas/batch.py +++ b/src/compute_permit_sim/schemas/batch.py @@ -58,10 +58,10 @@ class BatchColumnNames: AUDIT_RATE = "audit_rate" AUDIT_RATE_MEAN = "audit_rate_mean" AUDIT_RATE_STD = "audit_rate_std" - FALSE_POSITIVE_RATE_MEAN = "false_positive_rate_mean" - FALSE_POSITIVE_RATE_STD = "false_positive_rate_std" - DETECTION_RATE_MEAN = "detection_rate_mean" - DETECTION_RATE_STD = "detection_rate_std" + COMPLIANT_AUDIT_FRACTION_MEAN = "compliant_audit_fraction_mean" + COMPLIANT_AUDIT_FRACTION_STD = "compliant_audit_fraction_std" + DETECTION_RATE_GIVEN_AUDIT_MEAN = "detection_rate_given_audit_mean" + DETECTION_RATE_GIVEN_AUDIT_STD = "detection_rate_given_audit_std" @dataclass(frozen=True) @@ -81,8 +81,8 @@ class PerSeedResult: avg_payoff_compliant: float # NaN if no compliant labs avg_payoff_violator: float # NaN if no violators audit_rate: float - false_positive_rate: float - detection_rate: float # NaN if no audited violators + compliant_audit_fraction: float # audits on compliant / total audits + detection_rate_given_audit: float # NaN if no audited violators @dataclass(frozen=True) @@ -161,8 +161,10 @@ class MonteCarloResult: # --- Audit burden --- audit_rate: MetricStats # audits / total lab-steps - false_positive_rate: MetricStats # audits on compliant / total audits - detection_rate: MetricStats # caught / audits on violators + compliant_audit_fraction: MetricStats # audits on compliant / total audits + detection_rate_given_audit: ( + MetricStats # caught / audits on violators (given audit) + ) # --- Raw per-seed data (optional, set store_raw=True in run_monte_carlo) --- raw_seeds: list[PerSeedResult] = field(default_factory=list) diff --git a/src/compute_permit_sim/schemas/config.py b/src/compute_permit_sim/schemas/config.py index 191b5f8..4777cd2 100644 --- a/src/compute_permit_sim/schemas/config.py +++ b/src/compute_permit_sim/schemas/config.py @@ -67,8 +67,9 @@ class AuditConfig(BaseModel): 2. AUDIT OUTCOME: Whether an audit catches a violator (if one exists) - false_positive_rate (alpha): P(false alarm | compliant firm audited) - - false_negative_rate (beta): P(miss | non-compliant firm audited) - - p_catch = (1 - beta) + beta × backcheck_prob + - false_negative_rate (beta): P(miss | non-compliant firm, direct pass) + - p_catch = 1 - beta × (1 - backcheck_prob) × (1 - p_w) × (1 - p_m) + where p_w = whistleblower_prob, p_m = monitoring_prob """ base_prob: float = Field( diff --git a/src/compute_permit_sim/services/monte_carlo.py b/src/compute_permit_sim/services/monte_carlo.py index 555994c..4026f95 100644 --- a/src/compute_permit_sim/services/monte_carlo.py +++ b/src/compute_permit_sim/services/monte_carlo.py @@ -48,8 +48,8 @@ class _RunResult(NamedTuple): # Audit burden audit_rate: float - false_positive_rate: float - detection_rate: float # NaN if 0 audited violators + compliant_audit_fraction: float # audits on compliant / total audits + detection_rate_given_audit: float # NaN if 0 audited violators def _run_once(config: ScenarioConfig, seed: int) -> _RunResult: @@ -135,10 +135,10 @@ def _run_once(config: ScenarioConfig, seed: int) -> _RunResult: avg_payoff_compliant=avg_payoff_compliant, avg_payoff_violator=avg_payoff_violator, audit_rate=total_audits / total_lab_steps if total_lab_steps else 0.0, - false_positive_rate=( + compliant_audit_fraction=( audits_on_compliant / total_audits if total_audits else 0.0 ), - detection_rate=( + detection_rate_given_audit=( violations_caught / audits_on_violators if audits_on_violators else float("nan") @@ -269,14 +269,18 @@ def run_monte_carlo( else MetricStats.nan() ), audit_rate=MetricStats.from_values([r.audit_rate for r in raw]), - false_positive_rate=MetricStats.from_values( - [r.false_positive_rate for r in raw] + compliant_audit_fraction=MetricStats.from_values( + [r.compliant_audit_fraction for r in raw] ), - detection_rate=( + detection_rate_given_audit=( MetricStats.from_values( - [r.detection_rate for r in raw if not math.isnan(r.detection_rate)] + [ + r.detection_rate_given_audit + for r in raw + if not math.isnan(r.detection_rate_given_audit) + ] ) - if any(not math.isnan(r.detection_rate) for r in raw) + if any(not math.isnan(r.detection_rate_given_audit) for r in raw) else MetricStats.nan() ), raw_seeds=[ @@ -289,8 +293,8 @@ def run_monte_carlo( avg_payoff_compliant=r.avg_payoff_compliant, avg_payoff_violator=r.avg_payoff_violator, audit_rate=r.audit_rate, - false_positive_rate=r.false_positive_rate, - detection_rate=r.detection_rate, + compliant_audit_fraction=r.compliant_audit_fraction, + detection_rate_given_audit=r.detection_rate_given_audit, ) for s, r in zip(run_seeds, raw) ] diff --git a/src/compute_permit_sim/vis/export.py b/src/compute_permit_sim/vis/export.py index 57213da..06d91a7 100644 --- a/src/compute_permit_sim/vis/export.py +++ b/src/compute_permit_sim/vis/export.py @@ -529,10 +529,10 @@ def export_monte_carlo_to_csv( _BCN.PAYOFF_VIOLATOR_STD: r.payoff_violator.std, _BCN.AUDIT_RATE_MEAN: r.audit_rate.mean, _BCN.AUDIT_RATE_STD: r.audit_rate.std, - _BCN.FALSE_POSITIVE_RATE_MEAN: r.false_positive_rate.mean, - _BCN.FALSE_POSITIVE_RATE_STD: r.false_positive_rate.std, - _BCN.DETECTION_RATE_MEAN: r.detection_rate.mean, - _BCN.DETECTION_RATE_STD: r.detection_rate.std, + _BCN.COMPLIANT_AUDIT_FRACTION_MEAN: r.compliant_audit_fraction.mean, + _BCN.COMPLIANT_AUDIT_FRACTION_STD: r.compliant_audit_fraction.std, + _BCN.DETECTION_RATE_GIVEN_AUDIT_MEAN: r.detection_rate_given_audit.mean, + _BCN.DETECTION_RATE_GIVEN_AUDIT_STD: r.detection_rate_given_audit.std, } for r in results ] @@ -580,8 +580,8 @@ def export_mc_per_seed_to_csv( _BCN.PAYOFF_COMPLIANT_MEAN: s.avg_payoff_compliant, _BCN.PAYOFF_VIOLATOR_MEAN: s.avg_payoff_violator, _BCN.AUDIT_RATE_MEAN: s.audit_rate, - _BCN.FALSE_POSITIVE_RATE_MEAN: s.false_positive_rate, - _BCN.DETECTION_RATE_MEAN: s.detection_rate, + _BCN.COMPLIANT_AUDIT_FRACTION_MEAN: s.compliant_audit_fraction, + _BCN.DETECTION_RATE_GIVEN_AUDIT_MEAN: s.detection_rate_given_audit, } for s in result.raw_seeds ] @@ -726,8 +726,8 @@ def export_sweep_to_csv( _BCN.PAYOFF_VIOLATOR_MEAN: pt.result.payoff_violator.mean, _BCN.AUDIT_RATE_MEAN: pt.result.audit_rate.mean, _BCN.AUDIT_RATE_STD: pt.result.audit_rate.std, - _BCN.FALSE_POSITIVE_RATE_MEAN: pt.result.false_positive_rate.mean, - _BCN.DETECTION_RATE_MEAN: pt.result.detection_rate.mean, + _BCN.COMPLIANT_AUDIT_FRACTION_MEAN: pt.result.compliant_audit_fraction.mean, + _BCN.DETECTION_RATE_GIVEN_AUDIT_MEAN: pt.result.detection_rate_given_audit.mean, } for pt in result.points ] @@ -827,11 +827,15 @@ def export_monte_carlo_to_excel( ), ("Audit Rate", result.audit_rate.mean, result.audit_rate.std), ( - "False Positive Rate", - result.false_positive_rate.mean, - result.false_positive_rate.std, + "Compliant Audit Fraction", + result.compliant_audit_fraction.mean, + result.compliant_audit_fraction.std, + ), + ( + "Detection Rate (given audit)", + result.detection_rate_given_audit.mean, + result.detection_rate_given_audit.std, ), - ("Detection Rate", result.detection_rate.mean, result.detection_rate.std), ] for label, mean_val, std_val in _mc_summary_rows: is_pct = ( @@ -871,8 +875,8 @@ def export_monte_carlo_to_excel( "Payoff Compliant", "Payoff Violator", "Audit Rate", - "False Positive Rate", - "Detection Rate", + "Compliant Audit Fraction", + "Detection Rate (given audit)", ] for col, h in enumerate(seed_headers): seed_sheet.write(0, col, h, header_fmt) @@ -887,8 +891,8 @@ def export_monte_carlo_to_excel( s.avg_payoff_compliant, s.avg_payoff_violator, s.audit_rate, - s.false_positive_rate, - s.detection_rate, + s.compliant_audit_fraction, + s.detection_rate_given_audit, ] for col, v in enumerate(vals): seed_sheet.write( @@ -985,7 +989,7 @@ def export_sweep_to_excel( "Avg Price", "Avg Net Payoff", "Audit Rate", - "Detection Rate", + "Detection Rate (given audit)", ] for col, h in enumerate(sweep_headers): sweep_sheet.write(0, col, h, header_fmt) @@ -1002,7 +1006,7 @@ def export_sweep_to_excel( pt.result.avg_price.mean, pt.result.avg_net_payoff.mean, pt.result.audit_rate.mean, - pt.result.detection_rate.mean, + pt.result.detection_rate_given_audit.mean, ] for col, v in enumerate(vals): sweep_sheet.write( diff --git a/src/compute_permit_sim/vis/panels/batch_results.py b/src/compute_permit_sim/vis/panels/batch_results.py index 5116a6a..1f24d44 100644 --- a/src/compute_permit_sim/vis/panels/batch_results.py +++ b/src/compute_permit_sim/vis/panels/batch_results.py @@ -100,9 +100,9 @@ def _MCResultsView() -> Any: ) DownloadJSON( "Download config JSON (for reproducibility)", - lambda r=result: r.config.model_dump_json(indent=2).encode( + lambda r=result: r.config.model_dump_json(indent=2).encode( # type: ignore[misc] "utf-8" - ), # type: ignore[misc] + ), f"mc_config_{safe}.json", ) @@ -182,16 +182,16 @@ def _MCResultsView() -> Any: "\u2014", ), ( - "False Positive Rate", - f"{result.false_positive_rate.mean:.1%}", - f"{result.false_positive_rate.std:.1%}", + "Compliant Audit Fraction", + f"{result.compliant_audit_fraction.mean:.1%}", + f"{result.compliant_audit_fraction.std:.1%}", "\u2014", "\u2014", ), ( - "Detection Rate", - f"{result.detection_rate.mean:.1%}", - f"{result.detection_rate.std:.1%}", + "Detection Rate (given audit)", + f"{result.detection_rate_given_audit.mean:.1%}", + f"{result.detection_rate_given_audit.std:.1%}", "\u2014", "\u2014", ), @@ -266,9 +266,9 @@ def _SweepResultsView() -> Any: ) DownloadJSON( "Download config JSON (for reproducibility)", - lambda r=result: r.config.model_dump_json(indent=2).encode( + lambda r=result: r.config.model_dump_json(indent=2).encode( # type: ignore[misc] "utf-8" - ), # type: ignore[misc] + ), f"sweep_config_{safe_s}_{safe_p}.json", ) From 411f77513570e73a69e0c99939e5cd58ec909f30 Mon Sep 17 00:00:00 2001 From: emlynsg Date: Thu, 12 Mar 2026 21:34:48 +0100 Subject: [PATCH 2/3] Remove scripts/ from git, update agent docs for metric rename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove generate_figure_data.py from tracking (local tooling, gitignored) - Remove figure-data Makefile target that referenced it - Update .agents/ docs: detection_rate → detection_rate_given_audit Co-Authored-By: Claude Opus 4.6 --- .agents/rules/project.md | 2 +- .agents/workflows/researcher.md | 6 +-- .gitignore | 1 + Makefile | 6 +-- scripts/generate_figure_data.py | 72 --------------------------------- 5 files changed, 6 insertions(+), 81 deletions(-) delete mode 100644 scripts/generate_figure_data.py diff --git a/.agents/rules/project.md b/.agents/rules/project.md index a646e01..a7cc495 100644 --- a/.agents/rules/project.md +++ b/.agents/rules/project.md @@ -138,7 +138,7 @@ The simulation services are all headless (no Solara dependency) and callable dir | `override_config(cfg, path, value)` | `schemas.sweep_params` | Mutate one field on a frozen config | **Key result fields** (both `run_single` → `SimulationRun` and `run_monte_carlo` → `MonteCarloResult`): -- `metrics.avg_compliance`, `metrics.final_compliance`, `metrics.detection_rate` +- `metrics.avg_compliance`, `metrics.final_compliance`, `metrics.detection_rate_given_audit` - MC only: `.avg_compliance.mean/.std`, `.step_compliance` (per-step), `.raw_seeds` (if `store_raw=True`) **Session folder contract** — every `/researcher` invocation produces a `YYYY-MM-DD_slug/` folder under `agent_workspace/research/` containing: diff --git a/.agents/workflows/researcher.md b/.agents/workflows/researcher.md index d8d6618..0115200 100644 --- a/.agents/workflows/researcher.md +++ b/.agents/workflows/researcher.md @@ -64,7 +64,7 @@ Before running experiments, orient yourself: > Always start from `research_margin_baseline.json` with a tightened cap (e.g. cap=10, > n_agents=15) as the canonical interesting starting point. > - > **`detection_rate = nan` is a diagnostic signal**, not missing data. It means zero + > **`detection_rate_given_audit = nan` is a diagnostic signal**, not missing data. It means zero > violations occurred — which confirms a degenerate config. Treat it as a hard > signal to rethink the parameter regime, not as an experiment result. @@ -94,7 +94,7 @@ from compute_permit_sim.schemas import ScenarioConfig result = run_single(config) # returns SimulationRun print(result.metrics.avg_compliance) print(result.metrics.final_compliance) -print(result.metrics.detection_rate) +print(result.metrics.detection_rate_given_audit) ``` ### Monte Carlo — `run_monte_carlo` @@ -112,7 +112,7 @@ result = run_monte_carlo( # result.final_compliance.mean / .std # result.p10_compliance, result.p90_compliance # result.pct_runs_full_compliance -# result.detection_rate.mean +# result.detection_rate_given_audit.mean # result.step_compliance — list[MetricStats], one per step # result.raw_seeds — list[SeedResult] if store_raw=True ``` diff --git a/.gitignore b/.gitignore index f6cc16d..b7dcd34 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,4 @@ agent_workspace/ # Other .claude +scripts/ diff --git a/Makefile b/Makefile index fee2663..a71e402 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,12 @@ # Makefile for Compute Permit Simulator -.PHONY: run viz app heatmap solara lint format test check clean help figure-data +.PHONY: run viz app heatmap solara lint format test check clean help help: @echo "Available commands:" @echo " make run - Run the simulation once (all scenarios)" @echo " make mc - Monte Carlo: 50 runs per scenario, exports CSV + LaTeX table" @echo " make sweep - Sensitivity sweeps (pi_0 and collateral on minimal scenario)" - @echo " make figure-data - Generate documents/figures/ CSVs for paper plots (200 runs)" @echo " make paper-results - Run MC + sweep and print LaTeX table to stdout" @echo " make app - Run the Solara interactive dashboard (alias: viz)" @echo " make lint - Run linters (ruff check)" @@ -28,9 +27,6 @@ sweep: uv run main.py --sweep-file sweep_pi0_lawless.json uv run main.py --sweep-file sweep_collateral_lawless.json -figure-data: - uv run scripts/generate_figure_data.py - list-sweeps: @echo "Available sweep files:" @ls scenarios/sweeps/*.json 2>/dev/null || echo " (none)" diff --git a/scripts/generate_figure_data.py b/scripts/generate_figure_data.py deleted file mode 100644 index fe8f68d..0000000 --- a/scripts/generate_figure_data.py +++ /dev/null @@ -1,72 +0,0 @@ -"""Generate CSV data for the two dynamic scenario plots in documents/figures/. - -Outputs: - documents/figures/compliance_ratchet.csv (scenario_5_reputation_ratchet) - documents/figures/compliance_oscillation.csv (scenario_6_enforcement_cycles) - -Each CSV has columns: step, mean, lower (mean-std), upper (mean+std) - -Usage: - uv run scripts/generate_figure_data.py [--runs N] -""" - -from __future__ import annotations - -import argparse -from pathlib import Path - -from compute_permit_sim.services.config_manager import load_scenario -from compute_permit_sim.services.monte_carlo import run_monte_carlo - - -def write_csv(path: Path, step_compliance: list) -> None: - """Write step,mean,lower,upper CSV from a list of MetricStats.""" - with path.open("w") as f: - f.write("step,mean,lower,upper\n") - for i, stats in enumerate(step_compliance): - mean = stats.mean - std = stats.std - lower = max(0.0, mean - std) - upper = min(1.0, mean + std) - f.write(f"{i + 1},{mean:.4f},{lower:.4f},{upper:.4f}\n") - - -def main() -> None: - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "--runs", - type=int, - default=200, - help="Monte Carlo runs per scenario (default: 200)", - ) - args = parser.parse_args() - - out_dir = Path(__file__).parent.parent / "documents" / "figures" - out_dir.mkdir(parents=True, exist_ok=True) - - scenarios = [ - ( - "basic/scenario_5_reputation_ratchet.json", - "compliance_ratchet.csv", - "Reputation Ratchet", - ), - ( - "basic/scenario_6_enforcement_cycles.json", - "compliance_oscillation.csv", - "Enforcement Cycles", - ), - ] - - for scenario_file, csv_name, label in scenarios: - print(f"Running {label} ({args.runs} seeds)...", flush=True) - config = load_scenario(scenario_file) - result = run_monte_carlo(config, n_runs=args.runs) - out_path = out_dir / csv_name - write_csv(out_path, result.step_compliance) - print( - f" -> {out_path} (final compliance: {result.final_compliance.mean:.1%})" - ) - - -if __name__ == "__main__": - main() From 06b6e4e435a6ba4da6e061c84f3358463044ad22 Mon Sep 17 00:00:00 2001 From: emlynsg Date: Fri, 13 Mar 2026 22:59:27 +0100 Subject: [PATCH 3/3] Consolidate scenarios, rename sweep files, clean up old names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Merge scenario 4 (dynamic) + 5 (reputation ratchet) into scenario 4 "Feedback-Driven Compliance" with weaker params (base_prob=0.20, penalty=$50M) — still converges to ~97% compliance - Rename scenario 6 → 5 (enforcement cycles) - Rename sweep files: lawless→minimal, maxwell→smart - Replace all remaining lawless/crisis/maxwell references in code - Add PR.md Co-Authored-By: Claude Opus 4.6 --- Makefile | 4 +-- PR.md | 17 ++++++++++ main.py | 4 +-- ...on => scenario_4_feedback_compliance.json} | 14 ++++----- ...son => scenario_5_enforcement_cycles.json} | 0 .../basic/scenario_5_reputation_ratchet.json | 31 ------------------- scenarios/batch_test.json | 4 +-- ...ess.json => sweep_collateral_minimal.json} | 0 scenarios/sweeps/sweep_decay_oscillation.json | 2 +- .../sweeps/sweep_escalation_dynamic.json | 2 +- ...i0_lawless.json => sweep_pi0_minimal.json} | 0 ...ce_maxwell.json => sweep_price_smart.json} | 0 src/compute_permit_sim/schemas/defaults.py | 6 ++-- .../services/config_manager.py | 2 +- 14 files changed, 36 insertions(+), 50 deletions(-) create mode 100644 PR.md rename scenarios/basic/{scenario_4_dynamic.json => scenario_4_feedback_compliance.json} (52%) rename scenarios/basic/{scenario_6_enforcement_cycles.json => scenario_5_enforcement_cycles.json} (100%) delete mode 100644 scenarios/basic/scenario_5_reputation_ratchet.json rename scenarios/sweeps/{sweep_collateral_lawless.json => sweep_collateral_minimal.json} (100%) rename scenarios/sweeps/{sweep_pi0_lawless.json => sweep_pi0_minimal.json} (100%) rename scenarios/sweeps/{sweep_price_maxwell.json => sweep_price_smart.json} (100%) diff --git a/Makefile b/Makefile index a71e402..2326d44 100644 --- a/Makefile +++ b/Makefile @@ -24,8 +24,8 @@ mc: uv run main.py --monte-carlo 50 sweep: - uv run main.py --sweep-file sweep_pi0_lawless.json - uv run main.py --sweep-file sweep_collateral_lawless.json + uv run main.py --sweep-file sweep_pi0_minimal.json + uv run main.py --sweep-file sweep_collateral_minimal.json list-sweeps: @echo "Available sweep files:" diff --git a/PR.md b/PR.md new file mode 100644 index 0000000..07ee79c --- /dev/null +++ b/PR.md @@ -0,0 +1,17 @@ +## Summary + +Fixes from code review of #11: seeded RNG bug, misleading metric names, and scenario cleanup. + +## What changed + +- **RNG fix**: fixed-price market tie-breaking now uses the sim's seeded RNG instead of the global one, so results are reproducible +- **Metric renames**: `false_positive_rate` → `compliant_audit_fraction` (no longer clashes with the config param), `detection_rate` → `detection_rate_given_audit` (clarifies it's conditional on audit). Updated everywhere — schemas, MC service, CSV/Excel export, dashboard UI. +- **Scenario consolidation**: merged scenario 4 (dynamic) and 5 (reputation ratchet) into a single **Scenario 4 — Feedback-Driven Compliance** with weaker parameters (base_prob=0.20, penalty=$50M) that still converges to ~97% compliance. Renamed scenario 6 → 5 (enforcement cycles). Removed lawless scenario (degenerate, already covered by scenario 1). +- **Name cleanup**: replaced all remaining "lawless/crisis/maxwell" references with "minimal/strict/smart" across Makefile, main.py, defaults, config_manager, sweep files +- **Docstring fixes**: corrected `p_catch` formula in AuditConfig, "six-phase" → "seven-phase" in game loop + +## Test plan + +- [x] 132 tests pass +- [ ] Run `make mc` and check CSV column headers +- [ ] Run scenarios 4 and 5 in dashboard, verify ratchet vs oscillation behaviour diff --git a/main.py b/main.py index 6373487..f6671ef 100644 --- a/main.py +++ b/main.py @@ -3,7 +3,7 @@ Usage: make run # single run, all scenarios make mc # Monte Carlo (50 runs per scenario) - make sweep # π₀ × K sensitivity sweep on Lawless + make sweep # π₀ × K sensitivity sweep on Minimal make paper-results # MC + sweep, outputs LaTeX snippet uv run main.py --runs 1 # single run (default) @@ -233,7 +233,7 @@ def _parse_args() -> argparse.Namespace: "--sweep-file", metavar="SWEEP_FILE", type=str, - help="Run a sweep from a JSON file in scenarios/sweeps/ (e.g. sweep_pi0_lawless.json).", + help="Run a sweep from a JSON file in scenarios/sweeps/ (e.g. sweep_pi0_minimal.json).", ) parser.add_argument( diff --git a/scenarios/basic/scenario_4_dynamic.json b/scenarios/basic/scenario_4_feedback_compliance.json similarity index 52% rename from scenarios/basic/scenario_4_dynamic.json rename to scenarios/basic/scenario_4_feedback_compliance.json index 8f6fbf1..2377818 100644 --- a/scenarios/basic/scenario_4_dynamic.json +++ b/scenarios/basic/scenario_4_feedback_compliance.json @@ -1,14 +1,14 @@ { - "name": "Dynamic Escalation (Time-Dependent)", - "description": "Demonstrates shifting deterrence via feedback loops: labs start non-compliant, get caught, and face escalating audit probabilities and reputation costs. Initial cheating gives way to compliance as enforcement ratchets up.", - "steps": 40, + "name": "Feedback-Driven Compliance", + "description": "Demonstrates that even modest enforcement can drive near-full compliance when reputation costs accumulate permanently. Base audit rate is only 20%, penalty is moderate ($50M), and audit escalation is weak (0.5). Despite these soft parameters, reputation sensitivity (10.0) ensures that after a few catches each lab's perceived burden exceeds its gain, producing a monotonic ratchet to ~97% compliance by step 50.", + "steps": 50, "n_agents": 20, "audit": { - "base_prob": 0.3, + "base_prob": 0.20, "signal_dependent": false, "false_negative_rate": 0.05, - "penalty_amount": 100.0, - "audit_escalation": 1.5, + "penalty_amount": 50.0, + "audit_escalation": 0.5, "audit_decay_rate": 0.1 }, "lab": { @@ -28,4 +28,4 @@ "permit_cap": 5.0 }, "collateral_amount": 0.0 -} \ No newline at end of file +} diff --git a/scenarios/basic/scenario_6_enforcement_cycles.json b/scenarios/basic/scenario_5_enforcement_cycles.json similarity index 100% rename from scenarios/basic/scenario_6_enforcement_cycles.json rename to scenarios/basic/scenario_5_enforcement_cycles.json diff --git a/scenarios/basic/scenario_5_reputation_ratchet.json b/scenarios/basic/scenario_5_reputation_ratchet.json deleted file mode 100644 index 5c240a8..0000000 --- a/scenarios/basic/scenario_5_reputation_ratchet.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "name": "Reputation Ratchet", - "description": "Reputation-driven convergence to full compliance. Random audits (pi_0=0.20, signal-independent) with strong reputation escalation (epsilon=1.5, no audit-coefficient dynamics). Labs start non-compliant (expected burden ~11M$ << gain ~80M$), but each catch permanently multiplies perceived reputation cost by 2.5x. After ~4-5 catches the expected burden exceeds even the highest-value gain, and labs comply permanently. Full compliance reached in expectation by step 30-40.", - "steps": 50, - "n_agents": 20, - "audit": { - "base_prob": 0.20, - "signal_dependent": false, - "false_negative_rate": 0.05, - "penalty_amount": 50.0, - "audit_escalation": 0.0, - "audit_decay_rate": 0.0 - }, - "lab": { - "compute_capacity_min": 1e26, - "compute_capacity_max": 1e26, - "economic_value_min": 70.0, - "economic_value_max": 100.0, - "capability_value": 0.0, - "racing_factor": 0.0, - "risk_profile_min": 1.0, - "risk_profile_max": 1.0, - "reputation_sensitivity": 10.0, - "reputation_escalation_factor": 1.5, - "audit_coefficient": 1.0 - }, - "market": { - "permit_cap": 8.0 - }, - "collateral_amount": 0.0 -} diff --git a/scenarios/batch_test.json b/scenarios/batch_test.json index 6395ece..f303c95 100644 --- a/scenarios/batch_test.json +++ b/scenarios/batch_test.json @@ -1,6 +1,6 @@ { - "name": "Dynamic Escalation (Time-Dependent)", - "description": "Demonstrates shifting deterrence via feedback loops: labs start non-compliant, get caught, and face escalating audit probabilities and reputation costs. Initial cheating gives way to compliance as enforcement ratchets up.", + "name": "Feedback-Driven Compliance", + "description": "Demonstrates that feedback mechanisms (reputation, audit escalation) can drive compliance even under moderate enforcement.", "notes": "", "n_agents": 20, "steps": 40, diff --git a/scenarios/sweeps/sweep_collateral_lawless.json b/scenarios/sweeps/sweep_collateral_minimal.json similarity index 100% rename from scenarios/sweeps/sweep_collateral_lawless.json rename to scenarios/sweeps/sweep_collateral_minimal.json diff --git a/scenarios/sweeps/sweep_decay_oscillation.json b/scenarios/sweeps/sweep_decay_oscillation.json index 86a3675..af37df9 100644 --- a/scenarios/sweeps/sweep_decay_oscillation.json +++ b/scenarios/sweeps/sweep_decay_oscillation.json @@ -1,5 +1,5 @@ { - "scenario_file": "basic/scenario_6_enforcement_cycles.json", + "scenario_file": "basic/scenario_5_enforcement_cycles.json", "param_path": "audit.audit_decay_rate", "param_label": "Audit Decay Rate \u03b4", "min_val": 0.0, diff --git a/scenarios/sweeps/sweep_escalation_dynamic.json b/scenarios/sweeps/sweep_escalation_dynamic.json index b626a53..513ae71 100644 --- a/scenarios/sweeps/sweep_escalation_dynamic.json +++ b/scenarios/sweeps/sweep_escalation_dynamic.json @@ -1,5 +1,5 @@ { - "scenario_file": "basic/scenario_6_enforcement_cycles.json", + "scenario_file": "basic/scenario_5_enforcement_cycles.json", "param_path": "audit.audit_escalation", "param_label": "Audit Escalation Factor \u0394", "min_val": 0.0, diff --git a/scenarios/sweeps/sweep_pi0_lawless.json b/scenarios/sweeps/sweep_pi0_minimal.json similarity index 100% rename from scenarios/sweeps/sweep_pi0_lawless.json rename to scenarios/sweeps/sweep_pi0_minimal.json diff --git a/scenarios/sweeps/sweep_price_maxwell.json b/scenarios/sweeps/sweep_price_smart.json similarity index 100% rename from scenarios/sweeps/sweep_price_maxwell.json rename to scenarios/sweeps/sweep_price_smart.json diff --git a/src/compute_permit_sim/schemas/defaults.py b/src/compute_permit_sim/schemas/defaults.py index 6f27b62..d6de624 100644 --- a/src/compute_permit_sim/schemas/defaults.py +++ b/src/compute_permit_sim/schemas/defaults.py @@ -79,7 +79,7 @@ # Stage 2: AUDIT OUTCOME — given audit, does it find a violation? # p_catch_if_audited = (1 - FNR) + FNR × backcheck_prob DEFAULT_AUDIT_FALSE_POS_RATE = 0.0 # alpha: P(false alarm | compliant firm audited) -DEFAULT_AUDIT_FALSE_NEG_RATE = 0.40 # beta: 40% miss rate in Lawless env +DEFAULT_AUDIT_FALSE_NEG_RATE = 0.40 # beta: 40% miss rate in Minimal env # Penalty structure: DEFAULT_AUDIT_PENALTY_AMOUNT = 200.0 # M$: flat penalty (default/fallback) DEFAULT_AUDIT_BACKCHECK_PROB = 0.0 # p_b: historical audit discovery rate @@ -93,7 +93,7 @@ # Seized on verified violation; returned otherwise. 0 = disabled. # Reference: Christoph (2026) Section 2.5, Proposition 3 # P_eff = min(K + phi, L) where K = collateral, phi = ex post fine, L = liability -DEFAULT_COLLATERAL_AMOUNT = 0.0 # M$: Lawless = 0 collateral +DEFAULT_COLLATERAL_AMOUNT = 0.0 # M$: Minimal = 0 collateral # --- Market Defaults --- DEFAULT_MARKET_PERMIT_CAP = 20.0 # Number of permits available @@ -118,7 +118,7 @@ # Reputation sensitivity: perceived reputation cost if caught (M$) DEFAULT_LAB_REPUTATION_SENSITIVITY = 0.0 # R: brand/trust damage # Audit coefficient: firm-specific audit rate scaling (dimensionless) -DEFAULT_LAB_AUDIT_COEFFICIENT = 0.8 # c(i): Lawless = 0.8 evasion factor +DEFAULT_LAB_AUDIT_COEFFICIENT = 0.8 # c(i): Minimal = 0.8 evasion factor # --- Dynamic Factor Defaults --- # All default to 0.0 (static behavior). Set > 0 to activate. # diff --git a/src/compute_permit_sim/services/config_manager.py b/src/compute_permit_sim/services/config_manager.py index 24b6553..acf4922 100644 --- a/src/compute_permit_sim/services/config_manager.py +++ b/src/compute_permit_sim/services/config_manager.py @@ -94,7 +94,7 @@ def load_sweep(filename: str) -> SweepConfig: } Args: - filename: Name of the sweep file (e.g. 'sweep_pi0_lawless.json'). + filename: Name of the sweep file (e.g. 'sweep_pi0_minimal.json'). Returns: Validated SweepConfig dataclass.