JTuffy · JTuffy · Mar 18, 2026 · Mar 12, 2026 · Mar 12, 2026 · Mar 13, 2026
diff --git a/.agents/rules/project.md b/.agents/rules/project.md
@@ -138,7 +138,7 @@ The simulation services are all headless (no Solara dependency) and callable dir
 | `override_config(cfg, path, value)` | `schemas.sweep_params` | Mutate one field on a frozen config |
 
 **Key result fields** (both `run_single` → `SimulationRun` and `run_monte_carlo` → `MonteCarloResult`):
-- `metrics.avg_compliance`, `metrics.final_compliance`, `metrics.detection_rate`
+- `metrics.avg_compliance`, `metrics.final_compliance`, `metrics.detection_rate_given_audit`
 - MC only: `.avg_compliance.mean/.std`, `.step_compliance` (per-step), `.raw_seeds` (if `store_raw=True`)
 
 **Session folder contract** — every `/researcher` invocation produces a `YYYY-MM-DD_slug/` folder under `agent_workspace/research/` containing:

diff --git a/.agents/workflows/researcher.md b/.agents/workflows/researcher.md
@@ -64,7 +64,7 @@ Before running experiments, orient yourself:
    > Always start from `research_margin_baseline.json` with a tightened cap (e.g. cap=10,
    > n_agents=15) as the canonical interesting starting point.
    >
-   > **`detection_rate = nan` is a diagnostic signal**, not missing data. It means zero
+   > **`detection_rate_given_audit = nan` is a diagnostic signal**, not missing data. It means zero
    > violations occurred — which confirms a degenerate config. Treat it as a hard
    > signal to rethink the parameter regime, not as an experiment result.
 
@@ -94,7 +94,7 @@ from compute_permit_sim.schemas import ScenarioConfig
 result = run_single(config)  # returns SimulationRun
 print(result.metrics.avg_compliance)
 print(result.metrics.final_compliance)
-print(result.metrics.detection_rate)
+print(result.metrics.detection_rate_given_audit)
 ```
 
 ### Monte Carlo — `run_monte_carlo`
@@ -112,7 +112,7 @@ result = run_monte_carlo(
 # result.final_compliance.mean / .std
 # result.p10_compliance, result.p90_compliance
 # result.pct_runs_full_compliance
-# result.detection_rate.mean
+# result.detection_rate_given_audit.mean
 # result.step_compliance   — list[MetricStats], one per step
 # result.raw_seeds         — list[SeedResult] if store_raw=True
 ```

diff --git a/.gitignore b/.gitignore
@@ -27,3 +27,4 @@ agent_workspace/
 
 # Other
 .claude
+scripts/
diff --git a/Makefile b/Makefile
@@ -6,7 +6,7 @@ help:
 	@echo "Available commands:"
 	@echo "  make run           - Run the simulation once (all scenarios)"
 	@echo "  make mc            - Monte Carlo: 50 runs per scenario, exports CSV + LaTeX table"
-	@echo "  make sweep         - Sensitivity sweep: π₀ on Lawless scenario"
+	@echo "  make sweep         - Sensitivity sweeps (pi_0 and collateral on minimal scenario)"
 	@echo "  make paper-results - Run MC + sweep and print LaTeX table to stdout"
 	@echo "  make app           - Run the Solara interactive dashboard (alias: viz)"
 	@echo "  make lint          - Run linters (ruff check)"
@@ -24,8 +24,8 @@ mc:
 	uv run main.py --monte-carlo 50
 
 sweep:
-	uv run main.py --sweep-file sweep_pi0_lawless.json
-	uv run main.py --sweep-file sweep_collateral_lawless.json
+	uv run main.py --sweep-file sweep_pi0_minimal.json
+	uv run main.py --sweep-file sweep_collateral_minimal.json
 
 list-sweeps:
 	@echo "Available sweep files:"

diff --git a/PR.md b/PR.md
@@ -0,0 +1,17 @@
+## Summary
+
+Fixes from code review of #11: seeded RNG bug, misleading metric names, and scenario cleanup.
+
+## What changed
+
+- **RNG fix**: fixed-price market tie-breaking now uses the sim's seeded RNG instead of the global one, so results are reproducible
+- **Metric renames**: `false_positive_rate` → `compliant_audit_fraction` (no longer clashes with the config param), `detection_rate` → `detection_rate_given_audit` (clarifies it's conditional on audit). Updated everywhere — schemas, MC service, CSV/Excel export, dashboard UI.
+- **Scenario consolidation**: merged scenario 4 (dynamic) and 5 (reputation ratchet) into a single **Scenario 4 — Feedback-Driven Compliance** with weaker parameters (base_prob=0.20, penalty=$50M) that still converges to ~97% compliance. Renamed scenario 6 → 5 (enforcement cycles). Removed lawless scenario (degenerate, already covered by scenario 1).
+- **Name cleanup**: replaced all remaining "lawless/crisis/maxwell" references with "minimal/strict/smart" across Makefile, main.py, defaults, config_manager, sweep files
+- **Docstring fixes**: corrected `p_catch` formula in AuditConfig, "six-phase" → "seven-phase" in game loop
+
+## Test plan
+
+- [x] 132 tests pass
+- [ ] Run `make mc` and check CSV column headers
+- [ ] Run scenarios 4 and 5 in dashboard, verify ratchet vs oscillation behaviour
diff --git a/main.py b/main.py
@@ -3,7 +3,7 @@
 Usage:
     make run                         # single run, all scenarios
     make mc                          # Monte Carlo (50 runs per scenario)
-    make sweep                       # π₀ × K sensitivity sweep on Lawless
+    make sweep                       # π₀ × K sensitivity sweep on Minimal
     make paper-results               # MC + sweep, outputs LaTeX snippet
 
     uv run main.py --runs 1          # single run (default)
@@ -233,7 +233,7 @@ def _parse_args() -> argparse.Namespace:
         "--sweep-file",
         metavar="SWEEP_FILE",
         type=str,
-        help="Run a sweep from a JSON file in scenarios/sweeps/ (e.g. sweep_pi0_lawless.json).",
+        help="Run a sweep from a JSON file in scenarios/sweeps/ (e.g. sweep_pi0_minimal.json).",
     )
 
     parser.add_argument(

diff --git a/scenarios/basic/scenario_4_dynamic.json → ...basic/scenario_4_feedback_compliance.json b/scenarios/basic/scenario_4_dynamic.json → ...basic/scenario_4_feedback_compliance.json
@@ -1,14 +1,14 @@
 {
-    "name": "Dynamic Escalation (Time-Dependent)",
-    "description": "Demonstrates shifting deterrence via feedback loops: labs start non-compliant, get caught, and face escalating audit probabilities and reputation costs. Initial cheating gives way to compliance as enforcement ratchets up.",
-    "steps": 40,
+    "name": "Feedback-Driven Compliance",
+    "description": "Demonstrates that even modest enforcement can drive near-full compliance when reputation costs accumulate permanently. Base audit rate is only 20%, penalty is moderate ($50M), and audit escalation is weak (0.5). Despite these soft parameters, reputation sensitivity (10.0) ensures that after a few catches each lab's perceived burden exceeds its gain, producing a monotonic ratchet to ~97% compliance by step 50.",
+    "steps": 50,
     "n_agents": 20,
     "audit": {
-        "base_prob": 0.3,
+        "base_prob": 0.20,
         "signal_dependent": false,
         "false_negative_rate": 0.05,
-        "penalty_amount": 100.0,
-        "audit_escalation": 1.5,
+        "penalty_amount": 50.0,
+        "audit_escalation": 0.5,
         "audit_decay_rate": 0.1
     },
     "lab": {
@@ -28,4 +28,4 @@
         "permit_cap": 5.0
     },
     "collateral_amount": 0.0
-}
+}
diff --git a/scenarios/basic/scenario_5_enforcement_cycles.json b/scenarios/basic/scenario_5_enforcement_cycles.json
@@ -0,0 +1,31 @@
+{
+    "name": "Enforcement Cycles",
+    "description": "Audit-coefficient escalation with fast decay produces persistent enforcement cycles. Labs start with low audit coefficient (c_base=0.3): initial detection probability ~32%, expected penalty ~$31.8M < gain ~$80M so labs cheat. On catch, coefficient spikes by +2.0 (to ~2.3, detection ~95%): lab complies. But 60% per-step decay rapidly returns coefficient toward 0.3: after 2 steps coefficient ~0.62 (detection ~61%, expected penalty ~$61M < $80M) so lab cheats again. Cycle period ~4-5 steps per lab. No reputation escalation — pure audit-coefficient dynamics. Requires signal_dependent=True so the coefficient affects detection probability.",
+    "steps": 60,
+    "n_agents": 20,
+    "audit": {
+        "base_prob": 0.05,
+        "signal_dependent": true,
+        "false_negative_rate": 0.05,
+        "penalty_amount": 100.0,
+        "audit_escalation": 2.0,
+        "audit_decay_rate": 0.60
+    },
+    "lab": {
+        "compute_capacity_min": 1e26,
+        "compute_capacity_max": 1e26,
+        "economic_value_min": 75.0,
+        "economic_value_max": 90.0,
+        "capability_value": 0.0,
+        "racing_factor": 0.0,
+        "risk_profile_min": 1.0,
+        "risk_profile_max": 1.0,
+        "reputation_sensitivity": 0.0,
+        "reputation_escalation_factor": 0.0,
+        "audit_coefficient": 0.3
+    },
+    "market": {
+        "permit_cap": 5.0
+    },
+    "collateral_amount": 0.0
+}
diff --git a/scenarios/batch_test.json b/scenarios/batch_test.json
@@ -1,6 +1,6 @@
 {
-  "name": "Dynamic Escalation (Time-Dependent)",
-  "description": "Demonstrates shifting deterrence via feedback loops: labs start non-compliant, get caught, and face escalating audit probabilities and reputation costs. Initial cheating gives way to compliance as enforcement ratchets up.",
+  "name": "Feedback-Driven Compliance",
+  "description": "Demonstrates that feedback mechanisms (reputation, audit escalation) can drive compliance even under moderate enforcement.",
   "notes": "",
   "n_agents": 20,
   "steps": 40,

diff --git a/...rios/sweeps/sweep_collateral_lawless.json → ...rios/sweeps/sweep_collateral_minimal.json b/...rios/sweeps/sweep_collateral_lawless.json → ...rios/sweeps/sweep_collateral_minimal.json
diff --git a/scenarios/sweeps/sweep_decay_oscillation.json b/scenarios/sweeps/sweep_decay_oscillation.json
@@ -0,0 +1,9 @@
+{
+    "scenario_file": "basic/scenario_5_enforcement_cycles.json",
+    "param_path": "audit.audit_decay_rate",
+    "param_label": "Audit Decay Rate \u03b4",
+    "min_val": 0.0,
+    "max_val": 0.90,
+    "interval": 0.10,
+    "n_runs": 50
+}
diff --git a/scenarios/sweeps/sweep_escalation_dynamic.json b/scenarios/sweeps/sweep_escalation_dynamic.json
@@ -0,0 +1,9 @@
+{
+    "scenario_file": "basic/scenario_5_enforcement_cycles.json",
+    "param_path": "audit.audit_escalation",
+    "param_label": "Audit Escalation Factor \u0394",
+    "min_val": 0.0,
+    "max_val": 3.0,
+    "interval": 0.25,
+    "n_runs": 50
+}
diff --git a/scenarios/sweeps/sweep_pi0_lawless.json → scenarios/sweeps/sweep_pi0_minimal.json b/scenarios/sweeps/sweep_pi0_lawless.json → scenarios/sweeps/sweep_pi0_minimal.json
diff --git a/scenarios/sweeps/sweep_price_maxwell.json → scenarios/sweeps/sweep_price_smart.json b/scenarios/sweeps/sweep_price_maxwell.json → scenarios/sweeps/sweep_price_smart.json
diff --git a/src/compute_permit_sim/core/game_loop.py b/src/compute_permit_sim/core/game_loop.py
@@ -1,6 +1,6 @@
 """Core game loop — pure business logic for one simulation step.
 
-Orchestrates the six-phase turn sequence:
+Orchestrates the seven-phase turn sequence:
     0. Collateral posting (above-threshold labs only)
     1. Trading (bids + market allocation, above-threshold labs only)
     2. Compliance decisions (above-threshold labs with excess only)
@@ -130,7 +130,7 @@ def execute_step(
                 outcome.agent_outcomes[lab.lab_id].bid_price = bid_per
                 outcome.agent_outcomes[lab.lab_id].permits_wanted = qty
 
-        clearing_price, allocations = market.allocate(bids)
+        clearing_price, allocations = market.allocate(bids, rng=_rng)
         outcome.clearing_price = clearing_price
 
         for lab in above:

diff --git a/src/compute_permit_sim/core/market.py b/src/compute_permit_sim/core/market.py
@@ -68,7 +68,9 @@ def resolve_price(self, bids: list[float]) -> float:
         return clearing_price
 
     def allocate(
-        self, bids: list[tuple[int, int, float]]
+        self,
+        bids: list[tuple[int, int, float]],
+        rng: random.Random | None = None,
     ) -> tuple[float, dict[int, int]]:
         """Resolve price and allocate permits via uniform-price auction.
 
@@ -126,7 +128,7 @@ def allocate(
                     allocations[lab_id] = qty
             else:
                 # Over-subscribed: randomly sample up to permit_cap units
-                winners = random.sample(fp_units, available)
+                winners = (rng or random).sample(fp_units, available)
                 for lab_id in winners:
                     allocations[lab_id] += 1
 

diff --git a/src/compute_permit_sim/schemas/batch.py b/src/compute_permit_sim/schemas/batch.py
@@ -58,10 +58,10 @@ class BatchColumnNames:
     AUDIT_RATE = "audit_rate"
     AUDIT_RATE_MEAN = "audit_rate_mean"
     AUDIT_RATE_STD = "audit_rate_std"
-    FALSE_POSITIVE_RATE_MEAN = "false_positive_rate_mean"
-    FALSE_POSITIVE_RATE_STD = "false_positive_rate_std"
-    DETECTION_RATE_MEAN = "detection_rate_mean"
-    DETECTION_RATE_STD = "detection_rate_std"
+    COMPLIANT_AUDIT_FRACTION_MEAN = "compliant_audit_fraction_mean"
+    COMPLIANT_AUDIT_FRACTION_STD = "compliant_audit_fraction_std"
+    DETECTION_RATE_GIVEN_AUDIT_MEAN = "detection_rate_given_audit_mean"
+    DETECTION_RATE_GIVEN_AUDIT_STD = "detection_rate_given_audit_std"
 
 
 @dataclass(frozen=True)
@@ -81,8 +81,8 @@ class PerSeedResult:
     avg_payoff_compliant: float  # NaN if no compliant labs
     avg_payoff_violator: float  # NaN if no violators
     audit_rate: float
-    false_positive_rate: float
-    detection_rate: float  # NaN if no audited violators
+    compliant_audit_fraction: float  # audits on compliant / total audits
+    detection_rate_given_audit: float  # NaN if no audited violators
 
 
 @dataclass(frozen=True)
@@ -161,8 +161,10 @@ class MonteCarloResult:
 
     # --- Audit burden ---
     audit_rate: MetricStats  # audits / total lab-steps
-    false_positive_rate: MetricStats  # audits on compliant / total audits
-    detection_rate: MetricStats  # caught / audits on violators
+    compliant_audit_fraction: MetricStats  # audits on compliant / total audits
+    detection_rate_given_audit: (
+        MetricStats  # caught / audits on violators (given audit)
+    )
 
     # --- Raw per-seed data (optional, set store_raw=True in run_monte_carlo) ---
     raw_seeds: list[PerSeedResult] = field(default_factory=list)

diff --git a/src/compute_permit_sim/schemas/config.py b/src/compute_permit_sim/schemas/config.py
@@ -67,8 +67,9 @@ class AuditConfig(BaseModel):
 
     2. AUDIT OUTCOME: Whether an audit catches a violator (if one exists)
        - false_positive_rate (alpha): P(false alarm | compliant firm audited)
-       - false_negative_rate (beta): P(miss | non-compliant firm audited)
-       - p_catch = (1 - beta) + beta × backcheck_prob
+       - false_negative_rate (beta): P(miss | non-compliant firm, direct pass)
+       - p_catch = 1 - beta × (1 - backcheck_prob) × (1 - p_w) × (1 - p_m)
+         where p_w = whistleblower_prob, p_m = monitoring_prob
     """
 
     base_prob: float = Field(

diff --git a/src/compute_permit_sim/schemas/defaults.py b/src/compute_permit_sim/schemas/defaults.py
@@ -79,7 +79,7 @@
 # Stage 2: AUDIT OUTCOME — given audit, does it find a violation?
 #   p_catch_if_audited = (1 - FNR) + FNR × backcheck_prob
 DEFAULT_AUDIT_FALSE_POS_RATE = 0.0  # alpha: P(false alarm | compliant firm audited)
-DEFAULT_AUDIT_FALSE_NEG_RATE = 0.40  # beta: 40% miss rate in Lawless env
+DEFAULT_AUDIT_FALSE_NEG_RATE = 0.40  # beta: 40% miss rate in Minimal env
 # Penalty structure:
 DEFAULT_AUDIT_PENALTY_AMOUNT = 200.0  # M$: flat penalty (default/fallback)
 DEFAULT_AUDIT_BACKCHECK_PROB = 0.0  # p_b: historical audit discovery rate
@@ -93,7 +93,7 @@
 # Seized on verified violation; returned otherwise. 0 = disabled.
 # Reference: Christoph (2026) Section 2.5, Proposition 3
 #   P_eff = min(K + phi, L) where K = collateral, phi = ex post fine, L = liability
-DEFAULT_COLLATERAL_AMOUNT = 0.0  # M$: Lawless = 0 collateral
+DEFAULT_COLLATERAL_AMOUNT = 0.0  # M$: Minimal = 0 collateral
 
 # --- Market Defaults ---
 DEFAULT_MARKET_PERMIT_CAP = 20.0  # Number of permits available
@@ -118,7 +118,7 @@
 # Reputation sensitivity: perceived reputation cost if caught (M$)
 DEFAULT_LAB_REPUTATION_SENSITIVITY = 0.0  # R: brand/trust damage
 # Audit coefficient: firm-specific audit rate scaling (dimensionless)
-DEFAULT_LAB_AUDIT_COEFFICIENT = 0.8  # c(i): Lawless = 0.8 evasion factor
+DEFAULT_LAB_AUDIT_COEFFICIENT = 0.8  # c(i): Minimal = 0.8 evasion factor
 # --- Dynamic Factor Defaults ---
 # All default to 0.0 (static behavior). Set > 0 to activate.
 #

diff --git a/src/compute_permit_sim/services/config_manager.py b/src/compute_permit_sim/services/config_manager.py
@@ -94,7 +94,7 @@ def load_sweep(filename: str) -> SweepConfig:
         }
 
     Args:
-        filename: Name of the sweep file (e.g. 'sweep_pi0_lawless.json').
+        filename: Name of the sweep file (e.g. 'sweep_pi0_minimal.json').
 
     Returns:
         Validated SweepConfig dataclass.

diff --git a/src/compute_permit_sim/services/monte_carlo.py b/src/compute_permit_sim/services/monte_carlo.py
@@ -48,8 +48,8 @@ class _RunResult(NamedTuple):
 
     # Audit burden
     audit_rate: float
-    false_positive_rate: float
-    detection_rate: float  # NaN if 0 audited violators
+    compliant_audit_fraction: float  # audits on compliant / total audits
+    detection_rate_given_audit: float  # NaN if 0 audited violators
 
 
 def _run_once(config: ScenarioConfig, seed: int) -> _RunResult:
@@ -135,10 +135,10 @@ def _run_once(config: ScenarioConfig, seed: int) -> _RunResult:
         avg_payoff_compliant=avg_payoff_compliant,
         avg_payoff_violator=avg_payoff_violator,
         audit_rate=total_audits / total_lab_steps if total_lab_steps else 0.0,
-        false_positive_rate=(
+        compliant_audit_fraction=(
             audits_on_compliant / total_audits if total_audits else 0.0
         ),
-        detection_rate=(
+        detection_rate_given_audit=(
             violations_caught / audits_on_violators
             if audits_on_violators
             else float("nan")
@@ -269,14 +269,18 @@ def run_monte_carlo(
             else MetricStats.nan()
         ),
         audit_rate=MetricStats.from_values([r.audit_rate for r in raw]),
-        false_positive_rate=MetricStats.from_values(
-            [r.false_positive_rate for r in raw]
+        compliant_audit_fraction=MetricStats.from_values(
+            [r.compliant_audit_fraction for r in raw]
         ),
-        detection_rate=(
+        detection_rate_given_audit=(
             MetricStats.from_values(
-                [r.detection_rate for r in raw if not math.isnan(r.detection_rate)]
+                [
+                    r.detection_rate_given_audit
+                    for r in raw
+                    if not math.isnan(r.detection_rate_given_audit)
+                ]
             )
-            if any(not math.isnan(r.detection_rate) for r in raw)
+            if any(not math.isnan(r.detection_rate_given_audit) for r in raw)
             else MetricStats.nan()
         ),
         raw_seeds=[
@@ -289,8 +293,8 @@ def run_monte_carlo(
                 avg_payoff_compliant=r.avg_payoff_compliant,
                 avg_payoff_violator=r.avg_payoff_violator,
                 audit_rate=r.audit_rate,
-                false_positive_rate=r.false_positive_rate,
-                detection_rate=r.detection_rate,
+                compliant_audit_fraction=r.compliant_audit_fraction,
+                detection_rate_given_audit=r.detection_rate_given_audit,
             )
             for s, r in zip(run_seeds, raw)
         ]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -27,3 +27,4 @@ agent_workspace/

		# Other
		.claude
		scripts/