From e4bc03739d1767a9e73b77ecf147ba79073f28bf Mon Sep 17 00:00:00 2001
From: dadachi <maurois@mac.com>
Date: Fri, 22 May 2026 21:08:23 +0900
Subject: [PATCH] feat(repair): wire the bounded self-repair loop (opt-in)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SPEC §5's 5-iteration self-repair loop was documented in CLAUDE.md and
rendered by the report (repairSection) but never implemented — failures
just surfaced and the agent exited. This wires it end-to-end.

- src/repair-loop.ts: runRepairLoop — pure, dependency-injected control
  flow. Targets the highest-priority code-repairable failure (all Layer 1
  leftover-token misses before any Layer 2 build miss), repairs, re-validates
  that platform, records a RepairAttempt, repeats until green or the cap.
  Hard-capped at REPAIR_ITERATION_CAP=5 (CLAUDE.md). Layer 3 (vision) and
  the contract reviewer are surfaced, not auto-repaired — a Layer 3 miss is
  often environmental, not a source bug.
- src/agents/repair.ts: runRepair — a Claude Agent SDK query() pass scoped
  (cwd) to the failing generated project, Read/Edit/Grep(/Bash for Layer 2),
  bypassPermissions, hermetic settingSources:[], bounded maxTurns. Stub path
  via isStub("repair"); "repair" added to AgentName.
- dispatch: after a failing first judge, if NATIVEAPPTEMPLATE_REPAIR is set
  (on / positive int, clamped to 5), run the loop with real repair +
  per-platform Layer1/Layer2 revalidation, then fold the result back into the
  judge + thread RepairAttempt[] into the report. Off by default; skipped in
  stub mode.
- report: BuildRunReportInput.repairAttempts → RunReport.repairAttempts, so
  the existing repairSection populates.
- tests: 7 new — loop resolves after one pass; gives up at the cap; clamps
  maxIterations to 5; fixes Layer 1 before Layer 2; no-ops on an
  unrepairable Layer 3 miss; report carries/omits repairAttempts.
- docs: SPEC §5 row → Shipped (opt-in); README flag; CLAUDE.md pointer.

The loop logic is unit-tested with injected fakes (no LLM/device). The
real repair-agent path is opt-in and not exercised in CI; an end-to-end
real-failure demo (hackathon stretch) is a follow-up.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md             |   2 +-
 README.md             |   1 +
 docs/SPEC.md          |   2 +-
 src/agents/repair.ts  | 141 ++++++++++++++++++++++++++++++++++
 src/agents/types.ts   |   2 +-
 src/dispatch.ts       |  92 ++++++++++++++++++++++-
 src/repair-loop.ts    | 103 +++++++++++++++++++++++++
 src/report/collect.ts |   6 +-
 tests/smoke.test.ts   | 171 +++++++++++++++++++++++++++++++++++++++++-
 9 files changed, 512 insertions(+), 8 deletions(-)
 create mode 100644 src/agents/repair.ts
 create mode 100644 src/repair-loop.ts
diff --git a/CLAUDE.md b/CLAUDE.md
index d53077e..90b1b39 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -91,7 +91,7 @@ Three clean operations on the substrate (details in `docs/SPEC.md` section 4):
 
 ## Guardrails
 
-- **Self-repair loop hard-capped at 5 iterations** per generated project. On exceed, surface residuals and exit.
+- **Self-repair loop hard-capped at 5 iterations** per generated project, opt-in via `NATIVEAPPTEMPLATE_REPAIR` (targets Layer 1/2 only; Layer 3 + reviewer are surfaced, not auto-repaired). On exceed, surface residuals and exit. Code: `src/repair-loop.ts` + `src/agents/repair.ts`.
 - Known-cryptic failure modes: Jetpack Compose compilation, Hilt DI. Slow down and verify rather than pattern-match on those.
 - **Do not invent tests for the generated code.** The substrate already has tests; use them.
 - **Never modify the substrate repos** — clone them fresh into `./out/<slug>/{rails,ios,android}` before editing.
diff --git a/README.md b/README.md
index f1c1efe..5451c91 100644
--- a/README.md
+++ b/README.md
@@ -139,6 +139,7 @@ The agent will also be available as a Claude Code plugin.
 
 - `NATIVEAPPTEMPLATE_VISUAL=1` — opts the run into Stage 1 visual judging (Layer 3). When set, Layer 2 runs in **build mode** instead of fast mode (full `xcodebuild build` + `./gradlew assembleDebug`), then for each platform the agent installs the app on the booted sim/emulator, captures the home screen, and judges it with Opus 4.7 vision against `DEFAULT_STAGE1_RUBRIC`. Adds 60-180s per platform depending on cold-build time. Requires a sim/emulator booted for each platform you want judged. Off by default — `npm run dev` keeps the existing fast path.
 - `NATIVEAPPTEMPLATE_VISUAL=2` — implies `=1` and additionally runs **Stage 2**: the agent boots the generated Rails app under `mise exec -- bin/dev` (after `bundle install` + `bin/rails db:prepare` + `bin/rails db:seed_fu`), waits for it to listen, then drives the iOS sim and Android emulator through the parameterized queue scenario (Sign Up → email-confirm via `bin/rails runner` → Sign In → drill into auto-seeded sample). Layer 3 then judges the last captured screenshot against `DEFAULT_STAGE2_RUBRIC` (domain content + no substrate-token leak). Adds 2–4 minutes per platform on top of `=1`. Requires both sims/emulators booted and the substrate's `mise` toolchain installed for `bin/dev`.
+- `NATIVEAPPTEMPLATE_REPAIR` — opts into the bounded self-repair loop. Set `on` (or a positive integer N, hard-capped at 5) to enable. When the first validation pass fails on a **code-repairable** layer — Layer 1 leftover substrate tokens or Layer 2 build/compile errors — the agent runs a Claude Agent SDK repair pass scoped to the failing generated project (Read/Edit/Bash inside `out/<slug>/<platform>/` only), re-validates that platform, and repeats up to the cap. Each attempt is recorded in the validation report's self-repair table. Layer 3 (vision) and contract-reviewer misses are surfaced but not auto-repaired (a Layer 3 miss is usually environmental, not a source bug). Off by default; when the loop can't close the failures the agent still exits non-zero.
 - `NATIVEAPPTEMPLATE_BRIDGE=off` — skip writing to `~/.gradle/gradle.properties`. The agent normally mirrors `NATIVEAPPTEMPLATE_API_*` (HOST/PORT/SCHEME) into renamed-product variants (`<PRODUCT>_API_*`) at run time so the generated Android app picks them up via `gradle.properties` and the iOS sim launch picks them up via `SIMCTL_CHILD_*`. Set this to disable the file write (process.env injection still runs for child-spawn paths).
 - `NATIVEAPPTEMPLATE_BRIDGE_DRY_RUN=1` — log what would be written to `~/.gradle/gradle.properties` instead of writing. Useful before granting the bridge write access to your user-global gradle.
 - `NATIVEAPPTEMPLATE_AGENT_ANTHROPIC_KEY` — dedicated workspace key, see [Security](#security).
diff --git a/docs/SPEC.md b/docs/SPEC.md
index 2bcc83e..20b1144 100644
--- a/docs/SPEC.md
+++ b/docs/SPEC.md
@@ -22,7 +22,7 @@ This document was originally a **pre-hackathon specification** (v1.0). It's pres
 | §5 Vision-guided self-repair, Stage 1 | **Shipped** | `NATIVEAPPTEMPLATE_VISUAL=1` opts in. Layer 2 escalates to build mode (`xcodebuild build` + `./gradlew assembleDebug`); home-screen judged with `DEFAULT_STAGE1_RUBRIC`. |
 | §5 Vision-guided self-repair, Stage 2 | **Shipped** | `NATIVEAPPTEMPLATE_VISUAL=2` opts in. The agent boots Rails under `mise exec -- bin/dev` (after `bundle install` + `db:prepare` + `db:seed_fu`), then drives the parameterized queue scenario (Welcome → Sign Up → email-confirm via `bin/rails runner` → Sign In → drill into auto-seeded sample) on both platforms via `mobile-mcp`. Layer 3 judges the post-walk screenshot against `DEFAULT_STAGE2_RUBRIC` (domain content + no substrate-token leak). |
 | §5 Vision-guided self-repair, Stage 3 | **Not shipped** | Multi-step CRUD (sign-up → CRUD → state transitions → logout) deferred. The Stage 2 walk currently stops at "drill into auto-seeded sample"; full Add/Toggle/Delete steps are a known follow-up. |
-| §5 Self-repair iteration cap | **Not shipped** | The 5-iteration self-repair loop is documented in CLAUDE.md but not yet implemented as a coded retry loop. Failures currently surface and the agent exits. |
+| §5 Self-repair iteration cap | **Shipped (opt-in)** | `NATIVEAPPTEMPLATE_REPAIR` opts in (`on` / a positive integer; hard-capped at 5). On a failing first judge pass the loop patches the highest-priority code-repairable failure (Layer 1 leftover tokens, then Layer 2 build errors) with a Claude Agent SDK repair pass scoped to the failing platform, re-validates, and records each `RepairAttempt` in the report — until green or the cap. Layer 3 (vision) and contract-reviewer misses are surfaced, not auto-repaired (a Layer 3 miss is often environmental, not a source bug). Off by default; unresolved failures still surface and the agent exits non-zero. |
 | §6 Layer 1 — structural (ripgrep + OpenAPI) | **Shipped** | Both ripgrep token scan and the three-way OpenAPI parity reviewer (Phase 1–3, PRs #46–#48) are in production. |
 | §6 Layer 2 Stage 1 (boot, build, launch) | **Shipped** | Default behavior. |
 | §6 Layer 2 Stage 2 (UI-driven scenario) | **Shipped** | Behind `NATIVEAPPTEMPLATE_VISUAL=2`; see §5 Stage 2 row. The original spec mentioned an HTTP-tail watcher for 4xx/5xx; the actual implementation walks the UI directly and lets the scenario `wait_for_text`/`assert_text` catch error states. |
diff --git a/src/agents/repair.ts b/src/agents/repair.ts
new file mode 100644
index 0000000..bf3b90a
--- /dev/null
+++ b/src/agents/repair.ts
@@ -0,0 +1,141 @@
+import { query } from "@anthropic-ai/claude-agent-sdk";
+import { trace } from "../trace.js";
+import { isStub } from "../stub.js";
+import type { DomainSpec, Platform } from "./types.js";
+
+const MODEL = "claude-opus-4-7";
+
+// Which validation layer this repair attempt targets. Layer 1 (leftover
+// substrate tokens) and Layer 2 (build/compile failures) are the
+// code-repairable, cheaply re-checkable layers. Layer 3 (vision) and the
+// contract reviewer are surfaced but not auto-repaired in this loop.
+export type RepairLayer = "layer1" | "layer2";
+
+export type RepairTarget = {
+  platform: Platform;
+  // Absolute path to out/<slug>/<platform> — the repair agent's cwd. It
+  // edits only inside this generated project, never the substrate.
+  outDir: string;
+  layer: RepairLayer;
+  // Failure context handed to the agent: the leftover-token findings
+  // (layer1) or the compiler stderr tail (layer2).
+  detail: string;
+  // Layer 1 only: the substrate tokens that must not remain.
+  forbiddenTokens?: readonly string[];
+};
+
+export type RepairOutcome = {
+  // A short, human-readable summary of what the agent changed, shown in
+  // the report's self-repair table. Whether the fix actually worked is
+  // decided by re-validation, not by this string.
+  action: string;
+};
+
+// One repair pass over a single failing platform. Drives the Claude Agent
+// SDK's agentic loop (Read/Edit/Bash) scoped to the generated project, then
+// returns a summary. The caller re-validates and records resolved/unresolved.
+export async function runRepair(target: RepairTarget, domain: DomainSpec): Promise<RepairOutcome> {
+  if (isStub("repair")) return runStubRepair(target);
+
+  const apiKey = process.env["NATIVEAPPTEMPLATE_AGENT_ANTHROPIC_KEY"] ?? process.env["ANTHROPIC_API_KEY"];
+  if (!apiKey) {
+    return { action: "skipped — no Anthropic API key in env" };
+  }
+
+  trace("repair", `${target.platform}/${target.layer}: invoking repair agent in ${target.outDir}`);
+
+  // Layer 2 may need to re-run the compiler to confirm; Layer 1 is a pure
+  // source edit, so it gets no shell.
+  const allowedTools =
+    target.layer === "layer2"
+      ? ["Read", "Edit", "Grep", "Glob", "Bash"]
+      : ["Read", "Edit", "Grep", "Glob"];
+
+  const response = query({
+    prompt: buildPrompt(target, domain),
+    options: {
+      cwd: target.outDir,
+      model: MODEL,
+      systemPrompt: SYSTEM_PROMPT,
+      allowedTools,
+      permissionMode: "bypassPermissions",
+      allowDangerouslySkipPermissions: true,
+      maxTurns: target.layer === "layer2" ? 40 : 20,
+      // Hermetic: don't inherit the developer's ~/.claude settings, project
+      // CLAUDE.md, or custom agents — the repair agent runs only with the
+      // system prompt below.
+      settingSources: [],
+      env: { ...stringEnv(process.env), ANTHROPIC_API_KEY: apiKey },
+    },
+  });
+
+  let action = `attempted ${target.layer} fix`;
+  let turns = 0;
+  for await (const message of response) {
+    if (message.type === "result") {
+      turns = message.num_turns;
+      if (message.subtype === "success" && !message.is_error) {
+        action = firstLine(message.result) || action;
+      } else {
+        action = `repair agent did not converge (${message.subtype})`;
+      }
+    }
+  }
+
+  trace("repair", `${target.platform}/${target.layer}: ${turns} turns — ${action}`);
+  return { action };
+}
+
+const SYSTEM_PROMPT = `You are a repair agent for a generated three-platform SaaS project (Rails 8.1 API, SwiftUI iOS, Jetpack Compose Android). A generated project failed one validation layer; your job is to make the smallest correct edit that fixes it. You operate ONLY inside the current working directory (one generated platform project) — never touch any other path.
+
+Two failure classes:
+- Layer 1 (structural): leftover substrate tokens (e.g. Shop, Shopkeeper, ItemTag, NativeAppTemplate and derived forms) survived the rename. Replace each remaining occurrence with its renamed equivalent, consistently, preserving case style (PascalCase→PascalCase, snake_case→snake_case). Do not rename anything that is NOT a substrate token. Do not introduce a token that collides with a language/framework reserved word.
+- Layer 2 (runtime): the project failed to build/compile. Read the compiler error, find the root cause, and fix it with a minimal, idiomatic change.
+
+Known-cryptic failure modes — slow down and verify rather than pattern-match:
+- Jetpack Compose compilation errors (often a missing import, a @Composable context mismatch, or a type-inference failure).
+- Hilt dependency-injection errors (missing @Inject / @Provides / module binding, or a scope mismatch).
+
+Make targeted edits; do not refactor unrelated code, add dependencies, or rewrite files wholesale. When done, reply with ONE concise sentence describing exactly what you changed.`;
+
+function buildPrompt(target: RepairTarget, domain: DomainSpec): string {
+  const renamePlan = domain.renamePlan.map((r) => `${r.from} → ${r.to}`).join(", ");
+  if (target.layer === "layer1") {
+    const forbidden = (target.forbiddenTokens ?? []).join(", ");
+    return `This generated ${target.platform} project still contains leftover substrate tokens that must not appear. Forbidden tokens: ${forbidden || "(see findings)"}. The intended renames are: ${renamePlan}.
+
+Leftover findings (token · file:line · excerpt):
+${target.detail}
+
+Replace every leftover occurrence with its renamed equivalent, then confirm none remain.`;
+  }
+  return `This generated ${target.platform} project failed to build. The intended domain renames were: ${renamePlan}.
+
+Compiler error (stderr tail):
+${target.detail}
+
+Diagnose and fix the root cause with a minimal edit. If you have a shell available, you may re-run the build to confirm, but keep it bounded.`;
+}
+
+// process.env is Record<string, string | undefined>; the SDK env option
+// wants string values only. Drop undefined entries.
+function stringEnv(env: NodeJS.ProcessEnv): Record<string, string> {
+  const out: Record<string, string> = {};
+  for (const [k, v] of Object.entries(env)) {
+    if (typeof v === "string") out[k] = v;
+  }
+  return out;
+}
+
+function firstLine(text: string): string {
+  const line = text.trim().split("\n")[0] ?? "";
+  return line.length > 200 ? `${line.slice(0, 197)}…` : line;
+}
+
+const delay = (ms: number): Promise<void> => new Promise((r) => { setTimeout(r, ms); });
+
+async function runStubRepair(target: RepairTarget): Promise<RepairOutcome> {
+  trace("repair", `(stub mode) ${target.platform}/${target.layer}`);
+  await delay(50);
+  return { action: `stub repair: no-op for ${target.platform} ${target.layer}` };
+}
diff --git a/src/agents/types.ts b/src/agents/types.ts
index 85fa576..bf2174e 100644
--- a/src/agents/types.ts
+++ b/src/agents/types.ts
@@ -28,7 +28,7 @@ export type RenamePair = {
 
 export type Platform = "rails" | "ios" | "android";
 
-export type AgentName = "planner" | Platform | "reviewer" | "judge" | "dispatch";
+export type AgentName = "planner" | Platform | "reviewer" | "judge" | "dispatch" | "repair";
 
 export type WorkerResult = {
   platform: Platform;
diff --git a/src/dispatch.ts b/src/dispatch.ts
index 5b4dd72..03b5e6f 100644
--- a/src/dispatch.ts
+++ b/src/dispatch.ts
@@ -11,8 +11,12 @@ import { isStub } from "./stub.js";
 import { trace } from "./trace.js";
 import { buildRunReport, writeReport, type ReportFormat, type ReportPaths } from "./report/collect.js";
 import { readPackageVersion } from "./version.js";
-import type { RunReport } from "./report/model.js";
-import type { JudgeResult } from "./agents/types.js";
+import { runRepairLoop, REPAIR_ITERATION_CAP, type RepairLoopDeps } from "./repair-loop.js";
+import { runRepair } from "./agents/repair.js";
+import { runLayer1 } from "./validation/layer1.js";
+import { runLayer2, type Layer2Mode } from "./validation/layer2.js";
+import type { RepairAttempt, RunReport } from "./report/model.js";
+import type { JudgeResult, Platform, PlatformDetail, WorkerResult } from "./agents/types.js";
 
 export type DispatchReportOptions = {
   enabled?: boolean;
@@ -83,6 +87,10 @@ export async function dispatch(spec: string, options: DispatchOptions = {}): Pro
   //       already launched after Stage 1. Off by default.
   const visualLevelRaw = process.env['NATIVEAPPTEMPLATE_VISUAL'] ?? "";
   const visualLevel = visualLevelRaw === "2" ? 2 : visualLevelRaw === "1" ? 1 : 0;
+  // Visual levels force build mode so Stage 1 has an artifact to launch;
+  // level 0 stays in the cheaper fast mode. The repair loop re-validates
+  // Layer 2 in the same mode the judge used.
+  const layer2Mode: Layer2Mode = visualLevel >= 1 ? "build" : "fast";
   const visual: VisualJudgeConfig | undefined = visualLevel >= 1
     ? {
         iosDir: resolve(process.cwd(), ios.outDir),
@@ -126,7 +134,7 @@ export async function dispatch(spec: string, options: DispatchOptions = {}): Pro
       ios,
       android,
       reviewer,
-      ...(visualLevel >= 1 ? { layer2Mode: "build" as const } : {}),
+      layer2Mode,
       ...(visual ? { visual } : {}),
     });
   } finally {
@@ -137,6 +145,66 @@ export async function dispatch(spec: string, options: DispatchOptions = {}): Pro
     }
   }
 
+  // Self-repair loop (opt-in via NATIVEAPPTEMPLATE_REPAIR). When the first
+  // judge pass fails on a code-repairable layer (Layer 1 leftover tokens or
+  // Layer 2 build errors), iterate: patch the failing platform with the
+  // repair agent, re-validate, record the attempt — bounded by the cap. Off
+  // by default; skipped in stub mode (no real judge/agent to drive).
+  let repairAttempts: readonly RepairAttempt[] = [];
+  const repairMax = parseRepairMax(process.env['NATIVEAPPTEMPLATE_REPAIR']);
+  if (repairMax > 0 && !judge.overallPass && judge.platforms && judge.platforms.length > 0 && !isStub("judge")) {
+    const workers: Record<Platform, WorkerResult> = { rails, ios, android };
+    const deps: RepairLoopDeps = {
+      repair: async (platform, layer, detail) => {
+        const w = workers[platform];
+        const outDir = resolve(process.cwd(), w.outDir);
+        const detailStr = layer === "layer1"
+          ? formatFindings(detail.layer1.findings)
+          : detail.layer2.stderrTail ?? "(no stderr captured)";
+        return runRepair(
+          {
+            platform,
+            outDir,
+            layer,
+            detail: detailStr,
+            ...(layer === "layer1" ? { forbiddenTokens: w.renamedFrom } : {}),
+          },
+          domain,
+        );
+      },
+      revalidate: async (platform) => {
+        const w = workers[platform];
+        const outDir = resolve(process.cwd(), w.outDir);
+        const [layer1, layer2] = await Promise.all([
+          runLayer1({ projectDir: outDir, forbiddenTokens: w.renamedFrom }),
+          runLayer2({ platform, outDir, mode: layer2Mode }),
+        ]);
+        return {
+          layer1: { pass: layer1.pass, findings: layer1.findings },
+          layer2: {
+            pass: layer2.pass,
+            command: layer2.command,
+            mode: layer2Mode,
+            exitCode: layer2.exitCode,
+            durationMs: layer2.durationMs,
+            ...(layer2.stderrTail !== undefined ? { stderrTail: layer2.stderrTail } : {}),
+          },
+        };
+      },
+    };
+    trace("dispatch", `self-repair: enabled (cap ${repairMax}); first pass failed — entering loop`);
+    const loop = await runRepairLoop({
+      platforms: judge.platforms,
+      reviewerPass: reviewer.contractParity === "pass",
+      maxIterations: repairMax,
+      deps,
+    });
+    repairAttempts = loop.attempts;
+    judge = { ...judge, overallPass: loop.overallPass, summary: loop.summary, platforms: loop.platforms };
+    const resolved = loop.attempts.filter((a) => a.resolved).length;
+    trace("dispatch", `self-repair: ${loop.attempts.length} attempt(s), ${resolved} resolved — overall now ${loop.overallPass ? "PASS" : "FAIL"}`);
+  }
+
   const report = buildRunReport({
     spec,
     domain,
@@ -147,6 +215,7 @@ export async function dispatch(spec: string, options: DispatchOptions = {}): Pro
     visualLevel: visualLevel as 0 | 1 | 2,
     startedAt,
     finishedAt: Date.now(),
+    repairAttempts,
   });
 
   // Default off in stub mode so the test suite never writes into ./out.
@@ -166,3 +235,20 @@ export async function dispatch(spec: string, options: DispatchOptions = {}): Pro
 
   return { ...judge, report, reportPaths };
 }
+
+// NATIVEAPPTEMPLATE_REPAIR control: unset / "0" / "off" / "false" → disabled;
+// "on" / "true" → run up to the cap; a positive integer N → up to min(N, cap).
+function parseRepairMax(raw: string | undefined): number {
+  if (!raw) return 0;
+  const lowered = raw.trim().toLowerCase();
+  if (lowered === "" || lowered === "0" || lowered === "off" || lowered === "false") return 0;
+  if (lowered === "on" || lowered === "true") return REPAIR_ITERATION_CAP;
+  const n = Number.parseInt(lowered, 10);
+  if (Number.isFinite(n) && n > 0) return Math.min(n, REPAIR_ITERATION_CAP);
+  return 0;
+}
+
+function formatFindings(findings: PlatformDetail["layer1"]["findings"]): string {
+  if (findings.length === 0) return "(no findings recorded)";
+  return findings.map((f) => `${f.token} · ${f.file}:${f.line} · ${f.text}`).join("\n");
+}
diff --git a/src/repair-loop.ts b/src/repair-loop.ts
new file mode 100644
index 0000000..2a1acdb
--- /dev/null
+++ b/src/repair-loop.ts
@@ -0,0 +1,103 @@
+import type { Platform, PlatformDetail } from "./agents/types.js";
+import type { RepairAttempt } from "./report/model.js";
+import type { RepairLayer } from "./agents/repair.js";
+
+// Re-validation of a single platform after a repair pass: a fresh Layer 1
+// (token scan) + Layer 2 (build) result. Layer 3 is intentionally not
+// re-run here — it's not code-repairable in this loop (see runRepairLoop).
+export type RevalidateResult = Pick<PlatformDetail, "layer1" | "layer2">;
+
+export type RepairLoopDeps = {
+  // Make one repair pass over a failing platform/layer; returns a summary
+  // of what changed. Whether it worked is decided by revalidate, not here.
+  repair: (platform: Platform, layer: RepairLayer, detail: PlatformDetail) => Promise<{ action: string }>;
+  // Re-run Layer 1 + Layer 2 for one platform.
+  revalidate: (platform: Platform) => Promise<RevalidateResult>;
+};
+
+export type RepairLoopInput = {
+  platforms: readonly PlatformDetail[];
+  reviewerPass: boolean;
+  maxIterations: number;
+  deps: RepairLoopDeps;
+};
+
+export type RepairLoopResult = {
+  platforms: PlatformDetail[];
+  attempts: RepairAttempt[];
+  overallPass: boolean;
+  summary: string;
+};
+
+type TargetRef = { platform: Platform; layer: RepairLayer };
+
+// The CLAUDE.md hard cap: never iterate more than this regardless of the
+// requested maxIterations.
+export const REPAIR_ITERATION_CAP = 5;
+
+// Bounded self-repair: while a code-repairable layer is failing, repair the
+// highest-priority failure, re-validate that platform, and record the
+// attempt — until everything passes or the iteration cap is hit. Pure
+// control flow: all I/O (the repair agent, the validators) is injected via
+// deps, so this is unit-testable without the LLM or a device.
+//
+// Scope: Layer 1 (leftover tokens) then Layer 2 (build) are the
+// code-repairable, cheaply re-checkable layers. Layer 3 (vision) and the
+// contract reviewer are surfaced but not auto-repaired — a Layer 3 miss is
+// often environmental (e.g. a first-launch system dialog), not a source bug.
+export async function runRepairLoop(input: RepairLoopInput): Promise<RepairLoopResult> {
+  const cap = Math.min(input.maxIterations, REPAIR_ITERATION_CAP);
+  const platforms: PlatformDetail[] = input.platforms.map((p) => ({ ...p }));
+  const attempts: RepairAttempt[] = [];
+
+  for (let iteration = 1; iteration <= cap; iteration++) {
+    const target = nextTarget(platforms);
+    if (!target) break; // no code-repairable failure remains
+
+    const detail = platforms.find((p) => p.platform === target.platform)!;
+    const { action } = await input.deps.repair(target.platform, target.layer, detail);
+
+    const revalidated = await input.deps.revalidate(target.platform);
+    const idx = platforms.findIndex((p) => p.platform === target.platform);
+    platforms[idx] = { ...platforms[idx]!, layer1: revalidated.layer1, layer2: revalidated.layer2 };
+
+    const resolved = target.layer === "layer1" ? revalidated.layer1.pass : revalidated.layer2.pass;
+    attempts.push({ iteration, failingLayer: target.layer, platform: target.platform, action, resolved });
+
+    if (computeOverall(platforms, input.reviewerPass)) break;
+  }
+
+  return {
+    platforms,
+    attempts,
+    overallPass: computeOverall(platforms, input.reviewerPass),
+    summary: summarize(platforms, input.reviewerPass),
+  };
+}
+
+// Highest-priority code-repairable failure: all Layer 1 misses before any
+// Layer 2 miss (leftover tokens routinely cause the build error, so fixing
+// structure first avoids chasing a downstream symptom).
+function nextTarget(platforms: readonly PlatformDetail[]): TargetRef | undefined {
+  for (const p of platforms) if (!p.layer1.pass) return { platform: p.platform, layer: "layer1" };
+  for (const p of platforms) if (!p.layer2.pass) return { platform: p.platform, layer: "layer2" };
+  return undefined;
+}
+
+function computeOverall(platforms: readonly PlatformDetail[], reviewerPass: boolean): boolean {
+  const layer1And2 = platforms.every((p) => p.layer1.pass && p.layer2.pass);
+  const layer3 = platforms.every((p) => p.layer3 === undefined || p.layer3.pass);
+  return layer1And2 && layer3 && reviewerPass;
+}
+
+// Mirrors the judge's one-line summary so the post-repair report reads
+// identically to a first-pass report.
+function summarize(platforms: readonly PlatformDetail[], reviewerPass: boolean): string {
+  const total = platforms.length;
+  const l1 = platforms.filter((p) => p.layer1.pass).length;
+  const l2 = platforms.filter((p) => p.layer2.pass).length;
+  const l3Plats = platforms.filter((p) => p.layer3 !== undefined);
+  const l3 = l3Plats.filter((p) => p.layer3!.pass).length;
+  const l3Summary = l3Plats.length > 0 ? `Layer 3 ${l3}/${l3Plats.length} pass` : "Layer 3 skipped";
+  return `Layer 1 ${l1}/${total} pass · Layer 2 ${l2}/${total} pass · ${l3Summary} · reviewer ${reviewerPass ? "PASS" : "FAIL"}`;
+}
diff --git a/src/report/collect.ts b/src/report/collect.ts
index 5a2e6d1..ab05f00 100644
--- a/src/report/collect.ts
+++ b/src/report/collect.ts
@@ -2,7 +2,7 @@ import { copyFile, mkdir, readFile, writeFile } from "node:fs/promises";
 import { basename, isAbsolute, join, resolve } from "node:path";
 import type { DomainSpec, JudgeResult, ReviewerResult } from "../agents/types.js";
 import { renderReport } from "./render.js";
-import type { AssetMap, RunReport } from "./model.js";
+import type { AssetMap, RepairAttempt, RunReport } from "./model.js";
 
 export type ReportFormat = "html" | "json" | "both";
 
@@ -16,6 +16,7 @@ export type BuildRunReportInput = {
   visualLevel: 0 | 1 | 2;
   startedAt: number;
   finishedAt: number;
+  repairAttempts?: readonly RepairAttempt[];
 };
 
 // Pure assembly: fold the run's pieces into the single RunReport
@@ -53,6 +54,9 @@ export function buildRunReport(input: BuildRunReportInput): RunReport {
         ...(e.states !== undefined ? { states: e.states } : {}),
       })),
     },
+    ...(input.repairAttempts && input.repairAttempts.length > 0
+      ? { repairAttempts: input.repairAttempts }
+      : {}),
   };
 }
 
diff --git a/tests/smoke.test.ts b/tests/smoke.test.ts
index 756815e..09589b2 100644
--- a/tests/smoke.test.ts
+++ b/tests/smoke.test.ts
@@ -6,7 +6,8 @@ import { runReviewer } from "../src/agents/reviewer.js";
 import { canonicalizeEndpoint, diffContracts } from "../src/agents/contract-extract.js";
 import { renderReport } from "../src/report/render.js";
 import { buildRunReport, writeReport, collectScreenshotPaths } from "../src/report/collect.js";
-import type { DomainSpec, JudgeResult, ReviewerResult } from "../src/agents/types.js";
+import type { DomainSpec, JudgeResult, ReviewerResult, Platform, PlatformDetail } from "../src/agents/types.js";
+import { runRepairLoop, REPAIR_ITERATION_CAP, type RepairLoopDeps, type RevalidateResult } from "../src/repair-loop.js";
 import { mkdtempSync, writeFileSync, readFileSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
@@ -1323,3 +1324,171 @@ test("parseArgs ignores an invalid --report-format value", async () => {
   assert.equal(parsed.spec, "spec");
   assert.equal(parsed.report.format, undefined);
 });
+
+// --- self-repair loop (src/repair-loop.ts) ---
+
+function platDetail(platform: Platform, l1: boolean, l2: boolean, l3?: boolean): PlatformDetail {
+  return {
+    platform,
+    layer1: { pass: l1, findings: l1 ? [] : [{ token: "Shop", file: "X.kt", line: 1, text: "class Shop" }] },
+    layer2: {
+      pass: l2,
+      command: "build",
+      mode: "build",
+      exitCode: l2 ? 0 : 1,
+      durationMs: 10,
+      ...(l2 ? {} : { stderrTail: "Unresolved reference: Shop" }),
+    },
+    ...(l3 !== undefined ? { layer3: { pass: l3 } } : {}),
+  };
+}
+
+function passLayers(platform: Platform): RevalidateResult {
+  return platDetail(platform, true, true);
+}
+
+test("runRepairLoop resolves a Layer 2 failure after one repair pass", async () => {
+  const repaired: string[] = [];
+  const deps: RepairLoopDeps = {
+    repair: async (platform, layer) => {
+      repaired.push(`${platform}/${layer}`);
+      return { action: `patched ${platform}` };
+    },
+    revalidate: async (platform) => passLayers(platform),
+  };
+  const result = await runRepairLoop({
+    platforms: [platDetail("rails", true, false)],
+    reviewerPass: true,
+    maxIterations: 5,
+    deps,
+  });
+  assert.equal(result.attempts.length, 1);
+  assert.deepEqual(result.attempts[0], {
+    iteration: 1,
+    failingLayer: "layer2",
+    platform: "rails",
+    action: "patched rails",
+    resolved: true,
+  });
+  assert.equal(result.overallPass, true);
+  assert.match(result.summary, /Layer 2 1\/1 pass/);
+  assert.deepEqual(repaired, ["rails/layer2"]);
+});
+
+test("runRepairLoop gives up after the cap when repair never resolves", async () => {
+  let repairCalls = 0;
+  const deps: RepairLoopDeps = {
+    repair: async () => {
+      repairCalls += 1;
+      return { action: "tried" };
+    },
+    // Never fixes anything — layer1 stays failing.
+    revalidate: async (platform) => platDetail(platform, false, true),
+  };
+  const result = await runRepairLoop({
+    platforms: [platDetail("android", false, true)],
+    reviewerPass: true,
+    maxIterations: 5,
+    deps,
+  });
+  assert.equal(result.attempts.length, 5);
+  assert.equal(repairCalls, 5);
+  assert.ok(result.attempts.every((a) => a.failingLayer === "layer1" && a.resolved === false));
+  assert.equal(result.overallPass, false);
+});
+
+test("runRepairLoop clamps maxIterations to the CLAUDE.md cap of 5", async () => {
+  const deps: RepairLoopDeps = {
+    repair: async () => ({ action: "x" }),
+    revalidate: async (platform) => platDetail(platform, false, true),
+  };
+  const result = await runRepairLoop({
+    platforms: [platDetail("ios", false, true)],
+    reviewerPass: true,
+    maxIterations: 99,
+    deps,
+  });
+  assert.equal(result.attempts.length, REPAIR_ITERATION_CAP);
+});
+
+test("runRepairLoop fixes Layer 1 before Layer 2 on a platform failing both", async () => {
+  let call = 0;
+  const deps: RepairLoopDeps = {
+    repair: async (_platform, _layer, detail) => ({ action: `saw ${detail.platform}` }),
+    revalidate: async (platform) => {
+      call += 1;
+      // First revalidate: layer1 now clean, layer2 still broken.
+      // Second revalidate: both clean.
+      return call === 1 ? platDetail(platform, true, false) : platDetail(platform, true, true);
+    },
+  };
+  const result = await runRepairLoop({
+    platforms: [platDetail("android", false, false)],
+    reviewerPass: true,
+    maxIterations: 5,
+    deps,
+  });
+  assert.equal(result.attempts.length, 2);
+  assert.equal(result.attempts[0]?.failingLayer, "layer1");
+  assert.equal(result.attempts[1]?.failingLayer, "layer2");
+  assert.equal(result.overallPass, true);
+});
+
+test("runRepairLoop with a Layer 3 failure it can't repair surfaces FAIL and makes no attempts", async () => {
+  let repairCalls = 0;
+  const deps: RepairLoopDeps = {
+    repair: async () => {
+      repairCalls += 1;
+      return { action: "should not run" };
+    },
+    revalidate: async (platform) => passLayers(platform),
+  };
+  // Layers 1 + 2 pass; only Layer 3 fails — not code-repairable here.
+  const result = await runRepairLoop({
+    platforms: [platDetail("rails", true, true), platDetail("ios", true, true, false)],
+    reviewerPass: true,
+    maxIterations: 5,
+    deps,
+  });
+  assert.equal(repairCalls, 0);
+  assert.equal(result.attempts.length, 0);
+  assert.equal(result.overallPass, false);
+});
+
+test("buildRunReport carries repairAttempts and renderReport shows the self-repair section", () => {
+  const report = buildRunReport({
+    spec: "a vet clinic queue",
+    domain: reportDomain,
+    judge: mixedJudge(),
+    reviewer: failReviewer,
+    agentVersion: "9.9.9",
+    judgeModel: "claude-opus-4-7",
+    visualLevel: 1,
+    startedAt: 1000,
+    finishedAt: 4000,
+    repairAttempts: [
+      { iteration: 1, failingLayer: "layer2", platform: "android", action: "added missing Hilt @Provides", resolved: true },
+    ],
+  });
+  assert.equal(report.repairAttempts?.length, 1);
+  const html = renderReport(report);
+  assert.ok(html.includes("Self-repair"), "repair section heading present");
+  assert.ok(html.includes("added missing Hilt @Provides"), "repair action rendered");
+});
+
+test("buildRunReport omits repairAttempts when none were made", () => {
+  const report = buildRunReport({
+    spec: "x",
+    domain: reportDomain,
+    judge: mixedJudge(),
+    reviewer: failReviewer,
+    agentVersion: "1.0.0",
+    judgeModel: "claude-opus-4-7",
+    visualLevel: 0,
+    startedAt: 0,
+    finishedAt: 1,
+    repairAttempts: [],
+  });
+  assert.equal(report.repairAttempts, undefined);
+  assert.ok(!renderReport(report).includes("Self-repair"));
+});