nativeapptemplate · dadachi · May 22, 2026 · May 22, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -91,7 +91,7 @@ Three clean operations on the substrate (details in `docs/SPEC.md` section 4):
 
 ## Guardrails
 
-- **Self-repair loop hard-capped at 5 iterations** per generated project. On exceed, surface residuals and exit.
+- **Self-repair loop hard-capped at 5 iterations** per generated project, opt-in via `NATIVEAPPTEMPLATE_REPAIR` (targets Layer 1/2 only; Layer 3 + reviewer are surfaced, not auto-repaired). On exceed, surface residuals and exit. Code: `src/repair-loop.ts` + `src/agents/repair.ts`.
 - Known-cryptic failure modes: Jetpack Compose compilation, Hilt DI. Slow down and verify rather than pattern-match on those.
 - **Do not invent tests for the generated code.** The substrate already has tests; use them.
 - **Never modify the substrate repos** — clone them fresh into `./out/<slug>/{rails,ios,android}` before editing.

diff --git a/README.md b/README.md
@@ -139,6 +139,7 @@ The agent will also be available as a Claude Code plugin.
 
 - `NATIVEAPPTEMPLATE_VISUAL=1` — opts the run into Stage 1 visual judging (Layer 3). When set, Layer 2 runs in **build mode** instead of fast mode (full `xcodebuild build` + `./gradlew assembleDebug`), then for each platform the agent installs the app on the booted sim/emulator, captures the home screen, and judges it with Opus 4.7 vision against `DEFAULT_STAGE1_RUBRIC`. Adds 60-180s per platform depending on cold-build time. Requires a sim/emulator booted for each platform you want judged. Off by default — `npm run dev` keeps the existing fast path.
 - `NATIVEAPPTEMPLATE_VISUAL=2` — implies `=1` and additionally runs **Stage 2**: the agent boots the generated Rails app under `mise exec -- bin/dev` (after `bundle install` + `bin/rails db:prepare` + `bin/rails db:seed_fu`), waits for it to listen, then drives the iOS sim and Android emulator through the parameterized queue scenario (Sign Up → email-confirm via `bin/rails runner` → Sign In → drill into auto-seeded sample). Layer 3 then judges the last captured screenshot against `DEFAULT_STAGE2_RUBRIC` (domain content + no substrate-token leak). Adds 2–4 minutes per platform on top of `=1`. Requires both sims/emulators booted and the substrate's `mise` toolchain installed for `bin/dev`.
+- `NATIVEAPPTEMPLATE_REPAIR` — opts into the bounded self-repair loop. Set `on` (or a positive integer N, hard-capped at 5) to enable. When the first validation pass fails on a **code-repairable** layer — Layer 1 leftover substrate tokens or Layer 2 build/compile errors — the agent runs a Claude Agent SDK repair pass scoped to the failing generated project (Read/Edit/Bash inside `out/<slug>/<platform>/` only), re-validates that platform, and repeats up to the cap. Each attempt is recorded in the validation report's self-repair table. Layer 3 (vision) and contract-reviewer misses are surfaced but not auto-repaired (a Layer 3 miss is usually environmental, not a source bug). Off by default; when the loop can't close the failures the agent still exits non-zero.
 - `NATIVEAPPTEMPLATE_BRIDGE=off` — skip writing to `~/.gradle/gradle.properties`. The agent normally mirrors `NATIVEAPPTEMPLATE_API_*` (HOST/PORT/SCHEME) into renamed-product variants (`<PRODUCT>_API_*`) at run time so the generated Android app picks them up via `gradle.properties` and the iOS sim launch picks them up via `SIMCTL_CHILD_*`. Set this to disable the file write (process.env injection still runs for child-spawn paths).
 - `NATIVEAPPTEMPLATE_BRIDGE_DRY_RUN=1` — log what would be written to `~/.gradle/gradle.properties` instead of writing. Useful before granting the bridge write access to your user-global gradle.
 - `NATIVEAPPTEMPLATE_AGENT_ANTHROPIC_KEY` — dedicated workspace key, see [Security](#security).

diff --git a/docs/SPEC.md b/docs/SPEC.md
@@ -22,7 +22,7 @@ This document was originally a **pre-hackathon specification** (v1.0). It's pres
 | §5 Vision-guided self-repair, Stage 1 | **Shipped** | `NATIVEAPPTEMPLATE_VISUAL=1` opts in. Layer 2 escalates to build mode (`xcodebuild build` + `./gradlew assembleDebug`); home-screen judged with `DEFAULT_STAGE1_RUBRIC`. |
 | §5 Vision-guided self-repair, Stage 2 | **Shipped** | `NATIVEAPPTEMPLATE_VISUAL=2` opts in. The agent boots Rails under `mise exec -- bin/dev` (after `bundle install` + `db:prepare` + `db:seed_fu`), then drives the parameterized queue scenario (Welcome → Sign Up → email-confirm via `bin/rails runner` → Sign In → drill into auto-seeded sample) on both platforms via `mobile-mcp`. Layer 3 judges the post-walk screenshot against `DEFAULT_STAGE2_RUBRIC` (domain content + no substrate-token leak). |
 | §5 Vision-guided self-repair, Stage 3 | **Not shipped** | Multi-step CRUD (sign-up → CRUD → state transitions → logout) deferred. The Stage 2 walk currently stops at "drill into auto-seeded sample"; full Add/Toggle/Delete steps are a known follow-up. |
-| §5 Self-repair iteration cap | **Not shipped** | The 5-iteration self-repair loop is documented in CLAUDE.md but not yet implemented as a coded retry loop. Failures currently surface and the agent exits. |
+| §5 Self-repair iteration cap | **Shipped (opt-in)** | `NATIVEAPPTEMPLATE_REPAIR` opts in (`on` / a positive integer; hard-capped at 5). On a failing first judge pass the loop patches the highest-priority code-repairable failure (Layer 1 leftover tokens, then Layer 2 build errors) with a Claude Agent SDK repair pass scoped to the failing platform, re-validates, and records each `RepairAttempt` in the report — until green or the cap. Layer 3 (vision) and contract-reviewer misses are surfaced, not auto-repaired (a Layer 3 miss is often environmental, not a source bug). Off by default; unresolved failures still surface and the agent exits non-zero. |
 | §6 Layer 1 — structural (ripgrep + OpenAPI) | **Shipped** | Both ripgrep token scan and the three-way OpenAPI parity reviewer (Phase 1–3, PRs #46–#48) are in production. |
 | §6 Layer 2 Stage 1 (boot, build, launch) | **Shipped** | Default behavior. |
 | §6 Layer 2 Stage 2 (UI-driven scenario) | **Shipped** | Behind `NATIVEAPPTEMPLATE_VISUAL=2`; see §5 Stage 2 row. The original spec mentioned an HTTP-tail watcher for 4xx/5xx; the actual implementation walks the UI directly and lets the scenario `wait_for_text`/`assert_text` catch error states. |

diff --git a/src/agents/repair.ts b/src/agents/repair.ts
@@ -0,0 +1,141 @@
+import { query } from "@anthropic-ai/claude-agent-sdk";
+import { trace } from "../trace.js";
+import { isStub } from "../stub.js";
+import type { DomainSpec, Platform } from "./types.js";
+
+const MODEL = "claude-opus-4-7";
+
+// Which validation layer this repair attempt targets. Layer 1 (leftover
+// substrate tokens) and Layer 2 (build/compile failures) are the
+// code-repairable, cheaply re-checkable layers. Layer 3 (vision) and the
+// contract reviewer are surfaced but not auto-repaired in this loop.
+export type RepairLayer = "layer1" | "layer2";
+
+export type RepairTarget = {
+  platform: Platform;
+  // Absolute path to out/<slug>/<platform> — the repair agent's cwd. It
+  // edits only inside this generated project, never the substrate.
+  outDir: string;
+  layer: RepairLayer;
+  // Failure context handed to the agent: the leftover-token findings
+  // (layer1) or the compiler stderr tail (layer2).
+  detail: string;
+  // Layer 1 only: the substrate tokens that must not remain.
+  forbiddenTokens?: readonly string[];
+};
+
+export type RepairOutcome = {
+  // A short, human-readable summary of what the agent changed, shown in
+  // the report's self-repair table. Whether the fix actually worked is
+  // decided by re-validation, not by this string.
+  action: string;
+};
+
+// One repair pass over a single failing platform. Drives the Claude Agent
+// SDK's agentic loop (Read/Edit/Bash) scoped to the generated project, then
+// returns a summary. The caller re-validates and records resolved/unresolved.
+export async function runRepair(target: RepairTarget, domain: DomainSpec): Promise<RepairOutcome> {
+  if (isStub("repair")) return runStubRepair(target);
+
+  const apiKey = process.env["NATIVEAPPTEMPLATE_AGENT_ANTHROPIC_KEY"] ?? process.env["ANTHROPIC_API_KEY"];
+  if (!apiKey) {
+    return { action: "skipped — no Anthropic API key in env" };
+  }
+
+  trace("repair", `${target.platform}/${target.layer}: invoking repair agent in ${target.outDir}`);
+
+  // Layer 2 may need to re-run the compiler to confirm; Layer 1 is a pure
+  // source edit, so it gets no shell.
+  const allowedTools =
+    target.layer === "layer2"
+      ? ["Read", "Edit", "Grep", "Glob", "Bash"]
+      : ["Read", "Edit", "Grep", "Glob"];
+
+  const response = query({
+    prompt: buildPrompt(target, domain),
+    options: {
+      cwd: target.outDir,
+      model: MODEL,
+      systemPrompt: SYSTEM_PROMPT,
+      allowedTools,
+      permissionMode: "bypassPermissions",
+      allowDangerouslySkipPermissions: true,
+      maxTurns: target.layer === "layer2" ? 40 : 20,
+      // Hermetic: don't inherit the developer's ~/.claude settings, project
+      // CLAUDE.md, or custom agents — the repair agent runs only with the
+      // system prompt below.
+      settingSources: [],
+      env: { ...stringEnv(process.env), ANTHROPIC_API_KEY: apiKey },
+    },
+  });
+
+  let action = `attempted ${target.layer} fix`;
+  let turns = 0;
+  for await (const message of response) {
+    if (message.type === "result") {
+      turns = message.num_turns;
+      if (message.subtype === "success" && !message.is_error) {
+        action = firstLine(message.result) || action;
+      } else {
+        action = `repair agent did not converge (${message.subtype})`;
+      }
+    }
+  }
+
+  trace("repair", `${target.platform}/${target.layer}: ${turns} turns — ${action}`);
+  return { action };
+}
+
+const SYSTEM_PROMPT = `You are a repair agent for a generated three-platform SaaS project (Rails 8.1 API, SwiftUI iOS, Jetpack Compose Android). A generated project failed one validation layer; your job is to make the smallest correct edit that fixes it. You operate ONLY inside the current working directory (one generated platform project) — never touch any other path.
+
+Two failure classes:
+- Layer 1 (structural): leftover substrate tokens (e.g. Shop, Shopkeeper, ItemTag, NativeAppTemplate and derived forms) survived the rename. Replace each remaining occurrence with its renamed equivalent, consistently, preserving case style (PascalCase→PascalCase, snake_case→snake_case). Do not rename anything that is NOT a substrate token. Do not introduce a token that collides with a language/framework reserved word.
+- Layer 2 (runtime): the project failed to build/compile. Read the compiler error, find the root cause, and fix it with a minimal, idiomatic change.
+
+Known-cryptic failure modes — slow down and verify rather than pattern-match:
+- Jetpack Compose compilation errors (often a missing import, a @Composable context mismatch, or a type-inference failure).
+- Hilt dependency-injection errors (missing @Inject / @Provides / module binding, or a scope mismatch).
+
+Make targeted edits; do not refactor unrelated code, add dependencies, or rewrite files wholesale. When done, reply with ONE concise sentence describing exactly what you changed.`;
+
+function buildPrompt(target: RepairTarget, domain: DomainSpec): string {
+  const renamePlan = domain.renamePlan.map((r) => `${r.from} → ${r.to}`).join(", ");
+  if (target.layer === "layer1") {
+    const forbidden = (target.forbiddenTokens ?? []).join(", ");
+    return `This generated ${target.platform} project still contains leftover substrate tokens that must not appear. Forbidden tokens: ${forbidden || "(see findings)"}. The intended renames are: ${renamePlan}.
+
+Leftover findings (token · file:line · excerpt):
+${target.detail}
+
+Replace every leftover occurrence with its renamed equivalent, then confirm none remain.`;
+  }
+  return `This generated ${target.platform} project failed to build. The intended domain renames were: ${renamePlan}.
+
+Compiler error (stderr tail):
+${target.detail}
+
+Diagnose and fix the root cause with a minimal edit. If you have a shell available, you may re-run the build to confirm, but keep it bounded.`;
+}
+
+// process.env is Record<string, string | undefined>; the SDK env option
+// wants string values only. Drop undefined entries.
+function stringEnv(env: NodeJS.ProcessEnv): Record<string, string> {
+  const out: Record<string, string> = {};
+  for (const [k, v] of Object.entries(env)) {
+    if (typeof v === "string") out[k] = v;
+  }
+  return out;
+}
+
+function firstLine(text: string): string {
+  const line = text.trim().split("\n")[0] ?? "";
+  return line.length > 200 ? `${line.slice(0, 197)}…` : line;
+}
+
+const delay = (ms: number): Promise<void> => new Promise((r) => { setTimeout(r, ms); });
+
+async function runStubRepair(target: RepairTarget): Promise<RepairOutcome> {
+  trace("repair", `(stub mode) ${target.platform}/${target.layer}`);
+  await delay(50);
+  return { action: `stub repair: no-op for ${target.platform} ${target.layer}` };
+}
diff --git a/src/agents/types.ts b/src/agents/types.ts
@@ -28,7 +28,7 @@ export type RenamePair = {
 
 export type Platform = "rails" | "ios" | "android";
 
-export type AgentName = "planner" | Platform | "reviewer" | "judge" | "dispatch";
+export type AgentName = "planner" | Platform | "reviewer" | "judge" | "dispatch" | "repair";
 
 export type WorkerResult = {
   platform: Platform;

diff --git a/src/dispatch.ts b/src/dispatch.ts
@@ -11,8 +11,12 @@ import { isStub } from "./stub.js";
 import { trace } from "./trace.js";
 import { buildRunReport, writeReport, type ReportFormat, type ReportPaths } from "./report/collect.js";
 import { readPackageVersion } from "./version.js";
-import type { RunReport } from "./report/model.js";
-import type { JudgeResult } from "./agents/types.js";
+import { runRepairLoop, REPAIR_ITERATION_CAP, type RepairLoopDeps } from "./repair-loop.js";
+import { runRepair } from "./agents/repair.js";
+import { runLayer1 } from "./validation/layer1.js";
+import { runLayer2, type Layer2Mode } from "./validation/layer2.js";
+import type { RepairAttempt, RunReport } from "./report/model.js";
+import type { JudgeResult, Platform, PlatformDetail, WorkerResult } from "./agents/types.js";
 
 export type DispatchReportOptions = {
   enabled?: boolean;
@@ -83,6 +87,10 @@ export async function dispatch(spec: string, options: DispatchOptions = {}): Pro
   //       already launched after Stage 1. Off by default.
   const visualLevelRaw = process.env['NATIVEAPPTEMPLATE_VISUAL'] ?? "";
   const visualLevel = visualLevelRaw === "2" ? 2 : visualLevelRaw === "1" ? 1 : 0;
+  // Visual levels force build mode so Stage 1 has an artifact to launch;
+  // level 0 stays in the cheaper fast mode. The repair loop re-validates
+  // Layer 2 in the same mode the judge used.
+  const layer2Mode: Layer2Mode = visualLevel >= 1 ? "build" : "fast";
   const visual: VisualJudgeConfig | undefined = visualLevel >= 1
     ? {
         iosDir: resolve(process.cwd(), ios.outDir),
@@ -126,7 +134,7 @@ export async function dispatch(spec: string, options: DispatchOptions = {}): Pro
       ios,
       android,
       reviewer,
-      ...(visualLevel >= 1 ? { layer2Mode: "build" as const } : {}),
+      layer2Mode,
       ...(visual ? { visual } : {}),
     });
   } finally {
@@ -137,6 +145,66 @@ export async function dispatch(spec: string, options: DispatchOptions = {}): Pro
     }
   }
 
+  // Self-repair loop (opt-in via NATIVEAPPTEMPLATE_REPAIR). When the first
+  // judge pass fails on a code-repairable layer (Layer 1 leftover tokens or
+  // Layer 2 build errors), iterate: patch the failing platform with the
+  // repair agent, re-validate, record the attempt — bounded by the cap. Off
+  // by default; skipped in stub mode (no real judge/agent to drive).
+  let repairAttempts: readonly RepairAttempt[] = [];
+  const repairMax = parseRepairMax(process.env['NATIVEAPPTEMPLATE_REPAIR']);
+  if (repairMax > 0 && !judge.overallPass && judge.platforms && judge.platforms.length > 0 && !isStub("judge")) {
+    const workers: Record<Platform, WorkerResult> = { rails, ios, android };
+    const deps: RepairLoopDeps = {
+      repair: async (platform, layer, detail) => {
+        const w = workers[platform];
+        const outDir = resolve(process.cwd(), w.outDir);
+        const detailStr = layer === "layer1"
+          ? formatFindings(detail.layer1.findings)
+          : detail.layer2.stderrTail ?? "(no stderr captured)";
+        return runRepair(
+          {
+            platform,
+            outDir,
+            layer,
+            detail: detailStr,
+            ...(layer === "layer1" ? { forbiddenTokens: w.renamedFrom } : {}),
+          },
+          domain,
+        );
+      },
+      revalidate: async (platform) => {
+        const w = workers[platform];
+        const outDir = resolve(process.cwd(), w.outDir);
+        const [layer1, layer2] = await Promise.all([
+          runLayer1({ projectDir: outDir, forbiddenTokens: w.renamedFrom }),
+          runLayer2({ platform, outDir, mode: layer2Mode }),
+        ]);
+        return {
+          layer1: { pass: layer1.pass, findings: layer1.findings },
+          layer2: {
+            pass: layer2.pass,
+            command: layer2.command,
+            mode: layer2Mode,
+            exitCode: layer2.exitCode,
+            durationMs: layer2.durationMs,
+            ...(layer2.stderrTail !== undefined ? { stderrTail: layer2.stderrTail } : {}),
+          },
+        };
+      },
+    };
+    trace("dispatch", `self-repair: enabled (cap ${repairMax}); first pass failed — entering loop`);
+    const loop = await runRepairLoop({
+      platforms: judge.platforms,
+      reviewerPass: reviewer.contractParity === "pass",
+      maxIterations: repairMax,
+      deps,
+    });
+    repairAttempts = loop.attempts;
+    judge = { ...judge, overallPass: loop.overallPass, summary: loop.summary, platforms: loop.platforms };
+    const resolved = loop.attempts.filter((a) => a.resolved).length;
+    trace("dispatch", `self-repair: ${loop.attempts.length} attempt(s), ${resolved} resolved — overall now ${loop.overallPass ? "PASS" : "FAIL"}`);
+  }
+
   const report = buildRunReport({
     spec,
     domain,
@@ -147,6 +215,7 @@ export async function dispatch(spec: string, options: DispatchOptions = {}): Pro
     visualLevel: visualLevel as 0 | 1 | 2,
     startedAt,
     finishedAt: Date.now(),
+    repairAttempts,
   });
 
   // Default off in stub mode so the test suite never writes into ./out.
@@ -166,3 +235,20 @@ export async function dispatch(spec: string, options: DispatchOptions = {}): Pro
 
   return { ...judge, report, reportPaths };
 }
+
+// NATIVEAPPTEMPLATE_REPAIR control: unset / "0" / "off" / "false" → disabled;
+// "on" / "true" → run up to the cap; a positive integer N → up to min(N, cap).
+function parseRepairMax(raw: string | undefined): number {
+  if (!raw) return 0;
+  const lowered = raw.trim().toLowerCase();
+  if (lowered === "" || lowered === "0" || lowered === "off" || lowered === "false") return 0;
+  if (lowered === "on" || lowered === "true") return REPAIR_ITERATION_CAP;
+  const n = Number.parseInt(lowered, 10);
+  if (Number.isFinite(n) && n > 0) return Math.min(n, REPAIR_ITERATION_CAP);
+  return 0;
+}
+
+function formatFindings(findings: PlatformDetail["layer1"]["findings"]): string {
+  if (findings.length === 0) return "(no findings recorded)";
+  return findings.map((f) => `${f.token} · ${f.file}:${f.line} · ${f.text}`).join("\n");
+}