diff --git a/AGENTS.md b/AGENTS.md
index 8a30035..942b34e 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -18,6 +18,7 @@ deterministic mp4 render. Human edits survive AI regeneration of the base.
 - `pnpm reframe compile <scene.ts|.json> [-o out.json] [--stdin] [--code "<src>"] [--json]` — bundle + validate eDSL source into SceneIR JSON, NO render (no ffmpeg/chromium; fast). On failure: a concise classified error (`bundle`/`eval`/`validation`), never the base64 bundle; `--json` makes it `{ok:false,error,kind,issues?}` where `issues` is the structured validation problems (each `{code,path,message}` — e.g. `code:"unknown-blend", path:"nodes.box"`). The in-process equivalent is exported as `reframe-video/compile` (`loadScene`/`loadSceneFromCode`/`checkDeterminism`, server-only); a thrown `SceneValidationError` carries `.issues` (and `.problems` for back-compat), and `SceneLoadError.issues` propagates them across the scene bundle. Entry `packages/render-cli/src/compile.ts`; loader `loadScene.ts`.
 - `pnpm reframe frame <scene.ts|.json> [--t <sec>] [-o out.png]` — render ONE frame at time `t` to a PNG (same renderer as `render`, no ffmpeg muxing; chromium only). For an agentic render-and-look loop (feed the frame back to a model). Reuses `renderFrameAt` (`frameLoop.ts`); entry `packages/render-cli/src/frame.ts`.
 - `pnpm reframe assemble <media...> [-o name] [--title "…"] [--bgm <synth>] [--hold s] [--seed N]` — the **files → scene** path: probe each image/video for its real duration (ffprobe) and scaffold an editable montage scene `.ts` wiring `photoMontage` (clip-aware holds, no freeze) + an optional `title` + a music bed. Probed numbers are baked in → the emitted scene is a normal deterministic scene. Probe `packages/render-cli/src/media/probe.ts`; entry `assemble.ts`.
+- `pnpm reframe narrate <scene.ts|.json> [--voice <name>] [--max-speed n] [--script <path>] [--dry-run]` — **scene-fitted Kokoro voiceover**. Reads a sibling `<scene>-vo/script.json` of `{ at, text }` lines (imported into `audio.narration`), computes each line's slot from the compiled label clock, synthesizes it with a Kokoro python sidecar (`narrate.py`), and **auto-fits** its speech rate to the slot (bounded by `--max-speed`, default 1.3; warns if even max overruns). Bakes `file`/`voice`/`speed`/`duration` back into `script.json` (like `assemble` bakes ffprobe numbers); the scene then plays each line as a label-anchored `file` cue (survives retiming/regen) with the bed ducking under the whole utterance. `--dry-run` prints the fit table from a length *estimate* (no synthesis, no Kokoro needed). Kokoro is an **optional dep** (`pip install kokoro` + espeak-ng), preflighted like ffmpeg/chromium; the `.wav` are external assets (same-machine, not golden) — commit `script.json` + wavs together. Entry `packages/render-cli/src/narrate.ts` + sidecar `narrate.py`; the IR field is `AudioIR.narration` (`packages/core/src/ir.ts`), resolved in `resolveAudioPlan` (`audio.ts`). See `examples/scenes/narrated-demo.ts`.
 - `pnpm reframe manifest <scene.ts|.json> [--json]` — dump the scene's **addressable surface**: every node (+ its `editableProps` and `animatedProps`), state, timeline label (+ `patchable` params), beat, and behavior, each with the overlay address that reaches it. The map an AI/human editor reads to patch a scene surgically (vs regenerating). Core `sceneManifest(compiled)` (`packages/core/src/manifest.ts`, exported); entry `packages/render-cli/src/manifest.ts`.
 - `pnpm reframe lint <scene.ts|.json> [--json] [--strict]` — the **studio-readiness gate**: (a) flag un-addressable motion (a tween/to/motionPath with no `label` can't be retimed by an overlay and a regen can silently drop it) + a `motionAddressableRatio` summary, and (b) for a `.ts` source, verify the scene is a **pure function of time** (`non-deterministic-render` finding) — it bundles once and evaluates TWICE, reporting the first IR address that differs (e.g. a `Math.random()`/`Date` baked into a prop), since a non-pure scene silently compiles to a different IR each time. `--strict` exits non-zero on findings (CI gate). Core `lintScene(compiled)` (addressability); `checkDeterminism(path)` (purity, exported via `reframe-video/compile`, `packages/render-cli/src/determinism.ts`); entry `lint.ts`.
 - `pnpm reframe verify-overlay <base.ts|.json> <overlay.json>... [--json]` — compose an overlay onto a base and report applied-vs-orphaned, NO render. The regen-survival check: run vs the original base (all applied), then vs the AI-regenerated base — any orphan is a broken stable address. Non-zero exit on orphans (CI gate). Reuses `composeScene`/`formatComposeReport`; entry `verifyOverlay.ts`.
@@ -290,7 +291,11 @@ addition to the git push, so both channels carry the same skill.
 - Audio: `scene.audio` cues anchor to timeline labels (they survive retiming);
   sfx are procedurally synthesized, CC0 samples live in `assets/sfx/`
   (LICENSE.md records provenance). Determinism contract covers the AudioPlan
-  and WAV bytes, not AAC-encoded mp4 bytes.
+  and WAV bytes, not AAC-encoded mp4 bytes. `audio.narration` lines (spoken VO)
+  resolve to label-anchored `file` cues after `reframe narrate` synthesizes them;
+  the Kokoro `.wav` are **external assets** (same-machine, version-dependent, like
+  images) — NOT part of the golden contract. Synthesis is out-of-band; only the
+  AudioPlan (cue timing + the baked `duration`-sized duck window) is deterministic.
 - Golden snapshots in `packages/core/test/__snapshots__` encode the determinism
   contract; if they change unexpectedly, that's a regression, not noise.
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 440b74d..589404f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,27 @@ versions may change them.
 
 ## [Unreleased]
 
+## [0.6.44] - 2026-06-21
+
+### Added
+
+#### Scene-fitted Kokoro narration (`reframe narrate` + `audio.narration`)
+
+- New IR field **`AudioIR.narration`** — spoken voiceover lines (`{ at, text, voice? }`)
+  authored as a sibling `<scene>-vo/script.json` the scene imports. Each line resolves to a
+  **label-anchored `file` cue** (so VO survives retiming/regen), with a baked `duration`
+  sizing the bed's duck window. Additive + golden-safe (no narration → byte-identical plan).
+- New command **`reframe narrate <scene> [--voice] [--max-speed] [--dry-run]`** — reads the
+  compiled **label clock**, synthesizes each line with a Kokoro python sidecar (`narrate.py`),
+  and **auto-fits** its speech rate to the slot between its anchor and the next line (bounded;
+  warns if even max speed overruns). Bakes `file`/`voice`/`speed`/`duration` back into
+  `script.json` (like `assemble` bakes ffprobe numbers). `--dry-run` prints the fit table from
+  a length estimate with no synthesis.
+- Kokoro is an **optional dependency** (`pip install kokoro` + espeak-ng), preflighted like
+  ffmpeg/chromium. The `.wav` are external assets (same-machine, not golden) — the determinism
+  contract still covers the AudioPlan, not the synthesized audio bytes.
+- Example `examples/scenes/narrated-demo.ts` (+ `narrated-demo-vo/script.json`).
+
 ## [0.6.43] - 2026-06-21
 
 ### Added
diff --git a/README.md b/README.md
index e211f7f..6251291 100644
--- a/README.md
+++ b/README.md
@@ -280,6 +280,7 @@ your scene.
 | `pnpm reframe verify-overlay <base> <overlay>... [--json]` | compose an overlay onto a base and report applied-vs-orphaned, no render — the regen-survival check (non-zero exit on orphans) |
 | `pnpm reframe labels <scene.ts\|.json>` | print the compiled event clock (every timeline label → exact seconds) — the timing source for audio cues |
 | `pnpm reframe assemble <media...> [-o name]` | probe images/videos (ffprobe) and scaffold an editable montage scene `.ts` wired with `photoMontage` |
+| `pnpm reframe narrate <scene.ts\|.json> [--voice <name>] [--max-speed n] [--dry-run]` | scene-fitted Kokoro voiceover: synth each `audio.narration` line and auto-fit its rate to the slot (needs python + `kokoro`) |
 | `pnpm reframe player <scene.ts\|.json> [-o out.html]` | bundle a scene into one self-contained HTML that plays the motion live in any browser |
 | `pnpm reframe logo <logo.svg\|brand-slug> [--motion <preset>]` | animate a logo (or a simple-icons brand) into a sting |
 | `pnpm reframe diff <ref-image> [scene.ts] [--t <sec>] [--mode side\|blend\|diff\|grid]` | compare a render against a reference image |
@@ -362,7 +363,7 @@ site. The [`docs/`](docs/) folder is its [Mintlify](https://mintlify.com) source
 |---|---|
 | [Introduction](docs/introduction.mdx) · [Quickstart](docs/quickstart.mdx) · [The loop](docs/the-loop.mdx) | the pitch, install, and the AI-write / human-edit / deterministic-render model |
 | [Gallery](docs/gallery.mdx) | a curated visual reel of scenes |
-| [Examples](examples/README.md) | all 67 example scenes, by category |
+| [Examples](examples/README.md) | all 68 example scenes, by category |
 | [Guides](docs/guides/) | the eDSL, directing, HTML/GSAP, and regeneration-contract guides (also `pnpm reframe guide`) |
 
 Curated renders live in [`docs/assets/gallery/`](docs/assets/gallery) and accumulate via `pnpm gallery` (the committed home; `out/` stays scratch).
@@ -375,7 +376,7 @@ Curated renders live in [`docs/assets/gallery/`](docs/assets/gallery) and accumu
 | `packages/renderer-canvas` | DisplayList → Canvas 2D (browser + capture shared) |
 | `packages/render-cli` | Playwright capture + ffmpeg encode; also renders arbitrary HTML/GSAP deterministically via a virtual clock |
 | `packages/preview` | the Vite editor |
-| `examples/` | 67 example scenes (see [`examples/README.md`](examples/README.md)), overlays, compositions, the edit-survival demo |
+| `examples/` | 68 example scenes (see [`examples/README.md`](examples/README.md)), overlays, compositions, the edit-survival demo |
 | `labs/` | experiments and product probes (live-data → baked scene → render), kept out of `examples/` so it stays purely demonstrative |
 | `docs/` | the [Mintlify](https://mintlify.com)-ready docs site + the authoring guides (also `pnpm reframe guide`) |
 | `benchmark/` | **measurement artifacts, not product code**: LLM generation benchmark (RESULTS/ANALYSIS.md), regeneration-contract experiment (regen/), calibrated motion profiler (harness/motion/, MOTION.md) |
diff --git a/docs/cli-reference.mdx b/docs/cli-reference.mdx
index 909f223..ea6a3c0 100644
--- a/docs/cli-reference.mdx
+++ b/docs/cli-reference.mdx
@@ -15,6 +15,7 @@ Run any command with `npx reframe-video <command>` (no clone needed) or `pnpm re
 | `player <scene.ts\|.json> [-o out.html]` | bundle a scene into one self-contained HTML that plays the motion live in any browser |
 | `logo <logo.svg\|brand-slug> [--motion <preset>] [--energy n] [--seed n]` | animate a logo (or a simple-icons brand) into a sting |
 | `assemble <media...> [-o name] [--title "…"] [--bgm <synth>]` | probe images/videos (ffprobe) and scaffold an editable montage scene `.ts` |
+| `narrate <scene.ts\|.json> [--voice <name>] [--max-speed n] [--dry-run]` | scene-fitted Kokoro voiceover — synth each `audio.narration` line and auto-fit its rate to the slot (needs python + `kokoro`; `--dry-run` estimates without synthesis) |
 
 ## Inspect & validate
 
diff --git a/docs/examples.mdx b/docs/examples.mdx
index 9d880a3..ad7661b 100644
--- a/docs/examples.mdx
+++ b/docs/examples.mdx
@@ -1,6 +1,6 @@
 ---
 title: Examples
-description: "All 67 example scenes, by category — each a single self-contained file you can render."
+description: "All 68 example scenes, by category — each a single self-contained file you can render."
 ---
 
 Every scene is one `.ts` file in [`examples/scenes/`](https://github.com/kiyeonjeon21/reframe/tree/main/examples/scenes) — self-contained and dependency-free. Render any of them:
@@ -35,7 +35,7 @@ The [gallery](/gallery) has the curated visual reel; the [repo README](https://g
 [annual-report](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/annual-report.ts) · [chart-buildup](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/chart-buildup.ts) · [data-explainer](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/data-explainer.ts) · [flow-diagram](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/flow-diagram.ts) · [github-year](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/github-year.ts)
 
 ## Audio
-[audio-visualizer](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/audio-visualizer.ts) · [auto-foley-demo](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/auto-foley-demo.ts) · [sample-showcase](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/sample-showcase.ts) · [sfx-compare](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/sfx-compare.ts) · [sfx-showcase](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/sfx-showcase.ts)
+[audio-visualizer](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/audio-visualizer.ts) · [auto-foley-demo](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/auto-foley-demo.ts) · [narrated-demo](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/narrated-demo.ts) · [sample-showcase](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/sample-showcase.ts) · [sfx-compare](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/sfx-compare.ts) · [sfx-showcase](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/sfx-showcase.ts)
 
 ## Logo stings
 [logo-reveal](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/logo-reveal.ts) · [logo-reveal-regen](https://github.com/kiyeonjeon21/reframe/blob/main/examples/scenes/logo-reveal-regen.ts)
diff --git a/docs/gallery.mdx b/docs/gallery.mdx
index eabb65d..d02507f 100644
--- a/docs/gallery.mdx
+++ b/docs/gallery.mdx
@@ -3,7 +3,7 @@ title: Gallery
 description: "A reel of reframe scenes — each a few-line declaration, each a deterministic render."
 ---
 
-Every clip below is a scene in [`examples/scenes/`](https://github.com/kiyeonjeon21/reframe/tree/main/examples/scenes). Render any of them yourself with `npx reframe-video render examples/scenes/<name>.ts`. The full list — 67 scenes by category — is on the [Examples](/examples) page.
+Every clip below is a scene in [`examples/scenes/`](https://github.com/kiyeonjeon21/reframe/tree/main/examples/scenes). Render any of them yourself with `npx reframe-video render examples/scenes/<name>.ts`. The full list — 68 scenes by category — is on the [Examples](/examples) page.
 
 <Note>
   These gifs are the curated showcase. New renders accumulate here via `pnpm gallery` (the committed home, vs the gitignored `out/` scratch).
diff --git a/examples/README.md b/examples/README.md
index 2e1d3eb..2a7672a 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,6 +1,6 @@
 # Examples
 
-67 curated scenes, one per `.ts` file in [`scenes/`](scenes). Each is a single, self-contained, dependency-free document — render any of them:
+68 curated scenes, one per `.ts` file in [`scenes/`](scenes). Each is a single, self-contained, dependency-free document — render any of them:
 
 ```bash
 pnpm reframe render examples/scenes/<scene>.ts        # in this repo
@@ -90,6 +90,7 @@ Also here: [`overlays/`](overlays) (human-edit layers), [`compositions/`](compos
 |---|---|
 | `audio-visualizer` | "THE DROP": radial spectrum bars, a pulsing core, a particle burst. |
 | `auto-foley-demo` | `autoFoley` scoring motion — whoosh / thud / pop following the tweens. |
+| `narrated-demo` | Scene-fitted Kokoro voiceover: `audio.narration` from a sibling `script.json`, each line auto-fitted to its slot by `reframe narrate`, bed ducking under it. |
 | `sample-showcase` | The CC0 sample library: keypress / footstep / click / confirm / UI sounds. |
 | `sfx-compare` | Synth vs sample A/B for the six original names. |
 | `sfx-showcase` | The procedural SFX palette, per-cue seeded variation as a little melody. |
diff --git a/examples/scenes/narrated-demo-vo/close.wav b/examples/scenes/narrated-demo-vo/close.wav
new file mode 100644
index 0000000..ebc8a70
Binary files /dev/null and b/examples/scenes/narrated-demo-vo/close.wav differ
diff --git a/examples/scenes/narrated-demo-vo/intro.wav b/examples/scenes/narrated-demo-vo/intro.wav
new file mode 100644
index 0000000..98df9e0
Binary files /dev/null and b/examples/scenes/narrated-demo-vo/intro.wav differ
diff --git a/examples/scenes/narrated-demo-vo/point.wav b/examples/scenes/narrated-demo-vo/point.wav
new file mode 100644
index 0000000..c30fdf0
Binary files /dev/null and b/examples/scenes/narrated-demo-vo/point.wav differ
diff --git a/examples/scenes/narrated-demo-vo/script.json b/examples/scenes/narrated-demo-vo/script.json
new file mode 100644
index 0000000..ec84705
--- /dev/null
+++ b/examples/scenes/narrated-demo-vo/script.json
@@ -0,0 +1,23 @@
+[
+  {
+    "at": "intro",
+    "text": "This is reframe.",
+    "file": "narrated-demo-vo/intro.wav",
+    "voice": "af_heart",
+    "duration": 1.775
+  },
+  {
+    "at": "point",
+    "text": "Anchored to the timeline, it survives.",
+    "file": "narrated-demo-vo/point.wav",
+    "voice": "af_heart",
+    "duration": 2.775
+  },
+  {
+    "at": "close",
+    "text": "Open source.",
+    "file": "narrated-demo-vo/close.wav",
+    "voice": "af_heart",
+    "duration": 1.65
+  }
+]
diff --git a/examples/scenes/narrated-demo.ts b/examples/scenes/narrated-demo.ts
new file mode 100644
index 0000000..736c133
--- /dev/null
+++ b/examples/scenes/narrated-demo.ts
@@ -0,0 +1,45 @@
+// Narrated demo — a pure-vector scene whose voiceover is authored as a sibling
+// `narrated-demo-vo/script.json` (imported into `audio.narration`) and synthesized
+// + fitted to the timeline by `reframe narrate`. Each line anchors to a timeline
+// label, so the VO survives retiming/regen; `narrate` reads the label clock and
+// fits each line's speech rate to its slot.
+//
+//   reframe narrate examples/scenes/narrated-demo.ts --dry-run   # fit table (no synth)
+//   reframe narrate examples/scenes/narrated-demo.ts             # synth + fit (python+kokoro)
+//   reframe render  examples/scenes/narrated-demo.ts             # mp4, bed ducks under the VO
+//
+// The .wav are out-of-band assets (not bundled to npm, not golden) — commit
+// script.json + the generated wavs together. Image/audio file cues don't render
+// in player/artifacts; mp4 only.
+
+import { scene, rect, text, seq, tween, wait, linearGradient } from "@reframe/core";
+import vo from "./narrated-demo-vo/script.json";
+
+const W = 1920, H = 1080;
+
+export default scene({
+  id: "narrated-demo",
+  size: { width: W, height: H },
+  fps: 30,
+  background: "#06070C",
+  nodes: [
+    rect({ id: "bg", x: 0, y: 0, width: W, height: H, fill: linearGradient(["#0A1430", "#06070C"], { angle: 90 }) }),
+    text({ id: "title", x: W / 2, y: H / 2 - 40, anchor: "center", content: "reframe", fontFamily: "Inter", fontSize: 200, fontWeight: 800, fill: "#FFFFFF", opacity: 0 }),
+    text({ id: "sub", x: W / 2, y: H / 2 + 110, anchor: "center", content: "voice that fits the scene", fontFamily: "Inter", fontSize: 46, fontWeight: 500, fill: "#7FB4FF", opacity: 0 }),
+  ],
+  // labels (intro / point / close) are the stable anchors the narration lines bind to
+  timeline: seq(
+    wait(0.4),
+    tween("title", { opacity: 1 }, { duration: 0.6, ease: "easeOutCubic", label: "intro" }),
+    wait(2.4),
+    tween("sub", { opacity: 1 }, { duration: 0.5, ease: "easeOutCubic", label: "point" }),
+    wait(3.2),
+    tween("title", { opacity: 0 }, { duration: 0.6, ease: "easeInCubic", label: "close" }),
+    tween("sub", { opacity: 0 }, { duration: 0.6, ease: "easeInCubic", label: "close-sub" }),
+    wait(0.6),
+  ),
+  audio: {
+    bgm: { synth: "ambient-pad", gain: 0.25, fadeIn: 1, fadeOut: 1.5, duck: { depth: 0.6 } },
+    narration: vo,
+  },
+});
diff --git a/examples/tsconfig.json b/examples/tsconfig.json
index 5919ed7..51bc6cf 100644
--- a/examples/tsconfig.json
+++ b/examples/tsconfig.json
@@ -2,7 +2,8 @@
   "extends": "../tsconfig.base.json",
   "compilerOptions": {
     "lib": ["ES2022", "DOM", "DOM.Iterable"],
-    "types": ["node"]
+    "types": ["node"],
+    "resolveJsonModule": true
   },
   "include": ["scenes", "scripts"]
 }
diff --git a/packages/core/src/audio.ts b/packages/core/src/audio.ts
index 907e167..7919529 100644
--- a/packages/core/src/audio.ts
+++ b/packages/core/src/audio.ts
@@ -115,7 +115,8 @@ export function resolveAudioPlan(compiled: CompiledScene): AudioPlan | null {
     ? autoFoley(compiled, audio.autoFoley === true ? {} : audio.autoFoley)
     : [];
   const manualCues = [...(audio?.cues ?? []), ...autoCues];
-  if (!audio || (!audio.bgm && manualCues.length === 0)) {
+  const narrationLines = audio?.narration ?? [];
+  if (!audio || (!audio.bgm && manualCues.length === 0 && narrationLines.length === 0)) {
     // a scene with only video-clip audio still gets a plan
     return clipAudio.length === 0
       ? null
@@ -160,6 +161,46 @@ export function resolveAudioPlan(compiled: CompiledScene): AudioPlan | null {
         : { kind: "file", path: cue.file! },
     });
   }
+
+  // Narration lines render as label-anchored file cues (after `reframe narrate`
+  // bakes their wav). Each carries a real `duration`, so the bed ducks under the
+  // whole utterance. An un-synthesized line (no `file`) warns and is skipped.
+  for (const [index, line] of narrationLines.entries()) {
+    let anchor: number;
+    if (typeof line.at === "number") {
+      anchor = line.at;
+    } else {
+      const span = compiled.labelTimes.get(line.at);
+      if (!span) {
+        warnings.push(`narration[${index}]: unknown label "${line.at}" — dropped`);
+        continue;
+      }
+      anchor = span.t0;
+    }
+    if (!line.file) {
+      warnings.push(`narration "${line.at}" not synthesized — run reframe narrate`);
+      continue;
+    }
+    const t = Math.max(0, anchor + (line.offset ?? 0));
+    if (t >= duration) {
+      warnings.push(`narration "${line.at}" at ${t.toFixed(2)}s starts past the scene end (${duration.toFixed(2)}s) — dropped`);
+      continue;
+    }
+    const lineDuration = line.duration ?? FILE_CUE_DURATION;
+    if (t + lineDuration > duration) {
+      warnings.push(`narration "${line.at}" at ${t.toFixed(2)}s extends past the scene end — it will be truncated`);
+    }
+    cues.push({
+      t,
+      gain: line.gain ?? 1.15,
+      duration: lineDuration,
+      fadeIn: 0,
+      fadeOut: 0,
+      pan: 0,
+      source: { kind: "file", path: line.file },
+    });
+  }
+
   cues.sort((a, b) => a.t - b.t);
 
   return {
diff --git a/packages/core/src/ir.ts b/packages/core/src/ir.ts
index b668971..b3ae22a 100644
--- a/packages/core/src/ir.ts
+++ b/packages/core/src/ir.ts
@@ -468,6 +468,33 @@ export interface AudioCueIR {
   params?: Record<string, number>;
 }
 
+/**
+ * A narration line — a spoken voiceover anchored to the timeline, fitted to the
+ * scene. The author writes `at` + `text` (+ optional `voice`/`gain`); the
+ * `reframe narrate` generator reads the label clock, synthesizes the line with a
+ * Kokoro TTS sidecar, fits its speech rate to the slot, and bakes `file`/`speed`/
+ * `duration` back. At render it behaves as a label-anchored `file` cue (so it
+ * survives retiming/regen), with `duration` sizing the bed's duck window.
+ */
+export interface NarrationLineIR {
+  /** Anchor: a timeline label (the step's start) or absolute seconds. */
+  at: string | number;
+  /** The line to speak. */
+  text: string;
+  /** Kokoro voice (e.g. "af_heart", "am_michael"); default chosen by `narrate`. */
+  voice?: string;
+  /** Linear gain, default ~1.15 (voiceover sits above the bed). */
+  gain?: number;
+  /** Seconds relative to the anchor (default 0). */
+  offset?: number;
+  /** BAKED by `reframe narrate`: scene-relative wav path (e.g. "demo-vo/intro.wav"). */
+  file?: string;
+  /** BAKED by `reframe narrate`: the fitted speech rate (default 1). */
+  speed?: number;
+  /** BAKED by `reframe narrate`: measured wav length (s) — sizes the duck window. */
+  duration?: number;
+}
+
 export interface AudioIR {
   bgm?: {
     file?: string;
@@ -480,6 +507,12 @@ export interface AudioIR {
     duck?: { depth?: number; attack?: number; release?: number } | false;
   };
   cues?: AudioCueIR[];
+  /**
+   * Spoken voiceover lines, each anchored to the timeline and fitted to the scene
+   * by `reframe narrate`. Render-equivalent to label-anchored `file` cues (after
+   * synthesis), so they survive retiming/regen. See {@link NarrationLineIR}.
+   */
+  narration?: NarrationLineIR[];
   /**
    * Auto-generate sound cues from node motion (move→whoosh, settle→impact,
    * scale-in→pop, panned by position). Deterministic + retime-safe (re-derived
diff --git a/packages/core/src/validate.ts b/packages/core/src/validate.ts
index 8377148..be907cf 100644
--- a/packages/core/src/validate.ts
+++ b/packages/core/src/validate.ts
@@ -332,6 +332,25 @@ export function validateScene(ir: SceneIR): void {
       add("audio-range", cp, `${cp}: pan must be in [-1, 1] (-1 left … +1 right)`);
     }
   }
+  for (const [i, line] of (ir.audio?.narration ?? []).entries()) {
+    const np = `audio.narration[${i}]`;
+    if (typeof line.at === "string" && !labels.has(line.at)) {
+      add("unknown-timeline-label", np, `${np}: unknown timeline label "${line.at}" — known labels: ${[...labels].join(", ") || "(none)"}`);
+    }
+    if (typeof line.at === "number" && line.at < 0) {
+      add("bad-duration", np, `${np}: "at" must be >= 0`);
+    }
+    if (typeof line.text !== "string" || line.text.trim() === "") {
+      add("narration-text", np, `${np}: "text" is required and must be non-empty`);
+    }
+    if (line.gain !== undefined && line.gain < 0) {
+      add("audio-range", np, `${np}: gain must be >= 0`);
+    }
+    if (line.speed !== undefined && line.speed <= 0) {
+      add("narration-speed", np, `${np}: speed must be > 0`);
+    }
+  }
+
   const duck = ir.audio?.bgm?.duck;
   if (typeof duck === "object" && duck !== null && duck.depth !== undefined && (duck.depth < 0 || duck.depth > 1)) {
     add("audio-range", "audio.bgm.duck.depth", "audio.bgm.duck.depth must be in [0, 1]");
diff --git a/packages/core/test/audio.test.ts b/packages/core/test/audio.test.ts
index 2b6c286..f3c58a4 100644
--- a/packages/core/test/audio.test.ts
+++ b/packages/core/test/audio.test.ts
@@ -103,6 +103,42 @@ describe("resolveAudioPlan", () => {
   });
 });
 
+describe("narration", () => {
+  it("resolves a synthesized line to a label-anchored file cue with a real duck window", () => {
+    const plan = resolveAudioPlan(
+      base({
+        narration: [{ at: "move", text: "hello", file: "t-vo/move.wav", duration: 0.8 }],
+      }),
+    )!;
+    expect(plan.cues).toHaveLength(1);
+    expect(plan.cues[0]).toMatchObject({
+      t: 0.5,
+      gain: 1.15,
+      duration: 0.8,
+      source: { kind: "file", path: "t-vo/move.wav" },
+    });
+    // the bed ducks under the whole utterance, not the 0.4s file default
+    expect(plan.duckWindows).toEqual([{ t0: 0.5, t1: 1.3 }]);
+  });
+
+  it("warns and drops an un-synthesized line (no file yet)", () => {
+    const plan = resolveAudioPlan(base({ narration: [{ at: "fade", text: "not yet" }] }))!;
+    expect(plan).not.toBeNull();
+    expect(plan.cues).toEqual([]);
+    expect(plan.warnings.some((w) => w.includes('narration "fade" not synthesized'))).toBe(true);
+  });
+
+  it("coexists with cues and sorts by time", () => {
+    const plan = resolveAudioPlan(
+      base({
+        cues: [{ at: "tail", sfx: "pop" }],
+        narration: [{ at: "lead", text: "intro", file: "t-vo/lead.wav", duration: 0.4 }],
+      }),
+    )!;
+    expect(plan.cues.map((c) => c.t)).toEqual([0, 2.0]);
+  });
+});
+
 describe("clip audio (video nodes)", () => {
   const vscene = (props: Record<string, unknown>, audio?: AudioIR) =>
     compileScene(
@@ -165,4 +201,16 @@ describe("audio validation", () => {
     expect(() => make({ cues: [{ at: "w", sfx: "pop", file: "x.wav" }] })).toThrowError(/exactly one/);
     expect(() => make({ cues: [{ at: "w", sfx: "kaboom" as never }] })).toThrowError(/unknown sfx "kaboom"/);
   });
+
+  it("validates narration: known label, non-empty text, positive speed", () => {
+    expect(() => make({ narration: [{ at: "nope", text: "hi" }] })).toThrowError(
+      /unknown timeline label "nope"/,
+    );
+    expect(() => make({ narration: [{ at: "w", text: "  " }] })).toThrowError(
+      /"text" is required and must be non-empty/,
+    );
+    expect(() => make({ narration: [{ at: "w", text: "hi", speed: 0 }] })).toThrowError(
+      /speed must be > 0/,
+    );
+  });
 });
diff --git a/packages/reframe-video/package.json b/packages/reframe-video/package.json
index 77e869a..01ac88e 100644
--- a/packages/reframe-video/package.json
+++ b/packages/reframe-video/package.json
@@ -1,6 +1,6 @@
 {
   "name": "reframe-video",
-  "version": "0.6.43",
+  "version": "0.6.44",
   "description": "Declarative motion graphics that AI can write and humans can tweak — human edits survive AI regeneration. Deterministic mp4 renders from a plain-data scene format.",
   "keywords": [
     "motion-graphics",
diff --git a/packages/reframe-video/scripts/build.ts b/packages/reframe-video/scripts/build.ts
index 57242d6..2263c56 100644
--- a/packages/reframe-video/scripts/build.ts
+++ b/packages/reframe-video/scripts/build.ts
@@ -36,6 +36,7 @@ const nodeBundles: [entry: string, out: string][] = [
   ["packages/render-cli/src/labels.ts", "labels.js"],
   ["packages/render-cli/src/compile.ts", "compile.js"],
   ["packages/render-cli/src/assemble.ts", "assemble.js"],
+  ["packages/render-cli/src/narrate.ts", "narrate.js"],
   ["packages/render-cli/src/manifest.ts", "manifest.js"],
   ["packages/render-cli/src/lint.ts", "lint.js"],
   ["packages/render-cli/src/verifyOverlay.ts", "verifyOverlay.js"],
@@ -175,6 +176,9 @@ if (!/from\s*["']reframe-video["']/.test(rcJs)) throw new Error("renderer-canvas
 // --- assets & guides -------------------------------------------------------
 await cp(join(REPO, "assets", "fonts"), join(PKG, "assets", "fonts"), { recursive: true });
 await cp(join(REPO, "assets", "sfx"), join(PKG, "assets", "sfx"), { recursive: true });
+// the Kokoro TTS sidecar ships beside dist/narrate.js (narrate.ts resolves it via
+// `new URL("./narrate.py", import.meta.url)`).
+await cp(join(REPO, "packages/render-cli/src/narrate.py"), join(PKG, "dist", "narrate.py"));
 await mkdir(join(PKG, "guides"), { recursive: true });
 // Guides ship flat under guides/; sources are the authoring docs under docs/.
 // Keep this set in sync with the GUIDE map in render-cli/src/reframe.ts.
diff --git a/packages/render-cli/src/narrate.py b/packages/render-cli/src/narrate.py
new file mode 100644
index 0000000..6b39267
--- /dev/null
+++ b/packages/render-cli/src/narrate.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+# Kokoro-TTS sidecar for `reframe narrate`. Reads a JSON request on stdin:
+#   { "outDir": "...", "lang": "a", "lines": [{ "stem", "text", "voice", "speed" }] }
+# synthesizes each line at the given speed, writes <outDir>/<stem>.wav @24kHz,
+# and prints { "durations": { "<stem>": <seconds> } } on stdout.
+#
+# Out-of-band by design: the .wav are external assets (not part of reframe's
+# golden/determinism contract), like images. Requires `kokoro` + espeak-ng.
+import sys, os, json, warnings
+
+warnings.filterwarnings("ignore")
+
+
+def main():
+    req = json.load(sys.stdin)
+    out_dir = req["outDir"]
+    lang = req.get("lang", "a")
+    os.makedirs(out_dir, exist_ok=True)
+
+    try:
+        import numpy as np
+        import soundfile as sf
+        from kokoro import KPipeline
+    except Exception as e:  # pragma: no cover - environment dependent
+        print(json.dumps({"error": f"kokoro import failed: {e}"}))
+        sys.exit(3)
+
+    pipe = KPipeline(lang_code=lang)
+    durations = {}
+    for line in req["lines"]:
+        stem = line["stem"]
+        text = line["text"]
+        voice = line.get("voice", "af_heart")
+        speed = float(line.get("speed", 1.0))
+        chunks = [a for _, _, a in pipe(text, voice=voice, speed=speed)]
+        audio = np.concatenate(chunks) if len(chunks) > 1 else chunks[0]
+        path = os.path.join(out_dir, f"{stem}.wav")
+        sf.write(path, audio, 24000)
+        durations[stem] = round(len(audio) / 24000, 4)
+
+    print(json.dumps({"durations": durations}))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/packages/render-cli/src/narrate.ts b/packages/render-cli/src/narrate.ts
new file mode 100644
index 0000000..7524171
--- /dev/null
+++ b/packages/render-cli/src/narrate.ts
@@ -0,0 +1,217 @@
+#!/usr/bin/env tsx
+/**
+ * `reframe narrate <scene.ts|.json> [--voice <name>] [--lang a] [--max-speed 1.3]
+ *  [--script <path>] [--dry-run]` — scene-fitted Kokoro voiceover.
+ *
+ * Reads a narration script (a sibling `<scene>-vo/script.json` of `{ at, text }`
+ * lines the scene imports into `audio.narration`), computes each line's time slot
+ * from the compiled label clock, synthesizes it with a Kokoro python sidecar, and
+ * AUTO-FITS its speech rate so it fits the slot (bounded; warns if even the max
+ * speed overruns). Bakes `file` / `voice` / `speed` / `duration` back into the
+ * script.json — the scene then plays each line as a label-anchored `file` cue that
+ * survives retiming/regen, with the bed ducking under the whole utterance.
+ *
+ * Determinism: the .wav are external assets (same-machine, Kokoro-version
+ * dependent), not part of the golden contract — commit script.json + wavs together.
+ */
+import { spawn } from "node:child_process";
+import { existsSync } from "node:fs";
+import { mkdir, readFile, writeFile } from "node:fs/promises";
+import { basename, dirname, isAbsolute, join, relative, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+import { compileScene } from "@reframe/core";
+import { loadScene } from "./loadScene.js";
+
+const NARRATE_PY = fileURLToPath(new URL("./narrate.py", import.meta.url));
+const CWD = process.env.INIT_CWD ?? process.cwd();
+const userPath = (p: string) => (isAbsolute(p) ? p : resolve(CWD, p));
+
+interface Line {
+  at: string | number;
+  text: string;
+  voice?: string;
+  gain?: number;
+  offset?: number;
+  file?: string;
+  speed?: number;
+  duration?: number;
+}
+
+interface Args {
+  scene?: string;
+  voice: string;
+  lang: string;
+  maxSpeed: number;
+  script?: string;
+  dryRun: boolean;
+}
+
+function fail(msg: string): never {
+  console.error(`error: ${msg}`);
+  process.exit(1);
+}
+
+function parseArgs(argv: string[]): Args {
+  const a: Args = { voice: "af_heart", lang: "a", maxSpeed: 1.3, dryRun: false };
+  for (let i = 0; i < argv.length; i++) {
+    const arg = argv[i]!;
+    const next = () => argv[++i] ?? fail(`${arg} needs a value`);
+    if (arg === "--voice") a.voice = next();
+    else if (arg === "--lang") a.lang = next();
+    else if (arg === "--max-speed") a.maxSpeed = Number(next());
+    else if (arg === "--script") a.script = next();
+    else if (arg === "--dry-run") a.dryRun = true;
+    else if (arg.startsWith("-")) fail(`unknown flag "${arg}"`);
+    else if (!a.scene) a.scene = arg;
+    else fail(`unexpected argument "${arg}"`);
+  }
+  return a;
+}
+
+const slug = (s: string) => s.replace(/[^a-zA-Z0-9_-]+/g, "-").replace(/^-+|-+$/g, "") || "line";
+const stemOf = (line: Line, i: number) => (typeof line.at === "string" ? slug(line.at) : `line${i}`);
+const posix = (p: string) => p.split("\\").join("/");
+// ~2.6 words/sec is a typical narration pace — a rough length estimate for --dry-run.
+const estimateSecs = (text: string) => Math.max(0.4, text.trim().split(/\s+/).length / 2.6);
+
+/** Run narrate.py with a JSON request on stdin, parse the JSON result. */
+function synth(req: unknown): Promise<{ durations?: Record<string, number>; error?: string }> {
+  return new Promise((res, rej) => {
+    const proc = spawn("python3", [NARRATE_PY], { stdio: ["pipe", "pipe", "inherit"] });
+    let stdout = "";
+    proc.stdout.on("data", (d: Buffer) => (stdout += d.toString()));
+    proc.on("error", rej); // ENOENT — python3 missing
+    proc.on("close", (code) => {
+      try {
+        res(JSON.parse(stdout.trim().split("\n").pop() ?? "{}"));
+      } catch {
+        rej(new Error(`narrate.py produced no JSON (exit ${code})`));
+      }
+    });
+    proc.stdin.write(JSON.stringify(req));
+    proc.stdin.end();
+  });
+}
+
+async function main() {
+  const args = parseArgs(process.argv.slice(2));
+  if (!args.scene) {
+    fail('narrate needs a scene file\nusage: reframe narrate <scene.ts|.json> [--voice <name>] [--lang a] [--max-speed 1.3] [--script <path>] [--dry-run]');
+  }
+  const scenePath = userPath(args.scene);
+  if (!existsSync(scenePath)) fail(`no such file: ${scenePath}`);
+
+  // the label clock — every line's slot is a window in the scene's own timeline
+  const scene = await loadScene(scenePath);
+  const compiled = compileScene(scene);
+  const duration = compiled.duration;
+
+  const sceneDir = dirname(scenePath);
+  const sceneBase = basename(scenePath).replace(/\.(ts|json)$/, "");
+  const scriptPath = args.script ? userPath(args.script) : join(sceneDir, `${sceneBase}-vo`, "script.json");
+  if (!existsSync(scriptPath)) {
+    fail(
+      `no narration script at ${scriptPath}\n` +
+        `create it as a JSON array and import it into your scene's audio.narration, e.g.:\n` +
+        `  [ { "at": "<label>", "text": "Your line." } ]`,
+    );
+  }
+  const voDir = dirname(scriptPath);
+  const voBase = posix(relative(sceneDir, voDir)); // e.g. "demo-vo"
+
+  const lines: Line[] = JSON.parse(await readFile(scriptPath, "utf8"));
+  if (!Array.isArray(lines) || lines.length === 0) fail(`${scriptPath}: expected a non-empty JSON array of narration lines`);
+
+  // resolve each line's anchor (label start + offset) and its slot (gap to the
+  // next line, or to the scene end for the last line)
+  const resolved = lines.map((line, i) => {
+    let anchor: number;
+    if (typeof line.at === "number") anchor = line.at;
+    else {
+      const span = compiled.labelTimes.get(line.at);
+      if (!span) fail(`line ${i}: unknown timeline label "${line.at}" (run \`reframe labels ${args.scene}\`)`);
+      anchor = span.t0;
+    }
+    return { line, i, stem: stemOf(line, i), anchor: Math.max(0, anchor + (line.offset ?? 0)) };
+  });
+  // slot = gap from this line's anchor to the next line's (last → scene end)
+  const order = [...resolved].sort((a, b) => a.anchor - b.anchor);
+  const slots = new Map<number, number>();
+  for (let k = 0; k < order.length; k++) {
+    const start = order[k]!.anchor;
+    const end = k + 1 < order.length ? order[k + 1]!.anchor : duration;
+    slots.set(order[k]!.i, Math.max(0.1, end - start));
+  }
+  const slotOf = (i: number) => slots.get(i)!;
+
+  const rows: { stem: string; at: string; slot: number; text: string; len: number; speed: number; warn?: string }[] = [];
+
+  if (args.dryRun) {
+    for (const r of resolved) {
+      const len = estimateSecs(r.line.text);
+      const slot = slotOf(r.i);
+      const speed = Math.min(args.maxSpeed, Math.max(1, len / slot));
+      const fitted = len / speed;
+      rows.push({ stem: r.stem, at: String(r.line.at), slot, text: r.line.text, len: fitted, speed, ...(fitted > slot + 0.05 ? { warn: "overruns" } : {}) });
+    }
+    printTable(rows, true);
+    console.log(`\n(dry run — estimates only, no synthesis. drop --dry-run to generate.)`);
+    return;
+  }
+
+  await mkdir(voDir, { recursive: true });
+
+  // PASS 1 — synth every line at natural speed, measure
+  const pass1 = await synth({ outDir: voDir, lang: args.lang, lines: resolved.map((r) => ({ stem: r.stem, text: r.line.text, voice: r.line.voice ?? args.voice, speed: 1 })) });
+  if (pass1.error) fail(pass1.error);
+  const dur1 = pass1.durations!;
+
+  // PASS 2 — re-synth only the lines that overrun their slot, sped up to fit
+  const refit = resolved
+    .map((r) => ({ r, speed: Math.min(args.maxSpeed, Math.max(1, +(dur1[r.stem]! / slotOf(r.i)).toFixed(3))) }))
+    .filter(({ speed }) => speed > 1.001);
+  let dur2: Record<string, number> = {};
+  if (refit.length > 0) {
+    const p2 = await synth({ outDir: voDir, lang: args.lang, lines: refit.map(({ r, speed }) => ({ stem: r.stem, text: r.line.text, voice: r.line.voice ?? args.voice, speed })) });
+    if (p2.error) fail(p2.error);
+    dur2 = p2.durations!;
+  }
+  const speedFor = new Map(refit.map(({ r, speed }) => [r.stem, speed]));
+
+  // bake file / voice / speed / duration back into each line
+  for (const r of resolved) {
+    const speed = speedFor.get(r.stem) ?? 1;
+    const len = (speed > 1 ? dur2[r.stem] : dur1[r.stem]) ?? dur1[r.stem]!;
+    const slot = slotOf(r.i);
+    r.line.file = posix(join(voBase, `${r.stem}.wav`));
+    r.line.voice = r.line.voice ?? args.voice;
+    if (speed > 1) r.line.speed = speed; else delete r.line.speed;
+    r.line.duration = +len.toFixed(3);
+    rows.push({ stem: r.stem, at: String(r.line.at), slot, text: r.line.text, len, speed, ...(len > slot + 0.05 ? { warn: "overruns" } : {}) });
+  }
+
+  await writeFile(scriptPath, JSON.stringify(lines, null, 2) + "\n");
+  printTable(rows.sort((a, b) => Number(a.warn ? 1 : 0) - Number(b.warn ? 1 : 0)), false);
+  const warned = rows.filter((r) => r.warn).length;
+  console.log(`\nwrote ${resolved.length} wav → ${voBase}/  ·  baked ${basename(scriptPath)}`);
+  if (warned > 0) console.log(`⚠ ${warned} line(s) overrun their slot even at ${args.maxSpeed}× — shorten the text or retime the beat.`);
+  console.log(`  next: reframe render ${args.scene}`);
+}
+
+function printTable(rows: { at: string; slot: number; text: string; len: number; speed: number; warn?: string }[], dry: boolean) {
+  console.log(`# narration ${dry ? "(estimated)" : "fit"} — label · slot · length · speed`);
+  for (const r of rows) {
+    const mark = r.warn ? "⚠" : "✓";
+    const sp = r.speed > 1.001 ? `${r.speed.toFixed(2)}×` : "1.0×";
+    const text = r.text.length > 40 ? r.text.slice(0, 37) + "…" : r.text;
+    console.log(`${mark} ${r.at.padEnd(16)} slot ${r.slot.toFixed(2)}s  len ${r.len.toFixed(2)}s  ${sp.padStart(5)}  ${JSON.stringify(text)}`);
+  }
+}
+
+main().catch((err: unknown) => {
+  const msg = err instanceof Error ? err.message : String(err);
+  if (/ENOENT/.test(msg)) {
+    fail("python3 not found on PATH — install Python 3, then: pip install kokoro && (macOS) brew install espeak-ng");
+  }
+  fail(msg);
+});
diff --git a/packages/render-cli/src/reframe.ts b/packages/render-cli/src/reframe.ts
index 9f23637..0dee912 100644
--- a/packages/render-cli/src/reframe.ts
+++ b/packages/render-cli/src/reframe.ts
@@ -41,6 +41,9 @@ const COMPILE = PACKAGED
 const ASSEMBLE = PACKAGED
   ? join(ROOT, "dist", "assemble.js")
   : join(ROOT, "packages", "render-cli", "src", "assemble.ts");
+const NARRATE = PACKAGED
+  ? join(ROOT, "dist", "narrate.js")
+  : join(ROOT, "packages", "render-cli", "src", "narrate.ts");
 const MANIFEST = PACKAGED
   ? join(ROOT, "dist", "manifest.js")
   : join(ROOT, "packages", "render-cli", "src", "manifest.ts");
@@ -99,6 +102,8 @@ usage:
   ${CMD} new <scene-name>        scaffold <scene-name>.ts in your directory
   ${CMD} assemble <media...> [-o name] [--title "…"] [--bgm <synth>] [--hold s] [--seed N]
                                  probe images/videos → scaffold a clip-aware montage scene .ts (then render it)
+  ${CMD} narrate <scene.ts|.json> [--voice <name>] [--max-speed n] [--dry-run]
+                                 scene-fitted Kokoro voiceover: synth each line, fit its rate to the slot (needs python+kokoro)
   ${CMD} labels <scene.ts|.json>  print the event clock (label → exact seconds; for sound design / timing)
   ${CMD} manifest <scene.ts|.json> [--json]  list the editable surface (node/state/label/beat/behavior addresses + patchable props)
   ${CMD} lint <scene.ts|.json> [--json] [--strict]  flag un-addressable motion (regen-unsafe) + an addressability summary
@@ -127,6 +132,15 @@ function preflightFfmpeg() {
   }
 }
 
+function preflightKokoro() {
+  if (spawnSync("python3", ["-c", "import kokoro"], { stdio: "ignore" }).error) {
+    fail(
+      "Kokoro TTS not available — narrate needs Python 3 with the `kokoro` package.\n" +
+        "  pip install kokoro    (then, for phonemes: macOS `brew install espeak-ng`, debian `apt install espeak-ng`)",
+    );
+  }
+}
+
 /** Run a child, mirroring output and appending a hint on known failures. */
 function run(
   cmd: string,
@@ -280,6 +294,19 @@ async function main() {
       );
     }
 
+    case "narrate": {
+      // scene-fitted Kokoro voiceover. Path resolution happens in the entry
+      // (relative to INIT_CWD) — forward the args. Synthesis needs python+kokoro;
+      // skip the preflight for --dry-run (estimate only, no synthesis).
+      const input = rest.find((a) => !a.startsWith("-"));
+      if (!input) fail(`narrate needs a scene file\n\n${USAGE}`);
+      if (!existsSync(userPath(input))) fail(`no such file: ${userPath(input)}`);
+      if (!rest.includes("--dry-run")) preflightKokoro();
+      process.exit(
+        await (PACKAGED ? run(process.execPath, [NARRATE, ...rest]) : run("npx", ["tsx", NARRATE, ...rest])),
+      );
+    }
+
     case "manifest":
     case "lint": {
       // read-only introspection — no ffmpeg/chromium. Scene file + pass-through flags.