app: hoist fenced code out of list items — enriched #243 workaround

yyq1025 · claude · yyq1025 · commit 22a4fd877750 · 2026-06-12T11:05:55.000-05:00
enriched repeats the list marker on every line of a code block nested in a list item (software-mansion/react-native-enriched-markdown#243, open). Lists whose item tree contains a fence are now split at fence boundaries by a line scanner over the list token's raw text: - concatenating the pieces reproduces the source exactly; ordered-list numbering survives because continuation runs keep their literal "2."/"3." markers and GFM honors a list's first number - covers task lists, nested lists, fence-on-marker-line, ~~~ fences, and a streaming-open tail fence (closed=false until the close lands) - scanner-conservative: unrecognized shapes (e.g. fences inside a nested blockquote) stay in the run — worst case is pre-hoist behavior - blockquotes deliberately not hoisted: #243 is list-marker-specific and a hoist would break the quote bar around the code Hoisted blocks gain shiki highlight + horizontal scroll + the themed container, same as top-level code. Known costs: full-width rendering (list indent lost) and post-code item content becomes a plain paragraph. Also reorders isFencedCodeClosed so single-line indented code counts as closed (it now runs for every segment, not just the last). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
diff --git a/packages/app/src/lib/markdown/markdown-chunking.ts b/packages/app/src/lib/markdown/markdown-chunking.ts
@@ -1,3 +1,4 @@
+import type { Token, Tokens } from "marked";
 import { lexer } from "marked";
 import { useMemo, useRef } from "react";
 
@@ -14,6 +15,17 @@ import { useMemo, useRef } from "react";
  *    flag (close-time-gated work, e.g. a long-block highlight fuse, can
  *    hang off it).
  *
+ * LIST HOIST: fenced code NESTED in list items (incl. task items and
+ * nested lists) is also broken out — enriched repeats the list marker on
+ * every code line (software-mansion-labs/react-native-enriched-markdown
+ * #243, open). The list's raw is split at fence boundaries; ordered-list
+ * numbering survives because the continuation run still carries the
+ * literal "2."/"3." markers and GFM honors a list's first number. Known
+ * tradeoffs: hoisted code renders full-width (list indent lost), and any
+ * post-code content of the same item becomes a plain paragraph.
+ * Blockquotes are deliberately NOT hoisted — #243 is list-marker-specific
+ * and a hoist would visually break the quote bar around the code.
+ *
  * Stability model: segments are keyed POSITIONALLY (`i-kind`). Children
  * memo on `raw`, so the prefix-diff guarantee we rely on is "unchanged
  * raw ⇒ no re-render", not "unchanged key". Lists/setext headings can
@@ -45,12 +57,109 @@ export interface ChunkStats {
  *  LAST segment can ever be open. */
 function isFencedCodeClosed(raw: string): boolean {
   const t = raw.trimEnd();
-  const firstNewline = t.indexOf("\n");
-  if (firstNewline < 0) return false; // just the opening fence line
   if (!t.startsWith("```") && !t.startsWith("~~~")) return true; // indented code
+  if (t.indexOf("\n") < 0) return false; // just the opening fence line
   return /\n[ \t]*(```|~~~)[ \t]*$/.test(t);
 }
 
+/** Fenced code anywhere down a list's item tree (nested lists recursed,
+ *  blockquotes deliberately not — see header). Indented (non-fenced) code
+ *  inside items is excluded: the line scanner below only understands
+ *  fences, and LLM output is fenced in practice. */
+function listHasNestedFence(list: Tokens.List): boolean {
+  for (const item of list.items) {
+    for (const t of item.tokens as Token[]) {
+      if (t.type === "code" && /^(`{3,}|~{3,})/.test(t.raw.trimStart())) {
+        return true;
+      }
+      if (t.type === "list" && listHasNestedFence(t as Tokens.List)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+type ListPiece =
+  | { kind: "run"; raw: string }
+  | { kind: "code"; raw: string; lang: string; code: string; closed: boolean };
+
+/** A fence-open line inside list raw: indentation, optionally the list
+ *  marker itself ("1. ```ts" — fence as the item's first block), then the
+ *  fence and its info string. The captured prefix length is what GFM
+ *  strips from the content lines (continuation indent = marker width). */
+const LIST_FENCE_OPEN =
+  /^([ \t]*(?:(?:[-*+]|\d{1,9}[.)])[ \t]+)?)(`{3,}|~{3,})([^\n]*)$/;
+const FENCE_CLOSE = /^[ \t]*(`{3,}|~{3,})[ \t]*$/;
+
+function dedent(line: string, max: number): string {
+  let n = 0;
+  while (n < max && line[n] === " ") n++;
+  return line.slice(n);
+}
+
+/** Split a list token's raw at fence boundaries. Pure text surgery on the
+ *  ORIGINAL raw (concatenating the pieces reproduces it exactly), so run
+ *  pieces keep their literal list markers. Scanner-conservative: a fence
+ *  shape it doesn't recognize (e.g. inside a nested blockquote, where the
+ *  line starts with ">") simply stays in the run — worst case is the
+ *  pre-hoist behavior, never corruption. */
+function splitListRaw(raw: string): ListPiece[] {
+  // Lines WITH their trailing newline (Hermes-safe; no lookbehind).
+  const lines = raw.match(/[^\n]*\n|[^\n]+/g) ?? [];
+  const pieces: ListPiece[] = [];
+  let run: string[] = [];
+  const flushRunPiece = () => {
+    if (run.length > 0) {
+      pieces.push({ kind: "run", raw: run.join("") });
+      run = [];
+    }
+  };
+
+  let i = 0;
+  while (i < lines.length) {
+    const line = lines[i] ?? "";
+    const open = LIST_FENCE_OPEN.exec(line.replace(/\n$/, ""));
+    if (open === null) {
+      run.push(line);
+      i++;
+      continue;
+    }
+    const prefixLen = open[1]?.length ?? 0;
+    const fence = open[2] ?? "";
+    const info = (open[3] ?? "").trim();
+    const rawLines: string[] = [line];
+    const codeLines: string[] = [];
+    let closed = false;
+    i++;
+    while (i < lines.length) {
+      const l = lines[i] ?? "";
+      rawLines.push(l);
+      i++;
+      const close = FENCE_CLOSE.exec(l.replace(/\n$/, ""));
+      if (
+        close !== null &&
+        close[1]?.[0] === fence[0] &&
+        (close[1]?.length ?? 0) >= fence.length
+      ) {
+        closed = true;
+        break;
+      }
+      codeLines.push(dedent(l, prefixLen));
+    }
+    flushRunPiece();
+    pieces.push({
+      kind: "code",
+      raw: rawLines.join(""),
+      lang: info,
+      code: codeLines.join("").replace(/\n$/, ""),
+      closed,
+    });
+  }
+  flushRunPiece();
+  return pieces;
+}
+
 export function chunkMarkdown(
   markdown: string,
   streamDone: boolean,
@@ -68,28 +177,54 @@ export function chunkMarkdown(
     });
     runBuffer = "";
   };
+  const pushCode = (
+    raw: string,
+    lang: string,
+    code: string,
+    closed: boolean,
+  ) => {
+    flushRun();
+    segments.push({
+      kind: "code",
+      key: `${segments.length}-code`,
+      raw,
+      lang,
+      code,
+      closed,
+    });
+  };
 
   for (const token of tokens) {
     if (token.type === "code") {
-      flushRun();
-      segments.push({
-        kind: "code",
-        key: `${segments.length}-code`,
-        raw: token.raw,
-        lang: typeof token.lang === "string" ? token.lang : "",
-        code: typeof token.text === "string" ? token.text : "",
-        closed: true, // non-last segments are closed by construction
-      });
+      pushCode(
+        token.raw,
+        typeof token.lang === "string" ? token.lang : "",
+        typeof token.text === "string" ? token.text : "",
+        isFencedCodeClosed(token.raw),
+      );
+    } else if (
+      token.type === "list" &&
+      // `type` alone doesn't narrow past marked's Tokens.Generic.
+      listHasNestedFence(token as Tokens.List)
+    ) {
+      for (const piece of splitListRaw(token.raw)) {
+        if (piece.kind === "code") {
+          pushCode(piece.raw, piece.lang, piece.code, piece.closed);
+        } else {
+          runBuffer += piece.raw;
+        }
+      }
     } else {
       runBuffer += token.raw;
     }
   }
   flushRun();
 
-  // Only the tail can be open; everything before it has tokens after it.
+  // Only the tail can be open (unclosed fences swallow to EOF); when the
+  // stream is done, an EOF-unterminated fence counts as closed.
   const last = segments[segments.length - 1];
-  if (last?.kind === "code") {
-    last.closed = streamDone || isFencedCodeClosed(last.raw);
+  if (last?.kind === "code" && streamDone) {
+    last.closed = true;
   }
   return segments;
 }