Skip to content

Commit 22a4fd8

Browse files
yyq1025claude
andcommitted
app: hoist fenced code out of list items — enriched #243 workaround
enriched repeats the list marker on every line of a code block nested in a list item (software-mansion/react-native-enriched-markdown#243, open). Lists whose item tree contains a fence are now split at fence boundaries by a line scanner over the list token's raw text: - concatenating the pieces reproduces the source exactly; ordered-list numbering survives because continuation runs keep their literal "2."/"3." markers and GFM honors a list's first number - covers task lists, nested lists, fence-on-marker-line, ~~~ fences, and a streaming-open tail fence (closed=false until the close lands) - scanner-conservative: unrecognized shapes (e.g. fences inside a nested blockquote) stay in the run — worst case is pre-hoist behavior - blockquotes deliberately not hoisted: #243 is list-marker-specific and a hoist would break the quote bar around the code Hoisted blocks gain shiki highlight + horizontal scroll + the themed container, same as top-level code. Known costs: full-width rendering (list indent lost) and post-code item content becomes a plain paragraph. Also reorders isFencedCodeClosed so single-line indented code counts as closed (it now runs for every segment, not just the last). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
1 parent 7e2afd2 commit 22a4fd8

1 file changed

Lines changed: 149 additions & 14 deletions

File tree

packages/app/src/lib/markdown/markdown-chunking.ts

Lines changed: 149 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import type { Token, Tokens } from "marked";
12
import { lexer } from "marked";
23
import { useMemo, useRef } from "react";
34

@@ -14,6 +15,17 @@ import { useMemo, useRef } from "react";
1415
* flag (close-time-gated work, e.g. a long-block highlight fuse, can
1516
* hang off it).
1617
*
18+
* LIST HOIST: fenced code NESTED in list items (incl. task items and
19+
* nested lists) is also broken out — enriched repeats the list marker on
20+
* every code line (software-mansion-labs/react-native-enriched-markdown
21+
* #243, open). The list's raw is split at fence boundaries; ordered-list
22+
* numbering survives because the continuation run still carries the
23+
* literal "2."/"3." markers and GFM honors a list's first number. Known
24+
* tradeoffs: hoisted code renders full-width (list indent lost), and any
25+
* post-code content of the same item becomes a plain paragraph.
26+
* Blockquotes are deliberately NOT hoisted — #243 is list-marker-specific
27+
* and a hoist would visually break the quote bar around the code.
28+
*
1729
* Stability model: segments are keyed POSITIONALLY (`i-kind`). Children
1830
* memo on `raw`, so the prefix-diff guarantee we rely on is "unchanged
1931
* raw ⇒ no re-render", not "unchanged key". Lists/setext headings can
@@ -45,12 +57,109 @@ export interface ChunkStats {
4557
* LAST segment can ever be open. */
4658
function isFencedCodeClosed(raw: string): boolean {
4759
const t = raw.trimEnd();
48-
const firstNewline = t.indexOf("\n");
49-
if (firstNewline < 0) return false; // just the opening fence line
5060
if (!t.startsWith("```") && !t.startsWith("~~~")) return true; // indented code
61+
if (t.indexOf("\n") < 0) return false; // just the opening fence line
5162
return /\n[ \t]*(```|~~~)[ \t]*$/.test(t);
5263
}
5364

65+
/** Fenced code anywhere down a list's item tree (nested lists recursed,
66+
* blockquotes deliberately not — see header). Indented (non-fenced) code
67+
* inside items is excluded: the line scanner below only understands
68+
* fences, and LLM output is fenced in practice. */
69+
function listHasNestedFence(list: Tokens.List): boolean {
70+
for (const item of list.items) {
71+
for (const t of item.tokens as Token[]) {
72+
if (t.type === "code" && /^(`{3,}|~{3,})/.test(t.raw.trimStart())) {
73+
return true;
74+
}
75+
if (t.type === "list" && listHasNestedFence(t as Tokens.List)) {
76+
return true;
77+
}
78+
}
79+
}
80+
return false;
81+
}
82+
83+
type ListPiece =
84+
| { kind: "run"; raw: string }
85+
| { kind: "code"; raw: string; lang: string; code: string; closed: boolean };
86+
87+
/** A fence-open line inside list raw: indentation, optionally the list
88+
* marker itself ("1. ```ts" — fence as the item's first block), then the
89+
* fence and its info string. The captured prefix length is what GFM
90+
* strips from the content lines (continuation indent = marker width). */
91+
const LIST_FENCE_OPEN =
92+
/^([ \t]*(?:(?:[-*+]|\d{1,9}[.)])[ \t]+)?)(`{3,}|~{3,})([^\n]*)$/;
93+
const FENCE_CLOSE = /^[ \t]*(`{3,}|~{3,})[ \t]*$/;
94+
95+
function dedent(line: string, max: number): string {
96+
let n = 0;
97+
while (n < max && line[n] === " ") n++;
98+
return line.slice(n);
99+
}
100+
101+
/** Split a list token's raw at fence boundaries. Pure text surgery on the
102+
* ORIGINAL raw (concatenating the pieces reproduces it exactly), so run
103+
* pieces keep their literal list markers. Scanner-conservative: a fence
104+
* shape it doesn't recognize (e.g. inside a nested blockquote, where the
105+
* line starts with ">") simply stays in the run — worst case is the
106+
* pre-hoist behavior, never corruption. */
107+
function splitListRaw(raw: string): ListPiece[] {
108+
// Lines WITH their trailing newline (Hermes-safe; no lookbehind).
109+
const lines = raw.match(/[^\n]*\n|[^\n]+/g) ?? [];
110+
const pieces: ListPiece[] = [];
111+
let run: string[] = [];
112+
const flushRunPiece = () => {
113+
if (run.length > 0) {
114+
pieces.push({ kind: "run", raw: run.join("") });
115+
run = [];
116+
}
117+
};
118+
119+
let i = 0;
120+
while (i < lines.length) {
121+
const line = lines[i] ?? "";
122+
const open = LIST_FENCE_OPEN.exec(line.replace(/\n$/, ""));
123+
if (open === null) {
124+
run.push(line);
125+
i++;
126+
continue;
127+
}
128+
const prefixLen = open[1]?.length ?? 0;
129+
const fence = open[2] ?? "";
130+
const info = (open[3] ?? "").trim();
131+
const rawLines: string[] = [line];
132+
const codeLines: string[] = [];
133+
let closed = false;
134+
i++;
135+
while (i < lines.length) {
136+
const l = lines[i] ?? "";
137+
rawLines.push(l);
138+
i++;
139+
const close = FENCE_CLOSE.exec(l.replace(/\n$/, ""));
140+
if (
141+
close !== null &&
142+
close[1]?.[0] === fence[0] &&
143+
(close[1]?.length ?? 0) >= fence.length
144+
) {
145+
closed = true;
146+
break;
147+
}
148+
codeLines.push(dedent(l, prefixLen));
149+
}
150+
flushRunPiece();
151+
pieces.push({
152+
kind: "code",
153+
raw: rawLines.join(""),
154+
lang: info,
155+
code: codeLines.join("").replace(/\n$/, ""),
156+
closed,
157+
});
158+
}
159+
flushRunPiece();
160+
return pieces;
161+
}
162+
54163
export function chunkMarkdown(
55164
markdown: string,
56165
streamDone: boolean,
@@ -68,28 +177,54 @@ export function chunkMarkdown(
68177
});
69178
runBuffer = "";
70179
};
180+
const pushCode = (
181+
raw: string,
182+
lang: string,
183+
code: string,
184+
closed: boolean,
185+
) => {
186+
flushRun();
187+
segments.push({
188+
kind: "code",
189+
key: `${segments.length}-code`,
190+
raw,
191+
lang,
192+
code,
193+
closed,
194+
});
195+
};
71196

72197
for (const token of tokens) {
73198
if (token.type === "code") {
74-
flushRun();
75-
segments.push({
76-
kind: "code",
77-
key: `${segments.length}-code`,
78-
raw: token.raw,
79-
lang: typeof token.lang === "string" ? token.lang : "",
80-
code: typeof token.text === "string" ? token.text : "",
81-
closed: true, // non-last segments are closed by construction
82-
});
199+
pushCode(
200+
token.raw,
201+
typeof token.lang === "string" ? token.lang : "",
202+
typeof token.text === "string" ? token.text : "",
203+
isFencedCodeClosed(token.raw),
204+
);
205+
} else if (
206+
token.type === "list" &&
207+
// `type` alone doesn't narrow past marked's Tokens.Generic.
208+
listHasNestedFence(token as Tokens.List)
209+
) {
210+
for (const piece of splitListRaw(token.raw)) {
211+
if (piece.kind === "code") {
212+
pushCode(piece.raw, piece.lang, piece.code, piece.closed);
213+
} else {
214+
runBuffer += piece.raw;
215+
}
216+
}
83217
} else {
84218
runBuffer += token.raw;
85219
}
86220
}
87221
flushRun();
88222

89-
// Only the tail can be open; everything before it has tokens after it.
223+
// Only the tail can be open (unclosed fences swallow to EOF); when the
224+
// stream is done, an EOF-unterminated fence counts as closed.
90225
const last = segments[segments.length - 1];
91-
if (last?.kind === "code") {
92-
last.closed = streamDone || isFencedCodeClosed(last.raw);
226+
if (last?.kind === "code" && streamDone) {
227+
last.closed = true;
93228
}
94229
return segments;
95230
}

0 commit comments

Comments
 (0)