app: stream-aware chunked markdown + launch-time shiki init + markdown grammar

yyq1025 · claude · yyq1025 · commit 3d1bd87df581 · 2026-06-12T08:54:20.000-05:00
- thread per-run `streaming` (last assistant block while the turn runs) so
  enriched's streamingAnimation is true ONLY for the live tail; settled
  messages hit the measurement cache (session-enter ~15→35 JS fps)
- init the highlighter at app launch (splash-covered) instead of lazily on
  first session enter; useCodeTokens hook replaces the per-component memo
  (a cached/time-sliced variant was A/B'd and dropped — the drawer-settle
  gate covers it)
- add the markdown fence grammar (65KB standalone, embedded langs are
  lazy) + md alias

Co-Authored-By: Claude Fable 5 &lt;noreply@anthropic.com&gt;
diff --git a/packages/app/src/app/_layout.tsx b/packages/app/src/app/_layout.tsx
@@ -25,6 +25,7 @@ import {
   DaemonClientProvider,
   useDaemonClient,
 } from "@/lib/daemon-client-context";
+import { initHighlighter } from "@/lib/markdown/code-highlighter";
 import { queryClient } from "@/lib/query-client";
 import { SafeAreaView } from "@/lib/styled";
 import { getThemePreference } from "@/lib/theme-preference";
@@ -63,6 +64,15 @@ export default function RootLayout() {
     void getThemePreference().then((pref) => Uniwind.setTheme(pref));
   }, []);
 
+  // Shiki engine + grammar load (~10-50ms JSON.parse, once per run) at app
+  // start — the engine README's recommendation, and the upstream example's
+  // exact idiom (initialize() from the root component's effect). Launch is
+  // splash-covered, so the parse is invisible here; paying it lazily on the
+  // first session-enter navigation was visible jank.
+  useEffect(() => {
+    initHighlighter();
+  }, []);
+
   return (
     <GestureHandlerRootView style={{ flex: 1 }}>
       <SafeAreaProvider>
diff --git a/packages/app/src/components/transcript/chat-panel.tsx b/packages/app/src/components/transcript/chat-panel.tsx
@@ -302,8 +302,25 @@ export function ChatPanel({
         // triggering its recompute, which jumped the anchored message. Also
         // doubles as the gap below the last message.
         ListFooterComponent={<ThinkingIndicator active={isThinking} />}
-        renderItem={({ item }) => {
-          if (item.kind === "text") return <TextBlock block={item} />;
+        renderItem={({ item, index }) => {
+          if (item.kind === "text")
+            return (
+              <TextBlock
+                block={item}
+                // The streaming message = the LAST block, assistant role,
+                // while the daemon-pushed activity says the turn is
+                // running. No protocol settle signal exists; this
+                // derivation is correct at every boundary (tool_call
+                // append → no longer last; interrupt/idle → not running;
+                // cold resume → not running). Worst case of a wrong beat:
+                // one extra remend pass + a deferred exact re-measure.
+                streaming={
+                  isRunning &&
+                  item.role === "assistant" &&
+                  index === blocks.length - 1
+                }
+              />
+            );
           if (item.kind === "tool") return <ToolBlock block={item} />;
           // `compact_divider` — placeholder renders nothing for this
           // commit. Actual divider component (horizontal line + caption)
diff --git a/packages/app/src/components/transcript/text-block.tsx b/packages/app/src/components/transcript/text-block.tsx
@@ -12,13 +12,15 @@ import type { TextRenderBlock } from "@/lib/transcript-blocks";
  * `ChunkedMarkdown` — text/table runs still go through enriched (Fabric,
  * Yoga-self-sizing, so rows hit LegendList with real height on first
  * layout), while code blocks break out into shiki-highlighted native
- * Text. See `ChunkedMarkdown.tsx` header for the design.
+ * Text. See `chunked-markdown.tsx` header for the design.
  *
- * `streamDone` is hardcoded `true` for now: there is no per-message
- * settle signal yet (daemon ignores content_block_stop), and `true` is
- * exact parity with the previous whole-message ChatMarkdown (no remend).
- * When a streaming derivation lands (isLast && activity running), wire
- * it here to enable tail-run repair.
+ * `streaming` comes from ChatPanel's derivation (last block && assistant
+ * && turn running — there is no protocol-level settle signal; daemon
+ * ignores content_block_stop). It gates three things downstream: tail-run
+ * remend repair, EOF fence-close marking, and — performance-critical —
+ * enriched's streamingAnimation, which must be FALSE for settled messages
+ * (measurement cache) and TRUE only for actively-changing content (see
+ * ChatMarkdownProps.streaming).
  *
  * Bubble shape mirrors Claude Desktop's user-message look — pale blue
  * background, dark navy text. Role labels (YOU / CLAUDE) are
@@ -31,7 +33,13 @@ import type { TextRenderBlock } from "@/lib/transcript-blocks";
  * the blue bubble entirely; text-only messages skip the stack. Both
  * paths share the outer `items-end` so alignment stays consistent.
  */
-export function TextBlock({ block }: { block: TextRenderBlock }) {
+export function TextBlock({
+  block,
+  streaming = false,
+}: {
+  block: TextRenderBlock;
+  streaming?: boolean;
+}) {
   if (block.role === "user") {
     // Materialize base64 → file:// URIs inline. Sync (new
     // expo-file-system API), cache-hit fast path on re-render so this
@@ -61,7 +69,7 @@ export function TextBlock({ block }: { block: TextRenderBlock }) {
   }
   return (
     <View className="px-4 py-1.5">
-      <ChunkedMarkdown markdown={block.text} streamDone />
+      <ChunkedMarkdown markdown={block.text} streamDone={!streaming} />
     </View>
   );
 }
diff --git a/packages/app/src/lib/markdown/chat-markdown.tsx b/packages/app/src/lib/markdown/chat-markdown.tsx
@@ -226,9 +226,31 @@ const DARK_STYLE = buildStyle(DARK_PALETTE);
 export interface ChatMarkdownProps {
   /** Assistant message content. May be partial during streaming. */
   markdown: string;
+  /** True ONLY for content actively receiving deltas (in practice: the
+   *  tail run of the streaming message). Drives enriched's
+   *  `streamingAnimation`, which is a PERFORMANCE switch, not just a fade:
+   *
+   *  - `false` (settled) → enriched's measurement CACHE is active —
+   *    re-mounting a session is cache hits instead of mock-rendering every
+   *    message synchronously just to measure it (the dominant cost of the
+   *    session-enter jank, ~15→35+ JS fps measured 2026-06-12) — and the
+   *    view.bounds measure fast path never engages (the sub-pixel height
+   *    creep loop; see ShadowMeasurementUtils.h).
+   *  - `true` (streaming) → bounds fast path gives cheap re-measures
+   *    between deltas. NEVER use false here: every delta is a new string,
+   *    so the cache misses every tick and enriched would mock-render the
+   *    whole message per delta.
+   *
+   *  The true→false settle flip forces one exact re-measure upstream
+   *  (ENRMPropsNeedExactStreamingMeasurement), snapping away any rounding
+   *  drift accumulated while streaming. */
+  streaming?: boolean;
 }
 
-export function ChatMarkdown({ markdown }: ChatMarkdownProps) {
+export function ChatMarkdown({
+  markdown,
+  streaming = false,
+}: ChatMarkdownProps) {
   const colorScheme = useColorScheme() ?? "light";
   const markdownStyle = colorScheme === "dark" ? DARK_STYLE : LIGHT_STYLE;
   return (
@@ -238,7 +260,7 @@ export function ChatMarkdown({ markdown }: ChatMarkdownProps) {
         underline: true,
       }}
       flavor="github"
-      streamingAnimation
+      streamingAnimation={streaming}
       streamingConfig={{ tableMode: "progressive" }}
       markdownStyle={markdownStyle}
     />
diff --git a/packages/app/src/lib/markdown/chunked-markdown.tsx b/packages/app/src/lib/markdown/chunked-markdown.tsx
@@ -1,4 +1,4 @@
-import { memo, useMemo } from "react";
+import { memo } from "react";
 import {
   ScrollView,
   Text,
@@ -14,12 +14,7 @@ import {
   DARK_PALETTE,
   LIGHT_PALETTE,
 } from "./chat-markdown";
-import {
-  resolveLang,
-  type TokenLines,
-  tokenizeCode,
-  useHighlighter,
-} from "./code-highlighter";
+import { useCodeTokens } from "./code-highlighter";
 import type { ChunkStats } from "./markdown-chunking";
 import { type MarkdownSegment, useMarkdownBlocks } from "./markdown-chunking";
 import { useRemend } from "./remend";
@@ -66,13 +61,16 @@ const RunSegment = memo(function RunSegment({ raw }: { raw: string }) {
   );
 });
 
-/** Tail run while streaming: unterminated inline syntax gets repaired.
+/** Tail run while streaming: unterminated inline syntax gets repaired,
+ *  and this is the ONLY place `streaming` is true — enriched's bounds
+ *  fast path for cheap per-delta re-measures (see ChatMarkdownProps).
+ *  Completed runs render settled (measurement cache).
  *  Same load-bearing View wrapper as RunSegment (see comment there). */
 function TailRunSegment({ raw }: { raw: string }) {
   const processed = useRemend(raw);
   return (
     <View>
-      <ChatMarkdown markdown={processed} />
+      <ChatMarkdown markdown={processed} streaming />
     </View>
   );
 }
@@ -122,18 +120,11 @@ const CodeBlockSegment = memo(function CodeBlockSegment({
   code: string;
   onTokenizeMs?: (ms: number) => void;
 }) {
-  const hl = useHighlighter();
   const scheme = useColorScheme() === "dark" ? "dark" : "light";
   const palette = scheme === "dark" ? DARK_PALETTE : LIGHT_PALETTE;
-  const langId = resolveLang(lang);
-
-  const lines = useMemo<TokenLines | null>(() => {
-    if (!hl || !langId) return null;
-    const t0 = performance.now();
-    const result = tokenizeCode(hl, code, langId, scheme);
-    onTokenizeMs?.(performance.now() - t0);
-    return result;
-  }, [hl, langId, code, scheme, onTokenizeMs]);
+  // Sync tokenize — see useCodeTokens for why (drawer-settle gate keeps
+  // mount bursts out of animation windows). null → plain, same metrics.
+  const lines = useCodeTokens(code, lang, scheme, onTokenizeMs);
 
   return (
     <View
diff --git a/packages/app/src/lib/markdown/code-highlighter.ts b/packages/app/src/lib/markdown/code-highlighter.ts
@@ -4,6 +4,7 @@ import langDiff from "@shikijs/langs/diff";
 import langJson from "@shikijs/langs/json";
 import langJsonc from "@shikijs/langs/jsonc";
 import langKotlin from "@shikijs/langs/kotlin";
+import langMarkdown from "@shikijs/langs/markdown";
 import langPython from "@shikijs/langs/python";
 import langRust from "@shikijs/langs/rust";
 import langShellscript from "@shikijs/langs/shellscript";
@@ -14,7 +15,7 @@ import langTsx from "@shikijs/langs/tsx";
 import langYaml from "@shikijs/langs/yaml";
 import themeGithubDark from "@shikijs/themes/github-dark";
 import themeGithubLight from "@shikijs/themes/github-light";
-import { useSyncExternalStore } from "react";
+import { useMemo, useSyncExternalStore } from "react";
 import {
   createNativeEngine,
   isNativeEngineAvailable,
@@ -27,7 +28,7 @@ import {
  *
  * Grammars are imported per-language — NEVER `@shikijs/langs` wholesale
  * (7.7MB) or `html` (silently chains javascript+css, ~291KB). Core set
- * ~470KB source, chosen by (a) sidecode's domain (TS/Swift/Kotlin) and
+ * ~535KB source, chosen by (a) sidecode's domain (TS/Swift/Kotlin) and
  * (b) what LLMs actually write in fence info-strings. One tsx grammar
  * covers the whole JS family via aliases (superset syntax; saves the
  * 180KB-each typescript/javascript grammars). Unknown languages degrade
@@ -42,7 +43,12 @@ let highlighter: HighlighterCore | null = null;
 let initStarted = false;
 const listeners = new Set<() => void>();
 
-function ensureHighlighter() {
+/** Kick off engine + grammar loading. Idempotent. Called from the root
+ *  layout once the launch settles (the engine README recommends app-start
+ *  init; the grammar JSON.parse burst (~535KB source) belongs in the
+ *  launch quiet window, not the first session-enter navigation). The
+ *  useHighlighter subscribe path keeps a deferred call as fallback. */
+export function initHighlighter() {
   if (initStarted) return;
   initStarted = true;
   if (!isNativeEngineAvailable()) {
@@ -66,6 +72,11 @@ function ensureHighlighter() {
       langJson,
       langJsonc,
       langDiff,
+      // 65KB standalone — its embedded-language list is LAZY (no chained
+      // grammar imports, unlike html). Nested fences inside a ```markdown
+      // block highlight for whatever grammars are loaded above; the rest
+      // render plain.
+      langMarkdown,
     ],
     engine: createNativeEngine(),
   })
@@ -82,7 +93,9 @@ export function useHighlighter(): HighlighterCore | null {
   return useSyncExternalStore(
     (cb) => {
       listeners.add(cb);
-      ensureHighlighter();
+      // Fallback only — the root layout already ran this at app launch in
+      // any real flow; idempotent no-op here.
+      initHighlighter();
       return () => listeners.delete(cb);
     },
     () => highlighter,
@@ -118,6 +131,8 @@ const LANG_ALIASES: Record<string, string> = {
   jsonc: "jsonc",
   diff: "diff",
   patch: "diff",
+  markdown: "markdown",
+  md: "markdown",
 };
 
 /** Info-strings that MEAN plain text — skip highlight without the
@@ -161,3 +176,33 @@ export function tokenizeCode(
     theme: scheme === "dark" ? "github-dark" : "github-light",
   });
 }
+
+// ─── Tokenization hook ──────────────────────────────────────────────────────
+
+/** Tokenized lines for a code block, or null while the engine loads /
+ *  for unsupported languages (null → caller renders plain text with
+ *  identical metrics, so the colored flip is layout-neutral).
+ *
+ *  Deliberately SYNC on the render frame. A cached + time-sliced variant
+ *  was built and A/B-tested (2026-06-12) and removed as unneeded: the
+ *  drawer-settle gate in the session screen keeps mount bursts out of
+ *  animation windows, and per-block tokenize (~1-3.5ms measured) is within
+ *  budget at our session scale. Revisit (see
+ *  memory/project_transcript_chunked_markdown_plan) if profiling ever
+ *  shows tokenize back on a hot path. */
+export function useCodeTokens(
+  code: string,
+  infoString: string,
+  scheme: "light" | "dark",
+  onTokenizeMs?: (ms: number) => void,
+): TokenLines | null {
+  const hl = useHighlighter();
+  const langId = resolveLang(infoString);
+  return useMemo(() => {
+    if (hl === null || langId === null) return null;
+    const t0 = performance.now();
+    const lines = tokenizeCode(hl, code, langId, scheme);
+    onTokenizeMs?.(performance.now() - t0);
+    return lines;
+  }, [hl, langId, code, scheme, onTokenizeMs]);
+}