diff --git a/Examples/CoreMLLLMChat/CoreMLLLMChat.xcodeproj/xcshareddata/xcschemes/CoreMLLLMChat.xcscheme b/Examples/CoreMLLLMChat/CoreMLLLMChat.xcodeproj/xcshareddata/xcschemes/CoreMLLLMChat.xcscheme
index ab73af6..7158d8a 100644
--- a/Examples/CoreMLLLMChat/CoreMLLLMChat.xcodeproj/xcshareddata/xcschemes/CoreMLLLMChat.xcscheme
+++ b/Examples/CoreMLLLMChat/CoreMLLLMChat.xcodeproj/xcshareddata/xcschemes/CoreMLLLMChat.xcscheme
@@ -50,6 +50,18 @@
ReferencedContainer = "container:CoreMLLLMChat.xcodeproj">
+
+
+
+
+
+
AsyncStream {
+ guard let engine = gemma4StatefulMultimodalEngine,
+ let tok = gemma4StatefulMultimodalTokenizer
+ else {
+ throw NSError(domain: "LLMRunner", code: 42,
+ userInfo: [NSLocalizedDescriptionKey:
+ "Gemma 4 stateful multimodal not loaded"])
+ }
+ isGenerating = true
+ tokensPerSecond = 0
+
+ // Encode image once per distinct attachment. Cache hit (same
+ // CGImage instance) skips the ~30 s vision graph + lets the
+ // engine's cross-turn KV reuse hit the LCP fast path.
+ var imageFeatures: MLMultiArray? = nil
+ var imageNumTokens = 0
+ var imageChanged = false
+ if let img = image {
+ if cachedGemma4MMImage === img, let f = cachedGemma4MMImageFeatures {
+ imageFeatures = f
+ imageNumTokens = 256
+ } else {
+ imageFeatures = try engine.processImage(img)
+ imageNumTokens = 256
+ cachedGemma4MMImage = img
+ cachedGemma4MMImageFeatures = imageFeatures
+ imageChanged = true
+ }
+ } else if cachedGemma4MMImage != nil {
+ cachedGemma4MMImage = nil
+ cachedGemma4MMImageFeatures = nil
+ imageChanged = true
+ }
+
+ var audioFeatures: MLMultiArray? = nil
+ var audioNumTokens = 0
+ var audioChanged = false
+ if let pcm = audio {
+ // Cheap fingerprint: [count, first, last]. Re-encode on
+ // any mismatch.
+ let sig: [Float] = pcm.isEmpty
+ ? [0, 0, 0]
+ : [Float(pcm.count), pcm.first ?? 0, pcm.last ?? 0]
+ let sigMatches = (cachedGemma4MMAudioSig == sig)
+ if sigMatches, let f = cachedGemma4MMAudioFeatures {
+ audioFeatures = f
+ audioNumTokens = cachedGemma4MMAudioTokens
+ } else {
+ let (feat, n) = try engine.processAudio(pcm)
+ audioFeatures = feat
+ audioNumTokens = n
+ cachedGemma4MMAudioSig = sig
+ cachedGemma4MMAudioFeatures = feat
+ cachedGemma4MMAudioTokens = n
+ audioChanged = true
+ }
+ } else if cachedGemma4MMAudioFeatures != nil {
+ cachedGemma4MMAudioSig = nil
+ cachedGemma4MMAudioFeatures = nil
+ cachedGemma4MMAudioTokens = 0
+ audioChanged = true
+ }
+
+ // Attachment changed → drop persisted KV so the LCP match
+ // doesn't reuse stale image/audio rows from a prior turn.
+ if imageChanged || audioChanged { engine.resetPersistedState() }
+
+ // Build the Gemma 4 prompt. Image / audio blocks are pinned to
+ // the LAST user turn so cross-turn resume keeps the pad span at
+ // a fixed offset (same trick as the legacy gemma4 path).
+ let imageBlock = "<|image>"
+ + String(repeating: "<|image|>", count: 256)
+ + ""
+ let audioBlock = "<|audio>"
+ + String(repeating: "<|audio|>", count: audioNumTokens)
+ + "