f · samgutentag · Jul 1, 2026 · Jul 2, 2026
diff --git a/Textream/Textream/MarqueeTextView.swift b/Textream/Textream/MarqueeTextView.swift
@@ -246,24 +246,33 @@ struct SpeechScrollView: View {
         )
     }
 
+    /// Y position in the viewport where the active word is anchored.
+    /// Smooth modes (classic/silence-paused) anchor in the lower third so read
+    /// text stays visible above while the next lines remain visible below —
+    /// anchoring at the very bottom leaves the speaker no lookahead.
+    /// wordProgressAtCurrentOffset must use the same anchor, otherwise
+    /// releasing a manual scroll snaps the text by the difference.
+    private func readingAnchorY(containerHeight: CGFloat) -> CGFloat {
+        smoothScroll ? containerHeight * 0.7 : containerHeight * 0.5
+    }
+
     private func recalcCenter(containerHeight: CGFloat) {
-        let center = containerHeight * 0.5
+        let anchor = readingAnchorY(containerHeight: containerHeight)
 
         if smoothScroll {
-            // Classic/silence-paused: anchor active word near the bottom, scrolling up
-            let bottomAnchor = containerHeight - 20
+            // Classic/silence-paused: continuous word progress, interpolated
             let wordIdx = Int(smoothWordProgress)
             let fraction = smoothWordProgress - Double(wordIdx)
             let clampedIdx = max(0, min(wordIdx, words.count - 1))
             guard let wordY = wordYPositions[clampedIdx] else { return }
             let nextY = wordYPositions[clampedIdx + 1] ?? wordY
             let interpolatedY = wordY + (nextY - wordY) * CGFloat(fraction)
-            scrollOffset = bottomAnchor - interpolatedY
+            scrollOffset = anchor - interpolatedY
         } else {
-            // Word-tracking/voice-activated: active word at vertical center
+            // Word tracking: active word at vertical center
             let wordIdx = activeWordIndex()
             if let wordY = wordYPositions[wordIdx] {
-                let target = center - wordY
+                let target = anchor - wordY
                 // Only update if it actually changed to avoid redundant animations
                 if abs(scrollOffset - target) > 1 {
                     scrollOffset = target
@@ -274,9 +283,9 @@ struct SpeechScrollView: View {
 
     /// Find the word progress at the current visual position (scrollOffset + manualOffset)
     private func wordProgressAtCurrentOffset() -> Double {
-        let center = containerHeight * 0.5
-        // The Y position currently at the center of the view
-        let targetY = center - (scrollOffset + manualOffset)
+        let anchor = readingAnchorY(containerHeight: containerHeight)
+        // The Y position currently at the reading anchor line
+        let targetY = anchor - (scrollOffset + manualOffset)
 
         // Find the closest word and interpolate
         let sorted = wordYPositions.sorted { $0.key < $1.key }

diff --git a/Textream/Textream/SpeechRecognizer.swift b/Textream/Textream/SpeechRecognizer.swift
@@ -80,12 +80,26 @@ class SpeechRecognizer {
     var shouldDismiss: Bool = false
     var shouldAdvancePage: Bool = false
 
-    /// True when recent audio levels indicate the user is actively speaking
-    var isSpeaking: Bool {
+    /// True when recent audio levels indicate the user is actively speaking.
+    /// Uses hysteresis: a level hovering around a single threshold would
+    /// rapidly toggle this flag and stutter the silence-paused scroll timer.
+    private(set) var isSpeaking: Bool = false
+
+    private static let speakingOnLevel: CGFloat = 0.08
+    private static let speakingOffLevel: CGFloat = 0.05
+
+    private func updateSpeakingState() {
         let recent = audioLevels.suffix(10)
-        guard !recent.isEmpty else { return false }
+        guard !recent.isEmpty else {
+            isSpeaking = false
+            return
+        }
         let avg = recent.reduce(0, +) / CGFloat(recent.count)
-        return avg > 0.08
+        if isSpeaking {
+            if avg < Self.speakingOffLevel { isSpeaking = false }
+        } else if avg > Self.speakingOnLevel {
+            isSpeaking = true
+        }
     }
 
     private var speechRecognizer: SFSpeechRecognizer?
@@ -243,6 +257,9 @@ class SpeechRecognizer {
             audioEngine.stop()
         }
         audioEngine.inputNode.removeTap(onBus: 0)
+        // The tap no longer feeds audioLevels, so the speaking state would
+        // otherwise freeze at its last value.
+        isSpeaking = false
     }
 
     private func cleanupRecognition() {
@@ -371,10 +388,12 @@ class SpeechRecognizer {
             let level = CGFloat(min(rms * 5, 1.0))
 
             DispatchQueue.main.async {
-                self?.audioLevels.append(level)
-                if (self?.audioLevels.count ?? 0) > 30 {
-                    self?.audioLevels.removeFirst()
+                guard let self else { return }
+                self.audioLevels.append(level)
+                if self.audioLevels.count > 30 {
+                    self.audioLevels.removeFirst()
                 }
+                self.updateSpeakingState()
             }
         }
 
@@ -582,16 +601,20 @@ class SpeechRecognizer {
         // Strategy 2: word-level match (handles STT word substitutions)
         let wordResult = wordLevelMatch(spoken: spoken)
 
-        // Use agreement-based selection instead of blind max().
         // If both strategies agree within a tolerance, use the average.
-        // If they disagree wildly, use the more conservative (lower) result
-        // to avoid false-positive jumps.
+        // If they disagree wildly, trust the word-level matcher: the char
+        // matcher's 3-char resync cannot bridge word-level substitutions
+        // ("sits" transcribed as "says"), after which it wedges permanently
+        // and taking min() would veto the word matcher forever, freezing the
+        // highlight. Word-level movement requires consecutive fuzzy word
+        // matches, and the 2-of-3 agreement gate below still filters
+        // transient false jumps.
         let best: Int
         let tolerance = 20 // characters
         if abs(charResult - wordResult) <= tolerance {
             best = (charResult + wordResult) / 2
         } else {
-            best = min(charResult, wordResult)
+            best = wordResult
         }
 
         let newCount = matchStartOffset + best