Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 18 additions & 9 deletions Textream/Textream/MarqueeTextView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -246,24 +246,33 @@ struct SpeechScrollView: View {
)
}

/// Y position in the viewport where the active word is anchored.
/// Smooth modes (classic/silence-paused) anchor in the lower third so read
/// text stays visible above while the next lines remain visible below —
/// anchoring at the very bottom leaves the speaker no lookahead.
/// wordProgressAtCurrentOffset must use the same anchor, otherwise
/// releasing a manual scroll snaps the text by the difference.
private func readingAnchorY(containerHeight: CGFloat) -> CGFloat {
smoothScroll ? containerHeight * 0.7 : containerHeight * 0.5
}

private func recalcCenter(containerHeight: CGFloat) {
let center = containerHeight * 0.5
let anchor = readingAnchorY(containerHeight: containerHeight)

if smoothScroll {
// Classic/silence-paused: anchor active word near the bottom, scrolling up
let bottomAnchor = containerHeight - 20
// Classic/silence-paused: continuous word progress, interpolated
let wordIdx = Int(smoothWordProgress)
let fraction = smoothWordProgress - Double(wordIdx)
let clampedIdx = max(0, min(wordIdx, words.count - 1))
guard let wordY = wordYPositions[clampedIdx] else { return }
let nextY = wordYPositions[clampedIdx + 1] ?? wordY
let interpolatedY = wordY + (nextY - wordY) * CGFloat(fraction)
scrollOffset = bottomAnchor - interpolatedY
scrollOffset = anchor - interpolatedY
} else {
// Word-tracking/voice-activated: active word at vertical center
// Word tracking: active word at vertical center
let wordIdx = activeWordIndex()
if let wordY = wordYPositions[wordIdx] {
let target = center - wordY
let target = anchor - wordY
// Only update if it actually changed to avoid redundant animations
if abs(scrollOffset - target) > 1 {
scrollOffset = target
Expand All @@ -274,9 +283,9 @@ struct SpeechScrollView: View {

/// Find the word progress at the current visual position (scrollOffset + manualOffset)
private func wordProgressAtCurrentOffset() -> Double {
let center = containerHeight * 0.5
// The Y position currently at the center of the view
let targetY = center - (scrollOffset + manualOffset)
let anchor = readingAnchorY(containerHeight: containerHeight)
// The Y position currently at the reading anchor line
let targetY = anchor - (scrollOffset + manualOffset)

// Find the closest word and interpolate
let sorted = wordYPositions.sorted { $0.key < $1.key }
Expand Down
45 changes: 34 additions & 11 deletions Textream/Textream/SpeechRecognizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,26 @@ class SpeechRecognizer {
var shouldDismiss: Bool = false
var shouldAdvancePage: Bool = false

/// True when recent audio levels indicate the user is actively speaking
var isSpeaking: Bool {
/// True when recent audio levels indicate the user is actively speaking.
/// Uses hysteresis: a level hovering around a single threshold would
/// rapidly toggle this flag and stutter the silence-paused scroll timer.
private(set) var isSpeaking: Bool = false

private static let speakingOnLevel: CGFloat = 0.08
private static let speakingOffLevel: CGFloat = 0.05

private func updateSpeakingState() {
let recent = audioLevels.suffix(10)
guard !recent.isEmpty else { return false }
guard !recent.isEmpty else {
isSpeaking = false
return
}
let avg = recent.reduce(0, +) / CGFloat(recent.count)
return avg > 0.08
if isSpeaking {
if avg < Self.speakingOffLevel { isSpeaking = false }
} else if avg > Self.speakingOnLevel {
isSpeaking = true
}
}

private var speechRecognizer: SFSpeechRecognizer?
Expand Down Expand Up @@ -243,6 +257,9 @@ class SpeechRecognizer {
audioEngine.stop()
}
audioEngine.inputNode.removeTap(onBus: 0)
// The tap no longer feeds audioLevels, so the speaking state would
// otherwise freeze at its last value.
isSpeaking = false
}

private func cleanupRecognition() {
Expand Down Expand Up @@ -371,10 +388,12 @@ class SpeechRecognizer {
let level = CGFloat(min(rms * 5, 1.0))

DispatchQueue.main.async {
self?.audioLevels.append(level)
if (self?.audioLevels.count ?? 0) > 30 {
self?.audioLevels.removeFirst()
guard let self else { return }
self.audioLevels.append(level)
if self.audioLevels.count > 30 {
self.audioLevels.removeFirst()
}
self.updateSpeakingState()
}
}

Expand Down Expand Up @@ -582,16 +601,20 @@ class SpeechRecognizer {
// Strategy 2: word-level match (handles STT word substitutions)
let wordResult = wordLevelMatch(spoken: spoken)

// Use agreement-based selection instead of blind max().
// If both strategies agree within a tolerance, use the average.
// If they disagree wildly, use the more conservative (lower) result
// to avoid false-positive jumps.
// If they disagree wildly, trust the word-level matcher: the char
// matcher's 3-char resync cannot bridge word-level substitutions
// ("sits" transcribed as "says"), after which it wedges permanently
// and taking min() would veto the word matcher forever, freezing the
// highlight. Word-level movement requires consecutive fuzzy word
// matches, and the 2-of-3 agreement gate below still filters
// transient false jumps.
let best: Int
let tolerance = 20 // characters
if abs(charResult - wordResult) <= tolerance {
best = (charResult + wordResult) / 2
} else {
best = min(charResult, wordResult)
best = wordResult
}

let newCount = matchStartOffset + best
Expand Down