From 1acea31c6f5ff7e36e4b380f9e63f83e9a1d1c31 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:16:57 +0530 Subject: [PATCH 01/17] Tear down AVAudioEngine when idle and free buffer on stop The recorder held an AVAudioEngine for the lifetime of the process and kept the audio buffer's high-water capacity forever. For a menu-bar app that lives for days this kept the audio HAL warm and steadily grew RAM. - Make audioEngine optional, create on startRecording, nil on stop - Drop buffer capacity instead of removeAll(keepingCapacity: true) - Move buffer out via COW transfer instead of copying on stop AUDIOTYPE-1 --- AudioType/Core/AudioRecorder.swift | 38 ++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/AudioType/Core/AudioRecorder.swift b/AudioType/Core/AudioRecorder.swift index 452148c..701051f 100644 --- a/AudioType/Core/AudioRecorder.swift +++ b/AudioType/Core/AudioRecorder.swift @@ -2,7 +2,10 @@ import AVFoundation import os.log class AudioRecorder { - private let audioEngine = AVAudioEngine() + // Lazily created on startRecording and torn down on stopRecording so the + // audio HAL doesn't stay warm between recordings (big idle-energy win for + // a menu-bar app). + private var audioEngine: AVAudioEngine? private var audioBuffer: [Float] = [] private let bufferLock = NSLock() private var isRecording = false @@ -16,8 +19,8 @@ class AudioRecorder { private let targetSampleRate: Double = 16000 init() { - // Pre-allocate buffer for ~30 seconds of audio at 16kHz - audioBuffer.reserveCapacity(Int(targetSampleRate * 30)) + // Buffer is allocated on each startRecording so the recorder has zero + // footprint when idle. } func startRecording() throws { @@ -26,12 +29,17 @@ class AudioRecorder { return } - // Clear previous buffer + // Drop the buffer entirely (don't preserve capacity — see issue 1.4). bufferLock.lock() - audioBuffer.removeAll(keepingCapacity: true) + audioBuffer = [] + audioBuffer.reserveCapacity(Int(targetSampleRate * 30)) bufferLock.unlock() - let inputNode = audioEngine.inputNode + // Lazily create the audio engine on each recording. + let engine = AVAudioEngine() + audioEngine = engine + + let inputNode = engine.inputNode let inputFormat = inputNode.outputFormat(forBus: 0) logger.info("Input format: \(inputFormat.sampleRate)Hz, \(inputFormat.channelCount) channels") @@ -66,8 +74,8 @@ class AudioRecorder { } // Start audio engine - audioEngine.prepare() - try audioEngine.start() + engine.prepare() + try engine.start() isRecording = true logger.info("Recording started") @@ -79,15 +87,21 @@ class AudioRecorder { return nil } - // Stop and remove tap - audioEngine.inputNode.removeTap(onBus: 0) - audioEngine.stop() + // Stop and tear down the engine so the audio HAL releases its resources. + if let engine = audioEngine { + engine.inputNode.removeTap(onBus: 0) + engine.stop() + } + audioEngine = nil isRecording = false - // Return captured samples + // Move the buffer out of the recorder (zero-copy via COW transfer) and + // leave the recorder with a fresh empty array so it doesn't keep the + // recording's high-water capacity in memory. bufferLock.lock() let samples = audioBuffer + audioBuffer = [] bufferLock.unlock() logger.info( From cdc74604a6f71a869055f87cde4075fa145fcae1 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:17:03 +0530 Subject: [PATCH 02/17] Use clipboard paste for long inserts; cache CGEventSource Per-character keystroke synthesis with a 1 ms sleep was the dominant post-release latency for any non-trivial transcription, and a fresh CGEventSource was being allocated for every character. - Route text > 30 chars through the existing clipboard paste path - Create CGEventSource once per insertion, pass it to insertCharacter AUDIOTYPE-1 --- AudioType/Core/TextInserter.swift | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/AudioType/Core/TextInserter.swift b/AudioType/Core/TextInserter.swift index 34186c6..84e9061 100644 --- a/AudioType/Core/TextInserter.swift +++ b/AudioType/Core/TextInserter.swift @@ -6,24 +6,41 @@ import os.log class TextInserter { private let logger = Logger(subsystem: "com.audiotype", category: "TextInserter") + /// Above this length we paste via clipboard instead of synthesising one + /// keystroke per character. Per-char synthesis costs ~1 ms each plus a + /// fresh CGEventSource per char — for long dictations that's the dominant + /// post-recording latency the user feels. + private static let clipboardPasteThreshold = 30 + func insertText(_ text: String) { guard !text.isEmpty else { return } logger.info("Inserting text: \(text.prefix(50))...") - // Use CGEvent to simulate keyboard input - for char in text { - insertCharacter(char) - // Small delay between characters for reliability - usleep(1000) // 1ms + if text.count > Self.clipboardPasteThreshold { + insertTextViaClipboard(text) + } else { + insertTextViaKeystrokes(text) } logger.info("Text insertion complete") } - private func insertCharacter(_ char: Character) { + /// Per-character keystroke synthesis. Used for short strings where + /// clipboard paste's clipboard-restore quirks aren't worth it. + private func insertTextViaKeystrokes(_ text: String) { + // Cache the event source once for the whole insertion — creating one + // per character was a measurable hot path. let source = CGEventSource(stateID: .hidSystemState) + for char in text { + insertCharacter(char, source: source) + // Tiny delay so target apps don't drop events under load. + usleep(1000) // 1ms + } + } + + private func insertCharacter(_ char: Character, source: CGEventSource?) { // Create key down event guard let keyDown = CGEvent(keyboardEventSource: source, virtualKey: 0, keyDown: true) else { logger.error("Failed to create keyDown event") From 5f8669892eafa51e8ad8aa85d46b25437a194e07 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:17:51 +0530 Subject: [PATCH 03/17] Cache RecordingOverlay hosting view; drive text via observable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each recording start and recording→processing transition was building a fresh NSHostingView, which leaked the SwiftUI graph and Metal layers and was a primary contributor to the +80 MB drift seen after long sessions. - Add overlayText to AudioLevelMonitor as @Published - Read text from the env object inside RecordingOverlay - Build the NSHostingView once and just mutate overlayText afterwards AUDIOTYPE-1 --- AudioType/App/MenuBarController.swift | 39 ++++++++++++++++----------- AudioType/UI/RecordingOverlay.swift | 3 +-- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/AudioType/App/MenuBarController.swift b/AudioType/App/MenuBarController.swift index e324448..26e3041 100644 --- a/AudioType/App/MenuBarController.swift +++ b/AudioType/App/MenuBarController.swift @@ -17,16 +17,20 @@ extension NSImage { } } -/// Shared observable for live audio level — drives the recording waveform. +/// Shared observable for live audio level - drives the recording waveform. +/// Also carries the overlay text so the hosting view doesn't have to be +/// rebuilt on every state change. class AudioLevelMonitor: ObservableObject { static let shared = AudioLevelMonitor() @Published var level: Float = 0.0 + @Published var overlayText: String = "Recording..." } class MenuBarController: NSObject, NSWindowDelegate { private weak var statusItem: NSStatusItem? private var transcriptionManager: TranscriptionManager private var recordingWindow: NSWindow? + private var recordingHostingView: NSHostingView? private var settingsWindow: NSWindow? init(transcriptionManager: TranscriptionManager) { @@ -115,7 +119,7 @@ class MenuBarController: NSObject, NSWindowDelegate { updateStatusMenuItem("Ready") case .recording: - // Tinted coral/red — non-template so the color shows through + // Tinted coral/red - non-template so the color shows through if let base = NSImage( systemSymbolName: "waveform.circle.fill", accessibilityDescription: "Recording") { button.image = base.tinted(with: AudioTypeTheme.nsRecordingRed) @@ -124,7 +128,7 @@ class MenuBarController: NSObject, NSWindowDelegate { updateStatusMenuItem("Recording...") case .processing: - // Tinted amber — "I'm thinking" + // Tinted amber - "I'm thinking" if let base = NSImage( systemSymbolName: "ellipsis.circle.fill", accessibilityDescription: "Processing") { button.image = base.tinted(with: AudioTypeTheme.nsAmber) @@ -175,22 +179,27 @@ class MenuBarController: NSObject, NSWindowDelegate { recordingWindow = window } - let hostingView = NSHostingView( - rootView: RecordingOverlay(text: "Recording...") - .environmentObject(AudioLevelMonitor.shared)) - hostingView.frame = NSRect(x: 0, y: 0, width: 180, height: 50) - recordingWindow?.contentView = hostingView + // Build the hosting view once; subsequent updates just mutate the + // observable state. Re-creating NSHostingView on every state change + // was leaking the SwiftUI graph and Metal layers. + if recordingHostingView == nil { + let hosting = NSHostingView( + rootView: AnyView( + RecordingOverlay() + .environmentObject(AudioLevelMonitor.shared) + ) + ) + hosting.frame = NSRect(x: 0, y: 0, width: 180, height: 50) + recordingHostingView = hosting + recordingWindow?.contentView = hosting + } + + AudioLevelMonitor.shared.overlayText = "Recording..." recordingWindow?.orderFront(nil) } private func updateRecordingIndicator(text: String) { - if let window = recordingWindow { - let hostingView = NSHostingView( - rootView: RecordingOverlay(text: text) - .environmentObject(AudioLevelMonitor.shared)) - hostingView.frame = NSRect(x: 0, y: 0, width: 180, height: 50) - window.contentView = hostingView - } + AudioLevelMonitor.shared.overlayText = text } private func hideRecordingIndicator() { diff --git a/AudioType/UI/RecordingOverlay.swift b/AudioType/UI/RecordingOverlay.swift index 39ceb5a..cc7b99e 100644 --- a/AudioType/UI/RecordingOverlay.swift +++ b/AudioType/UI/RecordingOverlay.swift @@ -1,11 +1,10 @@ import SwiftUI struct RecordingOverlay: View { - let text: String @EnvironmentObject var levelMonitor: AudioLevelMonitor private var isRecording: Bool { - text == "Recording..." + levelMonitor.overlayText == "Recording..." } var body: some View { From f9deded48a084093bb9a73becad89112a0a518d1 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:18:06 +0530 Subject: [PATCH 04/17] Remove notification observers in MenuBarController deinit Selector-based observers on NotificationCenter.default were never explicitly removed. While unzeroed-weak crashes are no longer a risk on modern macOS, explicit cleanup is best practice and matters if the controller is ever re-instantiated. AUDIOTYPE-1 --- AudioType/App/MenuBarController.swift | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/AudioType/App/MenuBarController.swift b/AudioType/App/MenuBarController.swift index 26e3041..9393d58 100644 --- a/AudioType/App/MenuBarController.swift +++ b/AudioType/App/MenuBarController.swift @@ -54,6 +54,10 @@ class MenuBarController: NSObject, NSWindowDelegate { ) } + deinit { + NotificationCenter.default.removeObserver(self) + } + func setupStatusItem(_ statusItem: NSStatusItem) { self.statusItem = statusItem From 635811dec0d2f801744475e3d441497d029ab294 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:18:36 +0530 Subject: [PATCH 05/17] Cache tinted status-bar icons instead of re-rendering per state NSImage.tinted uses lockFocus/unlockFocus, which allocates a fresh offscreen bitmap rep on every call. With four distinct icon states that's a fixed set; pre-render each one once and reuse. AUDIOTYPE-1 --- AudioType/App/MenuBarController.swift | 45 ++++++++++++++++----------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/AudioType/App/MenuBarController.swift b/AudioType/App/MenuBarController.swift index 9393d58..57f99f4 100644 --- a/AudioType/App/MenuBarController.swift +++ b/AudioType/App/MenuBarController.swift @@ -33,6 +33,29 @@ class MenuBarController: NSObject, NSWindowDelegate { private var recordingHostingView: NSHostingView? private var settingsWindow: NSWindow? + // Pre-rendered status-bar icons, built once. Calling NSImage.tinted on + // every state change re-rasterizes the symbol via lockFocus/unlockFocus + // and was a steady source of bitmap allocations. + private lazy var idleIcon: NSImage? = { + let img = NSImage( + systemSymbolName: "waveform.circle.fill", accessibilityDescription: "Ready") + img?.isTemplate = true + return img + }() + private lazy var recordingIcon: NSImage? = { + NSImage(systemSymbolName: "waveform.circle.fill", accessibilityDescription: "Recording")? + .tinted(with: AudioTypeTheme.nsRecordingRed) + }() + private lazy var processingIcon: NSImage? = { + NSImage(systemSymbolName: "ellipsis.circle.fill", accessibilityDescription: "Processing")? + .tinted(with: AudioTypeTheme.nsAmber) + }() + private lazy var errorIcon: NSImage? = { + NSImage( + systemSymbolName: "exclamationmark.triangle.fill", accessibilityDescription: "Error")? + .tinted(with: .systemRed) + }() + init(transcriptionManager: TranscriptionManager) { self.transcriptionManager = transcriptionManager super.init() @@ -114,38 +137,24 @@ class MenuBarController: NSObject, NSWindowDelegate { switch state { case .idle: - let img = NSImage( - systemSymbolName: "waveform.circle.fill", accessibilityDescription: "Ready") - img?.isTemplate = true - button.image = img + button.image = idleIcon AudioLevelMonitor.shared.level = 0 hideRecordingIndicator() updateStatusMenuItem("Ready") case .recording: - // Tinted coral/red - non-template so the color shows through - if let base = NSImage( - systemSymbolName: "waveform.circle.fill", accessibilityDescription: "Recording") { - button.image = base.tinted(with: AudioTypeTheme.nsRecordingRed) - } + button.image = recordingIcon showRecordingIndicator() updateStatusMenuItem("Recording...") case .processing: - // Tinted amber - "I'm thinking" - if let base = NSImage( - systemSymbolName: "ellipsis.circle.fill", accessibilityDescription: "Processing") { - button.image = base.tinted(with: AudioTypeTheme.nsAmber) - } + button.image = processingIcon AudioLevelMonitor.shared.level = 0 updateRecordingIndicator(text: "Processing...") updateStatusMenuItem("Processing...") case .error(let message): - let img = NSImage( - systemSymbolName: "exclamationmark.triangle.fill", accessibilityDescription: "Error") - img?.isTemplate = false - button.image = img?.tinted(with: .systemRed) + button.image = errorIcon hideRecordingIndicator() updateStatusMenuItem("Error: \(message)") } From b85526b2c38a02c2a68e0400a5b8dddb353938bd Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:19:11 +0530 Subject: [PATCH 06/17] Return CGEvent unretained from event tap callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tap callback fires on every modifier-key change system-wide. Each return value used Unmanaged.passRetained(event), which adds a retain the system then has to release — wasted work per event. Apple's own sample code returns passUnretained because the event is already owned by the system. AUDIOTYPE-1 --- AudioType/Core/HotKeyManager.swift | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/AudioType/Core/HotKeyManager.swift b/AudioType/Core/HotKeyManager.swift index 075b053..b6c396c 100644 --- a/AudioType/Core/HotKeyManager.swift +++ b/AudioType/Core/HotKeyManager.swift @@ -36,7 +36,10 @@ class HotKeyManager { options: .defaultTap, eventsOfInterest: eventMask, callback: { proxy, type, event, refcon in - guard let refcon = refcon else { return Unmanaged.passRetained(event) } + // The event is owned by the system; pass it back unretained. + // Using passRetained here added a retain/release pair per event + // (i.e. on every modifier-key change system-wide). + guard let refcon = refcon else { return Unmanaged.passUnretained(event) } let manager = Unmanaged.fromOpaque(refcon).takeUnretainedValue() return manager.handleEvent(proxy: proxy, type: type, event: event) }, @@ -85,7 +88,7 @@ class HotKeyManager { if let tap = eventTap { CGEvent.tapEnable(tap: tap, enable: true) } - return Unmanaged.passRetained(event) + return Unmanaged.passUnretained(event) } let flags = event.flags @@ -120,7 +123,7 @@ class HotKeyManager { } } - return Unmanaged.passRetained(event) + return Unmanaged.passUnretained(event) } deinit { From 59aa202642d4fdc090a63d070fd78a42d3e1fa28 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:20:28 +0530 Subject: [PATCH 07/17] Vectorise RMS and stop allocating per audio tap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The audio-tap callback fires every ~100 ms during recording. Each call was allocating an intermediate [Float] from the converter output, then appending it to the main buffer (a second copy), then computing RMS via a scalar Array.reduce. - Compute RMS with vDSP_measqv (vectorised, ~5-10× faster) - Append directly from UnsafeBufferPointer; no intermediate Array - Wrap all bufferLock acquires with defer { unlock() } for safety AUDIOTYPE-1 --- AudioType/Core/AudioRecorder.swift | 57 +++++++++++++++++++----------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/AudioType/Core/AudioRecorder.swift b/AudioType/Core/AudioRecorder.swift index 701051f..42ab4e5 100644 --- a/AudioType/Core/AudioRecorder.swift +++ b/AudioType/Core/AudioRecorder.swift @@ -1,4 +1,5 @@ import AVFoundation +import Accelerate import os.log class AudioRecorder { @@ -30,10 +31,12 @@ class AudioRecorder { } // Drop the buffer entirely (don't preserve capacity — see issue 1.4). - bufferLock.lock() - audioBuffer = [] - audioBuffer.reserveCapacity(Int(targetSampleRate * 30)) - bufferLock.unlock() + do { + bufferLock.lock() + defer { bufferLock.unlock() } + audioBuffer = [] + audioBuffer.reserveCapacity(Int(targetSampleRate * 30)) + } // Lazily create the audio engine on each recording. let engine = AVAudioEngine() @@ -99,10 +102,13 @@ class AudioRecorder { // Move the buffer out of the recorder (zero-copy via COW transfer) and // leave the recorder with a fresh empty array so it doesn't keep the // recording's high-water capacity in memory. - bufferLock.lock() - let samples = audioBuffer - audioBuffer = [] - bufferLock.unlock() + let samples: [Float] + do { + bufferLock.lock() + defer { bufferLock.unlock() } + samples = audioBuffer + audioBuffer = [] + } logger.info( "Recording stopped, captured \(samples.count) samples (\(Double(samples.count) / self.targetSampleRate, format: .fixed(precision: 2))s)" @@ -114,10 +120,7 @@ class AudioRecorder { private func processAudioBuffer( _ buffer: AVAudioPCMBuffer, converter: AVAudioConverter?, targetFormat: AVAudioFormat ) { - var samplesArray: [Float] - if let converter = converter { - // Need to convert to target format let frameCount = AVAudioFrameCount( Double(buffer.frameLength) * targetSampleRate / buffer.format.sampleRate ) @@ -143,25 +146,37 @@ class AudioRecorder { } guard let channelData = convertedBuffer.floatChannelData else { return } - samplesArray = Array( - UnsafeBufferPointer(start: channelData[0], count: Int(convertedBuffer.frameLength))) + let count = Int(convertedBuffer.frameLength) + consume(samples: channelData[0], count: count) } else { - // Already in correct format guard let channelData = buffer.floatChannelData else { return } - samplesArray = Array( - UnsafeBufferPointer(start: channelData[0], count: Int(buffer.frameLength))) + let count = Int(buffer.frameLength) + consume(samples: channelData[0], count: count) } + } - // Compute RMS level for live waveform - let rms = sqrt(samplesArray.reduce(0) { $0 + $1 * $1 } / Float(max(samplesArray.count, 1))) + /// Consume a chunk of mic samples: compute RMS for the waveform and append + /// to the recording buffer — without ever materialising an intermediate + /// `[Float]`. Called on the audio thread. + private func consume(samples: UnsafePointer, count: Int) { + guard count > 0 else { return } + + // RMS via Accelerate (vectorised). Replaces a scalar reduce loop that + // ran on every tap callback. + var meanSquare: Float = 0 + vDSP_measqv(samples, 1, &meanSquare, vDSP_Length(count)) + let rms = sqrt(meanSquare) // Normalize: typical speech RMS is 0.01–0.15, scale aggressively to 0–1 let level = min(rms * 25, 1.0) onLevelUpdate?(level) - // Append to buffer + // Append directly from the unsafe buffer pointer; [Float] has an + // append(contentsOf:) overload that takes any Sequence, including + // UnsafeBufferPointer, so no intermediate Array is allocated. + let ptr = UnsafeBufferPointer(start: samples, count: count) bufferLock.lock() - audioBuffer.append(contentsOf: samplesArray) - bufferLock.unlock() + defer { bufferLock.unlock() } + audioBuffer.append(contentsOf: ptr) } } From 5ab3b07d10f14a29cc4a3ef239276a48075943c6 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:21:22 +0530 Subject: [PATCH 08/17] Rewrite WAVEncoder.encode with preallocated Data + vDSP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The encoder allocated an intermediate [Int16] (~960 KB for a 30 s clip), let Data realloc as it grew from 0, then made one appendLittleEndian call per sample (~480 000 calls). - Allocate final Data once at exact size - Write header in place via storeBytes - Clip + scale + Float→Int16 conversion via vDSP into the data region Produces byte-identical output. Significant peak-memory reduction and encode-time speedup on long recordings. AUDIOTYPE-1 --- AudioType/Core/WAVEncoder.swift | 111 ++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 33 deletions(-) diff --git a/AudioType/Core/WAVEncoder.swift b/AudioType/Core/WAVEncoder.swift index ea1210a..d20dacc 100644 --- a/AudioType/Core/WAVEncoder.swift +++ b/AudioType/Core/WAVEncoder.swift @@ -1,3 +1,4 @@ +import Accelerate import Foundation import os.log @@ -25,7 +26,7 @@ struct WhisperAPIConfig { /// and response parsing are all handled here. class WhisperAPIEngine: TranscriptionEngine { - /// Provider configuration — subclasses must override. + /// Provider configuration - subclasses must override. var config: WhisperAPIConfig { fatalError("Subclasses must override config") } @@ -178,45 +179,89 @@ enum WhisperAPIError: Error, LocalizedError { enum WAVEncoder { /// Encode Float32 PCM samples into WAV in memory (16-bit PCM, mono). + /// + /// The previous implementation allocated an intermediate `[Int16]` + /// (~960 KB for a 30 s clip), let `Data` realloc as it grew, and + /// did 480 000 individual `appendLittleEndian` calls. This version: + /// + /// - Allocates the final `Data` once at exact size (44-byte header + 2N). + /// - Writes the header in place. + /// - Uses Accelerate to clip Float → Int16 directly into the data + /// region in a single pass. static func encode(samples: [Float], sampleRate: Int) -> Data { - var data = Data() - - let int16Samples = samples.map { sample -> Int16 in - let clamped = max(-1.0, min(1.0, sample)) - return Int16(clamped * Float(Int16.max)) - } - let numChannels: UInt16 = 1 let bitsPerSample: UInt16 = 16 let byteRate = UInt32(sampleRate) * UInt32(numChannels) * UInt32(bitsPerSample / 8) let blockAlign = numChannels * (bitsPerSample / 8) - let dataSize = UInt32(int16Samples.count * 2) - let fileSize = 36 + dataSize - - // RIFF header - data.append(contentsOf: "RIFF".utf8) - data.appendLittleEndian(fileSize) - data.append(contentsOf: "WAVE".utf8) - - // fmt chunk - data.append(contentsOf: "fmt ".utf8) - data.appendLittleEndian(UInt32(16)) - data.appendLittleEndian(UInt16(1)) // PCM - data.appendLittleEndian(numChannels) - data.appendLittleEndian(UInt32(sampleRate)) - data.appendLittleEndian(byteRate) - data.appendLittleEndian(blockAlign) - data.appendLittleEndian(bitsPerSample) - - // data chunk - data.append(contentsOf: "data".utf8) - data.appendLittleEndian(dataSize) - - for sample in int16Samples { - data.appendLittleEndian(sample) + let dataSize = UInt32(samples.count * 2) + let fileSize: UInt32 = 36 + dataSize + let totalSize = 44 + samples.count * 2 + + var data = Data(count: totalSize) + data.withUnsafeMutableBytes { (raw: UnsafeMutableRawBufferPointer) -> Void in + guard let base = raw.baseAddress else { return } + + // --- Header --------------------------------------------------------- + func writeASCII(_ string: String, at offset: Int) { + for (i, byte) in string.utf8.enumerated() { + base.storeBytes(of: byte, toByteOffset: offset + i, as: UInt8.self) + } + } + func writeLE(_ value: T, at offset: Int) { + base.storeBytes(of: value.littleEndian, toByteOffset: offset, as: T.self) + } + + writeASCII("RIFF", at: 0) + writeLE(fileSize, at: 4) + writeASCII("WAVE", at: 8) + + writeASCII("fmt ", at: 12) + writeLE(UInt32(16), at: 16) + writeLE(UInt16(1), at: 20) // PCM + writeLE(numChannels, at: 22) + writeLE(UInt32(sampleRate), at: 24) + writeLE(byteRate, at: 28) + writeLE(blockAlign, at: 32) + writeLE(bitsPerSample, at: 34) + + writeASCII("data", at: 36) + writeLE(dataSize, at: 40) + + // --- PCM data ------------------------------------------------------- + // Clip to [-1, 1], scale by Int16.max, convert to Int16 — all via + // Accelerate, all into the destination region in one pass. + guard !samples.isEmpty else { return } + + let dst = base.advanced(by: 44).assumingMemoryBound(to: Int16.self) + let n = vDSP_Length(samples.count) + + samples.withUnsafeBufferPointer { src in + guard let srcBase = src.baseAddress else { return } + + // Scratch buffer for clip+scale; reuse src memory would mutate the + // caller's input, so allocate a transient float buffer. + let scratch = UnsafeMutablePointer.allocate(capacity: samples.count) + defer { scratch.deallocate() } + + // Clip into scratch. + var lo: Float = -1.0 + var hi: Float = 1.0 + vDSP_vclip(srcBase, 1, &lo, &hi, scratch, 1, n) + + // Scale by Int16.max in place. + var scale = Float(Int16.max) + vDSP_vsmul(scratch, 1, &scale, scratch, 1, n) + + // Convert Float → Int16 with rounding directly into dst. + vDSP_vfix16(scratch, 1, dst, 1, n) + + // WAV is little-endian. On Apple silicon and Intel, host order is + // already LE so no byte-swap needed. Guard with a static assert + // for any future big-endian Apple platform (none exist today). + assert(1.littleEndian == 1, "WAVEncoder assumes little-endian host") + } } - return data } From 854ab2d2661044fcc4e5835de94ff4bf9054a896 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:21:57 +0530 Subject: [PATCH 09/17] Switch transcribe upload to URLSession.upload(for:from:) Setting URLRequest.httpBody and calling URLSession.shared.data(for:) typically holds the body in two places (the request and URLSession's internal copy). For ~2 MB WAV bodies that's wasted memory. upload(for: from:) takes the body once and forwards it. - buildRequest now returns (URLRequest, Data) instead of mutating httpBody - transcribe now uses upload(for:from:) AUDIOTYPE-1 --- AudioType/Core/WAVEncoder.swift | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/AudioType/Core/WAVEncoder.swift b/AudioType/Core/WAVEncoder.swift index d20dacc..970f775 100644 --- a/AudioType/Core/WAVEncoder.swift +++ b/AudioType/Core/WAVEncoder.swift @@ -88,7 +88,7 @@ class WhisperAPIEngine: TranscriptionEngine { throw WhisperAPIError.invalidURL } - let request = WAVEncoder.buildRequest( + let (request, body) = WAVEncoder.buildRequest( url: url, apiKey: apiKey, wavData: wavData, @@ -98,7 +98,10 @@ class WhisperAPIEngine: TranscriptionEngine { let (data, response): (Data, URLResponse) do { - (data, response) = try await URLSession.shared.data(for: request) + // upload(for:from:) keeps a single copy of the body; setting + // request.httpBody and calling data(for:) tends to keep the body + // resident in two places. With ~2 MB WAV bodies this matters. + (data, response) = try await URLSession.shared.upload(for: request, from: body) } catch { throw WhisperAPIError.networkError(error.localizedDescription) } @@ -267,6 +270,9 @@ enum WAVEncoder { /// Build a multipart/form-data request for an OpenAI-compatible /// `/v1/audio/transcriptions` endpoint. + /// + /// Returns the request and body separately so callers can pass the body + /// to `URLSession.upload(for:from:)` instead of setting `httpBody`. static func buildRequest( url: URL, apiKey: String, @@ -274,7 +280,7 @@ enum WAVEncoder { model: String, languageCode: String?, timeoutInterval: TimeInterval = 30 - ) -> URLRequest { + ) -> (URLRequest, Data) { let boundary = UUID().uuidString var request = URLRequest(url: url) @@ -311,8 +317,7 @@ enum WAVEncoder { ) body.append(Data("--\(boundary)--\r\n".utf8)) - request.httpBody = body - return request + return (request, body) } } From 22e62b5e7e2bd7dbcc9afe8d4ea296727a13afca Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:22:48 +0530 Subject: [PATCH 10/17] Compile single regex for TextPostProcessor replacements The processor was running ~85 case-insensitive replacingOccurrences calls on the full transcription, each O(n), and rebuilding a merged dictionary on every call. Dictionary iteration order is also undefined, so identical inputs could produce different outputs across runs. - Compile a single NSRegularExpression with alternation, longest-first - Cache the compiled regex + lookup; rebuild only on catalog changes - Apply replacements in one match-and-stitch pass - Deterministic output ordering as a side benefit Substring (not word-bounded) matching is preserved to match prior behavior. AUDIOTYPE-1 --- AudioType/Core/TextPostProcessor.swift | 97 +++++++++++++++++++++----- 1 file changed, 80 insertions(+), 17 deletions(-) diff --git a/AudioType/Core/TextPostProcessor.swift b/AudioType/Core/TextPostProcessor.swift index 706fd25..6aec6c4 100644 --- a/AudioType/Core/TextPostProcessor.swift +++ b/AudioType/Core/TextPostProcessor.swift @@ -106,41 +106,37 @@ class TextPostProcessor { // User-defined custom replacements private var customReplacements: [String: String] = [:] + // Cached compiled regex + lookup table. Rebuilt only when the catalog + // changes (custom replacements added/removed). The previous code rebuilt + // a merged dictionary and ran ~85 case-insensitive String scans on every + // single transcription. + private var cachedRegex: NSRegularExpression? + private var cachedLookup: [String: String] = [:] + private let regexLock = NSLock() + private init() { loadCustomReplacements() + rebuildRegex() } /// Process transcribed text with corrections func process(_ text: String) -> String { - var result = text - - // Apply word replacements (case-insensitive) - let allReplacements = wordReplacements.merging(customReplacements) { _, custom in custom } - - for (pattern, replacement) in allReplacements { - result = result.replacingOccurrences( - of: pattern, - with: replacement, - options: .caseInsensitive - ) - } - - // Capitalize first letter of sentences - result = capitalizeSentences(result) - - return result + let result = applyReplacements(text) + return capitalizeSentences(result) } /// Add a custom word replacement func addCustomReplacement(from: String, to: String) { customReplacements[from.lowercased()] = to saveCustomReplacements() + rebuildRegex() } /// Remove a custom replacement func removeCustomReplacement(from: String) { customReplacements.removeValue(forKey: from.lowercased()) saveCustomReplacements() + rebuildRegex() } /// Get all custom replacements @@ -150,6 +146,73 @@ class TextPostProcessor { // MARK: - Private + /// Rebuild the compiled regex from the current built-in + custom catalogs. + /// Custom replacements override built-ins on key collision. + private func rebuildRegex() { + regexLock.lock() + defer { regexLock.unlock() } + + let merged = wordReplacements.merging(customReplacements) { _, custom in custom } + cachedLookup = [:] + cachedLookup.reserveCapacity(merged.count) + for (key, value) in merged { + cachedLookup[key.lowercased()] = value + } + + // Sort keys longest-first so e.g. "rest api" wins over "api". This also + // gives us a deterministic order independent of dictionary hashing, + // which the old implementation lacked. + let keys = merged.keys.sorted { $0.count > $1.count } + let pattern = keys.map { NSRegularExpression.escapedPattern(for: $0) } + .joined(separator: "|") + + cachedRegex = try? NSRegularExpression( + pattern: pattern, + options: [.caseInsensitive] + ) + } + + /// Apply replacements in a single regex pass. + private func applyReplacements(_ text: String) -> String { + regexLock.lock() + let regex = cachedRegex + let lookup = cachedLookup + regexLock.unlock() + + guard let regex = regex, !text.isEmpty else { return text } + + let nsText = text as NSString + let range = NSRange(location: 0, length: nsText.length) + let matches = regex.matches(in: text, options: [], range: range) + if matches.isEmpty { return text } + + // Reassemble in one pass, alternating original spans and replacements. + var result = "" + result.reserveCapacity(text.count) + var cursor = 0 + for match in matches { + let r = match.range + if r.location > cursor { + result.append( + nsText.substring(with: NSRange(location: cursor, length: r.location - cursor)) + ) + } + let matched = nsText.substring(with: r).lowercased() + if let replacement = lookup[matched] { + result.append(replacement) + } else { + result.append(nsText.substring(with: r)) + } + cursor = r.location + r.length + } + if cursor < nsText.length { + result.append( + nsText.substring(with: NSRange(location: cursor, length: nsText.length - cursor)) + ) + } + return result + } + private func capitalizeSentences(_ text: String) -> String { var result = "" var capitalizeNext = true From 31c536a2abcc5c43cd2747e3f55847e17038b173 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:23:27 +0530 Subject: [PATCH 11/17] Cache Keychain reads in memory; invalidate on save/delete Every transcription resolved the API key via SecItemCopyMatching, often multiple times across the engine's isAvailable/apiKey accessors. Cache the resolved value (including the absent state) and invalidate on save/delete. AUDIOTYPE-1 --- AudioType/Utilities/KeychainHelper.swift | 39 +++++++++++++++++++++--- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/AudioType/Utilities/KeychainHelper.swift b/AudioType/Utilities/KeychainHelper.swift index 5a4b4ec..fb41886 100644 --- a/AudioType/Utilities/KeychainHelper.swift +++ b/AudioType/Utilities/KeychainHelper.swift @@ -12,6 +12,13 @@ enum KeychainHelper { subsystem: "com.audiotype", category: "KeychainHelper" ) + // In-memory cache of resolved values. Keychain reads aren't expensive in + // absolute terms but they were happening on every transcription (often + // multiple times) via the engines' apiKey getters. Cache entries are + // invalidated on save/delete. + private static var cache: [String: String?] = [:] + private static let cacheLock = NSLock() + // MARK: - Public API /// Save a value to the Keychain. Overwrites any existing value for the key. @@ -36,11 +43,23 @@ enum KeychainHelper { logger.error("Failed to save key \(key), status: \(status)") throw KeychainError.saveFailed(status) } + + cacheLock.lock() + cache[key] = value + cacheLock.unlock() + logger.info("Saved value for key: \(key)") } /// Retrieve a value from the Keychain. static func get(key: String) -> String? { + cacheLock.lock() + if let cached = cache[key] { + cacheLock.unlock() + return cached + } + cacheLock.unlock() + let query: [String: Any] = [ kSecClass as String: kSecClassGenericPassword, kSecAttrService as String: service, @@ -52,13 +71,20 @@ enum KeychainHelper { var result: AnyObject? let status = SecItemCopyMatching(query as CFDictionary, &result) - guard status == errSecSuccess, + let value: String? + if status == errSecSuccess, let data = result as? Data, - let value = String(data: data, encoding: .utf8) - else { - return nil + let decoded = String(data: data, encoding: .utf8) + { + value = decoded + } else { + value = nil } + cacheLock.lock() + cache[key] = value + cacheLock.unlock() + return value } @@ -72,6 +98,11 @@ enum KeychainHelper { ] let status = SecItemDelete(query as CFDictionary) + + cacheLock.lock() + cache[key] = .some(nil) // remember "absent" too, to avoid re-querying + cacheLock.unlock() + if status == errSecSuccess || status == errSecItemNotFound { return true } From c9774d0db06c5029d2a5b4ac4b1ca60b767d8436 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:24:04 +0530 Subject: [PATCH 12/17] Hold transcription Task and cancel on next recording A new Task.detached was spawned per recording without holding the handle. If the user fired the hotkey while a previous transcription was still in-flight (e.g. slow network), both would race and stale text could land in the user's new focus. - Hold the task in transcriptionTask - Cancel any pending task before starting a new recording AUDIOTYPE-1 --- AudioType/App/TranscriptionManager.swift | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/AudioType/App/TranscriptionManager.swift b/AudioType/App/TranscriptionManager.swift index 5948c4c..10b44db 100644 --- a/AudioType/App/TranscriptionManager.swift +++ b/AudioType/App/TranscriptionManager.swift @@ -32,6 +32,11 @@ class TranscriptionManager: ObservableObject { private var hotKeyManager: HotKeyManager? private var textInserter: TextInserter? + /// Active transcription task. Held so a new recording can cancel any + /// in-flight transcription from a previous one (e.g. user re-triggers + /// the hotkey while the network call is still pending). + private var transcriptionTask: Task? + private let logger = Logger(subsystem: "com.audiotype", category: "TranscriptionManager") private init() {} @@ -60,7 +65,7 @@ class TranscriptionManager: ObservableObject { if !EngineResolver.anyEngineAvailable { logger.warning("No transcription engine available") - setState(.error("No engine available — add a cloud API key or enable Apple Speech")) + setState(.error("No engine available - add a cloud API key or enable Apple Speech")) } else { logger.info("Transcription engine ready: \(engine.displayName)") } @@ -85,7 +90,7 @@ class TranscriptionManager: ObservableObject { audioRecorder = nil } - /// Called when the user saves an API key or changes engine preference — re-evaluate. + /// Called when the user saves an API key or changes engine preference - re-evaluate. func onEngineConfigChanged() { let engine = EngineResolver.resolve() activeEngineName = engine.displayName @@ -93,7 +98,7 @@ class TranscriptionManager: ObservableObject { setState(.idle) logger.info("Engine config changed, active engine: \(engine.displayName)") } else { - setState(.error("No engine available — add a cloud API key or enable Apple Speech")) + setState(.error("No engine available - add a cloud API key or enable Apple Speech")) } } @@ -118,10 +123,15 @@ class TranscriptionManager: ObservableObject { } guard EngineResolver.anyEngineAvailable else { - setState(.error("No engine available — add a cloud API key or enable Apple Speech")) + setState(.error("No engine available - add a cloud API key or enable Apple Speech")) return } + // Cancel any still-pending transcription from a previous recording so + // we don't insert stale text into the user's new context. + transcriptionTask?.cancel() + transcriptionTask = nil + do { try audioRecorder?.startRecording() setState(.recording) @@ -147,8 +157,9 @@ class TranscriptionManager: ObservableObject { logger.info("Recording stopped, captured \(samples.count) samples") setState(.processing) - // Transcribe in background - Task.detached { [weak self] in + // Transcribe in background. Hold the task so the next recording can + // cancel it if it's still pending. + transcriptionTask = Task.detached { [weak self] in await self?.transcribeAndInsert(samples: samples) } } From 59589ebbef134a8bd0bbe0f433fc1c9c6af3e530 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:24:36 +0530 Subject: [PATCH 13/17] Resolve transcription engine once per recording MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EngineResolver.resolve() was called twice — once via anyEngineAvailable at startRecording and once at transcribeAndInsert. Each call instantiated a fresh engine and (for cloud engines) hit the Keychain. Resolve once at recording start and reuse for the matching transcription. This also ensures the engine identity can't flip mid-recording if the user edits settings during capture. AUDIOTYPE-1 --- AudioType/App/TranscriptionManager.swift | 27 +++++++++++++++++------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/AudioType/App/TranscriptionManager.swift b/AudioType/App/TranscriptionManager.swift index 10b44db..17dbcdb 100644 --- a/AudioType/App/TranscriptionManager.swift +++ b/AudioType/App/TranscriptionManager.swift @@ -116,6 +116,12 @@ class TranscriptionManager: ObservableObject { } } + /// Engine resolved at recording start and reused for the matching + /// transcription. Keeps Keychain / availability checks out of the + /// post-stop hot path and ensures the engine identity doesn't change + /// mid-recording if the user edits settings. + private var activeEngine: TranscriptionEngine? + private func startRecording() { guard state == .idle else { logger.warning("Cannot start recording: not in idle state") @@ -132,10 +138,15 @@ class TranscriptionManager: ObservableObject { transcriptionTask?.cancel() transcriptionTask = nil + // Resolve the engine once, up front. transcribeAndInsert will reuse it. + let engine = EngineResolver.resolve() + activeEngine = engine + activeEngineName = engine.displayName + do { try audioRecorder?.startRecording() setState(.recording) - logger.info("Recording started") + logger.info("Recording started with engine: \(engine.displayName)") } catch { logger.error("Failed to start recording: \(error.localizedDescription)") setState(.error("Failed to start recording")) @@ -154,22 +165,22 @@ class TranscriptionManager: ObservableObject { return } + // Take the engine resolved at startRecording. Falls back to a fresh + // resolution defensively if somehow nil. + let engine = activeEngine ?? EngineResolver.resolve() + activeEngine = nil + logger.info("Recording stopped, captured \(samples.count) samples") setState(.processing) // Transcribe in background. Hold the task so the next recording can // cancel it if it's still pending. transcriptionTask = Task.detached { [weak self] in - await self?.transcribeAndInsert(samples: samples) + await self?.transcribeAndInsert(samples: samples, engine: engine) } } - private func transcribeAndInsert(samples: [Float]) async { - let engine = EngineResolver.resolve() - - await MainActor.run { - self.activeEngineName = engine.displayName - } + private func transcribeAndInsert(samples: [Float], engine: TranscriptionEngine) async { let startTime = CFAbsoluteTimeGetCurrent() From 0e855f86156c5fbd8eb12fabd9c054d3605f0707 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:25:28 +0530 Subject: [PATCH 14/17] Retain self for event-tap lifetime; invalidate port on stop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous code passed self unretained as the tap's refcon. If self were ever released while a callback was in-flight on another thread, takeUnretainedValue would dereference freed memory. - Retain self with passRetained on startListening; release on stopListening - Invalidate the CFMachPort before tearing down the run loop source so no further callbacks can fire while we clean up - Release the retain after the tap is dead so any in-flight callback still sees a live self This makes deinit unreachable while listening, which is the correct trade-off — cleanup must go through stopListening explicitly (which TranscriptionManager.cleanup already does). AUDIOTYPE-1 --- AudioType/Core/HotKeyManager.swift | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/AudioType/Core/HotKeyManager.swift b/AudioType/Core/HotKeyManager.swift index b6c396c..6c4c146 100644 --- a/AudioType/Core/HotKeyManager.swift +++ b/AudioType/Core/HotKeyManager.swift @@ -14,6 +14,12 @@ class HotKeyManager { private let callback: (HotKeyEvent) -> Void private var isRecording = false + // Retained pointer to self that the event-tap callback uses as refcon. + // Holding self retained for the lifetime of the tap means the tap + // callback is always safe to call back into self, even if the owner + // releases its reference. We balance the retain in stopListening. + private var refconRetained: Unmanaged? + // Track fn key state private var fnKeyWasPressed = false @@ -29,6 +35,10 @@ class HotKeyManager { // Use CGEventTap for fn key detection let eventMask: CGEventMask = (1 << CGEventType.flagsChanged.rawValue) + // Retain self for the duration of the tap. Released in stopListening. + let retained = Unmanaged.passRetained(self) + refconRetained = retained + guard let tap = CGEvent.tapCreate( tap: .cgSessionEventTap, @@ -37,15 +47,16 @@ class HotKeyManager { eventsOfInterest: eventMask, callback: { proxy, type, event, refcon in // The event is owned by the system; pass it back unretained. - // Using passRetained here added a retain/release pair per event - // (i.e. on every modifier-key change system-wide). guard let refcon = refcon else { return Unmanaged.passUnretained(event) } let manager = Unmanaged.fromOpaque(refcon).takeUnretainedValue() return manager.handleEvent(proxy: proxy, type: type, event: event) }, - userInfo: Unmanaged.passUnretained(self).toOpaque() + userInfo: retained.toOpaque() ) else { + // Tap creation failed — release the retain we just took. + retained.release() + refconRetained = nil logger.error("Failed to create event tap. Accessibility permission may be required.") return } @@ -64,6 +75,9 @@ class HotKeyManager { func stopListening() { if let tap = eventTap { CGEvent.tapEnable(tap: tap, enable: false) + // Invalidating the mach port stops further callbacks before we drop + // the run loop source. + CFMachPortInvalidate(tap) } if let source = runLoopSource { @@ -75,6 +89,12 @@ class HotKeyManager { isRecording = false fnKeyWasPressed = false + // Balance the retain taken in startListening. Done last so any + // callback already in-flight against the now-disabled tap still sees + // a live self via its own takeUnretainedValue. + refconRetained?.release() + refconRetained = nil + logger.info("Hotkey listener stopped") } From 3430ab251c9ca598889e8e0c3f65ba6c2f081511 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:32:08 +0530 Subject: [PATCH 15/17] Fix SwiftLint violations introduced by perf work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - KeychainHelper: opening_brace — keep brace on the same line as the multi-line if-let condition - WAVEncoder: redundant_void_return — drop explicit -> Void on the withUnsafeMutableBytes closure AUDIOTYPE-1 --- AudioType/Core/WAVEncoder.swift | 2 +- AudioType/Utilities/KeychainHelper.swift | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/AudioType/Core/WAVEncoder.swift b/AudioType/Core/WAVEncoder.swift index 970f775..4596ae8 100644 --- a/AudioType/Core/WAVEncoder.swift +++ b/AudioType/Core/WAVEncoder.swift @@ -202,7 +202,7 @@ enum WAVEncoder { let totalSize = 44 + samples.count * 2 var data = Data(count: totalSize) - data.withUnsafeMutableBytes { (raw: UnsafeMutableRawBufferPointer) -> Void in + data.withUnsafeMutableBytes { (raw: UnsafeMutableRawBufferPointer) in guard let base = raw.baseAddress else { return } // --- Header --------------------------------------------------------- diff --git a/AudioType/Utilities/KeychainHelper.swift b/AudioType/Utilities/KeychainHelper.swift index fb41886..956e924 100644 --- a/AudioType/Utilities/KeychainHelper.swift +++ b/AudioType/Utilities/KeychainHelper.swift @@ -74,8 +74,7 @@ enum KeychainHelper { let value: String? if status == errSecSuccess, let data = result as? Data, - let decoded = String(data: data, encoding: .utf8) - { + let decoded = String(data: data, encoding: .utf8) { value = decoded } else { value = nil From 89aa5dcf43466beeeba8b2f3b3daa3f8dac9552a Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:37:58 +0530 Subject: [PATCH 16/17] Mark AppDelegate @MainActor to silence Sendable warning --- AudioType/App/AudioTypeApp.swift | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/AudioType/App/AudioTypeApp.swift b/AudioType/App/AudioTypeApp.swift index fd20ece..3054019 100644 --- a/AudioType/App/AudioTypeApp.swift +++ b/AudioType/App/AudioTypeApp.swift @@ -12,6 +12,7 @@ struct AudioTypeApp: App { } } +@MainActor class AppDelegate: NSObject, NSApplicationDelegate { private var statusItem: NSStatusItem! private var menuBarController: MenuBarController! @@ -49,8 +50,8 @@ class AppDelegate: NSObject, NSApplicationDelegate { // Show onboarding if permissions are missing or no engine is usable if !micPermission || !accessibilityPermission || !EngineResolver.anyEngineAvailable { - DispatchQueue.main.async { - self.showOnboarding() + await MainActor.run { + showOnboarding() } } else { // All set — start listening for hotkey From 6252cc4d0695a76ae340b6feb5477832dcaaf15d Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Sat, 2 May 2026 11:37:58 +0530 Subject: [PATCH 17/17] Stop auto-closing onboarding; let user click Get Started --- AudioType/UI/OnboardingView.swift | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/AudioType/UI/OnboardingView.swift b/AudioType/UI/OnboardingView.swift index 8f862fb..8155343 100644 --- a/AudioType/UI/OnboardingView.swift +++ b/AudioType/UI/OnboardingView.swift @@ -9,7 +9,6 @@ struct OnboardingView: View { @State private var anyCloudKeyConfigured = GroqEngine.isConfigured || OpenAIEngine.isConfigured @State private var apiKeyText = "" @State private var apiKeySaveError: String? - @State private var hasAutoCompleted = false let timer = Timer.publish(every: 0.5, on: .main, in: .common).autoconnect() @@ -77,7 +76,7 @@ struct OnboardingView: View { .font(.caption) .foregroundColor(.secondary) } - Text("Cloud transcription — faster & more accurate") + Text("Cloud transcription - faster & more accurate") .font(.caption) .foregroundColor(.secondary) } @@ -165,17 +164,13 @@ struct OnboardingView: View { checkPermissions() } .onReceive(timer) { _ in - // Continuously check permissions + // Continuously refresh permission state so the UI reflects changes made + // in System Settings. The user closes the window themselves via the + // "Get Started" button once everything is ready. microphoneGranted = AVCaptureDevice.authorizationStatus(for: .audio) == .authorized accessibilityGranted = Permissions.checkAccessibility() speechRecognitionGranted = Permissions.isSpeechRecognitionAuthorized anyCloudKeyConfigured = GroqEngine.isConfigured || OpenAIEngine.isConfigured - - // Auto-complete when all required permissions are ready and at least one engine works - if canContinue && !hasAutoCompleted { - hasAutoCompleted = true - onComplete() - } } }