diff --git a/AudioType/App/AudioTypeApp.swift b/AudioType/App/AudioTypeApp.swift
index fd20ece..3054019 100644
--- a/AudioType/App/AudioTypeApp.swift
+++ b/AudioType/App/AudioTypeApp.swift
@@ -12,6 +12,7 @@ struct AudioTypeApp: App {
   }
 }
 
+@MainActor
 class AppDelegate: NSObject, NSApplicationDelegate {
   private var statusItem: NSStatusItem!
   private var menuBarController: MenuBarController!
@@ -49,8 +50,8 @@ class AppDelegate: NSObject, NSApplicationDelegate {
 
     // Show onboarding if permissions are missing or no engine is usable
     if !micPermission || !accessibilityPermission || !EngineResolver.anyEngineAvailable {
-      DispatchQueue.main.async {
-        self.showOnboarding()
+      await MainActor.run {
+        showOnboarding()
       }
     } else {
       // All set — start listening for hotkey
diff --git a/AudioType/App/MenuBarController.swift b/AudioType/App/MenuBarController.swift
index e324448..57f99f4 100644
--- a/AudioType/App/MenuBarController.swift
+++ b/AudioType/App/MenuBarController.swift
@@ -17,18 +17,45 @@ extension NSImage {
   }
 }
 
-/// Shared observable for live audio level — drives the recording waveform.
+/// Shared observable for live audio level - drives the recording waveform.
+/// Also carries the overlay text so the hosting view doesn't have to be
+/// rebuilt on every state change.
 class AudioLevelMonitor: ObservableObject {
   static let shared = AudioLevelMonitor()
   @Published var level: Float = 0.0
+  @Published var overlayText: String = "Recording..."
 }
 
 class MenuBarController: NSObject, NSWindowDelegate {
   private weak var statusItem: NSStatusItem?
   private var transcriptionManager: TranscriptionManager
   private var recordingWindow: NSWindow?
+  private var recordingHostingView: NSHostingView<AnyView>?
   private var settingsWindow: NSWindow?
 
+  // Pre-rendered status-bar icons, built once. Calling NSImage.tinted on
+  // every state change re-rasterizes the symbol via lockFocus/unlockFocus
+  // and was a steady source of bitmap allocations.
+  private lazy var idleIcon: NSImage? = {
+    let img = NSImage(
+      systemSymbolName: "waveform.circle.fill", accessibilityDescription: "Ready")
+    img?.isTemplate = true
+    return img
+  }()
+  private lazy var recordingIcon: NSImage? = {
+    NSImage(systemSymbolName: "waveform.circle.fill", accessibilityDescription: "Recording")?
+      .tinted(with: AudioTypeTheme.nsRecordingRed)
+  }()
+  private lazy var processingIcon: NSImage? = {
+    NSImage(systemSymbolName: "ellipsis.circle.fill", accessibilityDescription: "Processing")?
+      .tinted(with: AudioTypeTheme.nsAmber)
+  }()
+  private lazy var errorIcon: NSImage? = {
+    NSImage(
+      systemSymbolName: "exclamationmark.triangle.fill", accessibilityDescription: "Error")?
+      .tinted(with: .systemRed)
+  }()
+
   init(transcriptionManager: TranscriptionManager) {
     self.transcriptionManager = transcriptionManager
     super.init()
@@ -50,6 +77,10 @@ class MenuBarController: NSObject, NSWindowDelegate {
     )
   }
 
+  deinit {
+    NotificationCenter.default.removeObserver(self)
+  }
+
   func setupStatusItem(_ statusItem: NSStatusItem) {
     self.statusItem = statusItem
 
@@ -106,38 +137,24 @@ class MenuBarController: NSObject, NSWindowDelegate {
 
     switch state {
     case .idle:
-      let img = NSImage(
-        systemSymbolName: "waveform.circle.fill", accessibilityDescription: "Ready")
-      img?.isTemplate = true
-      button.image = img
+      button.image = idleIcon
       AudioLevelMonitor.shared.level = 0
       hideRecordingIndicator()
       updateStatusMenuItem("Ready")
 
     case .recording:
-      // Tinted coral/red — non-template so the color shows through
-      if let base = NSImage(
-        systemSymbolName: "waveform.circle.fill", accessibilityDescription: "Recording") {
-        button.image = base.tinted(with: AudioTypeTheme.nsRecordingRed)
-      }
+      button.image = recordingIcon
       showRecordingIndicator()
       updateStatusMenuItem("Recording...")
 
     case .processing:
-      // Tinted amber — "I'm thinking"
-      if let base = NSImage(
-        systemSymbolName: "ellipsis.circle.fill", accessibilityDescription: "Processing") {
-        button.image = base.tinted(with: AudioTypeTheme.nsAmber)
-      }
+      button.image = processingIcon
       AudioLevelMonitor.shared.level = 0
       updateRecordingIndicator(text: "Processing...")
       updateStatusMenuItem("Processing...")
 
     case .error(let message):
-      let img = NSImage(
-        systemSymbolName: "exclamationmark.triangle.fill", accessibilityDescription: "Error")
-      img?.isTemplate = false
-      button.image = img?.tinted(with: .systemRed)
+      button.image = errorIcon
       hideRecordingIndicator()
       updateStatusMenuItem("Error: \(message)")
     }
@@ -175,22 +192,27 @@ class MenuBarController: NSObject, NSWindowDelegate {
       recordingWindow = window
     }
 
-    let hostingView = NSHostingView(
-      rootView: RecordingOverlay(text: "Recording...")
-        .environmentObject(AudioLevelMonitor.shared))
-    hostingView.frame = NSRect(x: 0, y: 0, width: 180, height: 50)
-    recordingWindow?.contentView = hostingView
+    // Build the hosting view once; subsequent updates just mutate the
+    // observable state. Re-creating NSHostingView on every state change
+    // was leaking the SwiftUI graph and Metal layers.
+    if recordingHostingView == nil {
+      let hosting = NSHostingView(
+        rootView: AnyView(
+          RecordingOverlay()
+            .environmentObject(AudioLevelMonitor.shared)
+        )
+      )
+      hosting.frame = NSRect(x: 0, y: 0, width: 180, height: 50)
+      recordingHostingView = hosting
+      recordingWindow?.contentView = hosting
+    }
+
+    AudioLevelMonitor.shared.overlayText = "Recording..."
     recordingWindow?.orderFront(nil)
   }
 
   private func updateRecordingIndicator(text: String) {
-    if let window = recordingWindow {
-      let hostingView = NSHostingView(
-        rootView: RecordingOverlay(text: text)
-          .environmentObject(AudioLevelMonitor.shared))
-      hostingView.frame = NSRect(x: 0, y: 0, width: 180, height: 50)
-      window.contentView = hostingView
-    }
+    AudioLevelMonitor.shared.overlayText = text
   }
 
   private func hideRecordingIndicator() {
diff --git a/AudioType/App/TranscriptionManager.swift b/AudioType/App/TranscriptionManager.swift
index 5948c4c..17dbcdb 100644
--- a/AudioType/App/TranscriptionManager.swift
+++ b/AudioType/App/TranscriptionManager.swift
@@ -32,6 +32,11 @@ class TranscriptionManager: ObservableObject {
   private var hotKeyManager: HotKeyManager?
   private var textInserter: TextInserter?
 
+  /// Active transcription task. Held so a new recording can cancel any
+  /// in-flight transcription from a previous one (e.g. user re-triggers
+  /// the hotkey while the network call is still pending).
+  private var transcriptionTask: Task<Void, Never>?
+
   private let logger = Logger(subsystem: "com.audiotype", category: "TranscriptionManager")
 
   private init() {}
@@ -60,7 +65,7 @@ class TranscriptionManager: ObservableObject {
 
     if !EngineResolver.anyEngineAvailable {
       logger.warning("No transcription engine available")
-      setState(.error("No engine available — add a cloud API key or enable Apple Speech"))
+      setState(.error("No engine available - add a cloud API key or enable Apple Speech"))
     } else {
       logger.info("Transcription engine ready: \(engine.displayName)")
     }
@@ -85,7 +90,7 @@ class TranscriptionManager: ObservableObject {
     audioRecorder = nil
   }
 
-  /// Called when the user saves an API key or changes engine preference — re-evaluate.
+  /// Called when the user saves an API key or changes engine preference - re-evaluate.
   func onEngineConfigChanged() {
     let engine = EngineResolver.resolve()
     activeEngineName = engine.displayName
@@ -93,7 +98,7 @@ class TranscriptionManager: ObservableObject {
       setState(.idle)
       logger.info("Engine config changed, active engine: \(engine.displayName)")
     } else {
-      setState(.error("No engine available — add a cloud API key or enable Apple Speech"))
+      setState(.error("No engine available - add a cloud API key or enable Apple Speech"))
     }
   }
 
@@ -111,6 +116,12 @@ class TranscriptionManager: ObservableObject {
     }
   }
 
+  /// Engine resolved at recording start and reused for the matching
+  /// transcription. Keeps Keychain / availability checks out of the
+  /// post-stop hot path and ensures the engine identity doesn't change
+  /// mid-recording if the user edits settings.
+  private var activeEngine: TranscriptionEngine?
+
   private func startRecording() {
     guard state == .idle else {
       logger.warning("Cannot start recording: not in idle state")
@@ -118,14 +129,24 @@ class TranscriptionManager: ObservableObject {
     }
 
     guard EngineResolver.anyEngineAvailable else {
-      setState(.error("No engine available — add a cloud API key or enable Apple Speech"))
+      setState(.error("No engine available - add a cloud API key or enable Apple Speech"))
       return
     }
 
+    // Cancel any still-pending transcription from a previous recording so
+    // we don't insert stale text into the user's new context.
+    transcriptionTask?.cancel()
+    transcriptionTask = nil
+
+    // Resolve the engine once, up front. transcribeAndInsert will reuse it.
+    let engine = EngineResolver.resolve()
+    activeEngine = engine
+    activeEngineName = engine.displayName
+
     do {
       try audioRecorder?.startRecording()
       setState(.recording)
-      logger.info("Recording started")
+      logger.info("Recording started with engine: \(engine.displayName)")
     } catch {
       logger.error("Failed to start recording: \(error.localizedDescription)")
       setState(.error("Failed to start recording"))
@@ -144,21 +165,22 @@ class TranscriptionManager: ObservableObject {
       return
     }
 
+    // Take the engine resolved at startRecording. Falls back to a fresh
+    // resolution defensively if somehow nil.
+    let engine = activeEngine ?? EngineResolver.resolve()
+    activeEngine = nil
+
     logger.info("Recording stopped, captured \(samples.count) samples")
     setState(.processing)
 
-    // Transcribe in background
-    Task.detached { [weak self] in
-      await self?.transcribeAndInsert(samples: samples)
+    // Transcribe in background. Hold the task so the next recording can
+    // cancel it if it's still pending.
+    transcriptionTask = Task.detached { [weak self] in
+      await self?.transcribeAndInsert(samples: samples, engine: engine)
     }
   }
 
-  private func transcribeAndInsert(samples: [Float]) async {
-    let engine = EngineResolver.resolve()
-
-    await MainActor.run {
-      self.activeEngineName = engine.displayName
-    }
+  private func transcribeAndInsert(samples: [Float], engine: TranscriptionEngine) async {
 
     let startTime = CFAbsoluteTimeGetCurrent()
 
diff --git a/AudioType/Core/AudioRecorder.swift b/AudioType/Core/AudioRecorder.swift
index 452148c..42ab4e5 100644
--- a/AudioType/Core/AudioRecorder.swift
+++ b/AudioType/Core/AudioRecorder.swift
@@ -1,8 +1,12 @@
 import AVFoundation
+import Accelerate
 import os.log
 
 class AudioRecorder {
-  private let audioEngine = AVAudioEngine()
+  // Lazily created on startRecording and torn down on stopRecording so the
+  // audio HAL doesn't stay warm between recordings (big idle-energy win for
+  // a menu-bar app).
+  private var audioEngine: AVAudioEngine?
   private var audioBuffer: [Float] = []
   private let bufferLock = NSLock()
   private var isRecording = false
@@ -16,8 +20,8 @@ class AudioRecorder {
   private let targetSampleRate: Double = 16000
 
   init() {
-    // Pre-allocate buffer for ~30 seconds of audio at 16kHz
-    audioBuffer.reserveCapacity(Int(targetSampleRate * 30))
+    // Buffer is allocated on each startRecording so the recorder has zero
+    // footprint when idle.
   }
 
   func startRecording() throws {
@@ -26,12 +30,19 @@ class AudioRecorder {
       return
     }
 
-    // Clear previous buffer
-    bufferLock.lock()
-    audioBuffer.removeAll(keepingCapacity: true)
-    bufferLock.unlock()
+    // Drop the buffer entirely (don't preserve capacity — see issue 1.4).
+    do {
+      bufferLock.lock()
+      defer { bufferLock.unlock() }
+      audioBuffer = []
+      audioBuffer.reserveCapacity(Int(targetSampleRate * 30))
+    }
 
-    let inputNode = audioEngine.inputNode
+    // Lazily create the audio engine on each recording.
+    let engine = AVAudioEngine()
+    audioEngine = engine
+
+    let inputNode = engine.inputNode
     let inputFormat = inputNode.outputFormat(forBus: 0)
 
     logger.info("Input format: \(inputFormat.sampleRate)Hz, \(inputFormat.channelCount) channels")
@@ -66,8 +77,8 @@ class AudioRecorder {
     }
 
     // Start audio engine
-    audioEngine.prepare()
-    try audioEngine.start()
+    engine.prepare()
+    try engine.start()
 
     isRecording = true
     logger.info("Recording started")
@@ -79,16 +90,25 @@ class AudioRecorder {
       return nil
     }
 
-    // Stop and remove tap
-    audioEngine.inputNode.removeTap(onBus: 0)
-    audioEngine.stop()
+    // Stop and tear down the engine so the audio HAL releases its resources.
+    if let engine = audioEngine {
+      engine.inputNode.removeTap(onBus: 0)
+      engine.stop()
+    }
+    audioEngine = nil
 
     isRecording = false
 
-    // Return captured samples
-    bufferLock.lock()
-    let samples = audioBuffer
-    bufferLock.unlock()
+    // Move the buffer out of the recorder (zero-copy via COW transfer) and
+    // leave the recorder with a fresh empty array so it doesn't keep the
+    // recording's high-water capacity in memory.
+    let samples: [Float]
+    do {
+      bufferLock.lock()
+      defer { bufferLock.unlock() }
+      samples = audioBuffer
+      audioBuffer = []
+    }
 
     logger.info(
       "Recording stopped, captured \(samples.count) samples (\(Double(samples.count) / self.targetSampleRate, format: .fixed(precision: 2))s)"
@@ -100,10 +120,7 @@ class AudioRecorder {
   private func processAudioBuffer(
     _ buffer: AVAudioPCMBuffer, converter: AVAudioConverter?, targetFormat: AVAudioFormat
   ) {
-    var samplesArray: [Float]
-
     if let converter = converter {
-      // Need to convert to target format
       let frameCount = AVAudioFrameCount(
         Double(buffer.frameLength) * targetSampleRate / buffer.format.sampleRate
       )
@@ -129,25 +146,37 @@ class AudioRecorder {
       }
 
       guard let channelData = convertedBuffer.floatChannelData else { return }
-      samplesArray = Array(
-        UnsafeBufferPointer(start: channelData[0], count: Int(convertedBuffer.frameLength)))
+      let count = Int(convertedBuffer.frameLength)
+      consume(samples: channelData[0], count: count)
     } else {
-      // Already in correct format
       guard let channelData = buffer.floatChannelData else { return }
-      samplesArray = Array(
-        UnsafeBufferPointer(start: channelData[0], count: Int(buffer.frameLength)))
+      let count = Int(buffer.frameLength)
+      consume(samples: channelData[0], count: count)
     }
+  }
 
-    // Compute RMS level for live waveform
-    let rms = sqrt(samplesArray.reduce(0) { $0 + $1 * $1 } / Float(max(samplesArray.count, 1)))
+  /// Consume a chunk of mic samples: compute RMS for the waveform and append
+  /// to the recording buffer — without ever materialising an intermediate
+  /// `[Float]`. Called on the audio thread.
+  private func consume(samples: UnsafePointer<Float>, count: Int) {
+    guard count > 0 else { return }
+
+    // RMS via Accelerate (vectorised). Replaces a scalar reduce loop that
+    // ran on every tap callback.
+    var meanSquare: Float = 0
+    vDSP_measqv(samples, 1, &meanSquare, vDSP_Length(count))
+    let rms = sqrt(meanSquare)
     // Normalize: typical speech RMS is 0.01–0.15, scale aggressively to 0–1
     let level = min(rms * 25, 1.0)
     onLevelUpdate?(level)
 
-    // Append to buffer
+    // Append directly from the unsafe buffer pointer; [Float] has an
+    // append(contentsOf:) overload that takes any Sequence, including
+    // UnsafeBufferPointer, so no intermediate Array is allocated.
+    let ptr = UnsafeBufferPointer(start: samples, count: count)
     bufferLock.lock()
-    audioBuffer.append(contentsOf: samplesArray)
-    bufferLock.unlock()
+    defer { bufferLock.unlock() }
+    audioBuffer.append(contentsOf: ptr)
   }
 }
 
diff --git a/AudioType/Core/HotKeyManager.swift b/AudioType/Core/HotKeyManager.swift
index 075b053..6c4c146 100644
--- a/AudioType/Core/HotKeyManager.swift
+++ b/AudioType/Core/HotKeyManager.swift
@@ -14,6 +14,12 @@ class HotKeyManager {
   private let callback: (HotKeyEvent) -> Void
   private var isRecording = false
 
+  // Retained pointer to self that the event-tap callback uses as refcon.
+  // Holding self retained for the lifetime of the tap means the tap
+  // callback is always safe to call back into self, even if the owner
+  // releases its reference. We balance the retain in stopListening.
+  private var refconRetained: Unmanaged<HotKeyManager>?
+
   // Track fn key state
   private var fnKeyWasPressed = false
 
@@ -29,6 +35,10 @@ class HotKeyManager {
     // Use CGEventTap for fn key detection
     let eventMask: CGEventMask = (1 << CGEventType.flagsChanged.rawValue)
 
+    // Retain self for the duration of the tap. Released in stopListening.
+    let retained = Unmanaged.passRetained(self)
+    refconRetained = retained
+
     guard
       let tap = CGEvent.tapCreate(
         tap: .cgSessionEventTap,
@@ -36,13 +46,17 @@ class HotKeyManager {
         options: .defaultTap,
         eventsOfInterest: eventMask,
         callback: { proxy, type, event, refcon in
-          guard let refcon = refcon else { return Unmanaged.passRetained(event) }
+          // The event is owned by the system; pass it back unretained.
+          guard let refcon = refcon else { return Unmanaged.passUnretained(event) }
           let manager = Unmanaged<HotKeyManager>.fromOpaque(refcon).takeUnretainedValue()
           return manager.handleEvent(proxy: proxy, type: type, event: event)
         },
-        userInfo: Unmanaged.passUnretained(self).toOpaque()
+        userInfo: retained.toOpaque()
       )
     else {
+      // Tap creation failed — release the retain we just took.
+      retained.release()
+      refconRetained = nil
       logger.error("Failed to create event tap. Accessibility permission may be required.")
       return
     }
@@ -61,6 +75,9 @@ class HotKeyManager {
   func stopListening() {
     if let tap = eventTap {
       CGEvent.tapEnable(tap: tap, enable: false)
+      // Invalidating the mach port stops further callbacks before we drop
+      // the run loop source.
+      CFMachPortInvalidate(tap)
     }
 
     if let source = runLoopSource {
@@ -72,6 +89,12 @@ class HotKeyManager {
     isRecording = false
     fnKeyWasPressed = false
 
+    // Balance the retain taken in startListening. Done last so any
+    // callback already in-flight against the now-disabled tap still sees
+    // a live self via its own takeUnretainedValue.
+    refconRetained?.release()
+    refconRetained = nil
+
     logger.info("Hotkey listener stopped")
   }
 
@@ -85,7 +108,7 @@ class HotKeyManager {
       if let tap = eventTap {
         CGEvent.tapEnable(tap: tap, enable: true)
       }
-      return Unmanaged.passRetained(event)
+      return Unmanaged.passUnretained(event)
     }
 
     let flags = event.flags
@@ -120,7 +143,7 @@ class HotKeyManager {
       }
     }
 
-    return Unmanaged.passRetained(event)
+    return Unmanaged.passUnretained(event)
   }
 
   deinit {
diff --git a/AudioType/Core/TextInserter.swift b/AudioType/Core/TextInserter.swift
index 34186c6..84e9061 100644
--- a/AudioType/Core/TextInserter.swift
+++ b/AudioType/Core/TextInserter.swift
@@ -6,24 +6,41 @@ import os.log
 class TextInserter {
   private let logger = Logger(subsystem: "com.audiotype", category: "TextInserter")
 
+  /// Above this length we paste via clipboard instead of synthesising one
+  /// keystroke per character. Per-char synthesis costs ~1 ms each plus a
+  /// fresh CGEventSource per char — for long dictations that's the dominant
+  /// post-recording latency the user feels.
+  private static let clipboardPasteThreshold = 30
+
   func insertText(_ text: String) {
     guard !text.isEmpty else { return }
 
     logger.info("Inserting text: \(text.prefix(50))...")
 
-    // Use CGEvent to simulate keyboard input
-    for char in text {
-      insertCharacter(char)
-      // Small delay between characters for reliability
-      usleep(1000)  // 1ms
+    if text.count > Self.clipboardPasteThreshold {
+      insertTextViaClipboard(text)
+    } else {
+      insertTextViaKeystrokes(text)
     }
 
     logger.info("Text insertion complete")
   }
 
-  private func insertCharacter(_ char: Character) {
+  /// Per-character keystroke synthesis. Used for short strings where
+  /// clipboard paste's clipboard-restore quirks aren't worth it.
+  private func insertTextViaKeystrokes(_ text: String) {
+    // Cache the event source once for the whole insertion — creating one
+    // per character was a measurable hot path.
     let source = CGEventSource(stateID: .hidSystemState)
 
+    for char in text {
+      insertCharacter(char, source: source)
+      // Tiny delay so target apps don't drop events under load.
+      usleep(1000)  // 1ms
+    }
+  }
+
+  private func insertCharacter(_ char: Character, source: CGEventSource?) {
     // Create key down event
     guard let keyDown = CGEvent(keyboardEventSource: source, virtualKey: 0, keyDown: true) else {
       logger.error("Failed to create keyDown event")
diff --git a/AudioType/Core/TextPostProcessor.swift b/AudioType/Core/TextPostProcessor.swift
index 706fd25..6aec6c4 100644
--- a/AudioType/Core/TextPostProcessor.swift
+++ b/AudioType/Core/TextPostProcessor.swift
@@ -106,41 +106,37 @@ class TextPostProcessor {
   // User-defined custom replacements
   private var customReplacements: [String: String] = [:]
 
+  // Cached compiled regex + lookup table. Rebuilt only when the catalog
+  // changes (custom replacements added/removed). The previous code rebuilt
+  // a merged dictionary and ran ~85 case-insensitive String scans on every
+  // single transcription.
+  private var cachedRegex: NSRegularExpression?
+  private var cachedLookup: [String: String] = [:]
+  private let regexLock = NSLock()
+
   private init() {
     loadCustomReplacements()
+    rebuildRegex()
   }
 
   /// Process transcribed text with corrections
   func process(_ text: String) -> String {
-    var result = text
-
-    // Apply word replacements (case-insensitive)
-    let allReplacements = wordReplacements.merging(customReplacements) { _, custom in custom }
-
-    for (pattern, replacement) in allReplacements {
-      result = result.replacingOccurrences(
-        of: pattern,
-        with: replacement,
-        options: .caseInsensitive
-      )
-    }
-
-    // Capitalize first letter of sentences
-    result = capitalizeSentences(result)
-
-    return result
+    let result = applyReplacements(text)
+    return capitalizeSentences(result)
   }
 
   /// Add a custom word replacement
   func addCustomReplacement(from: String, to: String) {
     customReplacements[from.lowercased()] = to
     saveCustomReplacements()
+    rebuildRegex()
   }
 
   /// Remove a custom replacement
   func removeCustomReplacement(from: String) {
     customReplacements.removeValue(forKey: from.lowercased())
     saveCustomReplacements()
+    rebuildRegex()
   }
 
   /// Get all custom replacements
@@ -150,6 +146,73 @@ class TextPostProcessor {
 
   // MARK: - Private
 
+  /// Rebuild the compiled regex from the current built-in + custom catalogs.
+  /// Custom replacements override built-ins on key collision.
+  private func rebuildRegex() {
+    regexLock.lock()
+    defer { regexLock.unlock() }
+
+    let merged = wordReplacements.merging(customReplacements) { _, custom in custom }
+    cachedLookup = [:]
+    cachedLookup.reserveCapacity(merged.count)
+    for (key, value) in merged {
+      cachedLookup[key.lowercased()] = value
+    }
+
+    // Sort keys longest-first so e.g. "rest api" wins over "api". This also
+    // gives us a deterministic order independent of dictionary hashing,
+    // which the old implementation lacked.
+    let keys = merged.keys.sorted { $0.count > $1.count }
+    let pattern = keys.map { NSRegularExpression.escapedPattern(for: $0) }
+      .joined(separator: "|")
+
+    cachedRegex = try? NSRegularExpression(
+      pattern: pattern,
+      options: [.caseInsensitive]
+    )
+  }
+
+  /// Apply replacements in a single regex pass.
+  private func applyReplacements(_ text: String) -> String {
+    regexLock.lock()
+    let regex = cachedRegex
+    let lookup = cachedLookup
+    regexLock.unlock()
+
+    guard let regex = regex, !text.isEmpty else { return text }
+
+    let nsText = text as NSString
+    let range = NSRange(location: 0, length: nsText.length)
+    let matches = regex.matches(in: text, options: [], range: range)
+    if matches.isEmpty { return text }
+
+    // Reassemble in one pass, alternating original spans and replacements.
+    var result = ""
+    result.reserveCapacity(text.count)
+    var cursor = 0
+    for match in matches {
+      let r = match.range
+      if r.location > cursor {
+        result.append(
+          nsText.substring(with: NSRange(location: cursor, length: r.location - cursor))
+        )
+      }
+      let matched = nsText.substring(with: r).lowercased()
+      if let replacement = lookup[matched] {
+        result.append(replacement)
+      } else {
+        result.append(nsText.substring(with: r))
+      }
+      cursor = r.location + r.length
+    }
+    if cursor < nsText.length {
+      result.append(
+        nsText.substring(with: NSRange(location: cursor, length: nsText.length - cursor))
+      )
+    }
+    return result
+  }
+
   private func capitalizeSentences(_ text: String) -> String {
     var result = ""
     var capitalizeNext = true
diff --git a/AudioType/Core/WAVEncoder.swift b/AudioType/Core/WAVEncoder.swift
index ea1210a..4596ae8 100644
--- a/AudioType/Core/WAVEncoder.swift
+++ b/AudioType/Core/WAVEncoder.swift
@@ -1,3 +1,4 @@
+import Accelerate
 import Foundation
 import os.log
 
@@ -25,7 +26,7 @@ struct WhisperAPIConfig {
 /// and response parsing are all handled here.
 class WhisperAPIEngine: TranscriptionEngine {
 
-  /// Provider configuration — subclasses must override.
+  /// Provider configuration - subclasses must override.
   var config: WhisperAPIConfig {
     fatalError("Subclasses must override config")
   }
@@ -87,7 +88,7 @@ class WhisperAPIEngine: TranscriptionEngine {
       throw WhisperAPIError.invalidURL
     }
 
-    let request = WAVEncoder.buildRequest(
+    let (request, body) = WAVEncoder.buildRequest(
       url: url,
       apiKey: apiKey,
       wavData: wavData,
@@ -97,7 +98,10 @@ class WhisperAPIEngine: TranscriptionEngine {
 
     let (data, response): (Data, URLResponse)
     do {
-      (data, response) = try await URLSession.shared.data(for: request)
+      // upload(for:from:) keeps a single copy of the body; setting
+      // request.httpBody and calling data(for:) tends to keep the body
+      // resident in two places. With ~2 MB WAV bodies this matters.
+      (data, response) = try await URLSession.shared.upload(for: request, from: body)
     } catch {
       throw WhisperAPIError.networkError(error.localizedDescription)
     }
@@ -178,50 +182,97 @@ enum WhisperAPIError: Error, LocalizedError {
 enum WAVEncoder {
 
   /// Encode Float32 PCM samples into WAV in memory (16-bit PCM, mono).
+  ///
+  /// The previous implementation allocated an intermediate `[Int16]`
+  /// (~960 KB for a 30 s clip), let `Data` realloc as it grew, and
+  /// did 480 000 individual `appendLittleEndian` calls. This version:
+  ///
+  /// - Allocates the final `Data` once at exact size (44-byte header + 2N).
+  /// - Writes the header in place.
+  /// - Uses Accelerate to clip Float → Int16 directly into the data
+  ///   region in a single pass.
   static func encode(samples: [Float], sampleRate: Int) -> Data {
-    var data = Data()
-
-    let int16Samples = samples.map { sample -> Int16 in
-      let clamped = max(-1.0, min(1.0, sample))
-      return Int16(clamped * Float(Int16.max))
-    }
-
     let numChannels: UInt16 = 1
     let bitsPerSample: UInt16 = 16
     let byteRate = UInt32(sampleRate)
       * UInt32(numChannels) * UInt32(bitsPerSample / 8)
     let blockAlign = numChannels * (bitsPerSample / 8)
-    let dataSize = UInt32(int16Samples.count * 2)
-    let fileSize = 36 + dataSize
-
-    // RIFF header
-    data.append(contentsOf: "RIFF".utf8)
-    data.appendLittleEndian(fileSize)
-    data.append(contentsOf: "WAVE".utf8)
-
-    // fmt chunk
-    data.append(contentsOf: "fmt ".utf8)
-    data.appendLittleEndian(UInt32(16))
-    data.appendLittleEndian(UInt16(1))  // PCM
-    data.appendLittleEndian(numChannels)
-    data.appendLittleEndian(UInt32(sampleRate))
-    data.appendLittleEndian(byteRate)
-    data.appendLittleEndian(blockAlign)
-    data.appendLittleEndian(bitsPerSample)
-
-    // data chunk
-    data.append(contentsOf: "data".utf8)
-    data.appendLittleEndian(dataSize)
-
-    for sample in int16Samples {
-      data.appendLittleEndian(sample)
+    let dataSize = UInt32(samples.count * 2)
+    let fileSize: UInt32 = 36 + dataSize
+    let totalSize = 44 + samples.count * 2
+
+    var data = Data(count: totalSize)
+    data.withUnsafeMutableBytes { (raw: UnsafeMutableRawBufferPointer) in
+      guard let base = raw.baseAddress else { return }
+
+      // --- Header ---------------------------------------------------------
+      func writeASCII(_ string: String, at offset: Int) {
+        for (i, byte) in string.utf8.enumerated() {
+          base.storeBytes(of: byte, toByteOffset: offset + i, as: UInt8.self)
+        }
+      }
+      func writeLE<T: FixedWidthInteger>(_ value: T, at offset: Int) {
+        base.storeBytes(of: value.littleEndian, toByteOffset: offset, as: T.self)
+      }
+
+      writeASCII("RIFF", at: 0)
+      writeLE(fileSize, at: 4)
+      writeASCII("WAVE", at: 8)
+
+      writeASCII("fmt ", at: 12)
+      writeLE(UInt32(16), at: 16)
+      writeLE(UInt16(1), at: 20)  // PCM
+      writeLE(numChannels, at: 22)
+      writeLE(UInt32(sampleRate), at: 24)
+      writeLE(byteRate, at: 28)
+      writeLE(blockAlign, at: 32)
+      writeLE(bitsPerSample, at: 34)
+
+      writeASCII("data", at: 36)
+      writeLE(dataSize, at: 40)
+
+      // --- PCM data -------------------------------------------------------
+      // Clip to [-1, 1], scale by Int16.max, convert to Int16 — all via
+      // Accelerate, all into the destination region in one pass.
+      guard !samples.isEmpty else { return }
+
+      let dst = base.advanced(by: 44).assumingMemoryBound(to: Int16.self)
+      let n = vDSP_Length(samples.count)
+
+      samples.withUnsafeBufferPointer { src in
+        guard let srcBase = src.baseAddress else { return }
+
+        // Scratch buffer for clip+scale; reuse src memory would mutate the
+        // caller's input, so allocate a transient float buffer.
+        let scratch = UnsafeMutablePointer<Float>.allocate(capacity: samples.count)
+        defer { scratch.deallocate() }
+
+        // Clip into scratch.
+        var lo: Float = -1.0
+        var hi: Float = 1.0
+        vDSP_vclip(srcBase, 1, &lo, &hi, scratch, 1, n)
+
+        // Scale by Int16.max in place.
+        var scale = Float(Int16.max)
+        vDSP_vsmul(scratch, 1, &scale, scratch, 1, n)
+
+        // Convert Float → Int16 with rounding directly into dst.
+        vDSP_vfix16(scratch, 1, dst, 1, n)
+
+        // WAV is little-endian. On Apple silicon and Intel, host order is
+        // already LE so no byte-swap needed. Guard with a static assert
+        // for any future big-endian Apple platform (none exist today).
+        assert(1.littleEndian == 1, "WAVEncoder assumes little-endian host")
+      }
     }
-
     return data
   }
 
   /// Build a multipart/form-data request for an OpenAI-compatible
   /// `/v1/audio/transcriptions` endpoint.
+  ///
+  /// Returns the request and body separately so callers can pass the body
+  /// to `URLSession.upload(for:from:)` instead of setting `httpBody`.
   static func buildRequest(
     url: URL,
     apiKey: String,
@@ -229,7 +280,7 @@ enum WAVEncoder {
     model: String,
     languageCode: String?,
     timeoutInterval: TimeInterval = 30
-  ) -> URLRequest {
+  ) -> (URLRequest, Data) {
     let boundary = UUID().uuidString
 
     var request = URLRequest(url: url)
@@ -266,8 +317,7 @@ enum WAVEncoder {
     )
     body.append(Data("--\(boundary)--\r\n".utf8))
 
-    request.httpBody = body
-    return request
+    return (request, body)
   }
 }
 
diff --git a/AudioType/UI/OnboardingView.swift b/AudioType/UI/OnboardingView.swift
index 8f862fb..8155343 100644
--- a/AudioType/UI/OnboardingView.swift
+++ b/AudioType/UI/OnboardingView.swift
@@ -9,7 +9,6 @@ struct OnboardingView: View {
   @State private var anyCloudKeyConfigured = GroqEngine.isConfigured || OpenAIEngine.isConfigured
   @State private var apiKeyText = ""
   @State private var apiKeySaveError: String?
-  @State private var hasAutoCompleted = false
 
   let timer = Timer.publish(every: 0.5, on: .main, in: .common).autoconnect()
 
@@ -77,7 +76,7 @@ struct OnboardingView: View {
                   .font(.caption)
                   .foregroundColor(.secondary)
               }
-              Text("Cloud transcription — faster & more accurate")
+              Text("Cloud transcription - faster & more accurate")
                 .font(.caption)
                 .foregroundColor(.secondary)
             }
@@ -165,17 +164,13 @@ struct OnboardingView: View {
       checkPermissions()
     }
     .onReceive(timer) { _ in
-      // Continuously check permissions
+      // Continuously refresh permission state so the UI reflects changes made
+      // in System Settings. The user closes the window themselves via the
+      // "Get Started" button once everything is ready.
       microphoneGranted = AVCaptureDevice.authorizationStatus(for: .audio) == .authorized
       accessibilityGranted = Permissions.checkAccessibility()
       speechRecognitionGranted = Permissions.isSpeechRecognitionAuthorized
       anyCloudKeyConfigured = GroqEngine.isConfigured || OpenAIEngine.isConfigured
-
-      // Auto-complete when all required permissions are ready and at least one engine works
-      if canContinue && !hasAutoCompleted {
-        hasAutoCompleted = true
-        onComplete()
-      }
     }
   }
 
diff --git a/AudioType/UI/RecordingOverlay.swift b/AudioType/UI/RecordingOverlay.swift
index 39ceb5a..cc7b99e 100644
--- a/AudioType/UI/RecordingOverlay.swift
+++ b/AudioType/UI/RecordingOverlay.swift
@@ -1,11 +1,10 @@
 import SwiftUI
 
 struct RecordingOverlay: View {
-  let text: String
   @EnvironmentObject var levelMonitor: AudioLevelMonitor
 
   private var isRecording: Bool {
-    text == "Recording..."
+    levelMonitor.overlayText == "Recording..."
   }
 
   var body: some View {
diff --git a/AudioType/Utilities/KeychainHelper.swift b/AudioType/Utilities/KeychainHelper.swift
index 5a4b4ec..956e924 100644
--- a/AudioType/Utilities/KeychainHelper.swift
+++ b/AudioType/Utilities/KeychainHelper.swift
@@ -12,6 +12,13 @@ enum KeychainHelper {
     subsystem: "com.audiotype", category: "KeychainHelper"
   )
 
+  // In-memory cache of resolved values. Keychain reads aren't expensive in
+  // absolute terms but they were happening on every transcription (often
+  // multiple times) via the engines' apiKey getters. Cache entries are
+  // invalidated on save/delete.
+  private static var cache: [String: String?] = [:]
+  private static let cacheLock = NSLock()
+
   // MARK: - Public API
 
   /// Save a value to the Keychain. Overwrites any existing value for the key.
@@ -36,11 +43,23 @@ enum KeychainHelper {
       logger.error("Failed to save key \(key), status: \(status)")
       throw KeychainError.saveFailed(status)
     }
+
+    cacheLock.lock()
+    cache[key] = value
+    cacheLock.unlock()
+
     logger.info("Saved value for key: \(key)")
   }
 
   /// Retrieve a value from the Keychain.
   static func get(key: String) -> String? {
+    cacheLock.lock()
+    if let cached = cache[key] {
+      cacheLock.unlock()
+      return cached
+    }
+    cacheLock.unlock()
+
     let query: [String: Any] = [
       kSecClass as String: kSecClassGenericPassword,
       kSecAttrService as String: service,
@@ -52,13 +71,19 @@ enum KeychainHelper {
     var result: AnyObject?
     let status = SecItemCopyMatching(query as CFDictionary, &result)
 
-    guard status == errSecSuccess,
+    let value: String?
+    if status == errSecSuccess,
       let data = result as? Data,
-      let value = String(data: data, encoding: .utf8)
-    else {
-      return nil
+      let decoded = String(data: data, encoding: .utf8) {
+      value = decoded
+    } else {
+      value = nil
     }
 
+    cacheLock.lock()
+    cache[key] = value
+    cacheLock.unlock()
+
     return value
   }
 
@@ -72,6 +97,11 @@ enum KeychainHelper {
     ]
 
     let status = SecItemDelete(query as CFDictionary)
+
+    cacheLock.lock()
+    cache[key] = .some(nil)  // remember "absent" too, to avoid re-querying
+    cacheLock.unlock()
+
     if status == errSecSuccess || status == errSecItemNotFound {
       return true
     }