Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
1acea31
Tear down AVAudioEngine when idle and free buffer on stop
PatelUtkarsh May 2, 2026
cdc7460
Use clipboard paste for long inserts; cache CGEventSource
PatelUtkarsh May 2, 2026
5f86698
Cache RecordingOverlay hosting view; drive text via observable
PatelUtkarsh May 2, 2026
f9deded
Remove notification observers in MenuBarController deinit
PatelUtkarsh May 2, 2026
635811d
Cache tinted status-bar icons instead of re-rendering per state
PatelUtkarsh May 2, 2026
b85526b
Return CGEvent unretained from event tap callback
PatelUtkarsh May 2, 2026
59aa202
Vectorise RMS and stop allocating per audio tap
PatelUtkarsh May 2, 2026
5ab3b07
Rewrite WAVEncoder.encode with preallocated Data + vDSP
PatelUtkarsh May 2, 2026
854ab2d
Switch transcribe upload to URLSession.upload(for:from:)
PatelUtkarsh May 2, 2026
22e62b5
Compile single regex for TextPostProcessor replacements
PatelUtkarsh May 2, 2026
31c536a
Cache Keychain reads in memory; invalidate on save/delete
PatelUtkarsh May 2, 2026
c9774d0
Hold transcription Task and cancel on next recording
PatelUtkarsh May 2, 2026
59589eb
Resolve transcription engine once per recording
PatelUtkarsh May 2, 2026
0e855f8
Retain self for event-tap lifetime; invalidate port on stop
PatelUtkarsh May 2, 2026
3430ab2
Fix SwiftLint violations introduced by perf work
PatelUtkarsh May 2, 2026
89aa5dc
Mark AppDelegate @MainActor to silence Sendable warning
PatelUtkarsh May 2, 2026
6252cc4
Stop auto-closing onboarding; let user click Get Started
PatelUtkarsh May 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions AudioType/App/AudioTypeApp.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ struct AudioTypeApp: App {
}
}

@MainActor
class AppDelegate: NSObject, NSApplicationDelegate {
private var statusItem: NSStatusItem!
private var menuBarController: MenuBarController!
Expand Down Expand Up @@ -49,8 +50,8 @@ class AppDelegate: NSObject, NSApplicationDelegate {

// Show onboarding if permissions are missing or no engine is usable
if !micPermission || !accessibilityPermission || !EngineResolver.anyEngineAvailable {
DispatchQueue.main.async {
self.showOnboarding()
await MainActor.run {
showOnboarding()
}
} else {
// All set — start listening for hotkey
Expand Down
84 changes: 53 additions & 31 deletions AudioType/App/MenuBarController.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,45 @@ extension NSImage {
}
}

/// Shared observable for live audio level — drives the recording waveform.
/// Shared observable for live audio level - drives the recording waveform.
/// Also carries the overlay text so the hosting view doesn't have to be
/// rebuilt on every state change.
class AudioLevelMonitor: ObservableObject {
static let shared = AudioLevelMonitor()
@Published var level: Float = 0.0
@Published var overlayText: String = "Recording..."
}

class MenuBarController: NSObject, NSWindowDelegate {
private weak var statusItem: NSStatusItem?
private var transcriptionManager: TranscriptionManager
private var recordingWindow: NSWindow?
private var recordingHostingView: NSHostingView<AnyView>?
private var settingsWindow: NSWindow?

// Pre-rendered status-bar icons, built once. Calling NSImage.tinted on
// every state change re-rasterizes the symbol via lockFocus/unlockFocus
// and was a steady source of bitmap allocations.
private lazy var idleIcon: NSImage? = {
let img = NSImage(
systemSymbolName: "waveform.circle.fill", accessibilityDescription: "Ready")
img?.isTemplate = true
return img
}()
private lazy var recordingIcon: NSImage? = {
NSImage(systemSymbolName: "waveform.circle.fill", accessibilityDescription: "Recording")?
.tinted(with: AudioTypeTheme.nsRecordingRed)
}()
private lazy var processingIcon: NSImage? = {
NSImage(systemSymbolName: "ellipsis.circle.fill", accessibilityDescription: "Processing")?
.tinted(with: AudioTypeTheme.nsAmber)
}()
private lazy var errorIcon: NSImage? = {
NSImage(
systemSymbolName: "exclamationmark.triangle.fill", accessibilityDescription: "Error")?
.tinted(with: .systemRed)
}()

init(transcriptionManager: TranscriptionManager) {
self.transcriptionManager = transcriptionManager
super.init()
Expand All @@ -50,6 +77,10 @@ class MenuBarController: NSObject, NSWindowDelegate {
)
}

deinit {
NotificationCenter.default.removeObserver(self)
}

func setupStatusItem(_ statusItem: NSStatusItem) {
self.statusItem = statusItem

Expand Down Expand Up @@ -106,38 +137,24 @@ class MenuBarController: NSObject, NSWindowDelegate {

switch state {
case .idle:
let img = NSImage(
systemSymbolName: "waveform.circle.fill", accessibilityDescription: "Ready")
img?.isTemplate = true
button.image = img
button.image = idleIcon
AudioLevelMonitor.shared.level = 0
hideRecordingIndicator()
updateStatusMenuItem("Ready")

case .recording:
// Tinted coral/red — non-template so the color shows through
if let base = NSImage(
systemSymbolName: "waveform.circle.fill", accessibilityDescription: "Recording") {
button.image = base.tinted(with: AudioTypeTheme.nsRecordingRed)
}
button.image = recordingIcon
showRecordingIndicator()
updateStatusMenuItem("Recording...")

case .processing:
// Tinted amber — "I'm thinking"
if let base = NSImage(
systemSymbolName: "ellipsis.circle.fill", accessibilityDescription: "Processing") {
button.image = base.tinted(with: AudioTypeTheme.nsAmber)
}
button.image = processingIcon
AudioLevelMonitor.shared.level = 0
updateRecordingIndicator(text: "Processing...")
updateStatusMenuItem("Processing...")

case .error(let message):
let img = NSImage(
systemSymbolName: "exclamationmark.triangle.fill", accessibilityDescription: "Error")
img?.isTemplate = false
button.image = img?.tinted(with: .systemRed)
button.image = errorIcon
hideRecordingIndicator()
updateStatusMenuItem("Error: \(message)")
}
Expand Down Expand Up @@ -175,22 +192,27 @@ class MenuBarController: NSObject, NSWindowDelegate {
recordingWindow = window
}

let hostingView = NSHostingView(
rootView: RecordingOverlay(text: "Recording...")
.environmentObject(AudioLevelMonitor.shared))
hostingView.frame = NSRect(x: 0, y: 0, width: 180, height: 50)
recordingWindow?.contentView = hostingView
// Build the hosting view once; subsequent updates just mutate the
// observable state. Re-creating NSHostingView on every state change
// was leaking the SwiftUI graph and Metal layers.
if recordingHostingView == nil {
let hosting = NSHostingView(
rootView: AnyView(
RecordingOverlay()
.environmentObject(AudioLevelMonitor.shared)
)
)
hosting.frame = NSRect(x: 0, y: 0, width: 180, height: 50)
recordingHostingView = hosting
recordingWindow?.contentView = hosting
}

AudioLevelMonitor.shared.overlayText = "Recording..."
recordingWindow?.orderFront(nil)
}

private func updateRecordingIndicator(text: String) {
if let window = recordingWindow {
let hostingView = NSHostingView(
rootView: RecordingOverlay(text: text)
.environmentObject(AudioLevelMonitor.shared))
hostingView.frame = NSRect(x: 0, y: 0, width: 180, height: 50)
window.contentView = hostingView
}
AudioLevelMonitor.shared.overlayText = text
}

private func hideRecordingIndicator() {
Expand Down
50 changes: 36 additions & 14 deletions AudioType/App/TranscriptionManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ class TranscriptionManager: ObservableObject {
private var hotKeyManager: HotKeyManager?
private var textInserter: TextInserter?

/// Active transcription task. Held so a new recording can cancel any
/// in-flight transcription from a previous one (e.g. user re-triggers
/// the hotkey while the network call is still pending).
private var transcriptionTask: Task<Void, Never>?

private let logger = Logger(subsystem: "com.audiotype", category: "TranscriptionManager")

private init() {}
Expand Down Expand Up @@ -60,7 +65,7 @@ class TranscriptionManager: ObservableObject {

if !EngineResolver.anyEngineAvailable {
logger.warning("No transcription engine available")
setState(.error("No engine available add a cloud API key or enable Apple Speech"))
setState(.error("No engine available - add a cloud API key or enable Apple Speech"))
} else {
logger.info("Transcription engine ready: \(engine.displayName)")
}
Expand All @@ -85,15 +90,15 @@ class TranscriptionManager: ObservableObject {
audioRecorder = nil
}

/// Called when the user saves an API key or changes engine preference re-evaluate.
/// Called when the user saves an API key or changes engine preference - re-evaluate.
func onEngineConfigChanged() {
let engine = EngineResolver.resolve()
activeEngineName = engine.displayName
if EngineResolver.anyEngineAvailable {
setState(.idle)
logger.info("Engine config changed, active engine: \(engine.displayName)")
} else {
setState(.error("No engine available add a cloud API key or enable Apple Speech"))
setState(.error("No engine available - add a cloud API key or enable Apple Speech"))
}
}

Expand All @@ -111,21 +116,37 @@ class TranscriptionManager: ObservableObject {
}
}

/// Engine resolved at recording start and reused for the matching
/// transcription. Keeps Keychain / availability checks out of the
/// post-stop hot path and ensures the engine identity doesn't change
/// mid-recording if the user edits settings.
private var activeEngine: TranscriptionEngine?

private func startRecording() {
guard state == .idle else {
logger.warning("Cannot start recording: not in idle state")
return
}

guard EngineResolver.anyEngineAvailable else {
setState(.error("No engine available add a cloud API key or enable Apple Speech"))
setState(.error("No engine available - add a cloud API key or enable Apple Speech"))
return
}

// Cancel any still-pending transcription from a previous recording so
// we don't insert stale text into the user's new context.
transcriptionTask?.cancel()
transcriptionTask = nil

// Resolve the engine once, up front. transcribeAndInsert will reuse it.
let engine = EngineResolver.resolve()
activeEngine = engine
activeEngineName = engine.displayName

do {
try audioRecorder?.startRecording()
setState(.recording)
logger.info("Recording started")
logger.info("Recording started with engine: \(engine.displayName)")
} catch {
logger.error("Failed to start recording: \(error.localizedDescription)")
setState(.error("Failed to start recording"))
Expand All @@ -144,21 +165,22 @@ class TranscriptionManager: ObservableObject {
return
}

// Take the engine resolved at startRecording. Falls back to a fresh
// resolution defensively if somehow nil.
let engine = activeEngine ?? EngineResolver.resolve()
activeEngine = nil

logger.info("Recording stopped, captured \(samples.count) samples")
setState(.processing)

// Transcribe in background
Task.detached { [weak self] in
await self?.transcribeAndInsert(samples: samples)
// Transcribe in background. Hold the task so the next recording can
// cancel it if it's still pending.
transcriptionTask = Task.detached { [weak self] in
await self?.transcribeAndInsert(samples: samples, engine: engine)
}
}

private func transcribeAndInsert(samples: [Float]) async {
let engine = EngineResolver.resolve()

await MainActor.run {
self.activeEngineName = engine.displayName
}
private func transcribeAndInsert(samples: [Float], engine: TranscriptionEngine) async {

let startTime = CFAbsoluteTimeGetCurrent()

Expand Down
Loading
Loading