Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
"remark-gfm": "^4.0.1",
"sonner": "^2.0.7",
"tailwind-merge": "^3.3.1",
"zod": "^4.1.12",
"zod": "^4.4.3",
"zustand": "^5.0.8"
},
"devDependencies": {
Expand Down
42 changes: 28 additions & 14 deletions frontend/src/hooks/useSTT.ts
Original file line number Diff line number Diff line change
Expand Up @@ -186,25 +186,38 @@ export function useSTT(userId = 'default') {
}, [])

useEffect(() => {
if (!isEnabled || !isExternalProvider) {
return
}

if (!audioRecorder.current) {
audioRecorder.current = new AudioRecorder()
}

if (!recorderConfiguredRef.current) {
setupAudioRecorder(audioRecorder.current)
recorderConfiguredRef.current = true
}

return () => {
if (audioRecorder.current) {
audioRecorder.current.abort()
audioRecorder.current?.dispose()
audioRecorder.current = null
recorderConfiguredRef.current = false
}
}, [setupAudioRecorder])

useEffect(() => {
const recorder = audioRecorder.current
if (!recorder) return
if (isEnabled && isExternalProvider) {
void recorder.prepare()
} else {
recorder.releaseStream()
}
}, [isEnabled, isExternalProvider])

useEffect(() => {
if (!isEnabled || !isExternalProvider) return
const handleVisibility = () => {
if (document.visibilityState === 'hidden') {
audioRecorder.current?.releaseStream()
}
}
}, [isEnabled, isExternalProvider, setupAudioRecorder])
document.addEventListener('visibilitychange', handleVisibility)
return () => document.removeEventListener('visibilitychange', handleVisibility)
}, [isEnabled, isExternalProvider])

const clearStartupTimeout = useCallback(() => {
if (startupTimeoutRef.current) {
Expand Down Expand Up @@ -250,8 +263,9 @@ export function useSTT(userId = 'default') {

if (isExternalProvider) {
if (!audioRecorder.current) {
audioRecorder.current = new AudioRecorder()
setupAudioRecorder(audioRecorder.current)
setIsError(true)
setError('Recorder not initialized')
return false
}

try {
Expand Down Expand Up @@ -321,7 +335,7 @@ export function useSTT(userId = 'default') {
return false
}
}
}, [isSupported, isEnabled, isExternalProvider, config.language, setupAudioRecorder, clearStartupTimeout, abortAndResetOnTimeout])
}, [isSupported, isEnabled, isExternalProvider, config.language, clearStartupTimeout, abortAndResetOnTimeout])

const stopRecording = useCallback(() => {
if (isExternalProvider && audioRecorder.current) {
Expand Down
225 changes: 221 additions & 4 deletions frontend/src/lib/audioRecorder.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { describe, it, expect } from 'vitest'
import { describe, it, expect, beforeEach, afterEach, vi, beforeAll } from 'vitest'
import { AudioRecorder, downsampleAndConvert, encodeWavFromInt16 } from './audioRecorder'

describe('downsampleAndConvert', () => {
Expand Down Expand Up @@ -51,6 +51,19 @@ describe('downsampleAndConvert', () => {
})

describe('encodeWavFromInt16', () => {
beforeAll(() => {
if (typeof Blob.prototype.arrayBuffer !== 'function') {
Blob.prototype.arrayBuffer = async function arrayBuffer() {
const reader = new FileReader()
return new Promise((resolve, reject) => {
reader.onload = () => resolve(reader.result as ArrayBuffer)
reader.onerror = reject
reader.readAsArrayBuffer(this)
})
}
}
})

it('should create a Blob with audio/wav type', () => {
const samples = new Int16Array(1000)
const blob = encodeWavFromInt16(samples, 16000, 1)
Expand Down Expand Up @@ -113,12 +126,11 @@ describe('encodeWavFromInt16', () => {
expect(data).toBe('data')
})

it('should have correct file size for 1000 samples', async () => {
it('should have correct file size for 1000 samples', () => {
const samples = new Int16Array(1000)
const blob = encodeWavFromInt16(samples, 16000, 1)
const arrayBuffer = await blob.arrayBuffer()

expect(arrayBuffer.byteLength).toBe(44 + 1000 * 2)
expect(blob.size).toBe(44 + 1000 * 2)
})

it('should handle different sample rates', async () => {
Expand Down Expand Up @@ -150,3 +162,208 @@ describe('AudioRecorder.isSupported', () => {
}).not.toThrow()
})
})

describe('AudioRecorder lifecycle', () => {
class FakeAudioWorklet {
addModule = vi.fn(async (_url: string) => undefined)
}

class FakeAudioWorkletNode {
port = {
onmessage: null as ((e: MessageEvent<Int16Array>) => void) | null,
postMessage: vi.fn(),
}
disconnect = vi.fn()
}

class FakeAudioContext {
state: 'suspended' | 'running' | 'closed' = 'suspended'
sampleRate = 16000
audioWorklet = new FakeAudioWorklet()
resume = vi.fn(async () => {
this.state = 'running'
})
suspend = vi.fn(async () => {
this.state = 'suspended'
})
close = vi.fn(async () => {
this.state = 'closed'
})
createMediaStreamSource = vi.fn(() => ({
connect: vi.fn(),
disconnect: vi.fn(),
}))
}

class FakeMediaStreamTrack {
enabled = true
stop = vi.fn()
}

let getUserMediaMock: ReturnType<typeof vi.fn>
let originalAudioContext: typeof AudioContext | undefined
let originalAudioWorkletNode: typeof AudioWorkletNode | undefined
let originalMediaDevices: MediaDevices | undefined

beforeEach(() => {
vi.clearAllMocks()

const track1 = new FakeMediaStreamTrack()
const track2 = new FakeMediaStreamTrack()
const getTracks = () => [track1, track2]
const fakeStream = { getTracks }

getUserMediaMock = vi.fn(async () => fakeStream)
originalMediaDevices = (navigator as any).mediaDevices
Object.defineProperty(navigator, 'mediaDevices', {
value: { getUserMedia: getUserMediaMock },
writable: true,
configurable: true,
})

originalAudioContext = globalThis.AudioContext
originalAudioWorkletNode = globalThis.AudioWorkletNode

globalThis.AudioContext = FakeAudioContext as unknown as typeof AudioContext
globalThis.AudioWorkletNode = FakeAudioWorkletNode as unknown as typeof AudioWorkletNode
})

afterEach(() => {
if (originalAudioContext) {
globalThis.AudioContext = originalAudioContext
}
if (originalAudioWorkletNode) {
globalThis.AudioWorkletNode = originalAudioWorkletNode
}
if (originalMediaDevices) {
Object.defineProperty(navigator, 'mediaDevices', {
value: originalMediaDevices,
writable: true,
configurable: true,
})
}
})

it('prepare() creates a suspended AudioContext and loads the worklet without calling getUserMedia', async () => {
const recorder = new AudioRecorder()
await recorder.prepare()

const ctx = (recorder as unknown as { audioContext: FakeAudioContext }).audioContext
expect(ctx).toBeDefined()
expect(ctx.audioWorklet.addModule).toHaveBeenCalledTimes(1)
expect(getUserMediaMock).not.toHaveBeenCalled()
expect(ctx.state).toBe('suspended')
})

it('prepare() is idempotent across multiple calls (single addModule)', async () => {
const recorder = new AudioRecorder()
await recorder.prepare()
await recorder.prepare()

const ctx = (recorder as unknown as { audioContext: FakeAudioContext }).audioContext
expect(ctx.audioWorklet.addModule).toHaveBeenCalledTimes(1)
})

it('start() after prepare() resumes context and calls getUserMedia exactly once', async () => {
const recorder = new AudioRecorder()
await recorder.prepare()
await recorder.start()

const ctx = (recorder as unknown as { audioContext: FakeAudioContext }).audioContext
expect(ctx.resume).toHaveBeenCalledTimes(1)
expect(getUserMediaMock).toHaveBeenCalledTimes(1)
expect(ctx.state).toBe('running')
})

it('stop() suspends context instead of closing it', async () => {
const recorder = new AudioRecorder()
await recorder.prepare()
await recorder.start()
await recorder.stop()

const ctx = (recorder as unknown as { audioContext: FakeAudioContext }).audioContext
expect(ctx.suspend).toHaveBeenCalled()
expect(ctx.close).not.toHaveBeenCalled()
expect(ctx).toBeDefined()
})

it('start() after stop() reuses the same AudioContext and MediaStream', async () => {
const recorder = new AudioRecorder()
await recorder.prepare()
await recorder.start()
await recorder.stop()

const ctx = (recorder as unknown as { audioContext: FakeAudioContext }).audioContext
const addModuleCount = ctx.audioWorklet.addModule.mock.calls.length
const getUserMediaCount = getUserMediaMock.mock.calls.length

await recorder.start()

expect(ctx.audioWorklet.addModule).toHaveBeenCalledTimes(addModuleCount)
expect(getUserMediaMock).toHaveBeenCalledTimes(getUserMediaCount)
})

it('stop() sets all MediaStream tracks enabled = false', async () => {
const recorder = new AudioRecorder()
await recorder.prepare()
await recorder.start()
await recorder.stop()

const mediaStream = (recorder as unknown as { mediaStream: { getTracks: () => FakeMediaStreamTrack[] } }).mediaStream
const tracks = mediaStream.getTracks()
expect(tracks[0].enabled).toBe(false)
expect(tracks[1].enabled).toBe(false)
})

it('releaseStream() stops all tracks and nulls stream but keeps AudioContext suspended', async () => {
const recorder = new AudioRecorder()
await recorder.prepare()
await recorder.start()
await recorder.stop()

const ctx = (recorder as unknown as { audioContext: FakeAudioContext }).audioContext
const initialContextState = ctx.state

recorder.releaseStream()

const mediaStream = (recorder as unknown as { mediaStream: { getTracks: () => FakeMediaStreamTrack[] } | null }).mediaStream
expect(mediaStream).toBeNull()
expect(ctx.state).toBe(initialContextState)

await recorder.start()
expect(getUserMediaMock).toHaveBeenCalledTimes(2)
})

it('dispose() closes context and stops tracks', async () => {
const recorder = new AudioRecorder()
await recorder.prepare()
await recorder.start()
await recorder.stop()

const ctxBefore = (recorder as unknown as { audioContext: FakeAudioContext }).audioContext

recorder.dispose()

expect(ctxBefore.close).toHaveBeenCalled()
expect(ctxBefore.state).toBe('closed')
})

it('start() after dispose() rebuilds context and re-acquires getUserMedia', async () => {
const recorder = new AudioRecorder()
await recorder.prepare()
await recorder.start()
await recorder.stop()

const ctxBefore = (recorder as unknown as { audioContext: FakeAudioContext }).audioContext
const getUserMediaCountBefore = getUserMediaMock.mock.calls.length

recorder.dispose()

await recorder.start()

const ctxAfter = (recorder as unknown as { audioContext: FakeAudioContext }).audioContext
expect(ctxAfter).not.toBe(ctxBefore)
expect(ctxAfter.audioWorklet.addModule).toHaveBeenCalledTimes(1)
expect(getUserMediaMock).toHaveBeenCalledTimes(getUserMediaCountBefore + 1)
})
})
Loading