From f7a7d21cd9f978397168d64bb0d7d7f0545dc5e8 Mon Sep 17 00:00:00 2001 From: Pranav Joshi Date: Thu, 27 Nov 2025 16:57:21 +0530 Subject: [PATCH 1/8] post voice traffic only to socket --- src/directLine.mock.ts | 12 ++++ src/directLine.test.ts | 148 +++++++++++++++++++++++++++++++++++++++++ src/directLine.ts | 67 +++++++++++++++++-- 3 files changed, 220 insertions(+), 7 deletions(-) diff --git a/src/directLine.mock.ts b/src/directLine.mock.ts index 152fba1af..540452ad7 100644 --- a/src/directLine.mock.ts +++ b/src/directLine.mock.ts @@ -11,6 +11,18 @@ const notImplemented = (): never => { throw new Error('not implemented') }; export const mockActivity = (text: string): DirectLineExport.Activity => ({ type: 'message', from: { id: 'sender' }, text }); +export const mockVoiceActivity = (): DirectLineExport.Activity => ({ + type: 'event', + from: { id: 'sender' }, + name: 'voiceLiveEvent', + value: { + voiceLiveEvent: { + type: 'type', + delta: 'base64AudioChunk' + } + } +}); + // MOCK DirectLine Server (shared state used by Observable.ajax and WebSocket mocks) interface ActivitySocket { diff --git a/src/directLine.test.ts b/src/directLine.test.ts index f280e081c..3a70ab667 100644 --- a/src/directLine.test.ts +++ b/src/directLine.test.ts @@ -243,4 +243,152 @@ describe('MockSuite', () => { expect(actualError.status).toStrictEqual(429); expect(endTime - startTime).toStrictEqual(10); }); + + test('VoiceActivityWebSocket', () => { + const voiceActivity = DirectLineMock.mockVoiceActivity(); + directline = new DirectLineExport.DirectLine({ ...services, webSocket: true }); + + const actual: Array = []; + subscriptions.push(directline.activity$.subscribe(a => actual.push(a))); + + let postActivityCompleted = false; + let postActivityError: any; + + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + yield directline.postActivity(voiceActivity) + .do(() => postActivityCompleted = true) + .catch(error => { + postActivityError = error; + return Observable.empty(); + }); + yield Observable.timer(200, scheduler); + }; + + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + scheduler.flush(); + + // Assert that voice activity was sent successfully without errors + expect(postActivityCompleted).toBe(true); + expect(postActivityError).toBeUndefined(); + }); + + test('VoiceActivityWithoutWebSocket', () => { + const voiceActivity = DirectLineMock.mockVoiceActivity(); + directline = new DirectLineExport.DirectLine({ ...services, webSocket: false }); + + let actualError: any; + + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + yield directline.postActivity(voiceActivity).catch(error => { + actualError = error; + return Observable.empty(); + }); + }; + + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + scheduler.flush(); + + expect(actualError.message).toContain('Voice activities require WebSocket to be enabled'); + }); + + test('VoiceVsTextActivityRouting', () => { + const voiceActivity = DirectLineMock.mockVoiceActivity(); + const textActivity = DirectLineMock.mockActivity('hello'); + + directline = new DirectLineExport.DirectLine({ ...services, webSocket: true }); + + const actual: Array = []; + subscriptions.push(directline.activity$.subscribe(a => actual.push(a))); + + let voiceCompleted = false; + let textCompleted = false; + let voiceError: any; + let textError: any; + + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + + // Send text activity (should go through HTTP/Ajax) + yield directline.postActivity(textActivity) + .do(() => textCompleted = true) + .catch(error => { + textError = error; + return Observable.empty(); + }); + + yield Observable.timer(100, scheduler); + + // Send voice activity (should go through WebSocket) + yield directline.postActivity(voiceActivity) + .do(() => voiceCompleted = true) + .catch(error => { + voiceError = error; + return Observable.empty(); + }); + + yield Observable.timer(200, scheduler); + }; + + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + scheduler.flush(); + + // Both should complete successfully but through different paths + expect(textCompleted).toBe(true); + expect(voiceCompleted).toBe(true); + expect(textError).toBeUndefined(); + expect(voiceError).toBeUndefined(); + + // Text activity should echo back, voice activity should not + expect(actual).toContainEqual(textActivity); + expect(actual).not.toContainEqual(voiceActivity); + }); + + test('InvalidVoiceActivityStructures', () => { + const invalidStructures: DirectLineExport.Activity[] = [ + { type: 'event', from: { id: 'user' }, value: null } as any, + { type: 'event', from: { id: 'user' }, value: { voiceLiveEvent: null } } as any, + { type: 'event', from: { id: 'user' }, value: { voiceLiveEvent: {} } }, + { type: 'event', from: { id: 'user' }, value: { notVoice: { data: 'test' } } } as any + ]; + + directline = new DirectLineExport.DirectLine({ ...services, webSocket: true }); + + const actual: Array = []; + subscriptions.push(directline.activity$.subscribe(a => actual.push(a))); + + let completedCount = 0; + let errorCount = 0; + + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + + // Send each invalid structure - should all go through HTTP path + for (const invalidActivity of invalidStructures) { + yield directline.postActivity(invalidActivity) + .do(() => completedCount++) + .catch(error => { + errorCount++; + return Observable.empty(); + }); + yield Observable.timer(100, scheduler); + } + + yield Observable.timer(200, scheduler); + }; + + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + scheduler.flush(); + + // All invalid structures should complete successfully through HTTP path + expect(completedCount).toBe(invalidStructures.length); + expect(errorCount).toBe(0); + + // All invalid structures should echo back (confirming they went through HTTP, not WebSocket) + expect(actual).toHaveLength(invalidStructures.length); + invalidStructures.forEach(invalidActivity => { + expect(actual).toContainEqual(invalidActivity); + }); + }); }); diff --git a/src/directLine.ts b/src/directLine.ts index 7cc1b15fc..be6ca859a 100644 --- a/src/directLine.ts +++ b/src/directLine.ts @@ -470,6 +470,7 @@ export class DirectLine implements IBotConnection { public referenceGrammarId: string; private timeout = 20 * 1000; private retries: number; + private webSocketConnection: WebSocket | null = null; private localeOnStartConversation: string; private userIdOnStartConversation: string; @@ -765,6 +766,32 @@ export class DirectLine implements IBotConnection { if (activity.type === "message" && activity.attachments && activity.attachments.length > 0) return this.postMessageWithAttachments(activity); + // if it is voice activity, send it through webSocket as voice over http is not supported in ABS. + // ABS limitation - client to server push is not being processed over web socket for text. + // Once it is implemented, we can remove this and send all traffic to the webSocket + if (this.isVoiceEventActivity(activity)) { + if (!this.webSocket) { + return Observable.throw(new Error('Voice activities require WebSocket to be enabled'), this.services.scheduler); + } + return this.checkConnection(true) + .flatMap(_ => + Observable.create((subscriber: Subscriber) => { + const envelope = { activities: [activity] }; + try { + if (!this.webSocketConnection || this.webSocketConnection.readyState !== WebSocket.OPEN) { + throw new Error('WebSocket connection not ready for voice activities'); + } + this.webSocketConnection.send(JSON.stringify(envelope)); + subscriber.next(envelope); + subscriber.complete(); + } catch (e) { + subscriber.error(e); + } + }) + ) + .catch(error => this.catchExpiredToken(error)); + } + // If we're not connected to the bot, get connected // Will throw an error if we are not connected konsole.log("postActivity", activity); @@ -786,6 +813,32 @@ export class DirectLine implements IBotConnection { .catch(error => this.catchExpiredToken(error)); } + // Until activity protocol changes for multi-modal output are ratified, this method + // identifies voice event activities using the given activity example below as payload + // to send voice chunks over activity protocol. The activity structure shown serves as + // the current solution for transmitting voice data: + // { "type": "event", "value": { "voiceLiveEvent": { "type": "response.audio.delta", "delta": "" } } } + private isVoiceEventActivity(activity: Activity) { + if (activity.type !== 'event') { + return false; + } + + if (!activity?.value || typeof activity?.value !== 'object') { + return false; + } + + const vle = activity?.value?.voiceLiveEvent; + if (!vle || typeof vle !== 'object') { + return false; + } + + if (Object.keys(vle).length === 0) { + return false; + } + + return true; + } + private postMessageWithAttachments(message: Message) { const { attachments } = message; // We clean the attachments but making sure every attachment has unique name. @@ -938,11 +991,11 @@ export class DirectLine implements IBotConnection { private observableWebSocket() { return Observable.create((subscriber: Subscriber) => { konsole.log("creating WebSocket", this.streamUrl); - const ws = new this.services.WebSocket(this.streamUrl); + this.webSocketConnection = new this.services.WebSocket(this.streamUrl); let sub: Subscription; let closed: boolean; - ws.onopen = open => { + this.webSocketConnection.onopen = open => { konsole.log("WebSocket open", open); // Chrome is pretty bad at noticing when a WebSocket connection is broken. // If we periodically ping the server with empty messages, it helps Chrome @@ -950,14 +1003,14 @@ export class DirectLine implements IBotConnection { // error, and that give us the opportunity to attempt to reconnect. sub = Observable.interval(this.timeout, this.services.scheduler).subscribe(_ => { try { - ws.send("") + this.webSocketConnection.send("") } catch(e) { konsole.log("Ping error", e); } }); } - ws.onclose = close => { + this.webSocketConnection.onclose = close => { konsole.log("WebSocket close", close); if (sub) sub.unsubscribe(); @@ -967,7 +1020,7 @@ export class DirectLine implements IBotConnection { closed = true; } - ws.onerror = error => { + this.webSocketConnection.onerror = error => { konsole.log("WebSocket error", error); if (sub) sub.unsubscribe(); @@ -977,14 +1030,14 @@ export class DirectLine implements IBotConnection { closed = true; } - ws.onmessage = message => message.data && subscriber.next(JSON.parse(message.data)); + this.webSocketConnection.onmessage = message => message.data && subscriber.next(JSON.parse(message.data)); // This is the 'unsubscribe' method, which is called when this observable is disposed. // When the WebSocket closes itself, we throw an error, and this function is eventually called. // When the observable is closed first (e.g. when tearing down a WebChat instance) then // we need to manually close the WebSocket. return () => { - if (ws.readyState === 0 || ws.readyState === 1) ws.close(); + if (this.webSocketConnection.readyState === 0 || this.webSocketConnection.readyState === 1) this.webSocketConnection.close(); } }) as Observable } From 19d9e7729a11ea7a781844738cac139e10b0e637 Mon Sep 17 00:00:00 2001 From: Pranav Joshi Date: Fri, 26 Dec 2025 12:35:05 +0530 Subject: [PATCH 2/8] comment resolved --- src/directLine.test.ts | 157 +++++++++++++++++++---------------------- src/directLine.ts | 34 ++++----- 2 files changed, 85 insertions(+), 106 deletions(-) diff --git a/src/directLine.test.ts b/src/directLine.test.ts index 3a70ab667..90ecb8dd6 100644 --- a/src/directLine.test.ts +++ b/src/directLine.test.ts @@ -274,121 +274,110 @@ describe('MockSuite', () => { }); test('VoiceActivityWithoutWebSocket', () => { - const voiceActivity = DirectLineMock.mockVoiceActivity(); - directline = new DirectLineExport.DirectLine({ ...services, webSocket: false }); + const voiceActivity = DirectLineMock.mockVoiceActivity(); + directline = new DirectLineExport.DirectLine({ ...services, webSocket: false }); - let actualError: any; + let actualError: any; - const scenario = function* (): IterableIterator> { - yield Observable.timer(200, scheduler); - yield directline.postActivity(voiceActivity).catch(error => { - actualError = error; - return Observable.empty(); - }); - }; + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + yield directline.postActivity(voiceActivity).catch(error => { + actualError = error; + return Observable.empty(); + }); + }; - subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); - scheduler.flush(); + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + scheduler.flush(); - expect(actualError.message).toContain('Voice activities require WebSocket to be enabled'); + expect(actualError.message).toContain('Voice activities require WebSocket to be enabled'); }); test('VoiceVsTextActivityRouting', () => { - const voiceActivity = DirectLineMock.mockVoiceActivity(); - const textActivity = DirectLineMock.mockActivity('hello'); + const voiceActivity = DirectLineMock.mockVoiceActivity(); + const textActivity = DirectLineMock.mockActivity('hello'); - directline = new DirectLineExport.DirectLine({ ...services, webSocket: true }); + directline = new DirectLineExport.DirectLine({ ...services, webSocket: true }); - const actual: Array = []; - subscriptions.push(directline.activity$.subscribe(a => actual.push(a))); + const actual: Array = []; + subscriptions.push(directline.activity$.subscribe(a => actual.push(a))); - let voiceCompleted = false; - let textCompleted = false; - let voiceError: any; - let textError: any; + let voiceCompleted = false; + let textCompleted = false; + let voiceError: any; + let textError: any; - const scenario = function* (): IterableIterator> { - yield Observable.timer(200, scheduler); + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); - // Send text activity (should go through HTTP/Ajax) - yield directline.postActivity(textActivity) - .do(() => textCompleted = true) - .catch(error => { - textError = error; - return Observable.empty(); - }); + // Send text activity (should go through HTTP/Ajax) + yield directline.postActivity(textActivity) + .do(() => textCompleted = true) + .catch(error => { + textError = error; + return Observable.empty(); + }); - yield Observable.timer(100, scheduler); + yield Observable.timer(100, scheduler); - // Send voice activity (should go through WebSocket) - yield directline.postActivity(voiceActivity) - .do(() => voiceCompleted = true) - .catch(error => { - voiceError = error; - return Observable.empty(); - }); + // Send voice activity (should go through WebSocket) + yield directline.postActivity(voiceActivity) + .do(() => voiceCompleted = true) + .catch(error => { + voiceError = error; + return Observable.empty(); + }); - yield Observable.timer(200, scheduler); - }; + yield Observable.timer(200, scheduler); + }; - subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); - scheduler.flush(); + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + scheduler.flush(); - // Both should complete successfully but through different paths - expect(textCompleted).toBe(true); - expect(voiceCompleted).toBe(true); - expect(textError).toBeUndefined(); - expect(voiceError).toBeUndefined(); + // Both should complete successfully but through different paths + expect(textCompleted).toBe(true); + expect(voiceCompleted).toBe(true); + expect(textError).toBeUndefined(); + expect(voiceError).toBeUndefined(); - // Text activity should echo back, voice activity should not - expect(actual).toContainEqual(textActivity); - expect(actual).not.toContainEqual(voiceActivity); + // Text activity should echo back, voice activity should not + expect(actual).toContainEqual(textActivity); + expect(actual).not.toContainEqual(voiceActivity); }); - test('InvalidVoiceActivityStructures', () => { - const invalidStructures: DirectLineExport.Activity[] = [ - { type: 'event', from: { id: 'user' }, value: null } as any, - { type: 'event', from: { id: 'user' }, value: { voiceLiveEvent: null } } as any, + test.each([ + { type: 'event', from: { id: 'user' }, value: null }, + { type: 'event', from: { id: 'user' }, value: { voiceLiveEvent: null } }, { type: 'event', from: { id: 'user' }, value: { voiceLiveEvent: {} } }, - { type: 'event', from: { id: 'user' }, value: { notVoice: { data: 'test' } } } as any - ]; - - directline = new DirectLineExport.DirectLine({ ...services, webSocket: true }); - - const actual: Array = []; - subscriptions.push(directline.activity$.subscribe(a => actual.push(a))); + { type: 'event', from: { id: 'user' }, value: { notVoice: { data: 'test' } } } + ] as DirectLineExport.Activity[])('InvalidVoiceActivityStructure: %p', (invalidActivity) => { + directline = new DirectLineExport.DirectLine({ ...services, webSocket: true }); - let completedCount = 0; - let errorCount = 0; + const actual: Array = []; + subscriptions.push(directline.activity$.subscribe(a => actual.push(a))); - const scenario = function* (): IterableIterator> { - yield Observable.timer(200, scheduler); + let completed = false; + let activityError: any; - // Send each invalid structure - should all go through HTTP path - for (const invalidActivity of invalidStructures) { + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); yield directline.postActivity(invalidActivity) - .do(() => completedCount++) + .do(() => completed = true) .catch(error => { - errorCount++; + activityError = error; return Observable.empty(); }); - yield Observable.timer(100, scheduler); - } - - yield Observable.timer(200, scheduler); - }; + yield Observable.timer(200, scheduler); + }; - subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); - scheduler.flush(); + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + scheduler.flush(); - // All invalid structures should complete successfully through HTTP path - expect(completedCount).toBe(invalidStructures.length); - expect(errorCount).toBe(0); + // Should complete successfully through HTTP path + expect(completed).toBe(true); + expect(activityError).toBeUndefined(); - // All invalid structures should echo back (confirming they went through HTTP, not WebSocket) - expect(actual).toHaveLength(invalidStructures.length); - invalidStructures.forEach(invalidActivity => { + // Should echo back (confirming it went through HTTP, not WebSocket) expect(actual).toContainEqual(invalidActivity); - }); }); }); diff --git a/src/directLine.ts b/src/directLine.ts index be6ca859a..7f3d0d375 100644 --- a/src/directLine.ts +++ b/src/directLine.ts @@ -769,7 +769,7 @@ export class DirectLine implements IBotConnection { // if it is voice activity, send it through webSocket as voice over http is not supported in ABS. // ABS limitation - client to server push is not being processed over web socket for text. // Once it is implemented, we can remove this and send all traffic to the webSocket - if (this.isVoiceEventActivity(activity)) { + if (DirectLine.isVoiceEventActivity(activity)) { if (!this.webSocket) { return Observable.throw(new Error('Voice activities require WebSocket to be enabled'), this.services.scheduler); } @@ -778,8 +778,8 @@ export class DirectLine implements IBotConnection { Observable.create((subscriber: Subscriber) => { const envelope = { activities: [activity] }; try { - if (!this.webSocketConnection || this.webSocketConnection.readyState !== WebSocket.OPEN) { - throw new Error('WebSocket connection not ready for voice activities'); + if (!this.webSocketConnection || this.webSocketConnection.readyState !== WebSocket.OPEN) { + throw new Error('WebSocket connection not ready for voice activities'); } this.webSocketConnection.send(JSON.stringify(envelope)); subscriber.next(envelope); @@ -818,25 +818,15 @@ export class DirectLine implements IBotConnection { // to send voice chunks over activity protocol. The activity structure shown serves as // the current solution for transmitting voice data: // { "type": "event", "value": { "voiceLiveEvent": { "type": "response.audio.delta", "delta": "" } } } - private isVoiceEventActivity(activity: Activity) { - if (activity.type !== 'event') { - return false; - } - - if (!activity?.value || typeof activity?.value !== 'object') { - return false; - } - - const vle = activity?.value?.voiceLiveEvent; - if (!vle || typeof vle !== 'object') { - return false; - } - - if (Object.keys(vle).length === 0) { - return false; - } - - return true; + private static isVoiceEventActivity(activity: Activity) { + return ( + activity.type === 'event' && + activity?.value && + typeof activity?.value === 'object' && + activity?.value?.voiceLiveEvent && + typeof activity?.value?.voiceLiveEvent === 'object' && + Object.keys(activity?.value?.voiceLiveEvent).length > 0 + ); } private postMessageWithAttachments(message: Message) { From 7ba71907be8dedc687b160df983fc8f875910d85 Mon Sep 17 00:00:00 2001 From: Pranav Joshi Date: Wed, 7 Jan 2026 16:21:30 +0000 Subject: [PATCH 3/8] align close to activity spec --- src/directLine.mock.ts | 8 ++++---- src/directLine.test.ts | 4 ++-- src/directLine.ts | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/directLine.mock.ts b/src/directLine.mock.ts index 540452ad7..3ff594124 100644 --- a/src/directLine.mock.ts +++ b/src/directLine.mock.ts @@ -14,11 +14,11 @@ export const mockActivity = (text: string): DirectLineExport.Activity => ({ type export const mockVoiceActivity = (): DirectLineExport.Activity => ({ type: 'event', from: { id: 'sender' }, - name: 'voiceLiveEvent', + name: 'voiceEvent', value: { - voiceLiveEvent: { - type: 'type', - delta: 'base64AudioChunk' + voice: { + contentType: 'type', + contentUrl: 'base64AudioChunk' } } }); diff --git a/src/directLine.test.ts b/src/directLine.test.ts index 90ecb8dd6..bfa647341 100644 --- a/src/directLine.test.ts +++ b/src/directLine.test.ts @@ -347,8 +347,8 @@ describe('MockSuite', () => { test.each([ { type: 'event', from: { id: 'user' }, value: null }, - { type: 'event', from: { id: 'user' }, value: { voiceLiveEvent: null } }, - { type: 'event', from: { id: 'user' }, value: { voiceLiveEvent: {} } }, + { type: 'event', from: { id: 'user' }, value: { voice: null } }, + { type: 'event', from: { id: 'user' }, value: { voice: {} } }, { type: 'event', from: { id: 'user' }, value: { notVoice: { data: 'test' } } } ] as DirectLineExport.Activity[])('InvalidVoiceActivityStructure: %p', (invalidActivity) => { directline = new DirectLineExport.DirectLine({ ...services, webSocket: true }); diff --git a/src/directLine.ts b/src/directLine.ts index 7f3d0d375..bf45227d5 100644 --- a/src/directLine.ts +++ b/src/directLine.ts @@ -817,15 +817,15 @@ export class DirectLine implements IBotConnection { // identifies voice event activities using the given activity example below as payload // to send voice chunks over activity protocol. The activity structure shown serves as // the current solution for transmitting voice data: - // { "type": "event", "value": { "voiceLiveEvent": { "type": "response.audio.delta", "delta": "" } } } + // { "type": "event", "value": { "voice": { "contentUrl": "" } } } private static isVoiceEventActivity(activity: Activity) { return ( activity.type === 'event' && activity?.value && typeof activity?.value === 'object' && - activity?.value?.voiceLiveEvent && - typeof activity?.value?.voiceLiveEvent === 'object' && - Object.keys(activity?.value?.voiceLiveEvent).length > 0 + activity?.value?.voice && + typeof activity?.value?.voice === 'object' && + Object.keys(activity?.value?.voice).length > 0 ); } From 8a0d8debe447f00e260eabc0002b99fc6693f825 Mon Sep 17 00:00:00 2001 From: Pranav Joshi Date: Tue, 7 Apr 2026 14:46:09 +0000 Subject: [PATCH 4/8] voice mode handling --- src/directLine.mock.ts | 35 ++++ src/directLine.test.ts | 457 ++++++++++++++++++++++++++++++++--------- src/directLine.ts | 228 +++++++++++++++++--- 3 files changed, 591 insertions(+), 129 deletions(-) diff --git a/src/directLine.mock.ts b/src/directLine.mock.ts index 3ff594124..8713a1a5f 100644 --- a/src/directLine.mock.ts +++ b/src/directLine.mock.ts @@ -41,6 +41,7 @@ export interface Conversation { export interface Server { scheduler: TestScheduler; conversation: Conversation; + webSocketUrl?: string; } const tokenPrefix = 'token'; @@ -223,6 +224,7 @@ type EventHandler = (this: WebSocket, ev: E) => any; export const mockWebSocket = (server: Server): WebSocketConstructor => class MockWebSocket implements WebSocket, ActivitySocket { constructor(url: string, protocols?: string | string[]) { + server.webSocketUrl = url; server.scheduler.schedule(() => { this.readyState = WebSocket.CONNECTING; @@ -297,3 +299,36 @@ export const mockServices = (server: Server, scheduler: TestScheduler): DirectLi ajax: mockAjax(server), random: () => 0, }); + +// Helper to inject agent.capabilities event with audio support +export const mockAgentCapabilitiesEvent = (): DirectLineExport.Activity => ({ + type: 'event', + from: { id: 'bot' }, + name: 'agent.capabilities', + value: { + modalities: { + text: {}, + audio: { + fonts: [], + tools: [], + instructions: [] + } + } + } +}); + +// Helper to inject agent.capabilities event into WebSocket +export const injectAgentCapabilities = (server: Server): void => { + const capabilitiesEvent = mockAgentCapabilitiesEvent(); + const activityGroup: DirectLineExport.ActivityGroup = { + activities: [capabilitiesEvent], + watermark: server.conversation.history.length.toString(), + }; + const message = new MessageEvent('type', { data: JSON.stringify(activityGroup) }); + server.conversation.sockets.forEach(s => s.onmessage(message)); +}; + +// Helper to check if WebSocket URL contains multimodal path +export const hasMultimodalUrl = (server: Server): boolean => { + return !!server.webSocketUrl?.includes('/stream/multimodal'); +}; diff --git a/src/directLine.test.ts b/src/directLine.test.ts index 70459b13d..89a589941 100644 --- a/src/directLine.test.ts +++ b/src/directLine.test.ts @@ -391,140 +391,395 @@ describe('MockSuite', () => { }); }); - test('VoiceActivityWebSocket', () => { - const voiceActivity = DirectLineMock.mockVoiceActivity(); - directline = new DirectLineExport.DirectLine({ ...services, webSocket: true }); + describe('VoiceMode', () => { - const actual: Array = []; - subscriptions.push(directline.activity$.subscribe(a => actual.push(a))); + describe('enableVoiceMode: true (explicit)', () => { - let postActivityCompleted = false; - let postActivityError: any; + test('voice mode enabled and uses /stream/multimodal URL', () => { + directline = new DirectLineExport.DirectLine({ ...services, enableVoiceMode: true }); - const scenario = function* (): IterableIterator> { - yield Observable.timer(200, scheduler); - yield directline.postActivity(voiceActivity) - .do(() => postActivityCompleted = true) - .catch(error => { - postActivityError = error; - return Observable.empty(); + // Verify voice mode is enabled synchronously + expect(directline.getIsVoiceModeEnabled()).toBe(true); + + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + }; + + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + subscriptions.push(directline.activity$.subscribe()); + + scheduler.flush(); + + // Verify WebSocket URL contains /stream/multimodal + expect(DirectLineMock.hasMultimodalUrl(server)).toBe(true); + }); + + test('postActivity sends via WebSocket (does not echo back)', () => { + directline = new DirectLineExport.DirectLine({ ...services, enableVoiceMode: true }); + + const textActivity = DirectLineMock.mockActivity('hello-voice-mode'); + + let postCompleted = false; + const actual: Array = []; + + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + yield directline.postActivity(textActivity).do(() => postCompleted = true); + yield Observable.timer(100, scheduler); + }; + + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + subscriptions.push(directline.activity$.subscribe(a => actual.push(a))); + + scheduler.flush(); + + expect(postCompleted).toBe(true); + // WebSocket path: activity does NOT echo back (server doesn't broadcast WS-sent activities) + expect(actual).not.toContainEqual(textActivity); + }); + + test('reconnect after WebSocket close still uses /stream/multimodal URL', () => { + directline = new DirectLineExport.DirectLine({ ...services, enableVoiceMode: true }); + + // First verify initial connection uses multimodal URL + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + }; + + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + subscriptions.push(directline.activity$.subscribe()); + + scheduler.flush(); + + // Verify initial connection uses multimodal + expect(DirectLineMock.hasMultimodalUrl(server)).toBe(true); + + // Simulate WebSocket close (triggers reconnect) + DirectLineMock.injectClose(server); + + // Continue scheduler to allow reconnect + const reconnectScenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + }; + + subscriptions.push(lazyConcat(reconnectScenario()).observeOn(scheduler).subscribe()); + + scheduler.flush(); + + // After reconnect, should still use /stream/multimodal URL + expect(DirectLineMock.hasMultimodalUrl(server)).toBe(true); + expect(directline.getIsVoiceModeEnabled()).toBe(true); + }); + }); + + describe('enableVoiceMode: false (explicit)', () => { + + test('voice mode disabled and uses standard /stream URL', () => { + directline = new DirectLineExport.DirectLine({ ...services, enableVoiceMode: false }); + + // Verify voice mode is disabled + expect(directline.getIsVoiceModeEnabled()).toBe(false); + + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + }; + + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + subscriptions.push(directline.activity$.subscribe()); + + scheduler.flush(); + + // Verify WebSocket URL does NOT contain /stream/multimodal + expect(DirectLineMock.hasMultimodalUrl(server)).toBe(false); + }); + + test('postActivity sends via HTTP (echoes back)', () => { + directline = new DirectLineExport.DirectLine({ ...services, enableVoiceMode: false }); + + const textActivity = DirectLineMock.mockActivity('hello-http'); + + const actual: Array = []; + subscriptions.push(directline.activity$.subscribe(a => actual.push(a))); + + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + yield directline.postActivity(textActivity); + yield Observable.timer(100, scheduler); + }; + + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + + scheduler.flush(); + + // HTTP path: activity echoes back via activity$ (server broadcasts HTTP-posted activities) + expect(actual).toContainEqual(textActivity); + }); + + test('403 post returns retry and still uses standard /stream URL', () => { + services.ajax = DirectLineMock.mockAjax(server, (urlOrRequest) => { + if (typeof urlOrRequest === 'string') { + throw new Error(); + } + + if (urlOrRequest.url && urlOrRequest.url.indexOf('/conversations') > 0 && !/activities/u.test(urlOrRequest.url)) { + const response: Partial = { + response: server.conversation, + status: 201, + xhr: { getResponseHeader: () => 'n/a' } as unknown as XMLHttpRequest + }; + return response as AjaxResponse; + } + + if (urlOrRequest.url && /activities/u.test(urlOrRequest.url)) { + const response: Partial = { + status: 403, + xhr: { getResponseHeader: () => 'n/a' } as unknown as XMLHttpRequest + }; + const error = new Error('Forbidden'); + throw Object.assign(error, response); + } + + throw new Error(); }); - yield Observable.timer(200, scheduler); - }; - subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); - scheduler.flush(); + directline = new DirectLineExport.DirectLine({ ...services, enableVoiceMode: false }); - // Assert that voice activity was sent successfully without errors - expect(postActivityCompleted).toBe(true); - expect(postActivityError).toBeUndefined(); - }); + const retryActivity = DirectLineMock.mockActivity('will-retry-false'); + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + yield directline.postActivity(retryActivity); + }; - test('VoiceActivityWithoutWebSocket', () => { - const voiceActivity = DirectLineMock.mockVoiceActivity(); - directline = new DirectLineExport.DirectLine({ ...services, webSocket: false }); + let postResult: string | undefined; + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe({ + next: v => { postResult = v as string; }, + error: () => {}, + complete: () => {} + })); - let actualError: any; + scheduler.flush(); - const scenario = function* (): IterableIterator> { - yield Observable.timer(200, scheduler); - yield directline.postActivity(voiceActivity).catch(error => { - actualError = error; - return Observable.empty(); + expect(postResult).toStrictEqual('retry'); + expect(DirectLineMock.hasMultimodalUrl(server)).toBe(false); }); - }; + }); - subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); - scheduler.flush(); + describe('enableVoiceMode: undefined (auto-detect)', () => { - expect(actualError.message).toContain('Voice activities require WebSocket to be enabled'); - }); + test('non-iframe: voice mode disabled and uses standard /stream URL', () => { + // Default test environment is not an iframe (window.self === window.top) + directline = new DirectLineExport.DirectLine({ ...services }); - test('VoiceVsTextActivityRouting', () => { - const voiceActivity = DirectLineMock.mockVoiceActivity(); - const textActivity = DirectLineMock.mockActivity('hello'); + // Verify voice mode is disabled (synchronous - no iframe check needed) + expect(directline.getIsVoiceModeEnabled()).toBe(false); - directline = new DirectLineExport.DirectLine({ ...services, webSocket: true }); + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + }; - const actual: Array = []; - subscriptions.push(directline.activity$.subscribe(a => actual.push(a))); + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + subscriptions.push(directline.activity$.subscribe()); - let voiceCompleted = false; - let textCompleted = false; - let voiceError: any; - let textError: any; + scheduler.flush(); - const scenario = function* (): IterableIterator> { - yield Observable.timer(200, scheduler); + // Verify standard /stream URL (not multimodal) + expect(DirectLineMock.hasMultimodalUrl(server)).toBe(false); + }); + + test('non-iframe: 403 post returns retry and still uses standard /stream URL', () => { + services.ajax = DirectLineMock.mockAjax(server, (urlOrRequest) => { + if (typeof urlOrRequest === 'string') { + throw new Error(); + } + + if (urlOrRequest.url && urlOrRequest.url.indexOf('/conversations') > 0 && !/activities/u.test(urlOrRequest.url)) { + const response: Partial = { + response: server.conversation, + status: 201, + xhr: { getResponseHeader: () => 'n/a' } as unknown as XMLHttpRequest + }; + return response as AjaxResponse; + } + + if (urlOrRequest.url && /activities/u.test(urlOrRequest.url)) { + const response: Partial = { + status: 403, + xhr: { getResponseHeader: () => 'n/a' } as unknown as XMLHttpRequest + }; + const error = new Error('Forbidden'); + throw Object.assign(error, response); + } - // Send text activity (should go through HTTP/Ajax) - yield directline.postActivity(textActivity) - .do(() => textCompleted = true) - .catch(error => { - textError = error; - return Observable.empty(); + throw new Error(); }); - yield Observable.timer(100, scheduler); + directline = new DirectLineExport.DirectLine({ ...services }); - // Send voice activity (should go through WebSocket) - yield directline.postActivity(voiceActivity) - .do(() => voiceCompleted = true) - .catch(error => { - voiceError = error; - return Observable.empty(); + const retryActivity = DirectLineMock.mockActivity('will-retry-undefined'); + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + yield directline.postActivity(retryActivity); + }; + + let postResult: string | undefined; + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe({ + next: v => { postResult = v as string; }, + error: () => {}, + complete: () => {} + })); + + scheduler.flush(); + + expect(postResult).toStrictEqual('retry'); + expect(DirectLineMock.hasMultimodalUrl(server)).toBe(false); + }); + + test('iframe WITH microphone permission: voice mode enabled and uses /stream/multimodal URL', async () => { + // Mock iframe detection: window.self !== window.top + const originalSelf = window.self; + Object.defineProperty(window, 'self', { + value: { notTop: true }, + writable: true, + configurable: true }); - yield Observable.timer(200, scheduler); - }; + // Mock permissionsPolicy.allowsFeature('microphone') to return true + const originalPermissionsPolicy = (document as any).permissionsPolicy; + (document as any).permissionsPolicy = { + allowsFeature: (feature: string) => feature === 'microphone' + }; - subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); - scheduler.flush(); + try { + directline = new DirectLineExport.DirectLine({ ...services }); + await Promise.resolve(); - // Both should complete successfully but through different paths - expect(textCompleted).toBe(true); - expect(voiceCompleted).toBe(true); - expect(textError).toBeUndefined(); - expect(voiceError).toBeUndefined(); + const textActivity = DirectLineMock.mockActivity('iframe-with-mic'); + let postCompleted = false; + const actual: Array = []; - // Text activity should echo back, voice activity should not - expect(actual).toContainEqual(textActivity); - expect(actual).not.toContainEqual(voiceActivity); - }); + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + yield directline.postActivity(textActivity).do(() => postCompleted = true); + yield Observable.timer(100, scheduler); + }; - test.each([ - { type: 'event', from: { id: 'user' }, value: null }, - { type: 'event', from: { id: 'user' }, value: { voice: null } }, - { type: 'event', from: { id: 'user' }, value: { voice: {} } }, - { type: 'event', from: { id: 'user' }, value: { notVoice: { data: 'test' } } } - ] as DirectLineExport.Activity[])('InvalidVoiceActivityStructure: %p', (invalidActivity) => { - directline = new DirectLineExport.DirectLine({ ...services, webSocket: true }); + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + subscriptions.push(directline.activity$.subscribe(a => actual.push(a))); + + scheduler.flush(); + + expect(directline.getIsVoiceModeEnabled()).toBe(true); + + // Verify /stream/multimodal URL + expect(DirectLineMock.hasMultimodalUrl(server)).toBe(true); + // Verify WebSocket routing: activity does NOT echo back + expect(postCompleted).toBe(true); + expect(actual).not.toContainEqual(textActivity); + } finally { + Object.defineProperty(window, 'self', { + value: originalSelf, + writable: true, + configurable: true + }); + if (originalPermissionsPolicy) { + (document as any).permissionsPolicy = originalPermissionsPolicy; + } else { + delete (document as any).permissionsPolicy; + } + } + }); - const actual: Array = []; - subscriptions.push(directline.activity$.subscribe(a => actual.push(a))); + test('iframe WITHOUT microphone permission: voice mode disabled', async () => { + // Mock iframe detection: window.self !== window.top + const originalSelf = window.self; + Object.defineProperty(window, 'self', { + value: { notTop: true }, + writable: true, + configurable: true + }); - let completed = false; - let activityError: any; + // Mock permissionsPolicy.allowsFeature('microphone') to return false + const originalPermissionsPolicy = (document as any).permissionsPolicy; + (document as any).permissionsPolicy = { + allowsFeature: (feature: string) => false + }; - const scenario = function* (): IterableIterator> { - yield Observable.timer(200, scheduler); - yield directline.postActivity(invalidActivity) - .do(() => completed = true) - .catch(error => { - activityError = error; - return Observable.empty(); + try { + directline = new DirectLineExport.DirectLine({ ...services }); + + expect(directline.getIsVoiceModeEnabled()).toBe(false); + + const textActivity = DirectLineMock.mockActivity('iframe-no-mic'); + const actual: Array = []; + + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + yield directline.postActivity(textActivity); + yield Observable.timer(100, scheduler); + }; + + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); + subscriptions.push(directline.activity$.subscribe(a => actual.push(a))); + + scheduler.flush(); + + // Verify standard /stream URL (not multimodal) + expect(DirectLineMock.hasMultimodalUrl(server)).toBe(false); + // Verify HTTP routing: activity echoes back + expect(actual).toContainEqual(textActivity); + } finally { + Object.defineProperty(window, 'self', { + value: originalSelf, + writable: true, + configurable: true + }); + if (originalPermissionsPolicy) { + (document as any).permissionsPolicy = originalPermissionsPolicy; + } else { + delete (document as any).permissionsPolicy; + } + } + }); + }); + + describe('Voice Configuration & Events', () => { + + test('getVoiceConfiguration returns undefined initially', () => { + directline = new DirectLineExport.DirectLine({ ...services }); + + expect(directline.getVoiceConfiguration()).toBeUndefined(); + }); + + test('agent.capabilities event sets voiceConfiguration and fires capabilitieschanged', () => { + directline = new DirectLineExport.DirectLine({ ...services }); + + let eventFired = false; + directline.addEventListener('capabilitieschanged', () => { + eventFired = true; }); - yield Observable.timer(200, scheduler); - }; - subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); - scheduler.flush(); + subscriptions.push(directline.activity$.subscribe()); + + const scenario = function* (): IterableIterator> { + yield Observable.timer(200, scheduler); + }; + + subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe()); - // Should complete successfully through HTTP path - expect(completed).toBe(true); - expect(activityError).toBeUndefined(); + scheduler.flush(); - // Should echo back (confirming it went through HTTP, not WebSocket) - expect(actual).toContainEqual(invalidActivity); + // Inject agent.capabilities event + DirectLineMock.injectAgentCapabilities(server); + + // Verify voiceConfiguration is set + const config = directline.getVoiceConfiguration(); + expect(config).toBeDefined(); + expect(config?.sampleRate).toBe(24000); + expect(config?.chunkIntervalMs).toBe(100); + + // Verify capabilitieschanged event fired + expect(eventFired).toBe(true); + }); + }); }); }); diff --git a/src/directLine.ts b/src/directLine.ts index f415ebc17..f1af97f68 100644 --- a/src/directLine.ts +++ b/src/directLine.ts @@ -381,7 +381,14 @@ export interface DirectLineOptions { * If true, every outgoing activity will include deliveryMode: 'stream'. * If false/omitted, deliveryMode is not sent (defaults to 'normal' in ABS). */ - streaming?: boolean + streaming?: boolean, + /** + * Enable voice mode for audio streaming. + * - If true: voice mode enabled, uses /stream/multimodal endpoint, all traffic sent via WebSocket + * - If false: voice mode disabled, uses existing flow as is (/stream endpoint with http post) + * - If undefined: auto-detect for iframes with allow="microphone" attribute + */ + enableVoiceMode?: boolean } export interface Services { @@ -451,6 +458,52 @@ const konsole = { } } +/** + * Checks if the current context is running inside an iframe. + */ +const isInIframe = (): boolean => { + try { + return typeof window !== 'undefined' && window.self !== window.top; + } catch (e) { + // If accessing window.top throws (cross-origin), we're definitely in an iframe + return true; + } +} + +/** + * Checks if the iframe has microphone permission via the allow attribute. + */ +const hasIframeMicrophonePermission = async (): Promise => { + if (typeof window === 'undefined' || typeof document === 'undefined') { + return false; + } + + try { + // Try using the Permissions Policy API (Chrome 88+, Edge 88+) + const doc = document as any; + if (doc.permissionsPolicy && typeof doc.permissionsPolicy.allowsFeature === 'function') { + return doc.permissionsPolicy.allowsFeature('microphone'); + } + + // Fallback to deprecated Feature Policy API (Chrome 60-87, Edge 79-87) + if (doc.featurePolicy && typeof doc.featurePolicy.allowsFeature === 'function') { + return doc.featurePolicy.allowsFeature('microphone'); + } + + // Fallback to Permissions API (broader support: Chrome 43+, Firefox 46+, Safari 16+) + if (typeof navigator !== 'undefined' && navigator.permissions) { + const result = await navigator.permissions.query({ name: 'microphone' as PermissionName }); + // 'granted' or 'prompt' means microphone is allowed by iframe policy + // 'denied' means either user denied or iframe policy blocks it + return result.state !== 'denied'; + } + } catch (e) { + // If permissions check fails, assume microphone is not allowed in iframe + } + + return false; +} + export interface IBotConnection { connectionStatus$: BehaviorSubject, activity$: Observable, @@ -489,6 +542,19 @@ export class DirectLine implements IBotConnection { private tokenRefreshSubscription: Subscription; private streaming: boolean; + // Voice mode: when true, use multimodal stream endpoint and send all traffic via WebSocket + private voiceModeEnabled: boolean = false; + + // Voice configuration default constants + private static readonly VOICE_SAMPLE_RATE = 24000; + private static readonly VOICE_CHUNK_INTERVAL_MS = 100; + + // Voice configuration: set when server supports audio modality, undefined otherwise + private voiceConfiguration: { sampleRate: number; chunkIntervalMs: number } | undefined; + + // EventTarget for dispatching capability change events + private eventTarget = new EventTarget(); + constructor(options: DirectLineOptions & Partial) { this.secret = options.secret; this.token = options.secret || options.token; @@ -498,6 +564,9 @@ export class DirectLine implements IBotConnection { this.streaming = options.streaming; } + // Initialize voice mode detection (sets voiceModeEnabled synchronously for non-iframe cases) + this.initializeVoiceMode(options.enableVoiceMode); + if (options.conversationStartProperties && options.conversationStartProperties.locale) { if (Object.prototype.toString.call(options.conversationStartProperties.locale) === '[object String]') { this.localeOnStartConversation = options.conversationStartProperties.locale; @@ -786,23 +855,20 @@ export class DirectLine implements IBotConnection { if (activity.type === "message" && activity.attachments && activity.attachments.length > 0) return this.postMessageWithAttachments(activity); - // if it is voice activity, send it through webSocket as voice over http is not supported in ABS. - // ABS limitation - client to server push is not being processed over web socket for text. - // Once it is implemented, we can remove this and send all traffic to the webSocket - if (DirectLine.isVoiceEventActivity(activity)) { + // When voice mode is enabled, send ALL traffic (text + voice) via WebSocket + if (this.voiceModeEnabled) { if (!this.webSocket) { - return Observable.throw(new Error('Voice activities require WebSocket to be enabled'), this.services.scheduler); + return Observable.throw(new Error('Voice mode requires WebSocket to be enabled'), this.services.scheduler); } return this.checkConnection(true) .flatMap(_ => Observable.create((subscriber: Subscriber) => { - const envelope = { activities: [activity] }; try { if (!this.webSocketConnection || this.webSocketConnection.readyState !== WebSocket.OPEN) { throw new Error('WebSocket connection not ready for voice activities'); } - this.webSocketConnection.send(JSON.stringify(envelope)); - subscriber.next(envelope); + this.webSocketConnection.send(JSON.stringify(activity)); + subscriber.next(activity); subscriber.complete(); } catch (e) { subscriber.error(e); @@ -833,22 +899,6 @@ export class DirectLine implements IBotConnection { .catch(error => this.catchExpiredToken(error)); } - // Until activity protocol changes for multi-modal output are ratified, this method - // identifies voice event activities using the given activity example below as payload - // to send voice chunks over activity protocol. The activity structure shown serves as - // the current solution for transmitting voice data: - // { "type": "event", "value": { "voice": { "contentUrl": "" } } } - private static isVoiceEventActivity(activity: Activity) { - return ( - activity.type === 'event' && - activity?.value && - typeof activity?.value === 'object' && - activity?.value?.voice && - typeof activity?.value?.voice === 'object' && - Object.keys(activity?.value?.voice).length > 0 - ); - } - private postMessageWithAttachments(message: Message) { const { attachments } = message; // We clean the attachments but making sure every attachment has unique name. @@ -1000,8 +1050,11 @@ export class DirectLine implements IBotConnection { // implementation, I decided roll the below, where the logic is more purposeful. - @billba private observableWebSocket() { return Observable.create((subscriber: Subscriber) => { - konsole.log("creating WebSocket", this.streamUrl); - this.webSocketConnection = new this.services.WebSocket(this.streamUrl); + // Apply multimodal stream URL if voice mode is enabled + const streamUrl = this.getMultimodalStreamUrl(this.streamUrl); + + konsole.log("creating WebSocket", streamUrl); + this.webSocketConnection = new this.services.WebSocket(streamUrl); let sub: Subscription; let closed: boolean; @@ -1040,7 +1093,13 @@ export class DirectLine implements IBotConnection { closed = true; } - this.webSocketConnection.onmessage = message => message.data && subscriber.next(JSON.parse(message.data)); + this.webSocketConnection.onmessage = message => { + if (message.data) { + const data = JSON.parse(message.data); + this.handleIncomingActivity(data); + subscriber.next(data); + } + }; // This is the 'unsubscribe' method, which is called when this observable is disposed. // When the WebSocket closes itself, we throw an error, and this function is eventually called. @@ -1122,6 +1181,46 @@ export class DirectLine implements IBotConnection { this.userIdOnStartConversation = userId; } + /** + * Returns voice configuration from server's agent.capabilities event, or undefined if server doesn't support audio. + * Use this to configure microphone settings. Only available after server confirms audio support. + */ + getVoiceConfiguration() { + return this.voiceConfiguration; + } + + /** + * Returns true if multimodal experience is requested (client-side), false otherwise. + * Does NOT guarantee server supports voice - use getVoiceConfiguration() for that. + * Use this to determine if activities are sent via WebSocket (no echo-back wait needed). + */ + getIsVoiceModeEnabled(): boolean { + return !!this.voiceModeEnabled; + } + + /** + * Returns the current WebSocket stream URL (with /multimodal suffix if voice mode is enabled). + * Useful for debugging and testing. + */ + getStreamUrl(): string | undefined { + return this.streamUrl ? this.getMultimodalStreamUrl(this.streamUrl) : undefined; + } + + /** + * Adds an event listener for adapter events (e.g., 'capabilitieschanged'). + * Used by consumer to subscribe to capability updates. + */ + addEventListener(type: string, listener: EventListenerOrEventListenerObject, options?: boolean | AddEventListenerOptions): void { + this.eventTarget.addEventListener(type, listener, options); + } + + /** + * Removes an event listener for adapter events. + */ + removeEventListener(type: string, listener: EventListenerOrEventListenerObject, options?: boolean | EventListenerOptions): void { + this.eventTarget.removeEventListener(type, listener, options); + } + private parseToken(token: string) { try { const { user } = jwtDecode(token) as { [key: string]: any; }; @@ -1133,4 +1232,77 @@ export class DirectLine implements IBotConnection { } } + /** + * Initialize voice mode. + * - Explicit true/false: set synchronously (no race condition) + * - Undefined: auto-detect for iframes with microphone permission (async, best effort) + */ + private initializeVoiceMode(enableVoiceMode?: boolean): void { + // Explicit true: enable synchronously + if (enableVoiceMode === true) { + this.voiceModeEnabled = true; + this.eventTarget.dispatchEvent(new Event('capabilitieschanged')); + return; + } + + // Explicit false: already false by default, nothing to do + if (enableVoiceMode === false) { + return; + } + + // Undefined: auto-detect for iframe with microphone permission (async) + if (isInIframe()) { + hasIframeMicrophonePermission().then(hasMic => { + if (hasMic) { + this.voiceModeEnabled = true; + this.eventTarget.dispatchEvent(new Event('capabilitieschanged')); + } + }); + } + } + + /** + * Handles incoming activity group to check for agent.capabilities event. + * Sets voice configuration if server supports audio modality. + */ + private handleIncomingActivity(data: any): void { + const activities = data?.activities; + if (!Array.isArray(activities)) { + return; + } + + for (const activity of activities) { + if (activity?.type === 'event' && activity?.name === 'agent.capabilities') { + const modalities = activity?.value?.modalities; + const hasAudio = modalities?.audio && + typeof modalities.audio === 'object' && + Object.keys(modalities.audio).length > 0; + + if (hasAudio) { + this.voiceConfiguration = { + sampleRate: DirectLine.VOICE_SAMPLE_RATE, + chunkIntervalMs: DirectLine.VOICE_CHUNK_INTERVAL_MS + }; + this.eventTarget.dispatchEvent(new Event('capabilitieschanged')); + } + } + } + } + + /** + * Modifies stream URL for voice mode: replaces /stream with /stream/multimodal + */ + private getMultimodalStreamUrl(url: string): string { + if (!this.voiceModeEnabled || !url) { + return url; + } + + // Replace /stream endpoint with /stream/multimodal (if not already multimodal) + if (!url.includes('/stream/multimodal')) { + return url.replace('/stream', '/stream/multimodal'); + } + + return url; + } + } From 68840451e1955b8ad6f001ed5322ea00ec33453a Mon Sep 17 00:00:00 2001 From: Pranav Joshi Date: Tue, 7 Apr 2026 14:56:42 +0000 Subject: [PATCH 5/8] revert get tream url function --- src/directLine.ts | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/directLine.ts b/src/directLine.ts index f1af97f68..d81fdc504 100644 --- a/src/directLine.ts +++ b/src/directLine.ts @@ -1198,14 +1198,6 @@ export class DirectLine implements IBotConnection { return !!this.voiceModeEnabled; } - /** - * Returns the current WebSocket stream URL (with /multimodal suffix if voice mode is enabled). - * Useful for debugging and testing. - */ - getStreamUrl(): string | undefined { - return this.streamUrl ? this.getMultimodalStreamUrl(this.streamUrl) : undefined; - } - /** * Adds an event listener for adapter events (e.g., 'capabilitieschanged'). * Used by consumer to subscribe to capability updates. From c6ae8b49c4dfbcda01bb72363bdc4d1fe5e153cc Mon Sep 17 00:00:00 2001 From: Pranav Joshi Date: Tue, 7 Apr 2026 15:00:19 +0000 Subject: [PATCH 6/8] revert mockVoiceActivity --- src/directLine.mock.ts | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/directLine.mock.ts b/src/directLine.mock.ts index 8713a1a5f..fff353bde 100644 --- a/src/directLine.mock.ts +++ b/src/directLine.mock.ts @@ -11,18 +11,6 @@ const notImplemented = (): never => { throw new Error('not implemented') }; export const mockActivity = (text: string): DirectLineExport.Activity => ({ type: 'message', from: { id: 'sender' }, text }); -export const mockVoiceActivity = (): DirectLineExport.Activity => ({ - type: 'event', - from: { id: 'sender' }, - name: 'voiceEvent', - value: { - voice: { - contentType: 'type', - contentUrl: 'base64AudioChunk' - } - } -}); - // MOCK DirectLine Server (shared state used by Observable.ajax and WebSocket mocks) interface ActivitySocket { From 85c9aeb149846ade12e94d1196cb660036c8082c Mon Sep 17 00:00:00 2001 From: Pranav Joshi Date: Tue, 28 Apr 2026 13:12:46 +0000 Subject: [PATCH 7/8] remove audio length check --- src/directLine.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/directLine.ts b/src/directLine.ts index d81fdc504..6a92362ec 100644 --- a/src/directLine.ts +++ b/src/directLine.ts @@ -1267,8 +1267,7 @@ export class DirectLine implements IBotConnection { if (activity?.type === 'event' && activity?.name === 'agent.capabilities') { const modalities = activity?.value?.modalities; const hasAudio = modalities?.audio && - typeof modalities.audio === 'object' && - Object.keys(modalities.audio).length > 0; + typeof modalities.audio === 'object'; if (hasAudio) { this.voiceConfiguration = { From 0f186a0c15427354979ed38d855e845de6732e42 Mon Sep 17 00:00:00 2001 From: Pranav Joshi Date: Wed, 29 Apr 2026 06:58:02 +0000 Subject: [PATCH 8/8] review comment fixed --- src/directLine.ts | 67 +++++++++-------------------------------- src/iframeMicrophone.ts | 57 +++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 52 deletions(-) create mode 100644 src/iframeMicrophone.ts diff --git a/src/directLine.ts b/src/directLine.ts index 6a92362ec..7c387b90e 100644 --- a/src/directLine.ts +++ b/src/directLine.ts @@ -37,6 +37,8 @@ import { objectExpression } from '@babel/types'; import { DirectLineStreaming } from './directLineStreaming'; export { DirectLineStreaming }; +import { hasIframeMicrophonePermission, isInIframe } from './iframeMicrophone'; + const DIRECT_LINE_VERSION = 'DirectLine/3.0'; declare var process: { @@ -458,52 +460,6 @@ const konsole = { } } -/** - * Checks if the current context is running inside an iframe. - */ -const isInIframe = (): boolean => { - try { - return typeof window !== 'undefined' && window.self !== window.top; - } catch (e) { - // If accessing window.top throws (cross-origin), we're definitely in an iframe - return true; - } -} - -/** - * Checks if the iframe has microphone permission via the allow attribute. - */ -const hasIframeMicrophonePermission = async (): Promise => { - if (typeof window === 'undefined' || typeof document === 'undefined') { - return false; - } - - try { - // Try using the Permissions Policy API (Chrome 88+, Edge 88+) - const doc = document as any; - if (doc.permissionsPolicy && typeof doc.permissionsPolicy.allowsFeature === 'function') { - return doc.permissionsPolicy.allowsFeature('microphone'); - } - - // Fallback to deprecated Feature Policy API (Chrome 60-87, Edge 79-87) - if (doc.featurePolicy && typeof doc.featurePolicy.allowsFeature === 'function') { - return doc.featurePolicy.allowsFeature('microphone'); - } - - // Fallback to Permissions API (broader support: Chrome 43+, Firefox 46+, Safari 16+) - if (typeof navigator !== 'undefined' && navigator.permissions) { - const result = await navigator.permissions.query({ name: 'microphone' as PermissionName }); - // 'granted' or 'prompt' means microphone is allowed by iframe policy - // 'denied' means either user denied or iframe policy blocks it - return result.state !== 'denied'; - } - } catch (e) { - // If permissions check fails, assume microphone is not allowed in iframe - } - - return false; -} - export interface IBotConnection { connectionStatus$: BehaviorSubject, activity$: Observable, @@ -1281,19 +1237,26 @@ export class DirectLine implements IBotConnection { } /** - * Modifies stream URL for voice mode: replaces /stream with /stream/multimodal + * Modifies stream URL for voice mode: appends /multimodal to the /stream path + * while preserving query string, hash, and other URL parts. */ private getMultimodalStreamUrl(url: string): string { if (!this.voiceModeEnabled || !url) { return url; } - // Replace /stream endpoint with /stream/multimodal (if not already multimodal) - if (!url.includes('/stream/multimodal')) { - return url.replace('/stream', '/stream/multimodal'); - } + try { + const parsed = new URL(url); + + if (parsed.pathname.endsWith('/stream')) { + parsed.pathname += '/multimodal'; + } - return url; + return parsed.toString(); + } catch { + // If URL parsing fails (malformed URL), return as-is + return url; + } } } diff --git a/src/iframeMicrophone.ts b/src/iframeMicrophone.ts new file mode 100644 index 000000000..bd73eeb0f --- /dev/null +++ b/src/iframeMicrophone.ts @@ -0,0 +1,57 @@ +/** + * Utilities for detecting iframe context and microphone permission. + * + * Used by DirectLine to auto-detect whether voice mode should be enabled + * when running inside an iframe with `allow="microphone"` attribute. + */ + +/** + * Checks if the current context is running inside an iframe. + */ +export const isInIframe = (): boolean => { + try { + return typeof window !== 'undefined' && window.self !== window.top; + } catch (e) { + // If accessing window.top throws (cross-origin), we're definitely in an iframe + return true; + } +}; + +/** + * Checks if the iframe has microphone permission via the allow attribute. + * + * Tries (in order): + * 1. Permissions Policy API (Chrome 88+, Edge 88+) + * 2. Feature Policy API (Chrome 60-87, Edge 79-87) — deprecated + * 3. Permissions API (Chrome 43+, Firefox 46+, Safari 16+) + */ +export const hasIframeMicrophonePermission = async (): Promise => { + if (typeof window === 'undefined' || typeof document === 'undefined') { + return false; + } + + try { + // Try using the Permissions Policy API (Chrome 88+, Edge 88+) + const doc = document as any; + if (doc.permissionsPolicy && typeof doc.permissionsPolicy.allowsFeature === 'function') { + return doc.permissionsPolicy.allowsFeature('microphone'); + } + + // Fallback to deprecated Feature Policy API (Chrome 60-87, Edge 79-87) + if (doc.featurePolicy && typeof doc.featurePolicy.allowsFeature === 'function') { + return doc.featurePolicy.allowsFeature('microphone'); + } + + // Fallback to Permissions API (broader support: Chrome 43+, Firefox 46+, Safari 16+) + if (typeof navigator !== 'undefined' && navigator.permissions) { + const result = await navigator.permissions.query({ name: 'microphone' as PermissionName }); + // 'granted' or 'prompt' means microphone is allowed by iframe policy + // 'denied' means either user denied or iframe policy blocks it + return result.state !== 'denied'; + } + } catch (e) { + // If permissions check fails, assume microphone is not allowed in iframe + } + + return false; +};