From f7a7d21cd9f978397168d64bb0d7d7f0545dc5e8 Mon Sep 17 00:00:00 2001
From: Pranav Joshi <pranavjoshi@microsoft.com>
Date: Thu, 27 Nov 2025 16:57:21 +0530
Subject: [PATCH 1/8] post voice traffic only to socket

---
 src/directLine.mock.ts |  12 ++++
 src/directLine.test.ts | 148 +++++++++++++++++++++++++++++++++++++++++
 src/directLine.ts      |  67 +++++++++++++++++--
 3 files changed, 220 insertions(+), 7 deletions(-)
diff --git a/src/directLine.mock.ts b/src/directLine.mock.ts
index 152fba1af..540452ad7 100644
--- a/src/directLine.mock.ts
+++ b/src/directLine.mock.ts
@@ -11,6 +11,18 @@ const notImplemented = (): never => { throw new Error('not implemented') };
 
 export const mockActivity = (text: string): DirectLineExport.Activity => ({ type: 'message', from: { id: 'sender' }, text });
 
+export const mockVoiceActivity = (): DirectLineExport.Activity => ({
+    type: 'event',
+    from: { id: 'sender' },
+    name: 'voiceLiveEvent',
+    value: {
+        voiceLiveEvent: {
+            type: 'type',
+            delta: 'base64AudioChunk'
+        }
+    }
+});
+
 // MOCK DirectLine Server (shared state used by Observable.ajax and WebSocket mocks)
 
 interface ActivitySocket {
diff --git a/src/directLine.test.ts b/src/directLine.test.ts
index f280e081c..3a70ab667 100644
--- a/src/directLine.test.ts
+++ b/src/directLine.test.ts
@@ -243,4 +243,152 @@ describe('MockSuite', () => {
         expect(actualError.status).toStrictEqual(429);
         expect(endTime - startTime).toStrictEqual(10);
     });
+
+    test('VoiceActivityWebSocket', () => {
+        const voiceActivity = DirectLineMock.mockVoiceActivity();
+        directline = new DirectLineExport.DirectLine({ ...services, webSocket: true });
+
+        const actual: Array<DirectLineExport.Activity> = [];
+        subscriptions.push(directline.activity$.subscribe(a => actual.push(a)));
+
+        let postActivityCompleted = false;
+        let postActivityError: any;
+
+        const scenario = function* (): IterableIterator<Observable<unknown>> {
+            yield Observable.timer(200, scheduler);
+            yield directline.postActivity(voiceActivity)
+                .do(() => postActivityCompleted = true)
+                .catch(error => {
+                    postActivityError = error;
+                    return Observable.empty();
+                });
+            yield Observable.timer(200, scheduler);
+        };
+
+        subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+        scheduler.flush();
+
+        // Assert that voice activity was sent successfully without errors
+        expect(postActivityCompleted).toBe(true);
+        expect(postActivityError).toBeUndefined();
+    });
+
+    test('VoiceActivityWithoutWebSocket', () => {
+    const voiceActivity = DirectLineMock.mockVoiceActivity();
+    directline = new DirectLineExport.DirectLine({ ...services, webSocket: false });
+
+    let actualError: any;
+
+    const scenario = function* (): IterableIterator<Observable<unknown>> {
+        yield Observable.timer(200, scheduler);
+        yield directline.postActivity(voiceActivity).catch(error => {
+            actualError = error;
+            return Observable.empty();
+        });
+    };
+
+    subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+    scheduler.flush();
+
+    expect(actualError.message).toContain('Voice activities require WebSocket to be enabled');
+    });
+
+    test('VoiceVsTextActivityRouting', () => {
+    const voiceActivity = DirectLineMock.mockVoiceActivity();
+    const textActivity = DirectLineMock.mockActivity('hello');
+
+    directline = new DirectLineExport.DirectLine({ ...services, webSocket: true });
+
+    const actual: Array<DirectLineExport.Activity> = [];
+    subscriptions.push(directline.activity$.subscribe(a => actual.push(a)));
+
+    let voiceCompleted = false;
+    let textCompleted = false;
+    let voiceError: any;
+    let textError: any;
+
+    const scenario = function* (): IterableIterator<Observable<unknown>> {
+        yield Observable.timer(200, scheduler);
+
+        // Send text activity (should go through HTTP/Ajax)
+        yield directline.postActivity(textActivity)
+            .do(() => textCompleted = true)
+            .catch(error => {
+                textError = error;
+                return Observable.empty();
+            });
+
+        yield Observable.timer(100, scheduler);
+
+        // Send voice activity (should go through WebSocket)
+        yield directline.postActivity(voiceActivity)
+            .do(() => voiceCompleted = true)
+            .catch(error => {
+                voiceError = error;
+                return Observable.empty();
+            });
+
+        yield Observable.timer(200, scheduler);
+    };
+
+    subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+    scheduler.flush();
+
+    // Both should complete successfully but through different paths
+    expect(textCompleted).toBe(true);
+    expect(voiceCompleted).toBe(true);
+    expect(textError).toBeUndefined();
+    expect(voiceError).toBeUndefined();
+
+    // Text activity should echo back, voice activity should not
+    expect(actual).toContainEqual(textActivity);
+    expect(actual).not.toContainEqual(voiceActivity);
+    });
+
+    test('InvalidVoiceActivityStructures', () => {
+    const invalidStructures: DirectLineExport.Activity[] = [
+        { type: 'event', from: { id: 'user' }, value: null } as any,
+        { type: 'event', from: { id: 'user' }, value: { voiceLiveEvent: null } } as any,
+        { type: 'event', from: { id: 'user' }, value: { voiceLiveEvent: {} } },
+        { type: 'event', from: { id: 'user' }, value: { notVoice: { data: 'test' } } } as any
+    ];
+
+    directline = new DirectLineExport.DirectLine({ ...services, webSocket: true });
+
+    const actual: Array<DirectLineExport.Activity> = [];
+    subscriptions.push(directline.activity$.subscribe(a => actual.push(a)));
+
+    let completedCount = 0;
+    let errorCount = 0;
+
+    const scenario = function* (): IterableIterator<Observable<unknown>> {
+        yield Observable.timer(200, scheduler);
+
+        // Send each invalid structure - should all go through HTTP path
+        for (const invalidActivity of invalidStructures) {
+            yield directline.postActivity(invalidActivity)
+                .do(() => completedCount++)
+                .catch(error => {
+                    errorCount++;
+                    return Observable.empty();
+                });
+            yield Observable.timer(100, scheduler);
+        }
+
+        yield Observable.timer(200, scheduler);
+    };
+
+    subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+    scheduler.flush();
+
+    // All invalid structures should complete successfully through HTTP path
+    expect(completedCount).toBe(invalidStructures.length);
+    expect(errorCount).toBe(0);
+
+    // All invalid structures should echo back (confirming they went through HTTP, not WebSocket)
+    expect(actual).toHaveLength(invalidStructures.length);
+    invalidStructures.forEach(invalidActivity => {
+        expect(actual).toContainEqual(invalidActivity);
+        });
+    });
 });
diff --git a/src/directLine.ts b/src/directLine.ts
index 7cc1b15fc..be6ca859a 100644
--- a/src/directLine.ts
+++ b/src/directLine.ts
@@ -470,6 +470,7 @@ export class DirectLine implements IBotConnection {
     public referenceGrammarId: string;
     private timeout = 20 * 1000;
     private retries: number;
+    private webSocketConnection: WebSocket | null = null;
 
     private localeOnStartConversation: string;
     private userIdOnStartConversation: string;
@@ -765,6 +766,32 @@ export class DirectLine implements IBotConnection {
         if (activity.type === "message" && activity.attachments && activity.attachments.length > 0)
             return this.postMessageWithAttachments(activity);
 
+        // if it is voice activity, send it through webSocket as voice over http is not supported in ABS.
+        // ABS limitation - client to server push is not being processed over web socket for text.
+        // Once it is implemented, we can remove this and send all traffic to the webSocket
+        if (this.isVoiceEventActivity(activity)) {
+            if (!this.webSocket) {
+                return Observable.throw(new Error('Voice activities require WebSocket to be enabled'), this.services.scheduler);
+            }
+            return this.checkConnection(true)
+                .flatMap(_ =>
+                    Observable.create((subscriber: Subscriber<any>) => {
+                        const envelope = { activities: [activity] };
+                        try {
+                             if (!this.webSocketConnection || this.webSocketConnection.readyState !== WebSocket.OPEN) {
+                                 throw new Error('WebSocket connection not ready for voice activities');
+                            }
+                            this.webSocketConnection.send(JSON.stringify(envelope));
+                            subscriber.next(envelope);
+                            subscriber.complete();
+                        } catch (e) {
+                            subscriber.error(e);
+                        }
+                })
+            )
+            .catch(error => this.catchExpiredToken(error));
+        }
+
         // If we're not connected to the bot, get connected
         // Will throw an error if we are not connected
         konsole.log("postActivity", activity);
@@ -786,6 +813,32 @@ export class DirectLine implements IBotConnection {
         .catch(error => this.catchExpiredToken(error));
     }
 
+    // Until activity protocol changes for multi-modal output are ratified, this method
+    // identifies voice event activities using the given activity example below as payload
+    // to send voice chunks over activity protocol. The activity structure shown serves as
+    // the current solution for transmitting voice data:
+    // { "type": "event", "value": { "voiceLiveEvent": { "type": "response.audio.delta", "delta": "<base64 chunk>" } } }
+    private isVoiceEventActivity(activity: Activity) {
+        if (activity.type !== 'event') {
+            return false;
+        }
+
+        if (!activity?.value || typeof activity?.value !== 'object') {
+            return false;
+        }
+
+        const vle = activity?.value?.voiceLiveEvent;
+        if (!vle || typeof vle !== 'object') {
+            return false;
+        }
+
+        if (Object.keys(vle).length === 0) {
+            return false;
+        }
+
+        return true;
+    }
+
     private postMessageWithAttachments(message: Message) {
         const { attachments } = message;
         // We clean the attachments but making sure every attachment has unique name.
@@ -938,11 +991,11 @@ export class DirectLine implements IBotConnection {
     private observableWebSocket<T>() {
         return Observable.create((subscriber: Subscriber<T>) => {
             konsole.log("creating WebSocket", this.streamUrl);
-            const ws = new this.services.WebSocket(this.streamUrl);
+            this.webSocketConnection = new this.services.WebSocket(this.streamUrl);
             let sub: Subscription;
             let closed: boolean;
 
-            ws.onopen = open => {
+            this.webSocketConnection.onopen = open => {
                 konsole.log("WebSocket open", open);
                 // Chrome is pretty bad at noticing when a WebSocket connection is broken.
                 // If we periodically ping the server with empty messages, it helps Chrome
@@ -950,14 +1003,14 @@ export class DirectLine implements IBotConnection {
                 // error, and that give us the opportunity to attempt to reconnect.
                 sub = Observable.interval(this.timeout, this.services.scheduler).subscribe(_ => {
                     try {
-                        ws.send("")
+                        this.webSocketConnection.send("")
                     } catch(e) {
                         konsole.log("Ping error", e);
                     }
                 });
             }
 
-            ws.onclose = close => {
+            this.webSocketConnection.onclose = close => {
                 konsole.log("WebSocket close", close);
                 if (sub) sub.unsubscribe();
 
@@ -967,7 +1020,7 @@ export class DirectLine implements IBotConnection {
                 closed = true;
             }
 
-            ws.onerror = error => {
+            this.webSocketConnection.onerror = error => {
                 konsole.log("WebSocket error", error);
                 if (sub) sub.unsubscribe();
 
@@ -977,14 +1030,14 @@ export class DirectLine implements IBotConnection {
                 closed = true;
             }
 
-            ws.onmessage = message => message.data && subscriber.next(JSON.parse(message.data));
+            this.webSocketConnection.onmessage = message => message.data && subscriber.next(JSON.parse(message.data));
 
             // This is the 'unsubscribe' method, which is called when this observable is disposed.
             // When the WebSocket closes itself, we throw an error, and this function is eventually called.
             // When the observable is closed first (e.g. when tearing down a WebChat instance) then
             // we need to manually close the WebSocket.
             return () => {
-                if (ws.readyState === 0 || ws.readyState === 1) ws.close();
+                if (this.webSocketConnection.readyState === 0 || this.webSocketConnection.readyState === 1) this.webSocketConnection.close();
             }
         }) as Observable<T>
     }

From 19d9e7729a11ea7a781844738cac139e10b0e637 Mon Sep 17 00:00:00 2001
From: Pranav Joshi <pranavjoshi@microsoft.com>
Date: Fri, 26 Dec 2025 12:35:05 +0530
Subject: [PATCH 2/8] comment resolved

---
 src/directLine.test.ts | 157 +++++++++++++++++++----------------------
 src/directLine.ts      |  34 ++++-----
 2 files changed, 85 insertions(+), 106 deletions(-)

diff --git a/src/directLine.test.ts b/src/directLine.test.ts
index 3a70ab667..90ecb8dd6 100644
--- a/src/directLine.test.ts
+++ b/src/directLine.test.ts
@@ -274,121 +274,110 @@ describe('MockSuite', () => {
     });
 
     test('VoiceActivityWithoutWebSocket', () => {
-    const voiceActivity = DirectLineMock.mockVoiceActivity();
-    directline = new DirectLineExport.DirectLine({ ...services, webSocket: false });
+        const voiceActivity = DirectLineMock.mockVoiceActivity();
+        directline = new DirectLineExport.DirectLine({ ...services, webSocket: false });
 
-    let actualError: any;
+        let actualError: any;
 
-    const scenario = function* (): IterableIterator<Observable<unknown>> {
-        yield Observable.timer(200, scheduler);
-        yield directline.postActivity(voiceActivity).catch(error => {
-            actualError = error;
-            return Observable.empty();
-        });
-    };
+        const scenario = function* (): IterableIterator<Observable<unknown>> {
+            yield Observable.timer(200, scheduler);
+            yield directline.postActivity(voiceActivity).catch(error => {
+                actualError = error;
+                return Observable.empty();
+            });
+        };
 
-    subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
-    scheduler.flush();
+        subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+        scheduler.flush();
 
-    expect(actualError.message).toContain('Voice activities require WebSocket to be enabled');
+        expect(actualError.message).toContain('Voice activities require WebSocket to be enabled');
     });
 
     test('VoiceVsTextActivityRouting', () => {
-    const voiceActivity = DirectLineMock.mockVoiceActivity();
-    const textActivity = DirectLineMock.mockActivity('hello');
+        const voiceActivity = DirectLineMock.mockVoiceActivity();
+        const textActivity = DirectLineMock.mockActivity('hello');
 
-    directline = new DirectLineExport.DirectLine({ ...services, webSocket: true });
+        directline = new DirectLineExport.DirectLine({ ...services, webSocket: true });
 
-    const actual: Array<DirectLineExport.Activity> = [];
-    subscriptions.push(directline.activity$.subscribe(a => actual.push(a)));
+        const actual: Array<DirectLineExport.Activity> = [];
+        subscriptions.push(directline.activity$.subscribe(a => actual.push(a)));
 
-    let voiceCompleted = false;
-    let textCompleted = false;
-    let voiceError: any;
-    let textError: any;
+        let voiceCompleted = false;
+        let textCompleted = false;
+        let voiceError: any;
+        let textError: any;
 
-    const scenario = function* (): IterableIterator<Observable<unknown>> {
-        yield Observable.timer(200, scheduler);
+        const scenario = function* (): IterableIterator<Observable<unknown>> {
+            yield Observable.timer(200, scheduler);
 
-        // Send text activity (should go through HTTP/Ajax)
-        yield directline.postActivity(textActivity)
-            .do(() => textCompleted = true)
-            .catch(error => {
-                textError = error;
-                return Observable.empty();
-            });
+            // Send text activity (should go through HTTP/Ajax)
+            yield directline.postActivity(textActivity)
+                .do(() => textCompleted = true)
+                .catch(error => {
+                    textError = error;
+                    return Observable.empty();
+                });
 
-        yield Observable.timer(100, scheduler);
+            yield Observable.timer(100, scheduler);
 
-        // Send voice activity (should go through WebSocket)
-        yield directline.postActivity(voiceActivity)
-            .do(() => voiceCompleted = true)
-            .catch(error => {
-                voiceError = error;
-                return Observable.empty();
-            });
+            // Send voice activity (should go through WebSocket)
+            yield directline.postActivity(voiceActivity)
+                .do(() => voiceCompleted = true)
+                .catch(error => {
+                    voiceError = error;
+                    return Observable.empty();
+                });
 
-        yield Observable.timer(200, scheduler);
-    };
+            yield Observable.timer(200, scheduler);
+        };
 
-    subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
-    scheduler.flush();
+        subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+        scheduler.flush();
 
-    // Both should complete successfully but through different paths
-    expect(textCompleted).toBe(true);
-    expect(voiceCompleted).toBe(true);
-    expect(textError).toBeUndefined();
-    expect(voiceError).toBeUndefined();
+        // Both should complete successfully but through different paths
+        expect(textCompleted).toBe(true);
+        expect(voiceCompleted).toBe(true);
+        expect(textError).toBeUndefined();
+        expect(voiceError).toBeUndefined();
 
-    // Text activity should echo back, voice activity should not
-    expect(actual).toContainEqual(textActivity);
-    expect(actual).not.toContainEqual(voiceActivity);
+        // Text activity should echo back, voice activity should not
+        expect(actual).toContainEqual(textActivity);
+        expect(actual).not.toContainEqual(voiceActivity);
     });
 
-    test('InvalidVoiceActivityStructures', () => {
-    const invalidStructures: DirectLineExport.Activity[] = [
-        { type: 'event', from: { id: 'user' }, value: null } as any,
-        { type: 'event', from: { id: 'user' }, value: { voiceLiveEvent: null } } as any,
+    test.each([
+        { type: 'event', from: { id: 'user' }, value: null },
+        { type: 'event', from: { id: 'user' }, value: { voiceLiveEvent: null } },
         { type: 'event', from: { id: 'user' }, value: { voiceLiveEvent: {} } },
-        { type: 'event', from: { id: 'user' }, value: { notVoice: { data: 'test' } } } as any
-    ];
-
-    directline = new DirectLineExport.DirectLine({ ...services, webSocket: true });
-
-    const actual: Array<DirectLineExport.Activity> = [];
-    subscriptions.push(directline.activity$.subscribe(a => actual.push(a)));
+        { type: 'event', from: { id: 'user' }, value: { notVoice: { data: 'test' } } }
+    ] as DirectLineExport.Activity[])('InvalidVoiceActivityStructure: %p', (invalidActivity) => {
+        directline = new DirectLineExport.DirectLine({ ...services, webSocket: true });
 
-    let completedCount = 0;
-    let errorCount = 0;
+        const actual: Array<DirectLineExport.Activity> = [];
+        subscriptions.push(directline.activity$.subscribe(a => actual.push(a)));
 
-    const scenario = function* (): IterableIterator<Observable<unknown>> {
-        yield Observable.timer(200, scheduler);
+        let completed = false;
+        let activityError: any;
 
-        // Send each invalid structure - should all go through HTTP path
-        for (const invalidActivity of invalidStructures) {
+        const scenario = function* (): IterableIterator<Observable<unknown>> {
+            yield Observable.timer(200, scheduler);
             yield directline.postActivity(invalidActivity)
-                .do(() => completedCount++)
+                .do(() => completed = true)
                 .catch(error => {
-                    errorCount++;
+                    activityError = error;
                     return Observable.empty();
                 });
-            yield Observable.timer(100, scheduler);
-        }
-
-        yield Observable.timer(200, scheduler);
-    };
+            yield Observable.timer(200, scheduler);
+        };
 
-    subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
-    scheduler.flush();
+        subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+        scheduler.flush();
 
-    // All invalid structures should complete successfully through HTTP path
-    expect(completedCount).toBe(invalidStructures.length);
-    expect(errorCount).toBe(0);
+        // Should complete successfully through HTTP path
+        expect(completed).toBe(true);
+        expect(activityError).toBeUndefined();
 
-    // All invalid structures should echo back (confirming they went through HTTP, not WebSocket)
-    expect(actual).toHaveLength(invalidStructures.length);
-    invalidStructures.forEach(invalidActivity => {
+        // Should echo back (confirming it went through HTTP, not WebSocket)
         expect(actual).toContainEqual(invalidActivity);
-        });
     });
 });
diff --git a/src/directLine.ts b/src/directLine.ts
index be6ca859a..7f3d0d375 100644
--- a/src/directLine.ts
+++ b/src/directLine.ts
@@ -769,7 +769,7 @@ export class DirectLine implements IBotConnection {
         // if it is voice activity, send it through webSocket as voice over http is not supported in ABS.
         // ABS limitation - client to server push is not being processed over web socket for text.
         // Once it is implemented, we can remove this and send all traffic to the webSocket
-        if (this.isVoiceEventActivity(activity)) {
+        if (DirectLine.isVoiceEventActivity(activity)) {
             if (!this.webSocket) {
                 return Observable.throw(new Error('Voice activities require WebSocket to be enabled'), this.services.scheduler);
             }
@@ -778,8 +778,8 @@ export class DirectLine implements IBotConnection {
                     Observable.create((subscriber: Subscriber<any>) => {
                         const envelope = { activities: [activity] };
                         try {
-                             if (!this.webSocketConnection || this.webSocketConnection.readyState !== WebSocket.OPEN) {
-                                 throw new Error('WebSocket connection not ready for voice activities');
+                            if (!this.webSocketConnection || this.webSocketConnection.readyState !== WebSocket.OPEN) {
+                                throw new Error('WebSocket connection not ready for voice activities');
                             }
                             this.webSocketConnection.send(JSON.stringify(envelope));
                             subscriber.next(envelope);
@@ -818,25 +818,15 @@ export class DirectLine implements IBotConnection {
     // to send voice chunks over activity protocol. The activity structure shown serves as
     // the current solution for transmitting voice data:
     // { "type": "event", "value": { "voiceLiveEvent": { "type": "response.audio.delta", "delta": "<base64 chunk>" } } }
-    private isVoiceEventActivity(activity: Activity) {
-        if (activity.type !== 'event') {
-            return false;
-        }
-
-        if (!activity?.value || typeof activity?.value !== 'object') {
-            return false;
-        }
-
-        const vle = activity?.value?.voiceLiveEvent;
-        if (!vle || typeof vle !== 'object') {
-            return false;
-        }
-
-        if (Object.keys(vle).length === 0) {
-            return false;
-        }
-
-        return true;
+    private static isVoiceEventActivity(activity: Activity) {
+        return (
+            activity.type === 'event' &&
+            activity?.value &&
+            typeof activity?.value === 'object' &&
+            activity?.value?.voiceLiveEvent &&
+            typeof activity?.value?.voiceLiveEvent === 'object' &&
+            Object.keys(activity?.value?.voiceLiveEvent).length > 0
+        );
     }
 
     private postMessageWithAttachments(message: Message) {

From 7ba71907be8dedc687b160df983fc8f875910d85 Mon Sep 17 00:00:00 2001
From: Pranav Joshi <pranavjoshi001@gmail.com>
Date: Wed, 7 Jan 2026 16:21:30 +0000
Subject: [PATCH 3/8] align close to activity spec

---
 src/directLine.mock.ts | 8 ++++----
 src/directLine.test.ts | 4 ++--
 src/directLine.ts      | 8 ++++----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/directLine.mock.ts b/src/directLine.mock.ts
index 540452ad7..3ff594124 100644
--- a/src/directLine.mock.ts
+++ b/src/directLine.mock.ts
@@ -14,11 +14,11 @@ export const mockActivity = (text: string): DirectLineExport.Activity => ({ type
 export const mockVoiceActivity = (): DirectLineExport.Activity => ({
     type: 'event',
     from: { id: 'sender' },
-    name: 'voiceLiveEvent',
+    name: 'voiceEvent',
     value: {
-        voiceLiveEvent: {
-            type: 'type',
-            delta: 'base64AudioChunk'
+        voice: {
+            contentType: 'type',
+            contentUrl: 'base64AudioChunk'
         }
     }
 });
diff --git a/src/directLine.test.ts b/src/directLine.test.ts
index 90ecb8dd6..bfa647341 100644
--- a/src/directLine.test.ts
+++ b/src/directLine.test.ts
@@ -347,8 +347,8 @@ describe('MockSuite', () => {
 
     test.each([
         { type: 'event', from: { id: 'user' }, value: null },
-        { type: 'event', from: { id: 'user' }, value: { voiceLiveEvent: null } },
-        { type: 'event', from: { id: 'user' }, value: { voiceLiveEvent: {} } },
+        { type: 'event', from: { id: 'user' }, value: { voice: null } },
+        { type: 'event', from: { id: 'user' }, value: { voice: {} } },
         { type: 'event', from: { id: 'user' }, value: { notVoice: { data: 'test' } } }
     ] as DirectLineExport.Activity[])('InvalidVoiceActivityStructure: %p', (invalidActivity) => {
         directline = new DirectLineExport.DirectLine({ ...services, webSocket: true });
diff --git a/src/directLine.ts b/src/directLine.ts
index 7f3d0d375..bf45227d5 100644
--- a/src/directLine.ts
+++ b/src/directLine.ts
@@ -817,15 +817,15 @@ export class DirectLine implements IBotConnection {
     // identifies voice event activities using the given activity example below as payload
     // to send voice chunks over activity protocol. The activity structure shown serves as
     // the current solution for transmitting voice data:
-    // { "type": "event", "value": { "voiceLiveEvent": { "type": "response.audio.delta", "delta": "<base64 chunk>" } } }
+    // { "type": "event", "value": { "voice": { "contentUrl": "<base64 chunk>" } } }
     private static isVoiceEventActivity(activity: Activity) {
         return (
             activity.type === 'event' &&
             activity?.value &&
             typeof activity?.value === 'object' &&
-            activity?.value?.voiceLiveEvent &&
-            typeof activity?.value?.voiceLiveEvent === 'object' &&
-            Object.keys(activity?.value?.voiceLiveEvent).length > 0
+            activity?.value?.voice &&
+            typeof activity?.value?.voice === 'object' &&
+            Object.keys(activity?.value?.voice).length > 0
         );
     }
 

From 8a0d8debe447f00e260eabc0002b99fc6693f825 Mon Sep 17 00:00:00 2001
From: Pranav Joshi <pranavjoshi001@gmail.com>
Date: Tue, 7 Apr 2026 14:46:09 +0000
Subject: [PATCH 4/8] voice mode handling

---
 src/directLine.mock.ts |  35 ++++
 src/directLine.test.ts | 457 ++++++++++++++++++++++++++++++++---------
 src/directLine.ts      | 228 +++++++++++++++++---
 3 files changed, 591 insertions(+), 129 deletions(-)

diff --git a/src/directLine.mock.ts b/src/directLine.mock.ts
index 3ff594124..8713a1a5f 100644
--- a/src/directLine.mock.ts
+++ b/src/directLine.mock.ts
@@ -41,6 +41,7 @@ export interface Conversation {
 export interface Server {
   scheduler: TestScheduler;
   conversation: Conversation;
+  webSocketUrl?: string;
 }
 
 const tokenPrefix = 'token';
@@ -223,6 +224,7 @@ type EventHandler<E extends Event> = (this: WebSocket, ev: E) => any;
 export const mockWebSocket = (server: Server): WebSocketConstructor =>
   class MockWebSocket implements WebSocket, ActivitySocket {
     constructor(url: string, protocols?: string | string[]) {
+      server.webSocketUrl = url;
 
       server.scheduler.schedule(() => {
         this.readyState = WebSocket.CONNECTING;
@@ -297,3 +299,36 @@ export const mockServices = (server: Server, scheduler: TestScheduler): DirectLi
   ajax: mockAjax(server),
   random: () => 0,
 });
+
+// Helper to inject agent.capabilities event with audio support
+export const mockAgentCapabilitiesEvent = (): DirectLineExport.Activity => ({
+  type: 'event',
+  from: { id: 'bot' },
+  name: 'agent.capabilities',
+  value: {
+    modalities: {
+      text: {},
+      audio: {
+        fonts: [],
+        tools: [],
+        instructions: []
+      }
+    }
+  }
+});
+
+// Helper to inject agent.capabilities event into WebSocket
+export const injectAgentCapabilities = (server: Server): void => {
+  const capabilitiesEvent = mockAgentCapabilitiesEvent();
+  const activityGroup: DirectLineExport.ActivityGroup = {
+    activities: [capabilitiesEvent],
+    watermark: server.conversation.history.length.toString(),
+  };
+  const message = new MessageEvent('type', { data: JSON.stringify(activityGroup) });
+  server.conversation.sockets.forEach(s => s.onmessage(message));
+};
+
+// Helper to check if WebSocket URL contains multimodal path
+export const hasMultimodalUrl = (server: Server): boolean => {
+  return !!server.webSocketUrl?.includes('/stream/multimodal');
+};
diff --git a/src/directLine.test.ts b/src/directLine.test.ts
index 70459b13d..89a589941 100644
--- a/src/directLine.test.ts
+++ b/src/directLine.test.ts
@@ -391,140 +391,395 @@ describe('MockSuite', () => {
         });
     });
 
-    test('VoiceActivityWebSocket', () => {
-        const voiceActivity = DirectLineMock.mockVoiceActivity();
-        directline = new DirectLineExport.DirectLine({ ...services, webSocket: true });
+    describe('VoiceMode', () => {
 
-        const actual: Array<DirectLineExport.Activity> = [];
-        subscriptions.push(directline.activity$.subscribe(a => actual.push(a)));
+        describe('enableVoiceMode: true (explicit)', () => {
 
-        let postActivityCompleted = false;
-        let postActivityError: any;
+            test('voice mode enabled and uses /stream/multimodal URL', () => {
+                directline = new DirectLineExport.DirectLine({ ...services, enableVoiceMode: true });
 
-        const scenario = function* (): IterableIterator<Observable<unknown>> {
-            yield Observable.timer(200, scheduler);
-            yield directline.postActivity(voiceActivity)
-                .do(() => postActivityCompleted = true)
-                .catch(error => {
-                    postActivityError = error;
-                    return Observable.empty();
+                // Verify voice mode is enabled synchronously
+                expect(directline.getIsVoiceModeEnabled()).toBe(true);
+
+                const scenario = function* (): IterableIterator<Observable<unknown>> {
+                    yield Observable.timer(200, scheduler);
+                };
+
+                subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+                subscriptions.push(directline.activity$.subscribe());
+
+                scheduler.flush();
+
+                // Verify WebSocket URL contains /stream/multimodal
+                expect(DirectLineMock.hasMultimodalUrl(server)).toBe(true);
+            });
+
+            test('postActivity sends via WebSocket (does not echo back)', () => {
+                directline = new DirectLineExport.DirectLine({ ...services, enableVoiceMode: true });
+
+                const textActivity = DirectLineMock.mockActivity('hello-voice-mode');
+
+                let postCompleted = false;
+                const actual: Array<DirectLineExport.Activity> = [];
+
+                const scenario = function* (): IterableIterator<Observable<unknown>> {
+                    yield Observable.timer(200, scheduler);
+                    yield directline.postActivity(textActivity).do(() => postCompleted = true);
+                    yield Observable.timer(100, scheduler);
+                };
+
+                subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+                subscriptions.push(directline.activity$.subscribe(a => actual.push(a)));
+
+                scheduler.flush();
+
+                expect(postCompleted).toBe(true);
+                // WebSocket path: activity does NOT echo back (server doesn't broadcast WS-sent activities)
+                expect(actual).not.toContainEqual(textActivity);
+            });
+
+            test('reconnect after WebSocket close still uses /stream/multimodal URL', () => {
+                directline = new DirectLineExport.DirectLine({ ...services, enableVoiceMode: true });
+
+                // First verify initial connection uses multimodal URL
+                const scenario = function* (): IterableIterator<Observable<unknown>> {
+                    yield Observable.timer(200, scheduler);
+                };
+
+                subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+                subscriptions.push(directline.activity$.subscribe());
+
+                scheduler.flush();
+
+                // Verify initial connection uses multimodal
+                expect(DirectLineMock.hasMultimodalUrl(server)).toBe(true);
+
+                // Simulate WebSocket close (triggers reconnect)
+                DirectLineMock.injectClose(server);
+
+                // Continue scheduler to allow reconnect
+                const reconnectScenario = function* (): IterableIterator<Observable<unknown>> {
+                    yield Observable.timer(200, scheduler);
+                };
+
+                subscriptions.push(lazyConcat(reconnectScenario()).observeOn(scheduler).subscribe());
+
+                scheduler.flush();
+
+                // After reconnect, should still use /stream/multimodal URL
+                expect(DirectLineMock.hasMultimodalUrl(server)).toBe(true);
+                expect(directline.getIsVoiceModeEnabled()).toBe(true);
+            });
+        });
+
+        describe('enableVoiceMode: false (explicit)', () => {
+
+            test('voice mode disabled and uses standard /stream URL', () => {
+                directline = new DirectLineExport.DirectLine({ ...services, enableVoiceMode: false });
+
+                // Verify voice mode is disabled
+                expect(directline.getIsVoiceModeEnabled()).toBe(false);
+
+                const scenario = function* (): IterableIterator<Observable<unknown>> {
+                    yield Observable.timer(200, scheduler);
+                };
+
+                subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+                subscriptions.push(directline.activity$.subscribe());
+
+                scheduler.flush();
+
+                // Verify WebSocket URL does NOT contain /stream/multimodal
+                expect(DirectLineMock.hasMultimodalUrl(server)).toBe(false);
+            });
+
+            test('postActivity sends via HTTP (echoes back)', () => {
+                directline = new DirectLineExport.DirectLine({ ...services, enableVoiceMode: false });
+
+                const textActivity = DirectLineMock.mockActivity('hello-http');
+
+                const actual: Array<DirectLineExport.Activity> = [];
+                subscriptions.push(directline.activity$.subscribe(a => actual.push(a)));
+
+                const scenario = function* (): IterableIterator<Observable<unknown>> {
+                    yield Observable.timer(200, scheduler);
+                    yield directline.postActivity(textActivity);
+                    yield Observable.timer(100, scheduler);
+                };
+
+                subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+
+                scheduler.flush();
+
+                // HTTP path: activity echoes back via activity$ (server broadcasts HTTP-posted activities)
+                expect(actual).toContainEqual(textActivity);
+            });
+
+            test('403 post returns retry and still uses standard /stream URL', () => {
+                services.ajax = DirectLineMock.mockAjax(server, (urlOrRequest) => {
+                    if (typeof urlOrRequest === 'string') {
+                        throw new Error();
+                    }
+
+                    if (urlOrRequest.url && urlOrRequest.url.indexOf('/conversations') > 0 && !/activities/u.test(urlOrRequest.url)) {
+                        const response: Partial<AjaxResponse> = {
+                            response: server.conversation,
+                            status: 201,
+                            xhr: { getResponseHeader: () => 'n/a' } as unknown as XMLHttpRequest
+                        };
+                        return response as AjaxResponse;
+                    }
+
+                    if (urlOrRequest.url && /activities/u.test(urlOrRequest.url)) {
+                        const response: Partial<AjaxResponse> = {
+                            status: 403,
+                            xhr: { getResponseHeader: () => 'n/a' } as unknown as XMLHttpRequest
+                        };
+                        const error = new Error('Forbidden');
+                        throw Object.assign(error, response);
+                    }
+
+                    throw new Error();
                 });
-            yield Observable.timer(200, scheduler);
-        };
 
-        subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
-        scheduler.flush();
+                directline = new DirectLineExport.DirectLine({ ...services, enableVoiceMode: false });
 
-        // Assert that voice activity was sent successfully without errors
-        expect(postActivityCompleted).toBe(true);
-        expect(postActivityError).toBeUndefined();
-    });
+                const retryActivity = DirectLineMock.mockActivity('will-retry-false');
+                const scenario = function* (): IterableIterator<Observable<unknown>> {
+                    yield Observable.timer(200, scheduler);
+                    yield directline.postActivity(retryActivity);
+                };
 
-    test('VoiceActivityWithoutWebSocket', () => {
-        const voiceActivity = DirectLineMock.mockVoiceActivity();
-        directline = new DirectLineExport.DirectLine({ ...services, webSocket: false });
+                let postResult: string | undefined;
+                subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe({
+                    next: v => { postResult = v as string; },
+                    error: () => {},
+                    complete: () => {}
+                }));
 
-        let actualError: any;
+                scheduler.flush();
 
-        const scenario = function* (): IterableIterator<Observable<unknown>> {
-            yield Observable.timer(200, scheduler);
-            yield directline.postActivity(voiceActivity).catch(error => {
-                actualError = error;
-                return Observable.empty();
+                expect(postResult).toStrictEqual('retry');
+                expect(DirectLineMock.hasMultimodalUrl(server)).toBe(false);
             });
-        };
+        });
 
-        subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
-        scheduler.flush();
+        describe('enableVoiceMode: undefined (auto-detect)', () => {
 
-        expect(actualError.message).toContain('Voice activities require WebSocket to be enabled');
-    });
+            test('non-iframe: voice mode disabled and uses standard /stream URL', () => {
+                // Default test environment is not an iframe (window.self === window.top)
+                directline = new DirectLineExport.DirectLine({ ...services });
 
-    test('VoiceVsTextActivityRouting', () => {
-        const voiceActivity = DirectLineMock.mockVoiceActivity();
-        const textActivity = DirectLineMock.mockActivity('hello');
+                // Verify voice mode is disabled (synchronous - no iframe check needed)
+                expect(directline.getIsVoiceModeEnabled()).toBe(false);
 
-        directline = new DirectLineExport.DirectLine({ ...services, webSocket: true });
+                const scenario = function* (): IterableIterator<Observable<unknown>> {
+                    yield Observable.timer(200, scheduler);
+                };
 
-        const actual: Array<DirectLineExport.Activity> = [];
-        subscriptions.push(directline.activity$.subscribe(a => actual.push(a)));
+                subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+                subscriptions.push(directline.activity$.subscribe());
 
-        let voiceCompleted = false;
-        let textCompleted = false;
-        let voiceError: any;
-        let textError: any;
+                scheduler.flush();
 
-        const scenario = function* (): IterableIterator<Observable<unknown>> {
-            yield Observable.timer(200, scheduler);
+                // Verify standard /stream URL (not multimodal)
+                expect(DirectLineMock.hasMultimodalUrl(server)).toBe(false);
+            });
+
+            test('non-iframe: 403 post returns retry and still uses standard /stream URL', () => {
+                services.ajax = DirectLineMock.mockAjax(server, (urlOrRequest) => {
+                    if (typeof urlOrRequest === 'string') {
+                        throw new Error();
+                    }
+
+                    if (urlOrRequest.url && urlOrRequest.url.indexOf('/conversations') > 0 && !/activities/u.test(urlOrRequest.url)) {
+                        const response: Partial<AjaxResponse> = {
+                            response: server.conversation,
+                            status: 201,
+                            xhr: { getResponseHeader: () => 'n/a' } as unknown as XMLHttpRequest
+                        };
+                        return response as AjaxResponse;
+                    }
+
+                    if (urlOrRequest.url && /activities/u.test(urlOrRequest.url)) {
+                        const response: Partial<AjaxResponse> = {
+                            status: 403,
+                            xhr: { getResponseHeader: () => 'n/a' } as unknown as XMLHttpRequest
+                        };
+                        const error = new Error('Forbidden');
+                        throw Object.assign(error, response);
+                    }
 
-            // Send text activity (should go through HTTP/Ajax)
-            yield directline.postActivity(textActivity)
-                .do(() => textCompleted = true)
-                .catch(error => {
-                    textError = error;
-                    return Observable.empty();
+                    throw new Error();
                 });
 
-            yield Observable.timer(100, scheduler);
+                directline = new DirectLineExport.DirectLine({ ...services });
 
-            // Send voice activity (should go through WebSocket)
-            yield directline.postActivity(voiceActivity)
-                .do(() => voiceCompleted = true)
-                .catch(error => {
-                    voiceError = error;
-                    return Observable.empty();
+                const retryActivity = DirectLineMock.mockActivity('will-retry-undefined');
+                const scenario = function* (): IterableIterator<Observable<unknown>> {
+                    yield Observable.timer(200, scheduler);
+                    yield directline.postActivity(retryActivity);
+                };
+
+                let postResult: string | undefined;
+                subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe({
+                    next: v => { postResult = v as string; },
+                    error: () => {},
+                    complete: () => {}
+                }));
+
+                scheduler.flush();
+
+                expect(postResult).toStrictEqual('retry');
+                expect(DirectLineMock.hasMultimodalUrl(server)).toBe(false);
+            });
+
+            test('iframe WITH microphone permission: voice mode enabled and uses /stream/multimodal URL', async () => {
+                // Mock iframe detection: window.self !== window.top
+                const originalSelf = window.self;
+                Object.defineProperty(window, 'self', {
+                    value: { notTop: true },
+                    writable: true,
+                    configurable: true
                 });
 
-            yield Observable.timer(200, scheduler);
-        };
+                // Mock permissionsPolicy.allowsFeature('microphone') to return true
+                const originalPermissionsPolicy = (document as any).permissionsPolicy;
+                (document as any).permissionsPolicy = {
+                    allowsFeature: (feature: string) => feature === 'microphone'
+                };
 
-        subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
-        scheduler.flush();
+                try {
+                    directline = new DirectLineExport.DirectLine({ ...services });
+                    await Promise.resolve();
 
-        // Both should complete successfully but through different paths
-        expect(textCompleted).toBe(true);
-        expect(voiceCompleted).toBe(true);
-        expect(textError).toBeUndefined();
-        expect(voiceError).toBeUndefined();
+                    const textActivity = DirectLineMock.mockActivity('iframe-with-mic');
+                    let postCompleted = false;
+                    const actual: Array<DirectLineExport.Activity> = [];
 
-        // Text activity should echo back, voice activity should not
-        expect(actual).toContainEqual(textActivity);
-        expect(actual).not.toContainEqual(voiceActivity);
-    });
+                    const scenario = function* (): IterableIterator<Observable<unknown>> {
+                        yield Observable.timer(200, scheduler);
+                        yield directline.postActivity(textActivity).do(() => postCompleted = true);
+                        yield Observable.timer(100, scheduler);
+                    };
 
-    test.each([
-        { type: 'event', from: { id: 'user' }, value: null },
-        { type: 'event', from: { id: 'user' }, value: { voice: null } },
-        { type: 'event', from: { id: 'user' }, value: { voice: {} } },
-        { type: 'event', from: { id: 'user' }, value: { notVoice: { data: 'test' } } }
-    ] as DirectLineExport.Activity[])('InvalidVoiceActivityStructure: %p', (invalidActivity) => {
-        directline = new DirectLineExport.DirectLine({ ...services, webSocket: true });
+                    subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+                    subscriptions.push(directline.activity$.subscribe(a => actual.push(a)));
+
+                    scheduler.flush();
+
+                    expect(directline.getIsVoiceModeEnabled()).toBe(true);
+
+                    // Verify /stream/multimodal URL
+                    expect(DirectLineMock.hasMultimodalUrl(server)).toBe(true);
+                    // Verify WebSocket routing: activity does NOT echo back
+                    expect(postCompleted).toBe(true);
+                    expect(actual).not.toContainEqual(textActivity);
+                } finally {
+                    Object.defineProperty(window, 'self', {
+                        value: originalSelf,
+                        writable: true,
+                        configurable: true
+                    });
+                    if (originalPermissionsPolicy) {
+                        (document as any).permissionsPolicy = originalPermissionsPolicy;
+                    } else {
+                        delete (document as any).permissionsPolicy;
+                    }
+                }
+            });
 
-        const actual: Array<DirectLineExport.Activity> = [];
-        subscriptions.push(directline.activity$.subscribe(a => actual.push(a)));
+            test('iframe WITHOUT microphone permission: voice mode disabled', async () => {
+                // Mock iframe detection: window.self !== window.top
+                const originalSelf = window.self;
+                Object.defineProperty(window, 'self', {
+                    value: { notTop: true },
+                    writable: true,
+                    configurable: true
+                });
 
-        let completed = false;
-        let activityError: any;
+                // Mock permissionsPolicy.allowsFeature('microphone') to return false
+                const originalPermissionsPolicy = (document as any).permissionsPolicy;
+                (document as any).permissionsPolicy = {
+                    allowsFeature: (feature: string) => false
+                };
 
-        const scenario = function* (): IterableIterator<Observable<unknown>> {
-            yield Observable.timer(200, scheduler);
-            yield directline.postActivity(invalidActivity)
-                .do(() => completed = true)
-                .catch(error => {
-                    activityError = error;
-                    return Observable.empty();
+                try {
+                    directline = new DirectLineExport.DirectLine({ ...services });
+
+                    expect(directline.getIsVoiceModeEnabled()).toBe(false);
+
+                    const textActivity = DirectLineMock.mockActivity('iframe-no-mic');
+                    const actual: Array<DirectLineExport.Activity> = [];
+
+                    const scenario = function* (): IterableIterator<Observable<unknown>> {
+                        yield Observable.timer(200, scheduler);
+                        yield directline.postActivity(textActivity);
+                        yield Observable.timer(100, scheduler);
+                    };
+
+                    subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
+                    subscriptions.push(directline.activity$.subscribe(a => actual.push(a)));
+
+                    scheduler.flush();
+
+                    // Verify standard /stream URL (not multimodal)
+                    expect(DirectLineMock.hasMultimodalUrl(server)).toBe(false);
+                    // Verify HTTP routing: activity echoes back
+                    expect(actual).toContainEqual(textActivity);
+                } finally {
+                    Object.defineProperty(window, 'self', {
+                        value: originalSelf,
+                        writable: true,
+                        configurable: true
+                    });
+                    if (originalPermissionsPolicy) {
+                        (document as any).permissionsPolicy = originalPermissionsPolicy;
+                    } else {
+                        delete (document as any).permissionsPolicy;
+                    }
+                }
+            });
+        });
+
+        describe('Voice Configuration & Events', () => {
+
+            test('getVoiceConfiguration returns undefined initially', () => {
+                directline = new DirectLineExport.DirectLine({ ...services });
+
+                expect(directline.getVoiceConfiguration()).toBeUndefined();
+            });
+
+            test('agent.capabilities event sets voiceConfiguration and fires capabilitieschanged', () => {
+                directline = new DirectLineExport.DirectLine({ ...services });
+
+                let eventFired = false;
+                directline.addEventListener('capabilitieschanged', () => {
+                    eventFired = true;
                 });
-            yield Observable.timer(200, scheduler);
-        };
 
-        subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
-        scheduler.flush();
+                subscriptions.push(directline.activity$.subscribe());
+
+                const scenario = function* (): IterableIterator<Observable<unknown>> {
+                    yield Observable.timer(200, scheduler);
+                };
+
+                subscriptions.push(lazyConcat(scenario()).observeOn(scheduler).subscribe());
 
-        // Should complete successfully through HTTP path
-        expect(completed).toBe(true);
-        expect(activityError).toBeUndefined();
+                scheduler.flush();
 
-        // Should echo back (confirming it went through HTTP, not WebSocket)
-        expect(actual).toContainEqual(invalidActivity);
+                // Inject agent.capabilities event
+                DirectLineMock.injectAgentCapabilities(server);
+
+                // Verify voiceConfiguration is set
+                const config = directline.getVoiceConfiguration();
+                expect(config).toBeDefined();
+                expect(config?.sampleRate).toBe(24000);
+                expect(config?.chunkIntervalMs).toBe(100);
+
+                // Verify capabilitieschanged event fired
+                expect(eventFired).toBe(true);
+            });
+        });
     });
 });
diff --git a/src/directLine.ts b/src/directLine.ts
index f415ebc17..f1af97f68 100644
--- a/src/directLine.ts
+++ b/src/directLine.ts
@@ -381,7 +381,14 @@ export interface DirectLineOptions {
      * If true, every outgoing activity will include deliveryMode: 'stream'.
      * If false/omitted, deliveryMode is not sent (defaults to 'normal' in ABS).
      */
-    streaming?: boolean
+    streaming?: boolean,
+    /**
+     * Enable voice mode for audio streaming.
+     * - If true: voice mode enabled, uses /stream/multimodal endpoint, all traffic sent via WebSocket
+     * - If false: voice mode disabled, uses existing flow as is (/stream endpoint with http post)
+     * - If undefined: auto-detect for iframes with allow="microphone" attribute
+     */
+    enableVoiceMode?: boolean
 }
 
 export interface Services {
@@ -451,6 +458,52 @@ const konsole = {
     }
 }
 
+/**
+ * Checks if the current context is running inside an iframe.
+ */
+const isInIframe = (): boolean => {
+    try {
+        return typeof window !== 'undefined' && window.self !== window.top;
+    } catch (e) {
+        // If accessing window.top throws (cross-origin), we're definitely in an iframe
+        return true;
+    }
+}
+
+/**
+ * Checks if the iframe has microphone permission via the allow attribute.
+ */
+const hasIframeMicrophonePermission = async (): Promise<boolean> => {
+    if (typeof window === 'undefined' || typeof document === 'undefined') {
+        return false;
+    }
+
+    try {
+        // Try using the Permissions Policy API (Chrome 88+, Edge 88+)
+        const doc = document as any;
+        if (doc.permissionsPolicy && typeof doc.permissionsPolicy.allowsFeature === 'function') {
+            return doc.permissionsPolicy.allowsFeature('microphone');
+        }
+
+        // Fallback to deprecated Feature Policy API (Chrome 60-87, Edge 79-87)
+        if (doc.featurePolicy && typeof doc.featurePolicy.allowsFeature === 'function') {
+            return doc.featurePolicy.allowsFeature('microphone');
+        }
+
+        // Fallback to Permissions API (broader support: Chrome 43+, Firefox 46+, Safari 16+)
+        if (typeof navigator !== 'undefined' && navigator.permissions) {
+            const result = await navigator.permissions.query({ name: 'microphone' as PermissionName });
+            // 'granted' or 'prompt' means microphone is allowed by iframe policy
+            // 'denied' means either user denied or iframe policy blocks it
+            return result.state !== 'denied';
+        }
+    } catch (e) {
+        // If permissions check fails, assume microphone is not allowed in iframe
+    }
+
+    return false;
+}
+
 export interface IBotConnection {
     connectionStatus$: BehaviorSubject<ConnectionStatus>,
     activity$: Observable<Activity>,
@@ -489,6 +542,19 @@ export class DirectLine implements IBotConnection {
     private tokenRefreshSubscription: Subscription;
     private streaming: boolean;
 
+    // Voice mode: when true, use multimodal stream endpoint and send all traffic via WebSocket
+    private voiceModeEnabled: boolean = false;
+
+    // Voice configuration default constants
+    private static readonly VOICE_SAMPLE_RATE = 24000;
+    private static readonly VOICE_CHUNK_INTERVAL_MS = 100;
+
+    // Voice configuration: set when server supports audio modality, undefined otherwise
+    private voiceConfiguration: { sampleRate: number; chunkIntervalMs: number } | undefined;
+
+    // EventTarget for dispatching capability change events
+    private eventTarget = new EventTarget();
+
     constructor(options: DirectLineOptions & Partial<Services>) {
         this.secret = options.secret;
         this.token = options.secret || options.token;
@@ -498,6 +564,9 @@ export class DirectLine implements IBotConnection {
             this.streaming = options.streaming;
         }
 
+        // Initialize voice mode detection (sets voiceModeEnabled synchronously for non-iframe cases)
+        this.initializeVoiceMode(options.enableVoiceMode);
+
         if (options.conversationStartProperties && options.conversationStartProperties.locale) {
             if (Object.prototype.toString.call(options.conversationStartProperties.locale) === '[object String]') {
                 this.localeOnStartConversation = options.conversationStartProperties.locale;
@@ -786,23 +855,20 @@ export class DirectLine implements IBotConnection {
         if (activity.type === "message" && activity.attachments && activity.attachments.length > 0)
             return this.postMessageWithAttachments(activity);
 
-        // if it is voice activity, send it through webSocket as voice over http is not supported in ABS.
-        // ABS limitation - client to server push is not being processed over web socket for text.
-        // Once it is implemented, we can remove this and send all traffic to the webSocket
-        if (DirectLine.isVoiceEventActivity(activity)) {
+        // When voice mode is enabled, send ALL traffic (text + voice) via WebSocket
+        if (this.voiceModeEnabled) {
             if (!this.webSocket) {
-                return Observable.throw(new Error('Voice activities require WebSocket to be enabled'), this.services.scheduler);
+                return Observable.throw(new Error('Voice mode requires WebSocket to be enabled'), this.services.scheduler);
             }
             return this.checkConnection(true)
                 .flatMap(_ =>
                     Observable.create((subscriber: Subscriber<any>) => {
-                        const envelope = { activities: [activity] };
                         try {
                             if (!this.webSocketConnection || this.webSocketConnection.readyState !== WebSocket.OPEN) {
                                 throw new Error('WebSocket connection not ready for voice activities');
                             }
-                            this.webSocketConnection.send(JSON.stringify(envelope));
-                            subscriber.next(envelope);
+                            this.webSocketConnection.send(JSON.stringify(activity));
+                            subscriber.next(activity);
                             subscriber.complete();
                         } catch (e) {
                             subscriber.error(e);
@@ -833,22 +899,6 @@ export class DirectLine implements IBotConnection {
         .catch(error => this.catchExpiredToken(error));
     }
 
-    // Until activity protocol changes for multi-modal output are ratified, this method
-    // identifies voice event activities using the given activity example below as payload
-    // to send voice chunks over activity protocol. The activity structure shown serves as
-    // the current solution for transmitting voice data:
-    // { "type": "event", "value": { "voice": { "contentUrl": "<base64 chunk>" } } }
-    private static isVoiceEventActivity(activity: Activity) {
-        return (
-            activity.type === 'event' &&
-            activity?.value &&
-            typeof activity?.value === 'object' &&
-            activity?.value?.voice &&
-            typeof activity?.value?.voice === 'object' &&
-            Object.keys(activity?.value?.voice).length > 0
-        );
-    }
-
     private postMessageWithAttachments(message: Message) {
         const { attachments } = message;
         // We clean the attachments but making sure every attachment has unique name.
@@ -1000,8 +1050,11 @@ export class DirectLine implements IBotConnection {
     // implementation, I decided roll the below, where the logic is more purposeful. - @billba
     private observableWebSocket<T>() {
         return Observable.create((subscriber: Subscriber<T>) => {
-            konsole.log("creating WebSocket", this.streamUrl);
-            this.webSocketConnection = new this.services.WebSocket(this.streamUrl);
+            // Apply multimodal stream URL if voice mode is enabled
+            const streamUrl = this.getMultimodalStreamUrl(this.streamUrl);
+
+            konsole.log("creating WebSocket", streamUrl);
+            this.webSocketConnection = new this.services.WebSocket(streamUrl);
             let sub: Subscription;
             let closed: boolean;
 
@@ -1040,7 +1093,13 @@ export class DirectLine implements IBotConnection {
                 closed = true;
             }
 
-            this.webSocketConnection.onmessage = message => message.data && subscriber.next(JSON.parse(message.data));
+            this.webSocketConnection.onmessage = message => {
+                if (message.data) {
+                    const data = JSON.parse(message.data);
+                    this.handleIncomingActivity(data);
+                    subscriber.next(data);
+                }
+            };
 
             // This is the 'unsubscribe' method, which is called when this observable is disposed.
             // When the WebSocket closes itself, we throw an error, and this function is eventually called.
@@ -1122,6 +1181,46 @@ export class DirectLine implements IBotConnection {
         this.userIdOnStartConversation = userId;
     }
 
+    /**
+     * Returns voice configuration from server's agent.capabilities event, or undefined if server doesn't support audio.
+     * Use this to configure microphone settings. Only available after server confirms audio support.
+     */
+    getVoiceConfiguration() {
+        return this.voiceConfiguration;
+    }
+
+    /**
+     * Returns true if multimodal experience is requested (client-side), false otherwise.
+     * Does NOT guarantee server supports voice - use getVoiceConfiguration() for that.
+     * Use this to determine if activities are sent via WebSocket (no echo-back wait needed).
+     */
+    getIsVoiceModeEnabled(): boolean {
+        return !!this.voiceModeEnabled;
+    }
+
+    /**
+     * Returns the current WebSocket stream URL (with /multimodal suffix if voice mode is enabled).
+     * Useful for debugging and testing.
+     */
+    getStreamUrl(): string | undefined {
+        return this.streamUrl ? this.getMultimodalStreamUrl(this.streamUrl) : undefined;
+    }
+
+     /**
+     * Adds an event listener for adapter events (e.g., 'capabilitieschanged').
+     * Used by consumer to subscribe to capability updates.
+     */
+    addEventListener(type: string, listener: EventListenerOrEventListenerObject, options?: boolean | AddEventListenerOptions): void {
+        this.eventTarget.addEventListener(type, listener, options);
+    }
+
+    /**
+     * Removes an event listener for adapter events.
+     */
+    removeEventListener(type: string, listener: EventListenerOrEventListenerObject, options?: boolean | EventListenerOptions): void {
+        this.eventTarget.removeEventListener(type, listener, options);
+    }
+
     private parseToken(token: string) {
         try {
             const { user } = jwtDecode<JwtPayload>(token) as { [key: string]: any; };
@@ -1133,4 +1232,77 @@ export class DirectLine implements IBotConnection {
         }
     }
 
+    /**
+     * Initialize voice mode.
+     * - Explicit true/false: set synchronously (no race condition)
+     * - Undefined: auto-detect for iframes with microphone permission (async, best effort)
+     */
+    private initializeVoiceMode(enableVoiceMode?: boolean): void {
+        // Explicit true: enable synchronously
+        if (enableVoiceMode === true) {
+            this.voiceModeEnabled = true;
+            this.eventTarget.dispatchEvent(new Event('capabilitieschanged'));
+            return;
+        }
+
+        // Explicit false: already false by default, nothing to do
+        if (enableVoiceMode === false) {
+            return;
+        }
+
+        // Undefined: auto-detect for iframe with microphone permission (async)
+        if (isInIframe()) {
+            hasIframeMicrophonePermission().then(hasMic => {
+                if (hasMic) {
+                    this.voiceModeEnabled = true;
+                    this.eventTarget.dispatchEvent(new Event('capabilitieschanged'));
+                }
+            });
+        }
+    }
+
+    /**
+     * Handles incoming activity group to check for agent.capabilities event.
+     * Sets voice configuration if server supports audio modality.
+     */
+    private handleIncomingActivity(data: any): void {
+        const activities = data?.activities;
+        if (!Array.isArray(activities)) {
+            return;
+        }
+
+        for (const activity of activities) {
+            if (activity?.type === 'event' && activity?.name === 'agent.capabilities') {
+                const modalities = activity?.value?.modalities;
+                const hasAudio = modalities?.audio &&
+                    typeof modalities.audio === 'object' &&
+                    Object.keys(modalities.audio).length > 0;
+
+                if (hasAudio) {
+                    this.voiceConfiguration = {
+                        sampleRate: DirectLine.VOICE_SAMPLE_RATE,
+                        chunkIntervalMs: DirectLine.VOICE_CHUNK_INTERVAL_MS
+                    };
+                    this.eventTarget.dispatchEvent(new Event('capabilitieschanged'));
+                }
+            }
+        }
+    }
+
+    /**
+     * Modifies stream URL for voice mode: replaces /stream with /stream/multimodal
+     */
+    private getMultimodalStreamUrl(url: string): string {
+        if (!this.voiceModeEnabled || !url) {
+            return url;
+        }
+
+        // Replace /stream endpoint with /stream/multimodal (if not already multimodal)
+        if (!url.includes('/stream/multimodal')) {
+            return url.replace('/stream', '/stream/multimodal');
+        }
+
+        return url;
+    }
+
 }

From 68840451e1955b8ad6f001ed5322ea00ec33453a Mon Sep 17 00:00:00 2001
From: Pranav Joshi <pranavjoshi001@gmail.com>
Date: Tue, 7 Apr 2026 14:56:42 +0000
Subject: [PATCH 5/8] revert get tream url function

---
 src/directLine.ts | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/directLine.ts b/src/directLine.ts
index f1af97f68..d81fdc504 100644
--- a/src/directLine.ts
+++ b/src/directLine.ts
@@ -1198,14 +1198,6 @@ export class DirectLine implements IBotConnection {
         return !!this.voiceModeEnabled;
     }
 
-    /**
-     * Returns the current WebSocket stream URL (with /multimodal suffix if voice mode is enabled).
-     * Useful for debugging and testing.
-     */
-    getStreamUrl(): string | undefined {
-        return this.streamUrl ? this.getMultimodalStreamUrl(this.streamUrl) : undefined;
-    }
-
      /**
      * Adds an event listener for adapter events (e.g., 'capabilitieschanged').
      * Used by consumer to subscribe to capability updates.

From c6ae8b49c4dfbcda01bb72363bdc4d1fe5e153cc Mon Sep 17 00:00:00 2001
From: Pranav Joshi <pranavjoshi001@gmail.com>
Date: Tue, 7 Apr 2026 15:00:19 +0000
Subject: [PATCH 6/8] revert mockVoiceActivity

---
 src/directLine.mock.ts | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/src/directLine.mock.ts b/src/directLine.mock.ts
index 8713a1a5f..fff353bde 100644
--- a/src/directLine.mock.ts
+++ b/src/directLine.mock.ts
@@ -11,18 +11,6 @@ const notImplemented = (): never => { throw new Error('not implemented') };
 
 export const mockActivity = (text: string): DirectLineExport.Activity => ({ type: 'message', from: { id: 'sender' }, text });
 
-export const mockVoiceActivity = (): DirectLineExport.Activity => ({
-    type: 'event',
-    from: { id: 'sender' },
-    name: 'voiceEvent',
-    value: {
-        voice: {
-            contentType: 'type',
-            contentUrl: 'base64AudioChunk'
-        }
-    }
-});
-
 // MOCK DirectLine Server (shared state used by Observable.ajax and WebSocket mocks)
 
 interface ActivitySocket {

From 85c9aeb149846ade12e94d1196cb660036c8082c Mon Sep 17 00:00:00 2001
From: Pranav Joshi <pranavjoshi001@gmail.com>
Date: Tue, 28 Apr 2026 13:12:46 +0000
Subject: [PATCH 7/8] remove audio length check

---
 src/directLine.ts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/directLine.ts b/src/directLine.ts
index d81fdc504..6a92362ec 100644
--- a/src/directLine.ts
+++ b/src/directLine.ts
@@ -1267,8 +1267,7 @@ export class DirectLine implements IBotConnection {
             if (activity?.type === 'event' && activity?.name === 'agent.capabilities') {
                 const modalities = activity?.value?.modalities;
                 const hasAudio = modalities?.audio &&
-                    typeof modalities.audio === 'object' &&
-                    Object.keys(modalities.audio).length > 0;
+                    typeof modalities.audio === 'object';
 
                 if (hasAudio) {
                     this.voiceConfiguration = {

From 0f186a0c15427354979ed38d855e845de6732e42 Mon Sep 17 00:00:00 2001
From: Pranav Joshi <pranavjoshi001@gmail.com>
Date: Wed, 29 Apr 2026 06:58:02 +0000
Subject: [PATCH 8/8] review comment fixed

---
 src/directLine.ts       | 67 +++++++++--------------------------------
 src/iframeMicrophone.ts | 57 +++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 52 deletions(-)
 create mode 100644 src/iframeMicrophone.ts

diff --git a/src/directLine.ts b/src/directLine.ts
index 6a92362ec..7c387b90e 100644
--- a/src/directLine.ts
+++ b/src/directLine.ts
@@ -37,6 +37,8 @@ import { objectExpression } from '@babel/types';
 import { DirectLineStreaming } from './directLineStreaming';
 export { DirectLineStreaming };
 
+import { hasIframeMicrophonePermission, isInIframe } from './iframeMicrophone';
+
 const DIRECT_LINE_VERSION = 'DirectLine/3.0';
 
 declare var process: {
@@ -458,52 +460,6 @@ const konsole = {
     }
 }
 
-/**
- * Checks if the current context is running inside an iframe.
- */
-const isInIframe = (): boolean => {
-    try {
-        return typeof window !== 'undefined' && window.self !== window.top;
-    } catch (e) {
-        // If accessing window.top throws (cross-origin), we're definitely in an iframe
-        return true;
-    }
-}
-
-/**
- * Checks if the iframe has microphone permission via the allow attribute.
- */
-const hasIframeMicrophonePermission = async (): Promise<boolean> => {
-    if (typeof window === 'undefined' || typeof document === 'undefined') {
-        return false;
-    }
-
-    try {
-        // Try using the Permissions Policy API (Chrome 88+, Edge 88+)
-        const doc = document as any;
-        if (doc.permissionsPolicy && typeof doc.permissionsPolicy.allowsFeature === 'function') {
-            return doc.permissionsPolicy.allowsFeature('microphone');
-        }
-
-        // Fallback to deprecated Feature Policy API (Chrome 60-87, Edge 79-87)
-        if (doc.featurePolicy && typeof doc.featurePolicy.allowsFeature === 'function') {
-            return doc.featurePolicy.allowsFeature('microphone');
-        }
-
-        // Fallback to Permissions API (broader support: Chrome 43+, Firefox 46+, Safari 16+)
-        if (typeof navigator !== 'undefined' && navigator.permissions) {
-            const result = await navigator.permissions.query({ name: 'microphone' as PermissionName });
-            // 'granted' or 'prompt' means microphone is allowed by iframe policy
-            // 'denied' means either user denied or iframe policy blocks it
-            return result.state !== 'denied';
-        }
-    } catch (e) {
-        // If permissions check fails, assume microphone is not allowed in iframe
-    }
-
-    return false;
-}
-
 export interface IBotConnection {
     connectionStatus$: BehaviorSubject<ConnectionStatus>,
     activity$: Observable<Activity>,
@@ -1281,19 +1237,26 @@ export class DirectLine implements IBotConnection {
     }
 
     /**
-     * Modifies stream URL for voice mode: replaces /stream with /stream/multimodal
+     * Modifies stream URL for voice mode: appends /multimodal to the /stream path
+     * while preserving query string, hash, and other URL parts.
      */
     private getMultimodalStreamUrl(url: string): string {
         if (!this.voiceModeEnabled || !url) {
             return url;
         }
 
-        // Replace /stream endpoint with /stream/multimodal (if not already multimodal)
-        if (!url.includes('/stream/multimodal')) {
-            return url.replace('/stream', '/stream/multimodal');
-        }
+        try {
+            const parsed = new URL(url);
+
+            if (parsed.pathname.endsWith('/stream')) {
+                parsed.pathname += '/multimodal';
+            }
 
-        return url;
+            return parsed.toString();
+        } catch {
+            // If URL parsing fails (malformed URL), return as-is
+            return url;
+        }
     }
 
 }
diff --git a/src/iframeMicrophone.ts b/src/iframeMicrophone.ts
new file mode 100644
index 000000000..bd73eeb0f
--- /dev/null
+++ b/src/iframeMicrophone.ts
@@ -0,0 +1,57 @@
+/**
+ * Utilities for detecting iframe context and microphone permission.
+ *
+ * Used by DirectLine to auto-detect whether voice mode should be enabled
+ * when running inside an iframe with `allow="microphone"` attribute.
+ */
+
+/**
+ * Checks if the current context is running inside an iframe.
+ */
+export const isInIframe = (): boolean => {
+    try {
+        return typeof window !== 'undefined' && window.self !== window.top;
+    } catch (e) {
+        // If accessing window.top throws (cross-origin), we're definitely in an iframe
+        return true;
+    }
+};
+
+/**
+ * Checks if the iframe has microphone permission via the allow attribute.
+ *
+ * Tries (in order):
+ *   1. Permissions Policy API (Chrome 88+, Edge 88+)
+ *   2. Feature Policy API (Chrome 60-87, Edge 79-87) — deprecated
+ *   3. Permissions API (Chrome 43+, Firefox 46+, Safari 16+)
+ */
+export const hasIframeMicrophonePermission = async (): Promise<boolean> => {
+    if (typeof window === 'undefined' || typeof document === 'undefined') {
+        return false;
+    }
+
+    try {
+        // Try using the Permissions Policy API (Chrome 88+, Edge 88+)
+        const doc = document as any;
+        if (doc.permissionsPolicy && typeof doc.permissionsPolicy.allowsFeature === 'function') {
+            return doc.permissionsPolicy.allowsFeature('microphone');
+        }
+
+        // Fallback to deprecated Feature Policy API (Chrome 60-87, Edge 79-87)
+        if (doc.featurePolicy && typeof doc.featurePolicy.allowsFeature === 'function') {
+            return doc.featurePolicy.allowsFeature('microphone');
+        }
+
+        // Fallback to Permissions API (broader support: Chrome 43+, Firefox 46+, Safari 16+)
+        if (typeof navigator !== 'undefined' && navigator.permissions) {
+            const result = await navigator.permissions.query({ name: 'microphone' as PermissionName });
+            // 'granted' or 'prompt' means microphone is allowed by iframe policy
+            // 'denied' means either user denied or iframe policy blocks it
+            return result.state !== 'denied';
+        }
+    } catch (e) {
+        // If permissions check fails, assume microphone is not allowed in iframe
+    }
+
+    return false;
+};