Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Aevatar.Bootstrap/Aevatar.Bootstrap.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Aevatar.Configuration\Aevatar.Configuration.csproj" />
<ProjectReference Include="..\Aevatar.Foundation.VoicePresence\Aevatar.Foundation.VoicePresence.csproj" />
<ProjectReference Include="..\Aevatar.Foundation.Runtime.Hosting\Aevatar.Foundation.Runtime.Hosting.csproj" />
<ProjectReference Include="..\Aevatar.Hosting\Aevatar.Hosting.csproj" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using Aevatar.Configuration;
using Aevatar.Foundation.VoicePresence.Hosting;
using Aevatar.Hosting;
using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Http;
Expand Down Expand Up @@ -37,6 +38,8 @@ public sealed class AevatarDefaultHostOptions

public static class WebApplicationBuilderExtensions
{
private const string VoicePresenceWebSocketRoute = "/ws/voice/{actorId}";

public static WebApplicationBuilder AddAevatarDefaultHost(
this WebApplicationBuilder builder,
Action<AevatarDefaultHostOptions>? configureHost = null)
Expand Down Expand Up @@ -99,6 +102,7 @@ public static WebApplication UseAevatarDefaultHost(this WebApplication app)
ArgumentNullException.ThrowIfNull(app);

var options = app.Services.GetRequiredService<AevatarDefaultHostOptions>();
var hasVoicePresenceResolver = app.Services.GetService<IVoicePresenceSessionResolver>() != null;
if (options.EnableCors)
app.UseCors(options.CorsPolicyName);

Expand All @@ -113,7 +117,7 @@ public static WebApplication UseAevatarDefaultHost(this WebApplication app)
// a proper 401/403 instead of an unhandled 500 when no auth scheme is configured.
app.UseAuthorization();

if (options.EnableWebSockets)
if (options.EnableWebSockets || hasVoicePresenceResolver)
app.UseWebSockets();

if (options.MapRootHealthEndpoint)
Expand Down Expand Up @@ -155,6 +159,9 @@ public static WebApplication UseAevatarDefaultHost(this WebApplication app)
if (options.AutoMapCapabilities)
app.MapAevatarCapabilities();

if (hasVoicePresenceResolver)
app.MapVoicePresenceWebSocket(VoicePresenceWebSocketRoute);

return app;
}

Expand Down
96 changes: 96 additions & 0 deletions test/Aevatar.Tools.Cli.Tests/AppPlaygroundHostTests.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
using System.Net;
using System.Net.WebSockets;
using System.Text;
using System.Text.Json;
using Aevatar.Tools.Cli.Hosting;
using FluentAssertions;
using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Hosting;
using Microsoft.AspNetCore.Http;

namespace Aevatar.Tools.Cli.Tests;

Expand All @@ -10,6 +15,7 @@ public sealed class AppPlaygroundHostTests : IAsyncDisposable
private CancellationTokenSource? _cts;
private Task? _hostTask;
private string? _baseUrl;
private readonly List<WebApplication> _backgroundApps = [];

private async Task<string> StartHostAsync(int backendPort = 59999)
{
Expand All @@ -27,6 +33,19 @@ private async Task<string> StartHostAsync(int backendPort = 59999)

public async ValueTask DisposeAsync()
{
foreach (var app in _backgroundApps)
{
try
{
await app.StopAsync();
}
catch
{
}

await app.DisposeAsync();
}

if (_cts != null)
{
await _cts.CancelAsync();
Expand Down Expand Up @@ -102,6 +121,59 @@ public async Task AuthLogin_ShouldFallbackToIndexHtml()
response.StatusCode.Should().BeOneOf(HttpStatusCode.OK, HttpStatusCode.NotFound);
}

[Fact]
public async Task VoiceWebSocketProxy_ShouldForwardBinaryAndTextFrames()
{
byte[]? forwardedAudio = null;
string? forwardedControl = null;
string? forwardedActorId = null;
string? forwardedModule = null;

var backendPort = GetAvailablePort();
var backend = await StartVoiceBackendAsync(
backendPort,
async (context, socket) =>
{
forwardedActorId = context.Request.RouteValues["actorId"]?.ToString();
forwardedModule = context.Request.Query["module"].ToString();

var audioBuffer = new byte[256];
var audioResult = await socket.ReceiveAsync(audioBuffer, CancellationToken.None);
forwardedAudio = audioBuffer[..audioResult.Count].ToArray();

var controlBuffer = new byte[1024];
var controlResult = await socket.ReceiveAsync(controlBuffer, CancellationToken.None);
forwardedControl = Encoding.UTF8.GetString(controlBuffer, 0, controlResult.Count);

await socket.SendAsync(new byte[] { 9, 8, 7 }, WebSocketMessageType.Binary, true, CancellationToken.None);
await socket.CloseAsync(WebSocketCloseStatus.NormalClosure, "done", CancellationToken.None);
});
_backgroundApps.Add(backend);

var baseUrl = await StartHostAsync(backendPort);
using var client = new ClientWebSocket();
await client.ConnectAsync(
new Uri($"{baseUrl.Replace("http://", "ws://", StringComparison.Ordinal)}/ws/voice/agent-1?module=voice_presence_openai"),
CancellationToken.None);

await client.SendAsync(new byte[] { 1, 2, 3 }, WebSocketMessageType.Binary, true, CancellationToken.None);
await client.SendAsync(
Encoding.UTF8.GetBytes("{\"drainAcknowledged\":{\"responseId\":7,\"playoutSequence\":\"42\"}}"),
WebSocketMessageType.Text,
true,
CancellationToken.None);

var receiveBuffer = new byte[64];
var receiveResult = await client.ReceiveAsync(receiveBuffer, CancellationToken.None);

receiveResult.MessageType.Should().Be(WebSocketMessageType.Binary);
receiveBuffer[..receiveResult.Count].Should().Equal(9, 8, 7);
forwardedActorId.Should().Be("agent-1");
forwardedModule.Should().Be("voice_presence_openai");
forwardedAudio.Should().Equal(1, 2, 3);
forwardedControl.Should().Contain("drainAcknowledged");
}

[Fact]
public async Task FallbackToIndex_ShouldServeHtml()
{
Expand All @@ -115,6 +187,30 @@ public async Task FallbackToIndex_ShouldServeHtml()
response.StatusCode.Should().BeOneOf(HttpStatusCode.OK, HttpStatusCode.NotFound);
}

private static async Task<WebApplication> StartVoiceBackendAsync(
int port,
Func<HttpContext, WebSocket, Task> handleSocketAsync)
{
var builder = WebApplication.CreateBuilder();
builder.WebHost.UseUrls($"http://127.0.0.1:{port}");
var app = builder.Build();
app.UseWebSockets();
app.Map("/ws/voice/{actorId}", async (HttpContext context) =>
{
if (!context.WebSockets.IsWebSocketRequest)
{
context.Response.StatusCode = StatusCodes.Status400BadRequest;
return;
}

using var socket = await context.WebSockets.AcceptWebSocketAsync();
await handleSocketAsync(context, socket);
});

await app.StartAsync();
return app;
}

private static int GetAvailablePort()
{
using var listener = new System.Net.Sockets.TcpListener(IPAddress.Loopback, 0);
Expand Down
5 changes: 3 additions & 2 deletions test/Aevatar.Tools.Cli.Tests/RootCommandFactoryTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public void Create_ShouldReturnRootCommandWithDescription()
[InlineData("config")]
[InlineData("app")]
[InlineData("chat")]
[InlineData("voice")]
public void Create_ShouldRegisterSubcommand(string commandName)
{
var root = RootCommandFactory.Create();
Expand All @@ -27,10 +28,10 @@ public void Create_ShouldRegisterSubcommand(string commandName)
}

[Fact]
public void Create_ShouldRegisterExactly6Subcommands()
public void Create_ShouldRegisterExactly7Subcommands()
{
var root = RootCommandFactory.Create();

root.Subcommands.Should().HaveCount(6);
root.Subcommands.Should().HaveCount(7);
}
}
33 changes: 33 additions & 0 deletions test/Aevatar.Tools.Cli.Tests/VoiceCommandTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
using Aevatar.Tools.Cli.Commands;
using Aevatar.Tools.Cli.Hosting;
using FluentAssertions;

namespace Aevatar.Tools.Cli.Tests;

public sealed class VoiceCommandTests
{
[Fact]
public void Create_ShouldExposeExpectedOptions()
{
var command = VoiceCommand.Create();

command.Options.Should().Contain(option => option.Aliases.Contains("--agent"));
command.Options.Should().Contain(option => option.Aliases.Contains("--port"));
command.Options.Should().Contain(option => option.Aliases.Contains("--url"));
command.Options.Should().Contain(option => option.Aliases.Contains("--provider"));
command.Options.Should().Contain(option => option.Aliases.Contains("--voice"));
}

[Fact]
public void BuildUiUrl_ShouldEncodeVoiceParameters()
{
var url = VoiceCommandHandler.BuildUiUrl(
"http://localhost:6688",
"robot dog",
"openai",
"alloy",
24000);

url.Should().Be("http://localhost:6688/voice?agent=robot%20dog&sampleRateHz=24000&provider=openai&voice=alloy");
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
using Aevatar.Bootstrap;
using Aevatar.Bootstrap.Hosting;
using Aevatar.Foundation.VoicePresence.Hosting;
using FluentAssertions;
using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Routing;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;

Expand Down Expand Up @@ -34,6 +36,33 @@ public void AddAevatarDefaultHost_WhenConnectorBootstrapDisabled_ShouldNotRegist
descriptor.ImplementationType == typeof(ConnectorBootstrapHostedService));
}

[Fact]
public void UseAevatarDefaultHost_WhenVoicePresenceResolverRegistered_ShouldMapVoiceWebSocketRoute()
{
var builder = CreateBuilder();
builder.AddAevatarDefaultHost();
builder.Services.AddSingleton<IVoicePresenceSessionResolver, NullVoicePresenceSessionResolver>();

using var app = builder.Build();

app.UseAevatarDefaultHost();

GetRoutePatterns(app).Should().Contain("/ws/voice/{actorId}");
}

[Fact]
public void UseAevatarDefaultHost_WhenVoicePresenceResolverMissing_ShouldNotMapVoiceWebSocketRoute()
{
var builder = CreateBuilder();
builder.AddAevatarDefaultHost();

using var app = builder.Build();

app.UseAevatarDefaultHost();

GetRoutePatterns(app).Should().NotContain("/ws/voice/{actorId}");
}

private static WebApplicationBuilder CreateBuilder()
{
return WebApplication.CreateBuilder(new WebApplicationOptions
Expand All @@ -42,4 +71,20 @@ private static WebApplicationBuilder CreateBuilder()
ApplicationName = typeof(AevatarDefaultHostExtensionsTests).Assembly.FullName,
});
}

private static IEnumerable<string?> GetRoutePatterns(WebApplication app) =>
((IEndpointRouteBuilder)app).DataSources
.SelectMany(static source => source.Endpoints)
.OfType<RouteEndpoint>()
.Select(static endpoint => endpoint.RoutePattern.RawText);

private sealed class NullVoicePresenceSessionResolver : IVoicePresenceSessionResolver
{
public Task<VoicePresenceSession?> ResolveAsync(VoicePresenceSessionRequest request, CancellationToken ct = default)
{
_ = request;
_ = ct;
return Task.FromResult<VoicePresenceSession?>(null);
}
}
}
34 changes: 34 additions & 0 deletions tools/Aevatar.Tools.Cli/Commands/Voice/VoiceCommand.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
using System.CommandLine;
using Aevatar.Tools.Cli.Hosting;

namespace Aevatar.Tools.Cli.Commands;

internal static class VoiceCommand
{
public static Command Create()
{
var command = new Command("voice", "Open the browser-based voice UI for a voice-enabled GAgent.");
var agentOption = new Option<string>("--agent", "Voice-enabled actor ID.") { IsRequired = true };
var portOption = new Option<int>("--port", () => 6688, "App port for local UI and health check.");
var urlOption = new Option<string?>("--url", "Override workflow API base URL for this invocation.");
var providerOption = new Option<string?>("--provider", "Preferred voice provider alias (openai|minicpm).");
var voiceOption = new Option<string?>("--voice", "Preferred voice label shown in the browser UI.");

command.AddOption(agentOption);
command.AddOption(portOption);
command.AddOption(urlOption);
command.AddOption(providerOption);
command.AddOption(voiceOption);

command.SetHandler(
(string agent, int port, string? url, string? provider, string? voice) =>
VoiceCommandHandler.RunAsync(agent, port, url, provider, voice, CancellationToken.None),
agentOption,
portOption,
urlOption,
providerOption,
voiceOption);

return command;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
const TARGET_FRAME_SAMPLES = 480; // 20 ms at 24 kHz

class MicEncoder extends AudioWorkletProcessor {
constructor() {
super();
this.buffer = new Int16Array(TARGET_FRAME_SAMPLES);
this.fill = 0;
}

process(inputs) {
const channel = inputs[0]?.[0];
if (!channel) {
return true;
}

for (let index = 0; index < channel.length; index += 1) {
const sample = Math.max(-1, Math.min(1, channel[index]));
this.buffer[this.fill] = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
this.fill += 1;

if (this.fill === TARGET_FRAME_SAMPLES) {
const frame = new Int16Array(this.buffer);
this.port.postMessage(frame.buffer, [frame.buffer]);
this.fill = 0;
}
}

return true;
}
}

registerProcessor('mic-encoder', MicEncoder);
Loading
Loading