From de9e5b06a0fdab469e551dbe1c5a21679b17fa77 Mon Sep 17 00:00:00 2001 From: Christopher Sims Date: Tue, 4 Mar 2025 19:42:47 -0700 Subject: [PATCH 1/8] Big data layer refactor WIP, will detail changes when they are more final. But its working! Kinda. --- app/build.gradle.kts | 3 - .../java/com/critt/interp/ui/MainViewModel.kt | 108 ++++++----- buildSrc/src/main/kotlin/Dependencies.kt | 5 - data/build.gradle.kts | 2 + data/src/main/AndroidManifest.xml | 3 +- .../main/java/com/critt/data/AudioSource.kt | 34 +++- .../com/critt/data/TranslationRepository.kt | 22 +-- .../java/com/critt/data/TranslationSource.kt | 167 ++++++++++++------ .../java/com/critt/data/di/SocketsModule.kt | 12 +- 9 files changed, 212 insertions(+), 144 deletions(-) diff --git a/app/build.gradle.kts b/app/build.gradle.kts index 9496811..4f62234 100644 --- a/app/build.gradle.kts +++ b/app/build.gradle.kts @@ -70,9 +70,6 @@ dependencies { kapt(Dependencies.Hilt.ANDROID_COMPILER) implementation(Dependencies.Hilt.ANDROID) - //lifecycle (currently using this for Flow.asLiveData()) - implementation(Dependencies.Lifecycle.LIVEDATA) - //material theme implementation(Dependencies.Material.MATERIAL) diff --git a/app/src/main/java/com/critt/interp/ui/MainViewModel.kt b/app/src/main/java/com/critt/interp/ui/MainViewModel.kt index 432c5b2..dace875 100644 --- a/app/src/main/java/com/critt/interp/ui/MainViewModel.kt +++ b/app/src/main/java/com/critt/interp/ui/MainViewModel.kt @@ -4,17 +4,17 @@ import androidx.lifecycle.ViewModel import androidx.lifecycle.viewModelScope import com.critt.data.ApiResult import com.critt.data.AudioSource +import com.critt.data.LanguageRepository import com.critt.domain.LanguageData import com.critt.data.SessionManager import com.critt.domain.Speaker -import com.critt.data.TranslationRepository +import com.critt.data.TranslationSource import com.critt.domain.SpeechData import com.critt.domain.defaultLangObject import com.critt.domain.defaultLangSubject import dagger.hilt.android.lifecycle.HiltViewModel import kotlinx.coroutines.CancellationException import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.Job import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.flow.update @@ -24,9 +24,10 @@ import javax.inject.Inject @HiltViewModel class MainViewModel @Inject constructor( - private val translationRepo: TranslationRepository, private val sessionManager: SessionManager, - private val audioSource: AudioSource + private val audioSource: AudioSource, + private val translationSource: TranslationSource, + private val languageRepo: LanguageRepository ) : ViewModel() { // Supported languages state private val _supportedLanguages = @@ -55,12 +56,51 @@ class MainViewModel @Inject constructor( private val _streamingState = MutableStateFlow(AudioStreamingState.Idle) val streamingState = _streamingState.asStateFlow() + // onAudioData lambdas + private var onAudioDataSubject: (ByteArray?) -> Unit = {} + private var onAudioDataObject: (ByteArray?) -> Unit = {} + init { viewModelScope.launch(context = Dispatchers.IO) { - translationRepo.getSupportedLanguages().collect { res -> + languageRepo.getSupportedLanguages().collect { res -> _supportedLanguages.update { res } } } + + viewModelScope.launch { + translationSource.speechDataSubject.collect { + it?.let { + onTextData(it, _translationSubject, builderSubject) + } + } + } + + viewModelScope.launch { + translationSource.speechDataObject.collect { + it?.let { + onTextData(it, _translationObject, builderObject) + } + } + } + } + + override fun onCleared() { + super.onCleared() + // Clean up sockets + translationSource.disconnectAllSockets() + } + + private fun onTextData( + textData: SpeechData, + translationState: MutableStateFlow, + builder: StringBuilder + ) { + if (textData.isFinal) { + builder.append(textData.data) + translationState.update { builder.toString() } + } else { + translationState.update { builder.toString() + textData.data } + } } fun updateSpeaker(subjectSpeaking: Boolean) { @@ -101,16 +141,11 @@ class MainViewModel @Inject constructor( // open socket connection on the "subject" namespace viewModelScope.launch(context = Dispatchers.IO) { try { - translationRepo.connectSubject( + onAudioDataSubject = translationSource.connect( languageSubject = langSubject.value.language, - languageObject = langObject.value.language - ).collect { res -> - onTextData( - textData = res, - translationState = _translationSubject, - builder = builderSubject - ) - } + languageObject = langObject.value.language, + Speaker.SUBJECT + ) } catch (e: Exception) { stopRecordingAndStreaming() _streamingState.update { @@ -124,16 +159,11 @@ class MainViewModel @Inject constructor( // open socket connection on the "object" namespace viewModelScope.launch(context = Dispatchers.IO) { try { - translationRepo.connectObject( + onAudioDataObject = translationSource.connect( languageSubject = langSubject.value.language, - languageObject = langObject.value.language - ).collect { res -> - onTextData( - textData = res, - translationState = _translationObject, - builder = builderObject - ) - } + languageObject = langObject.value.language, + Speaker.OBJECT + ) } catch (e: Exception) { stopRecordingAndStreaming() _streamingState.update { @@ -172,33 +202,19 @@ class MainViewModel @Inject constructor( private fun stopRecordingAndStreaming() { audioSource.stopRecording() - translationRepo.disconnect() + translationSource.disconnectAllSockets() } fun onAudioData(data: ByteArray) { when (speakerCurr.value) { - Speaker.SUBJECT -> translationRepo.onData( - subjectData = data, - objectData = ByteArray(2048) - ) - - Speaker.OBJECT -> translationRepo.onData( - subjectData = ByteArray(2048), - objectData = data - ) - } - } - - private fun onTextData( - textData: SpeechData, - translationState: MutableStateFlow, - builder: StringBuilder - ) { - if (textData.isFinal) { - builder.append(textData.data) - translationState.update { builder.toString() } - } else { - translationState.update { builder.toString() + textData.data } + Speaker.SUBJECT -> { + onAudioDataSubject(data) + onAudioDataObject(ByteArray(2048)) + } + Speaker.OBJECT -> { + onAudioDataSubject(ByteArray(2048)) + onAudioDataObject(data) + } } } } diff --git a/buildSrc/src/main/kotlin/Dependencies.kt b/buildSrc/src/main/kotlin/Dependencies.kt index 7c338f1..cd96c47 100644 --- a/buildSrc/src/main/kotlin/Dependencies.kt +++ b/buildSrc/src/main/kotlin/Dependencies.kt @@ -49,11 +49,6 @@ object Dependencies { const val MATERIAL = "com.google.android.material:material:1.12.0" } - object Lifecycle { - //(currently using this for Flow.asLiveData()) - const val LIVEDATA = "androidx.lifecycle:lifecycle-livedata-ktx:2.8.7" - } - object Testing { const val JUNIT = "junit:junit:4.13.2" const val JUNIT_JUPITER = "org.junit.jupiter:junit-jupiter:5.8.1" diff --git a/data/build.gradle.kts b/data/build.gradle.kts index 546bfc4..125fac5 100644 --- a/data/build.gradle.kts +++ b/data/build.gradle.kts @@ -66,6 +66,8 @@ dependencies { kapt(Dependencies.Hilt.ANDROID_COMPILER) implementation(Dependencies.Hilt.ANDROID) + implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.6.2") + //testing testImplementation(Dependencies.Testing.JUNIT) testImplementation(Dependencies.Testing.JUNIT_JUPITER) diff --git a/data/src/main/AndroidManifest.xml b/data/src/main/AndroidManifest.xml index a5918e6..45e27ee 100644 --- a/data/src/main/AndroidManifest.xml +++ b/data/src/main/AndroidManifest.xml @@ -1,4 +1,5 @@ - + + \ No newline at end of file diff --git a/data/src/main/java/com/critt/data/AudioSource.kt b/data/src/main/java/com/critt/data/AudioSource.kt index da20cdb..1ce2b1f 100644 --- a/data/src/main/java/com/critt/data/AudioSource.kt +++ b/data/src/main/java/com/critt/data/AudioSource.kt @@ -4,6 +4,12 @@ import android.annotation.SuppressLint import android.media.AudioFormat import android.media.AudioRecord import android.media.MediaRecorder +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.Job +import kotlinx.coroutines.cancel +import kotlinx.coroutines.isActive +import kotlinx.coroutines.launch class AudioSource( private val sampleRate: Int = 16000, @@ -11,31 +17,43 @@ class AudioSource( ) { private var recorder: AudioRecord? = null + private val scope = CoroutineScope(Dispatchers.IO) // Use a background thread @SuppressLint("MissingPermission") fun startRecording(onData: (ByteArray) -> Unit) { + // Initialize the AudioRecord object recorder = AudioRecord( MediaRecorder.AudioSource.MIC, sampleRate, - AudioFormat.CHANNEL_CONFIGURATION_MONO, + AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, bufferSize ) + // Start recording recorder?.startRecording() - while (recorder != null) { - val buffer = ByteArray(bufferSize) - recorder?.read(buffer, 0, buffer.size) - onData(buffer) + val buffer = ByteArray(bufferSize) + while (recorder?.recordingState == AudioRecord.RECORDSTATE_RECORDING) { + val bytesRead = recorder?.read(buffer, 0, buffer.size) ?: -1 + if (bytesRead > 0) { + onData(buffer) + } } - - stopRecording() } fun stopRecording() { + // Stop the recording coroutine + + + // Stop and release the AudioRecord object recorder?.stop() recorder?.release() - recorder = null + } + + fun cleanup() { + // Clean up resources + stopRecording() + scope.cancel() // Cancel the coroutine scope } } \ No newline at end of file diff --git a/data/src/main/java/com/critt/data/TranslationRepository.kt b/data/src/main/java/com/critt/data/TranslationRepository.kt index c817c37..a5e9b59 100644 --- a/data/src/main/java/com/critt/data/TranslationRepository.kt +++ b/data/src/main/java/com/critt/data/TranslationRepository.kt @@ -4,29 +4,9 @@ import com.critt.domain.LanguageData import kotlinx.coroutines.flow.Flow import javax.inject.Inject -class TranslationRepository @Inject constructor( - private val translationSource: TranslationSource, +class LanguageRepository @Inject constructor( private val languageSource: LanguageSource ) { - /** returns Flow - * this represents the translation of the subject language to the object language - */ - fun connectObject(languageSubject: String, languageObject: String) = - translationSource.connectObject(languageSubject, languageObject) - - /** returns Flow - * this represents the translation of the object language to the subject language - */ - fun connectSubject(languageSubject: String, languageObject: String) = - translationSource.connectSubject(languageSubject, languageObject) - - fun onData(subjectData: ByteArray?, objectData: ByteArray?) = - translationSource.onData(subjectData, objectData) - - fun disconnect() { - translationSource.disconnect() - } - /** returns Flow>?> * this represents all of the languages we support for voice transcription and translation */ diff --git a/data/src/main/java/com/critt/data/TranslationSource.kt b/data/src/main/java/com/critt/data/TranslationSource.kt index 500c289..9300314 100644 --- a/data/src/main/java/com/critt/data/TranslationSource.kt +++ b/data/src/main/java/com/critt/data/TranslationSource.kt @@ -1,82 +1,139 @@ package com.critt.data -import com.critt.data.BuildConfig +import com.critt.domain.Speaker import com.critt.domain.SpeechData import com.google.gson.Gson +import com.google.gson.GsonBuilder import io.socket.client.IO import io.socket.client.Socket -import kotlinx.coroutines.channels.awaitClose -import kotlinx.coroutines.flow.Flow -import kotlinx.coroutines.flow.callbackFlow -import timber.log.Timber +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.asStateFlow +import kotlinx.coroutines.flow.update +import java.net.URI +import java.util.concurrent.ConcurrentHashMap class TranslationSource( private val sessionManager: SessionManager ) { - private var socketSubject: Socket? = null - private var socketObject: Socket? = null + private val _speechDataSubject = MutableStateFlow(null) + val speechDataSubject = _speechDataSubject.asStateFlow() - fun connectObject(languageSubject: String, languageObject: String): Flow { - /** - * https://socketio.github.io/socket.io-client-java/initialization.html#auth - * */ - val options: IO.Options = IO.Options.builder().setAuth(mapOf("token" to sessionManager.getAuthToken())).build() - socketObject = IO.socket(BuildConfig.API_BASE_URL + "object", options) - return initSocket(socketObject, getTranscriptionConfig(languageObject, languageSubject)) + private val _speechDataObject = MutableStateFlow(null) + val speechDataObject = _speechDataObject.asStateFlow() + + private val openSockets = ConcurrentHashMap() + + fun connect( + languageSubject: String, + languageObject: String, + speaker: Speaker + ): (ByteArray?) -> Unit { + val socket = initSocket(speaker) + val gson: Gson = GsonBuilder().create() + + socket.on(EVENT_TEXT_DATA) { args -> + println("$EVENT_TEXT_DATA received") + + try { + val speechData = gson.fromJson(args[0].toString(), SpeechData::class.java) + println("$EVENT_TEXT_DATA: $speechData") + + when (speaker) { + Speaker.SUBJECT -> _speechDataSubject.update { speechData } + Speaker.OBJECT -> _speechDataObject.update { speechData } + } + } catch (e: Exception) { + // 7. Handle parsing errors + println("Error parsing speechData: ${e.message}") + } + } + + socket.connect() + + socket.emit( + EVENT_START_GOOGLE_CLOUD_STREAM, + gson.toJson(getTranscriptionConfig(languageSubject, languageObject, speaker)) + ) + + return { + onAudioData(speaker, it) + } } - fun connectSubject(languageSubject: String, languageObject: String): Flow { - /** - * https://socketio.github.io/socket.io-client-java/initialization.html#auth - * */ - val options: IO.Options = IO.Options.builder().setAuth(mapOf("token" to sessionManager.getAuthToken())).build() - socketSubject = IO.socket(BuildConfig.API_BASE_URL + "subject", options) - return initSocket(socketSubject, getTranscriptionConfig(languageSubject, languageObject)) + private fun onAudioData(speaker: Speaker, audioData: ByteArray?) { + openSockets[speaker]?.emit(EVENT_AUDIO_DATA, audioData) ?: run { + println("onAudioData():: not emitting data: ${speaker.name} socket is null") + } } - fun onData(subjectData: ByteArray?, objectData: ByteArray?) { - socketSubject?.emit("binaryAudioData", subjectData) - socketObject?.emit("binaryAudioData", objectData) + private fun disconnectSocket(socket: Socket) { + listOf( + { socket.emit(EVENT_END_GOOGLE_CLOUD_STREAM) }, + { socket.off(EVENT_TEXT_DATA) }, + { socket.disconnect() } + ).forEach { action -> + try { + action() + } catch (e: Exception) { + println("Error during socket cleanup: ${e.message}") + } + } + println("Socket disconnected") } - private fun initSocket(socket: Socket?, config: Map): Flow = callbackFlow { // Cold Flow - Timber.d("initSocket() :: config = $config") + fun disconnectAllSockets() { + for (socket in openSockets.values) { + disconnectSocket(socket) + } - socket?.connect() - socket?.emit("startGoogleCloudStream", Gson().toJson(config)) + openSockets.clear() + } - socket?.on("speechData") { args -> - Gson().fromJson(args[0].toString(), SpeechData::class.java).let { - println("Object speechData: $it") - trySend(it) - } + private fun initSocket(speaker: Speaker): Socket { + val socketChannel = when (speaker) { + Speaker.SUBJECT -> CHANNEL_SUBJECT + Speaker.OBJECT -> CHANNEL_OBJECT } - awaitClose { - socket?.emit("endGoogleCloudStream") - socket?.off("speechData") - socket?.disconnect() + val socketUri = URI.create("${BuildConfig.API_BASE_URL}$socketChannel") + + /** + * https://socketio.github.io/socket.io-client-java/initialization.html#auth + * */ + val socketOptions: IO.Options = IO.Options.builder() + .setAuth(mapOf(AUTH_TOKEN_KEY to sessionManager.getAuthToken())) + .build() + + val socket: Socket = IO.socket(socketUri, socketOptions) + + // clean up and remove existing socket if it exists for the speaker + openSockets[speaker]?.let { + disconnectSocket(it) + openSockets.remove(speaker) } + + openSockets[speaker] = socket + return socket } - fun disconnect() { - socketObject?.emit("endGoogleCloudStream") - socketObject?.off("speechData") - socketObject?.disconnect() + companion object { + private const val EVENT_TEXT_DATA = "speechData" + private const val EVENT_AUDIO_DATA = "binaryAudioData" + private const val EVENT_START_GOOGLE_CLOUD_STREAM = "startGoogleCloudStream" + private const val EVENT_END_GOOGLE_CLOUD_STREAM = "endGoogleCloudStream" + private const val CHANNEL_SUBJECT = "subject" + private const val CHANNEL_OBJECT = "object" + private const val AUTH_TOKEN_KEY = "token" - socketSubject?.emit("endGoogleCloudStream") - socketSubject?.off("speechData") - socketSubject?.disconnect() + private fun getTranscriptionConfig(languageSubject: String, languageObject: String, speaker: Speaker) = + mapOf( + "audio" to mapOf( + "encoding" to "LINEAR16", + "sampleRateHertz" to 16000, + "languageCode" to if (speaker == Speaker.SUBJECT) languageSubject else languageObject + ), + "interimResults" to true, + "targetLanguage" to if (speaker == Speaker.SUBJECT) languageObject else languageSubject + ) } - - private fun getTranscriptionConfig(languageSubject: String, languageObject: String) = - mapOf( - "audio" to mapOf( - "encoding" to "LINEAR16", - "sampleRateHertz" to 16000, - "languageCode" to languageSubject - ), - "interimResults" to true, - "targetLanguage" to languageObject - ) } \ No newline at end of file diff --git a/data/src/main/java/com/critt/data/di/SocketsModule.kt b/data/src/main/java/com/critt/data/di/SocketsModule.kt index 7b703ca..ff65faf 100644 --- a/data/src/main/java/com/critt/data/di/SocketsModule.kt +++ b/data/src/main/java/com/critt/data/di/SocketsModule.kt @@ -5,14 +5,16 @@ import com.critt.data.TranslationSource import dagger.Module import dagger.Provides import dagger.hilt.InstallIn -import dagger.hilt.components.SingletonComponent -import javax.inject.Singleton +import dagger.hilt.android.components.ViewModelComponent +import dagger.hilt.android.scopes.ViewModelScoped @Module -@InstallIn(SingletonComponent::class) +@InstallIn(ViewModelComponent::class) object SocketsModule { @Provides - @Singleton - fun provideSocketsService(sessionManager: SessionManager): TranslationSource = + @ViewModelScoped + fun provideSocketsService( + sessionManager: SessionManager + ): TranslationSource = TranslationSource(sessionManager) } \ No newline at end of file From 95cc09b480cccb3d69c5c02c35805d82ae266922 Mon Sep 17 00:00:00 2001 From: Christopher Sims Date: Wed, 5 Mar 2025 12:56:17 -0700 Subject: [PATCH 2/8] Some state hoisting and comments --- .../java/com/critt/interp/ui/MainActivity.kt | 47 +++++++++---------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/app/src/main/java/com/critt/interp/ui/MainActivity.kt b/app/src/main/java/com/critt/interp/ui/MainActivity.kt index 828bdce..bb109e3 100644 --- a/app/src/main/java/com/critt/interp/ui/MainActivity.kt +++ b/app/src/main/java/com/critt/interp/ui/MainActivity.kt @@ -27,8 +27,6 @@ import androidx.compose.ui.unit.dp import androidx.core.content.ContextCompat import androidx.lifecycle.viewmodel.compose.viewModel import com.critt.data.ApiResult -import com.critt.domain.LanguageData -import com.critt.domain.Speaker import com.critt.interp.ui.components.DropdownSelector import com.critt.ui_common.theme.InterpTheme import dagger.hilt.android.AndroidEntryPoint @@ -48,14 +46,13 @@ class MainActivity : ComponentActivity() { @Composable fun TranslationGroup( translationText: String = "", - speaker: Speaker, - langSubject: LanguageData, - langObject: LanguageData, + textFromLanguage: String, + textToLanguage: String, interactionSource: MutableInteractionSource? = null ) { InterpTheme { Column(modifier = Modifier.padding(16.dp)) { - LanguageDisplay(speaker, langSubject, langObject) + LanguageDisplay(textFromLanguage, textToLanguage) Spacer(modifier = Modifier.height(12.dp)) OutputCard(translationText, interactionSource) } @@ -63,24 +60,18 @@ class MainActivity : ComponentActivity() { } @Composable - fun LanguageDisplay(speaker: Speaker, langSubject: LanguageData, langObject: LanguageData) { + fun LanguageDisplay(textFromLanguage: String, textToLanguage: String) { InterpTheme { Row { Text( - when (speaker) { - Speaker.SUBJECT -> langObject.name - Speaker.OBJECT -> langSubject.name - }, + text = textFromLanguage, color = MaterialTheme.colorScheme.onBackground ) Spacer(modifier = Modifier.width(8.dp)) Text("⇌", color = MaterialTheme.colorScheme.onBackground) Spacer(modifier = Modifier.width(8.dp)) Text( - when (speaker) { - Speaker.SUBJECT -> langSubject.name - Speaker.OBJECT -> langObject.name - }, + text = textToLanguage, color = MaterialTheme.colorScheme.onBackground ) } @@ -141,22 +132,28 @@ class MainActivity : ComponentActivity() { val streamingState by viewModel.streamingState.collectAsState() /** Local state -> LaunchedEffect -> ViewModel StateFlow */ - // Language selector for Subject speaker + // Language selector for Subject Speaker (the user) var uiSelectedLangSubject by remember { mutableStateOf(langSubject) } LaunchedEffect(uiSelectedLangSubject) { viewModel.selectLangSubject(uiSelectedLangSubject) } - // Language selector for Object speaker + + // Language selector for Object Speaker (to whom the user is talking) var uiSelectedLangObject by remember { mutableStateOf(langObject) } LaunchedEffect(uiSelectedLangObject) { viewModel.selectLangObject(uiSelectedLangObject) } - // Interaction source (pressing down on the lower OutputCard) for current Speaker + + // Interaction source applied to the lower OutputCard + // Allows the OutputCard facing the user to be used as an "I'm talking, now they are talking" toggle + // The user holds it down while talking + // This is how we know which language to translate the audio data into val interactionSource = remember { MutableInteractionSource() } val isPressed by interactionSource.collectIsPressedAsState() LaunchedEffect(isPressed) { viewModel.updateSpeaker(subjectSpeaking = isPressed) } + // Streaming state toggle (FAB) var toggleSideEffect by remember { mutableStateOf<(() -> Unit)?>(null) } LaunchedEffect(toggleSideEffect, hasRecordingPermission) { @@ -180,18 +177,16 @@ class MainActivity : ComponentActivity() { .rotate(180F) ) { TranslationGroup( - translationText = translationObject, - speaker = Speaker.OBJECT, - langSubject = langSubject, - langObject = langObject, + translationText = translationSubject, + textFromLanguage = langSubject.name, + textToLanguage = langObject.name, ) } Box(modifier = Modifier.weight(.40F)) { TranslationGroup( - translationText = translationSubject, - speaker = Speaker.SUBJECT, - langSubject = langSubject, - langObject = langObject, + translationText = translationObject, + textFromLanguage = langObject.name, + textToLanguage = langSubject.name, interactionSource = interactionSource ) } From 5a29a0023ce452febb9cceaa4d4bf79f1f69945b Mon Sep 17 00:00:00 2001 From: Christopher Sims Date: Wed, 5 Mar 2025 13:05:51 -0700 Subject: [PATCH 3/8] println -> Timber. Removed unused SessionManager dependency from MainViewModel. Comments. --- .../main/java/com/critt/interp/ui/MainViewModel.kt | 3 --- data/src/main/java/com/critt/data/AudioSource.kt | 6 +++--- .../main/java/com/critt/data/TranslationSource.kt | 13 +++++++------ 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/app/src/main/java/com/critt/interp/ui/MainViewModel.kt b/app/src/main/java/com/critt/interp/ui/MainViewModel.kt index dace875..33f2529 100644 --- a/app/src/main/java/com/critt/interp/ui/MainViewModel.kt +++ b/app/src/main/java/com/critt/interp/ui/MainViewModel.kt @@ -6,7 +6,6 @@ import com.critt.data.ApiResult import com.critt.data.AudioSource import com.critt.data.LanguageRepository import com.critt.domain.LanguageData -import com.critt.data.SessionManager import com.critt.domain.Speaker import com.critt.data.TranslationSource import com.critt.domain.SpeechData @@ -19,12 +18,10 @@ import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.flow.update import kotlinx.coroutines.launch -import timber.log.Timber import javax.inject.Inject @HiltViewModel class MainViewModel @Inject constructor( - private val sessionManager: SessionManager, private val audioSource: AudioSource, private val translationSource: TranslationSource, private val languageRepo: LanguageRepository diff --git a/data/src/main/java/com/critt/data/AudioSource.kt b/data/src/main/java/com/critt/data/AudioSource.kt index 1ce2b1f..91c521e 100644 --- a/data/src/main/java/com/critt/data/AudioSource.kt +++ b/data/src/main/java/com/critt/data/AudioSource.kt @@ -6,11 +6,11 @@ import android.media.AudioRecord import android.media.MediaRecorder import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.Job import kotlinx.coroutines.cancel -import kotlinx.coroutines.isActive -import kotlinx.coroutines.launch + +// TODO: This whole thing is a hacky POC +// TODO: Redesign it from the ground up class AudioSource( private val sampleRate: Int = 16000, private val bufferSize: Int = 2048 diff --git a/data/src/main/java/com/critt/data/TranslationSource.kt b/data/src/main/java/com/critt/data/TranslationSource.kt index 9300314..de2771f 100644 --- a/data/src/main/java/com/critt/data/TranslationSource.kt +++ b/data/src/main/java/com/critt/data/TranslationSource.kt @@ -9,6 +9,7 @@ import io.socket.client.Socket import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.flow.update +import timber.log.Timber import java.net.URI import java.util.concurrent.ConcurrentHashMap @@ -32,11 +33,11 @@ class TranslationSource( val gson: Gson = GsonBuilder().create() socket.on(EVENT_TEXT_DATA) { args -> - println("$EVENT_TEXT_DATA received") + Timber.d("$EVENT_TEXT_DATA received") try { val speechData = gson.fromJson(args[0].toString(), SpeechData::class.java) - println("$EVENT_TEXT_DATA: $speechData") + Timber.d("$EVENT_TEXT_DATA: $speechData") when (speaker) { Speaker.SUBJECT -> _speechDataSubject.update { speechData } @@ -44,7 +45,7 @@ class TranslationSource( } } catch (e: Exception) { // 7. Handle parsing errors - println("Error parsing speechData: ${e.message}") + Timber.e("Error parsing speechData: ${e.message}") } } @@ -62,7 +63,7 @@ class TranslationSource( private fun onAudioData(speaker: Speaker, audioData: ByteArray?) { openSockets[speaker]?.emit(EVENT_AUDIO_DATA, audioData) ?: run { - println("onAudioData():: not emitting data: ${speaker.name} socket is null") + Timber.i("onAudioData():: not emitting data: ${speaker.name} socket is null") } } @@ -75,10 +76,10 @@ class TranslationSource( try { action() } catch (e: Exception) { - println("Error during socket cleanup: ${e.message}") + Timber.e("Error during socket cleanup: ${e.message}") } } - println("Socket disconnected") + Timber.d("Socket disconnected") } fun disconnectAllSockets() { From eae2e73a8c1c535225d06ad07c8f8bd479cdec65 Mon Sep 17 00:00:00 2001 From: Christopher Sims Date: Wed, 5 Mar 2025 18:00:28 -0700 Subject: [PATCH 4/8] Complete redesign of AudioSource. Added capitalization to StringBuilders, with locale. --- .../java/com/critt/interp/ui/MainViewModel.kt | 42 ++++-- .../main/java/com/critt/data/AudioSource.kt | 137 +++++++++++++----- 2 files changed, 130 insertions(+), 49 deletions(-) diff --git a/app/src/main/java/com/critt/interp/ui/MainViewModel.kt b/app/src/main/java/com/critt/interp/ui/MainViewModel.kt index 33f2529..74cb8c5 100644 --- a/app/src/main/java/com/critt/interp/ui/MainViewModel.kt +++ b/app/src/main/java/com/critt/interp/ui/MainViewModel.kt @@ -12,12 +12,13 @@ import com.critt.domain.SpeechData import com.critt.domain.defaultLangObject import com.critt.domain.defaultLangSubject import dagger.hilt.android.lifecycle.HiltViewModel -import kotlinx.coroutines.CancellationException import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.flow.update import kotlinx.coroutines.launch +import timber.log.Timber +import java.util.Locale import javax.inject.Inject @HiltViewModel @@ -67,7 +68,12 @@ class MainViewModel @Inject constructor( viewModelScope.launch { translationSource.speechDataSubject.collect { it?.let { - onTextData(it, _translationSubject, builderSubject) + onTextData( + it, + _translationSubject, + builderSubject, + Locale(langObject.value.language) + ) } } } @@ -75,7 +81,12 @@ class MainViewModel @Inject constructor( viewModelScope.launch { translationSource.speechDataObject.collect { it?.let { - onTextData(it, _translationObject, builderObject) + onTextData( + it, + _translationObject, + builderObject, + Locale(langSubject.value.language) + ) } } } @@ -90,11 +101,15 @@ class MainViewModel @Inject constructor( private fun onTextData( textData: SpeechData, translationState: MutableStateFlow, - builder: StringBuilder + builder: StringBuilder, + locale: Locale ) { if (textData.isFinal) { builder.append(textData.data) - translationState.update { builder.toString() } + translationState.update { + builder.toString() + .replaceFirstChar { if (it.isLowerCase()) it.titlecase(locale) else it.toString() } + } } else { translationState.update { builder.toString() + textData.data } } @@ -144,6 +159,7 @@ class MainViewModel @Inject constructor( Speaker.SUBJECT ) } catch (e: Exception) { + Timber.e(e.message) stopRecordingAndStreaming() _streamingState.update { AudioStreamingState.Error( @@ -162,6 +178,7 @@ class MainViewModel @Inject constructor( Speaker.OBJECT ) } catch (e: Exception) { + Timber.e(e.message) stopRecordingAndStreaming() _streamingState.update { AudioStreamingState.Error( @@ -174,24 +191,16 @@ class MainViewModel @Inject constructor( // start recording viewModelScope.launch(context = Dispatchers.IO) { try { - audioSource.startRecording(onData = ::onAudioData) + audioSource.startRecording(viewModelScope, onData = ::onAudioData) } catch (e: Exception) { + Timber.e(e.message) + stopRecordingAndStreaming() _streamingState.update { AudioStreamingState.Error( e.message ?: "audioSource job: unknown error" ) } } - }.invokeOnCompletion { cause -> - stopRecordingAndStreaming() - when (cause) { - null, is CancellationException -> _streamingState.update { AudioStreamingState.Idle } - else -> _streamingState.update { - AudioStreamingState.Error( - cause.message ?: "audioSource job completion handler: unknown error" - ) - } - } } _streamingState.update { AudioStreamingState.Streaming } @@ -208,6 +217,7 @@ class MainViewModel @Inject constructor( onAudioDataSubject(data) onAudioDataObject(ByteArray(2048)) } + Speaker.OBJECT -> { onAudioDataSubject(ByteArray(2048)) onAudioDataObject(data) diff --git a/data/src/main/java/com/critt/data/AudioSource.kt b/data/src/main/java/com/critt/data/AudioSource.kt index 91c521e..5a54d4c 100644 --- a/data/src/main/java/com/critt/data/AudioSource.kt +++ b/data/src/main/java/com/critt/data/AudioSource.kt @@ -1,59 +1,130 @@ package com.critt.data +import android.Manifest import android.annotation.SuppressLint +import android.content.Context +import android.content.pm.PackageManager import android.media.AudioFormat import android.media.AudioRecord import android.media.MediaRecorder +import android.os.Process +import androidx.core.content.ContextCompat import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.cancel +import kotlinx.coroutines.Job +import kotlinx.coroutines.NonCancellable +import kotlinx.coroutines.isActive +import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext - -// TODO: This whole thing is a hacky POC -// TODO: Redesign it from the ground up +/** + * A robust and idiomatic audio source for recording audio from the microphone. + * + * This class handles audio recording, permission checks, and provides a more + * structured way to manage the recording process. + * + * @param context The application context. + * @param sampleRate The sample rate for audio recording (default: 16000 Hz). + * @param channelConfig The channel configuration (default: mono). + * @param audioFormat The audio format (default: PCM 16-bit). + */ class AudioSource( private val sampleRate: Int = 16000, - private val bufferSize: Int = 2048 + private val channelConfig: Int = AudioFormat.CHANNEL_IN_MONO, + private val audioFormat: Int = AudioFormat.ENCODING_PCM_16BIT ) { private var recorder: AudioRecord? = null - private val scope = CoroutineScope(Dispatchers.IO) // Use a background thread + private var recordingJob: Job? = null + /** + * Starts audio recording. + * + * @param onData Callback function to receive audio data as a ByteArray. + * @throws IllegalStateException If the audio recording is already started. + */ @SuppressLint("MissingPermission") - fun startRecording(onData: (ByteArray) -> Unit) { - // Initialize the AudioRecord object + fun startRecording(scope: CoroutineScope, onData: (ByteArray) -> Unit) { + if (recordingJob?.isActive == true) { + throw IllegalStateException("Audio recording is already started.") + } + + val minBufferSize = AudioRecord.getMinBufferSize(sampleRate, channelConfig, audioFormat) + if (minBufferSize == AudioRecord.ERROR_BAD_VALUE || minBufferSize == AudioRecord.ERROR) { + throw IllegalStateException("Invalid audio parameters.") + } + + val bufferSize = minBufferSize.coerceAtLeast(2048) // Ensure a minimum buffer size + recorder = AudioRecord( - MediaRecorder.AudioSource.MIC, - sampleRate, - AudioFormat.CHANNEL_IN_MONO, - AudioFormat.ENCODING_PCM_16BIT, - bufferSize - ) - - // Start recording - recorder?.startRecording() - - val buffer = ByteArray(bufferSize) - while (recorder?.recordingState == AudioRecord.RECORDSTATE_RECORDING) { - val bytesRead = recorder?.read(buffer, 0, buffer.size) ?: -1 - if (bytesRead > 0) { - onData(buffer) + /* audioSource = */ MediaRecorder.AudioSource.MIC, + /* sampleRateInHz = */ sampleRate, + /* channelConfig = */ channelConfig, + /* audioFormat = */ audioFormat, + /* bufferSizeInBytes = */ bufferSize + ).apply { + if (state != AudioRecord.STATE_INITIALIZED) { + throw IllegalStateException("AudioRecord initialization failed.") } } - } - fun stopRecording() { - // Stop the recording coroutine + recordingJob = scope.launch(Dispatchers.IO) { + Process.setThreadPriority(Process.THREAD_PRIORITY_URGENT_AUDIO) + recorder?.startRecording() + + val buffer = ByteArray(bufferSize) + while (isActive) { + val bytesRead = recorder?.read(buffer, 0, buffer.size) ?: -1 + when { + bytesRead > 0 -> { + val data = buffer.copyOf(bytesRead) + onData(data) + } + bytesRead == AudioRecord.ERROR_INVALID_OPERATION -> { + println("Error: Invalid operation during audio read.") + break + } - // Stop and release the AudioRecord object - recorder?.stop() - recorder?.release() + bytesRead == AudioRecord.ERROR_BAD_VALUE -> { + println("Error: Bad value during audio read.") + break + } + } + } + + stopRecordingInternal() + } + } + + /** + * Stops audio recording. + */ + fun stopRecording() { + recordingJob?.cancel() } - fun cleanup() { - // Clean up resources - stopRecording() - scope.cancel() // Cancel the coroutine scope + private suspend fun stopRecordingInternal() { + withContext(NonCancellable) { + var exception: Throwable? = null + recorder?.apply { + if (recordingState == AudioRecord.RECORDSTATE_RECORDING) { + runCatching { stop() } + .exceptionOrNull() + ?.also { + exception = it + println("Error stopping AudioRecord: ${it.message}") + } + } + runCatching { release() } + .exceptionOrNull() + ?.also { + exception = exception ?: it + println("Error releasing AudioRecord: ${it.message}") + } + } + recorder = null + exception?.let { throw it } + } } } \ No newline at end of file From 1c87aeb60935e06010d95979f3027226dd977d32 Mon Sep 17 00:00:00 2001 From: Christopher Sims Date: Wed, 5 Mar 2025 18:01:05 -0700 Subject: [PATCH 5/8] Complete redesign of AudioSource. Added capitalization to StringBuilders, with locale. --- data/src/main/java/com/critt/data/AudioSource.kt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/data/src/main/java/com/critt/data/AudioSource.kt b/data/src/main/java/com/critt/data/AudioSource.kt index 5a54d4c..4dc783a 100644 --- a/data/src/main/java/com/critt/data/AudioSource.kt +++ b/data/src/main/java/com/critt/data/AudioSource.kt @@ -1,14 +1,10 @@ package com.critt.data -import android.Manifest import android.annotation.SuppressLint -import android.content.Context -import android.content.pm.PackageManager import android.media.AudioFormat import android.media.AudioRecord import android.media.MediaRecorder import android.os.Process -import androidx.core.content.ContextCompat import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Job From 98f31e9a7d06799a720e550a575838e5fc377851 Mon Sep 17 00:00:00 2001 From: Christopher Sims Date: Wed, 5 Mar 2025 18:04:01 -0700 Subject: [PATCH 6/8] Removed uses-permission tag from library manifest --- data/src/main/AndroidManifest.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/data/src/main/AndroidManifest.xml b/data/src/main/AndroidManifest.xml index 45e27ee..a5918e6 100644 --- a/data/src/main/AndroidManifest.xml +++ b/data/src/main/AndroidManifest.xml @@ -1,5 +1,4 @@ - - + \ No newline at end of file From 00323dd7b472b6c60764f491d089d67e4b457eb8 Mon Sep 17 00:00:00 2001 From: Christopher Sims Date: Fri, 7 Mar 2025 10:45:57 -0700 Subject: [PATCH 7/8] Added javadocs to MainViewModel --- .../java/com/critt/interp/ui/MainViewModel.kt | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/app/src/main/java/com/critt/interp/ui/MainViewModel.kt b/app/src/main/java/com/critt/interp/ui/MainViewModel.kt index 74cb8c5..4e9408b 100644 --- a/app/src/main/java/com/critt/interp/ui/MainViewModel.kt +++ b/app/src/main/java/com/critt/interp/ui/MainViewModel.kt @@ -58,6 +58,12 @@ class MainViewModel @Inject constructor( private var onAudioDataSubject: (ByteArray?) -> Unit = {} private var onAudioDataObject: (ByteArray?) -> Unit = {} + /** + * Initializes the [MainViewModel]. + * + * It fetches the supported languages from the [LanguageRepository] and + * sets up coroutines to collect translated text from the [TranslationSource]. + */ init { viewModelScope.launch(context = Dispatchers.IO) { languageRepo.getSupportedLanguages().collect { res -> @@ -92,12 +98,25 @@ class MainViewModel @Inject constructor( } } + /** + * Cleans up resources when the ViewModel is cleared. + * + * It disconnects all sockets from the [translationSource]. + */ override fun onCleared() { super.onCleared() // Clean up sockets translationSource.disconnectAllSockets() } + /** + * Processes incoming text data and updates the corresponding translation state. + * + * @param textData The incoming speech data. + * @param translationState The state flow to update with the translated text. + * @param builder The StringBuilder to accumulate the translated text. + * @param locale The locale to use for capitalization. + */ private fun onTextData( textData: SpeechData, translationState: MutableStateFlow, @@ -115,6 +134,11 @@ class MainViewModel @Inject constructor( } } + /** + * Updates the current speaker. + * + * @param subjectSpeaking True if the SUBJECT speaker is speaking, false otherwise. + */ fun updateSpeaker(subjectSpeaking: Boolean) { when (subjectSpeaking) { true -> _speakerCurr.update { Speaker.SUBJECT } @@ -122,14 +146,29 @@ class MainViewModel @Inject constructor( } } + /** + * Selects a language for the SUBJECT speaker. + * + * @param lang The selected language. + */ fun selectLangSubject(lang: LanguageData) { _langSubject.update { lang } } + /** + * Selects a language for the OBJECT speaker. + * + * @param lang The selected language. + */ fun selectLangObject(lang: LanguageData) { _langObject.update { lang } } + /** + * Toggles the audio streaming state. + * + * It starts or stops recording and streaming based on the current state. + */ fun toggleStreaming() { when (_streamingState.value) { is AudioStreamingState.Idle -> startRecordingAndStreaming() @@ -145,6 +184,12 @@ class MainViewModel @Inject constructor( } } + /** + * Starts recording and streaming audio. + * + * It clears the translation output StringBuilders, opens socket connections, + * and starts the audio recording process. + */ private fun startRecordingAndStreaming() { // clear the translation output StringBuilders builderSubject.clear() @@ -206,11 +251,40 @@ class MainViewModel @Inject constructor( _streamingState.update { AudioStreamingState.Streaming } } + /** + * Stops the audio recording and disconnects all sockets. + * + * This function is responsible for cleaning up resources related to audio + * streaming and translation. It stops the audio recording process and + * disconnects all active socket connections. + * + * This function should be called when audio streaming is no longer needed, + * or when an error occurs that requires stopping the streaming process. + */ private fun stopRecordingAndStreaming() { audioSource.stopRecording() translationSource.disconnectAllSockets() } + /** + * Processes incoming audio data and routes it to the appropriate speaker's + * audio data handler. + * + * This function is called when new audio data is available from the + * [AudioSource]. It determines the current speaker based on the + * [speakerCurr] state and then calls the corresponding `onAudioData` + * lambda function to handle the data. + * + * If the current speaker is [Speaker.SUBJECT], the data is sent to + * [onAudioDataSubject], and an empty byte array is sent to + * [onAudioDataObject]. + * + * If the current speaker is [Speaker.OBJECT], the data is sent to + * [onAudioDataObject], and an empty byte array is sent to + * [onAudioDataSubject]. + * + * @param data The incoming audio data as a ByteArray. + */ fun onAudioData(data: ByteArray) { when (speakerCurr.value) { Speaker.SUBJECT -> { @@ -226,8 +300,37 @@ class MainViewModel @Inject constructor( } } +/** + * Represents the different states of audio streaming. + * + * This sealed class is used to represent the possible states of the audio + * streaming process within the application. It can be in one of three states: + * [Idle], [Streaming], or [Error]. + */ sealed class AudioStreamingState { + /** + * Represents the idle state of audio streaming. + * + * This state indicates that audio streaming is not currently active. + * No audio is being recorded or streamed. + */ object Idle : AudioStreamingState() + + /** + * Represents the streaming state of audio streaming. + * + * This state indicates that audio is currently being recorded and streamed. + */ object Streaming : AudioStreamingState() + + /** + * Represents the error state of audio streaming. + * + * This state indicates that an error has occurred during the audio + * streaming process. The [message] property contains a description of + * the error. + * + * @property message A description of the error that occurred. + */ data class Error(val message: String) : AudioStreamingState() } \ No newline at end of file From ce19dff5a5ac5bf61ad4649dd08bd8ab4c436cc3 Mon Sep 17 00:00:00 2001 From: Christopher Sims Date: Fri, 7 Mar 2025 10:55:40 -0700 Subject: [PATCH 8/8] Added javadocs to TranslationSource and replaced some literals with constants. Removed unused dep. --- data/build.gradle.kts | 2 - .../java/com/critt/data/TranslationSource.kt | 73 ++++++++++++++++++- 2 files changed, 71 insertions(+), 4 deletions(-) diff --git a/data/build.gradle.kts b/data/build.gradle.kts index 125fac5..546bfc4 100644 --- a/data/build.gradle.kts +++ b/data/build.gradle.kts @@ -66,8 +66,6 @@ dependencies { kapt(Dependencies.Hilt.ANDROID_COMPILER) implementation(Dependencies.Hilt.ANDROID) - implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.6.2") - //testing testImplementation(Dependencies.Testing.JUNIT) testImplementation(Dependencies.Testing.JUNIT_JUPITER) diff --git a/data/src/main/java/com/critt/data/TranslationSource.kt b/data/src/main/java/com/critt/data/TranslationSource.kt index de2771f..1786086 100644 --- a/data/src/main/java/com/critt/data/TranslationSource.kt +++ b/data/src/main/java/com/critt/data/TranslationSource.kt @@ -16,14 +16,37 @@ import java.util.concurrent.ConcurrentHashMap class TranslationSource( private val sessionManager: SessionManager ) { + + // StateFLow for SpeechData: SUBJECT private val _speechDataSubject = MutableStateFlow(null) val speechDataSubject = _speechDataSubject.asStateFlow() + // StateFLow for SpeechData: OBJECT private val _speechDataObject = MutableStateFlow(null) val speechDataObject = _speechDataObject.asStateFlow() + /** + * A map of open sockets, keyed by [Speaker]. + * It stores the active socket connections for each speaker. + */ private val openSockets = ConcurrentHashMap() + /** + * Connects to the translation service and sets up listeners for speech data. + * + * This function initializes a Socket.IO connection for the specified + * speaker and sets up a listener for the [EVENT_TEXT_DATA] event. When + * speech data is received, it's parsed and the corresponding state flow + * ([_speechDataSubject] or [_speechDataObject]) is updated. + * + * It also emits a [EVENT_START_GOOGLE_CLOUD_STREAM] event to start the + * Google Cloud Speech-to-Text stream. + * + * @param languageSubject The language code for the SUBJECT speaker. + * @param languageObject The language code for the OBJECT speaker. + * @param speaker The speaker for whom to establish the connection. + * @return A lambda function that accepts a ByteArray of audio data and sends it to the server. + */ fun connect( languageSubject: String, languageObject: String, @@ -61,12 +84,36 @@ class TranslationSource( } } + /** + * Sends audio data to the server for translation. + * + * This function emits the [EVENT_AUDIO_DATA] event with the provided + * audio data to the server. It uses the socket associated with the + * specified speaker. + * + * If the socket for the speaker is not open, it logs an informational + * message and does nothing. + * + * @param speaker The speaker for whom to send the audio data. + * @param audioData The audio data to send as a ByteArray. + */ private fun onAudioData(speaker: Speaker, audioData: ByteArray?) { openSockets[speaker]?.emit(EVENT_AUDIO_DATA, audioData) ?: run { Timber.i("onAudioData():: not emitting data: ${speaker.name} socket is null") } } + /** + * Disconnects a single socket and cleans up its resources. + * + * This function emits the [EVENT_END_GOOGLE_CLOUD_STREAM] event, removes + * the listener for [EVENT_TEXT_DATA], and disconnects the socket. + * + * It handles potential exceptions during the cleanup process and logs + * any errors that occur. + * + * @param socket The socket to disconnect. + */ private fun disconnectSocket(socket: Socket) { listOf( { socket.emit(EVENT_END_GOOGLE_CLOUD_STREAM) }, @@ -82,6 +129,13 @@ class TranslationSource( Timber.d("Socket disconnected") } + /** + * Disconnects all open sockets and clears the [openSockets] map. + * + * This function iterates through all the sockets in the [openSockets] map + * and calls [disconnectSocket] on each one. It then clears the map to + * release the references to the sockets. + */ fun disconnectAllSockets() { for (socket in openSockets.values) { disconnectSocket(socket) @@ -90,6 +144,19 @@ class TranslationSource( openSockets.clear() } + /** + * Initializes a Socket.IO connection for the specified speaker. + * + * This function creates a new Socket.IO connection to the server, + * authenticates the connection using the session manager's auth token, + * and stores the socket in the [openSockets] map. + * + * If a socket already exists for the speaker, it's disconnected and + * removed from the map before creating the new socket. + * + * @param speaker The speaker for whom to initialize the socket. + * @return The initialized Socket.IO socket. + */ private fun initSocket(speaker: Speaker): Socket { val socketChannel = when (speaker) { Speaker.SUBJECT -> CHANNEL_SUBJECT @@ -125,12 +192,14 @@ class TranslationSource( private const val CHANNEL_SUBJECT = "subject" private const val CHANNEL_OBJECT = "object" private const val AUTH_TOKEN_KEY = "token" + private const val CONFIG_SAMPLE_RATE_HERTZ = 16000 + private const val CONFIG_ENCODING = "LINEAR16" private fun getTranscriptionConfig(languageSubject: String, languageObject: String, speaker: Speaker) = mapOf( "audio" to mapOf( - "encoding" to "LINEAR16", - "sampleRateHertz" to 16000, + "encoding" to CONFIG_ENCODING, + "sampleRateHertz" to CONFIG_SAMPLE_RATE_HERTZ, "languageCode" to if (speaker == Speaker.SUBJECT) languageSubject else languageObject ), "interimResults" to true,