diff --git a/app/build.gradle.kts b/app/build.gradle.kts index 9496811..4f62234 100644 --- a/app/build.gradle.kts +++ b/app/build.gradle.kts @@ -70,9 +70,6 @@ dependencies { kapt(Dependencies.Hilt.ANDROID_COMPILER) implementation(Dependencies.Hilt.ANDROID) - //lifecycle (currently using this for Flow.asLiveData()) - implementation(Dependencies.Lifecycle.LIVEDATA) - //material theme implementation(Dependencies.Material.MATERIAL) diff --git a/app/src/main/java/com/critt/interp/ui/MainActivity.kt b/app/src/main/java/com/critt/interp/ui/MainActivity.kt index 828bdce..bb109e3 100644 --- a/app/src/main/java/com/critt/interp/ui/MainActivity.kt +++ b/app/src/main/java/com/critt/interp/ui/MainActivity.kt @@ -27,8 +27,6 @@ import androidx.compose.ui.unit.dp import androidx.core.content.ContextCompat import androidx.lifecycle.viewmodel.compose.viewModel import com.critt.data.ApiResult -import com.critt.domain.LanguageData -import com.critt.domain.Speaker import com.critt.interp.ui.components.DropdownSelector import com.critt.ui_common.theme.InterpTheme import dagger.hilt.android.AndroidEntryPoint @@ -48,14 +46,13 @@ class MainActivity : ComponentActivity() { @Composable fun TranslationGroup( translationText: String = "", - speaker: Speaker, - langSubject: LanguageData, - langObject: LanguageData, + textFromLanguage: String, + textToLanguage: String, interactionSource: MutableInteractionSource? = null ) { InterpTheme { Column(modifier = Modifier.padding(16.dp)) { - LanguageDisplay(speaker, langSubject, langObject) + LanguageDisplay(textFromLanguage, textToLanguage) Spacer(modifier = Modifier.height(12.dp)) OutputCard(translationText, interactionSource) } @@ -63,24 +60,18 @@ class MainActivity : ComponentActivity() { } @Composable - fun LanguageDisplay(speaker: Speaker, langSubject: LanguageData, langObject: LanguageData) { + fun LanguageDisplay(textFromLanguage: String, textToLanguage: String) { InterpTheme { Row { Text( - when (speaker) { - Speaker.SUBJECT -> langObject.name - Speaker.OBJECT -> langSubject.name - }, + text = textFromLanguage, color = MaterialTheme.colorScheme.onBackground ) Spacer(modifier = Modifier.width(8.dp)) Text("⇌", color = MaterialTheme.colorScheme.onBackground) Spacer(modifier = Modifier.width(8.dp)) Text( - when (speaker) { - Speaker.SUBJECT -> langSubject.name - Speaker.OBJECT -> langObject.name - }, + text = textToLanguage, color = MaterialTheme.colorScheme.onBackground ) } @@ -141,22 +132,28 @@ class MainActivity : ComponentActivity() { val streamingState by viewModel.streamingState.collectAsState() /** Local state -> LaunchedEffect -> ViewModel StateFlow */ - // Language selector for Subject speaker + // Language selector for Subject Speaker (the user) var uiSelectedLangSubject by remember { mutableStateOf(langSubject) } LaunchedEffect(uiSelectedLangSubject) { viewModel.selectLangSubject(uiSelectedLangSubject) } - // Language selector for Object speaker + + // Language selector for Object Speaker (to whom the user is talking) var uiSelectedLangObject by remember { mutableStateOf(langObject) } LaunchedEffect(uiSelectedLangObject) { viewModel.selectLangObject(uiSelectedLangObject) } - // Interaction source (pressing down on the lower OutputCard) for current Speaker + + // Interaction source applied to the lower OutputCard + // Allows the OutputCard facing the user to be used as an "I'm talking, now they are talking" toggle + // The user holds it down while talking + // This is how we know which language to translate the audio data into val interactionSource = remember { MutableInteractionSource() } val isPressed by interactionSource.collectIsPressedAsState() LaunchedEffect(isPressed) { viewModel.updateSpeaker(subjectSpeaking = isPressed) } + // Streaming state toggle (FAB) var toggleSideEffect by remember { mutableStateOf<(() -> Unit)?>(null) } LaunchedEffect(toggleSideEffect, hasRecordingPermission) { @@ -180,18 +177,16 @@ class MainActivity : ComponentActivity() { .rotate(180F) ) { TranslationGroup( - translationText = translationObject, - speaker = Speaker.OBJECT, - langSubject = langSubject, - langObject = langObject, + translationText = translationSubject, + textFromLanguage = langSubject.name, + textToLanguage = langObject.name, ) } Box(modifier = Modifier.weight(.40F)) { TranslationGroup( - translationText = translationSubject, - speaker = Speaker.SUBJECT, - langSubject = langSubject, - langObject = langObject, + translationText = translationObject, + textFromLanguage = langObject.name, + textToLanguage = langSubject.name, interactionSource = interactionSource ) } diff --git a/app/src/main/java/com/critt/interp/ui/MainViewModel.kt b/app/src/main/java/com/critt/interp/ui/MainViewModel.kt index 432c5b2..4e9408b 100644 --- a/app/src/main/java/com/critt/interp/ui/MainViewModel.kt +++ b/app/src/main/java/com/critt/interp/ui/MainViewModel.kt @@ -4,29 +4,28 @@ import androidx.lifecycle.ViewModel import androidx.lifecycle.viewModelScope import com.critt.data.ApiResult import com.critt.data.AudioSource +import com.critt.data.LanguageRepository import com.critt.domain.LanguageData -import com.critt.data.SessionManager import com.critt.domain.Speaker -import com.critt.data.TranslationRepository +import com.critt.data.TranslationSource import com.critt.domain.SpeechData import com.critt.domain.defaultLangObject import com.critt.domain.defaultLangSubject import dagger.hilt.android.lifecycle.HiltViewModel -import kotlinx.coroutines.CancellationException import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.Job import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.flow.update import kotlinx.coroutines.launch import timber.log.Timber +import java.util.Locale import javax.inject.Inject @HiltViewModel class MainViewModel @Inject constructor( - private val translationRepo: TranslationRepository, - private val sessionManager: SessionManager, - private val audioSource: AudioSource + private val audioSource: AudioSource, + private val translationSource: TranslationSource, + private val languageRepo: LanguageRepository ) : ViewModel() { // Supported languages state private val _supportedLanguages = @@ -55,14 +54,91 @@ class MainViewModel @Inject constructor( private val _streamingState = MutableStateFlow(AudioStreamingState.Idle) val streamingState = _streamingState.asStateFlow() + // onAudioData lambdas + private var onAudioDataSubject: (ByteArray?) -> Unit = {} + private var onAudioDataObject: (ByteArray?) -> Unit = {} + + /** + * Initializes the [MainViewModel]. + * + * It fetches the supported languages from the [LanguageRepository] and + * sets up coroutines to collect translated text from the [TranslationSource]. + */ init { viewModelScope.launch(context = Dispatchers.IO) { - translationRepo.getSupportedLanguages().collect { res -> + languageRepo.getSupportedLanguages().collect { res -> _supportedLanguages.update { res } } } + + viewModelScope.launch { + translationSource.speechDataSubject.collect { + it?.let { + onTextData( + it, + _translationSubject, + builderSubject, + Locale(langObject.value.language) + ) + } + } + } + + viewModelScope.launch { + translationSource.speechDataObject.collect { + it?.let { + onTextData( + it, + _translationObject, + builderObject, + Locale(langSubject.value.language) + ) + } + } + } + } + + /** + * Cleans up resources when the ViewModel is cleared. + * + * It disconnects all sockets from the [translationSource]. + */ + override fun onCleared() { + super.onCleared() + // Clean up sockets + translationSource.disconnectAllSockets() + } + + /** + * Processes incoming text data and updates the corresponding translation state. + * + * @param textData The incoming speech data. + * @param translationState The state flow to update with the translated text. + * @param builder The StringBuilder to accumulate the translated text. + * @param locale The locale to use for capitalization. + */ + private fun onTextData( + textData: SpeechData, + translationState: MutableStateFlow, + builder: StringBuilder, + locale: Locale + ) { + if (textData.isFinal) { + builder.append(textData.data) + translationState.update { + builder.toString() + .replaceFirstChar { if (it.isLowerCase()) it.titlecase(locale) else it.toString() } + } + } else { + translationState.update { builder.toString() + textData.data } + } } + /** + * Updates the current speaker. + * + * @param subjectSpeaking True if the SUBJECT speaker is speaking, false otherwise. + */ fun updateSpeaker(subjectSpeaking: Boolean) { when (subjectSpeaking) { true -> _speakerCurr.update { Speaker.SUBJECT } @@ -70,14 +146,29 @@ class MainViewModel @Inject constructor( } } + /** + * Selects a language for the SUBJECT speaker. + * + * @param lang The selected language. + */ fun selectLangSubject(lang: LanguageData) { _langSubject.update { lang } } + /** + * Selects a language for the OBJECT speaker. + * + * @param lang The selected language. + */ fun selectLangObject(lang: LanguageData) { _langObject.update { lang } } + /** + * Toggles the audio streaming state. + * + * It starts or stops recording and streaming based on the current state. + */ fun toggleStreaming() { when (_streamingState.value) { is AudioStreamingState.Idle -> startRecordingAndStreaming() @@ -93,6 +184,12 @@ class MainViewModel @Inject constructor( } } + /** + * Starts recording and streaming audio. + * + * It clears the translation output StringBuilders, opens socket connections, + * and starts the audio recording process. + */ private fun startRecordingAndStreaming() { // clear the translation output StringBuilders builderSubject.clear() @@ -101,17 +198,13 @@ class MainViewModel @Inject constructor( // open socket connection on the "subject" namespace viewModelScope.launch(context = Dispatchers.IO) { try { - translationRepo.connectSubject( + onAudioDataSubject = translationSource.connect( languageSubject = langSubject.value.language, - languageObject = langObject.value.language - ).collect { res -> - onTextData( - textData = res, - translationState = _translationSubject, - builder = builderSubject - ) - } + languageObject = langObject.value.language, + Speaker.SUBJECT + ) } catch (e: Exception) { + Timber.e(e.message) stopRecordingAndStreaming() _streamingState.update { AudioStreamingState.Error( @@ -124,17 +217,13 @@ class MainViewModel @Inject constructor( // open socket connection on the "object" namespace viewModelScope.launch(context = Dispatchers.IO) { try { - translationRepo.connectObject( + onAudioDataObject = translationSource.connect( languageSubject = langSubject.value.language, - languageObject = langObject.value.language - ).collect { res -> - onTextData( - textData = res, - translationState = _translationObject, - builder = builderObject - ) - } + languageObject = langObject.value.language, + Speaker.OBJECT + ) } catch (e: Exception) { + Timber.e(e.message) stopRecordingAndStreaming() _streamingState.update { AudioStreamingState.Error( @@ -147,64 +236,101 @@ class MainViewModel @Inject constructor( // start recording viewModelScope.launch(context = Dispatchers.IO) { try { - audioSource.startRecording(onData = ::onAudioData) + audioSource.startRecording(viewModelScope, onData = ::onAudioData) } catch (e: Exception) { + Timber.e(e.message) + stopRecordingAndStreaming() _streamingState.update { AudioStreamingState.Error( e.message ?: "audioSource job: unknown error" ) } } - }.invokeOnCompletion { cause -> - stopRecordingAndStreaming() - when (cause) { - null, is CancellationException -> _streamingState.update { AudioStreamingState.Idle } - else -> _streamingState.update { - AudioStreamingState.Error( - cause.message ?: "audioSource job completion handler: unknown error" - ) - } - } } _streamingState.update { AudioStreamingState.Streaming } } + /** + * Stops the audio recording and disconnects all sockets. + * + * This function is responsible for cleaning up resources related to audio + * streaming and translation. It stops the audio recording process and + * disconnects all active socket connections. + * + * This function should be called when audio streaming is no longer needed, + * or when an error occurs that requires stopping the streaming process. + */ private fun stopRecordingAndStreaming() { audioSource.stopRecording() - translationRepo.disconnect() + translationSource.disconnectAllSockets() } + /** + * Processes incoming audio data and routes it to the appropriate speaker's + * audio data handler. + * + * This function is called when new audio data is available from the + * [AudioSource]. It determines the current speaker based on the + * [speakerCurr] state and then calls the corresponding `onAudioData` + * lambda function to handle the data. + * + * If the current speaker is [Speaker.SUBJECT], the data is sent to + * [onAudioDataSubject], and an empty byte array is sent to + * [onAudioDataObject]. + * + * If the current speaker is [Speaker.OBJECT], the data is sent to + * [onAudioDataObject], and an empty byte array is sent to + * [onAudioDataSubject]. + * + * @param data The incoming audio data as a ByteArray. + */ fun onAudioData(data: ByteArray) { when (speakerCurr.value) { - Speaker.SUBJECT -> translationRepo.onData( - subjectData = data, - objectData = ByteArray(2048) - ) - - Speaker.OBJECT -> translationRepo.onData( - subjectData = ByteArray(2048), - objectData = data - ) - } - } + Speaker.SUBJECT -> { + onAudioDataSubject(data) + onAudioDataObject(ByteArray(2048)) + } - private fun onTextData( - textData: SpeechData, - translationState: MutableStateFlow, - builder: StringBuilder - ) { - if (textData.isFinal) { - builder.append(textData.data) - translationState.update { builder.toString() } - } else { - translationState.update { builder.toString() + textData.data } + Speaker.OBJECT -> { + onAudioDataSubject(ByteArray(2048)) + onAudioDataObject(data) + } } } } +/** + * Represents the different states of audio streaming. + * + * This sealed class is used to represent the possible states of the audio + * streaming process within the application. It can be in one of three states: + * [Idle], [Streaming], or [Error]. + */ sealed class AudioStreamingState { + /** + * Represents the idle state of audio streaming. + * + * This state indicates that audio streaming is not currently active. + * No audio is being recorded or streamed. + */ object Idle : AudioStreamingState() + + /** + * Represents the streaming state of audio streaming. + * + * This state indicates that audio is currently being recorded and streamed. + */ object Streaming : AudioStreamingState() + + /** + * Represents the error state of audio streaming. + * + * This state indicates that an error has occurred during the audio + * streaming process. The [message] property contains a description of + * the error. + * + * @property message A description of the error that occurred. + */ data class Error(val message: String) : AudioStreamingState() } \ No newline at end of file diff --git a/buildSrc/src/main/kotlin/Dependencies.kt b/buildSrc/src/main/kotlin/Dependencies.kt index 7c338f1..cd96c47 100644 --- a/buildSrc/src/main/kotlin/Dependencies.kt +++ b/buildSrc/src/main/kotlin/Dependencies.kt @@ -49,11 +49,6 @@ object Dependencies { const val MATERIAL = "com.google.android.material:material:1.12.0" } - object Lifecycle { - //(currently using this for Flow.asLiveData()) - const val LIVEDATA = "androidx.lifecycle:lifecycle-livedata-ktx:2.8.7" - } - object Testing { const val JUNIT = "junit:junit:4.13.2" const val JUNIT_JUPITER = "org.junit.jupiter:junit-jupiter:5.8.1" diff --git a/data/src/main/java/com/critt/data/AudioSource.kt b/data/src/main/java/com/critt/data/AudioSource.kt index da20cdb..4dc783a 100644 --- a/data/src/main/java/com/critt/data/AudioSource.kt +++ b/data/src/main/java/com/critt/data/AudioSource.kt @@ -4,38 +4,123 @@ import android.annotation.SuppressLint import android.media.AudioFormat import android.media.AudioRecord import android.media.MediaRecorder +import android.os.Process +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.Job +import kotlinx.coroutines.NonCancellable +import kotlinx.coroutines.isActive +import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext +/** + * A robust and idiomatic audio source for recording audio from the microphone. + * + * This class handles audio recording, permission checks, and provides a more + * structured way to manage the recording process. + * + * @param context The application context. + * @param sampleRate The sample rate for audio recording (default: 16000 Hz). + * @param channelConfig The channel configuration (default: mono). + * @param audioFormat The audio format (default: PCM 16-bit). + */ class AudioSource( private val sampleRate: Int = 16000, - private val bufferSize: Int = 2048 + private val channelConfig: Int = AudioFormat.CHANNEL_IN_MONO, + private val audioFormat: Int = AudioFormat.ENCODING_PCM_16BIT ) { private var recorder: AudioRecord? = null + private var recordingJob: Job? = null + /** + * Starts audio recording. + * + * @param onData Callback function to receive audio data as a ByteArray. + * @throws IllegalStateException If the audio recording is already started. + */ @SuppressLint("MissingPermission") - fun startRecording(onData: (ByteArray) -> Unit) { + fun startRecording(scope: CoroutineScope, onData: (ByteArray) -> Unit) { + if (recordingJob?.isActive == true) { + throw IllegalStateException("Audio recording is already started.") + } + + val minBufferSize = AudioRecord.getMinBufferSize(sampleRate, channelConfig, audioFormat) + if (minBufferSize == AudioRecord.ERROR_BAD_VALUE || minBufferSize == AudioRecord.ERROR) { + throw IllegalStateException("Invalid audio parameters.") + } + + val bufferSize = minBufferSize.coerceAtLeast(2048) // Ensure a minimum buffer size + recorder = AudioRecord( - MediaRecorder.AudioSource.MIC, - sampleRate, - AudioFormat.CHANNEL_CONFIGURATION_MONO, - AudioFormat.ENCODING_PCM_16BIT, - bufferSize - ) + /* audioSource = */ MediaRecorder.AudioSource.MIC, + /* sampleRateInHz = */ sampleRate, + /* channelConfig = */ channelConfig, + /* audioFormat = */ audioFormat, + /* bufferSizeInBytes = */ bufferSize + ).apply { + if (state != AudioRecord.STATE_INITIALIZED) { + throw IllegalStateException("AudioRecord initialization failed.") + } + } - recorder?.startRecording() + recordingJob = scope.launch(Dispatchers.IO) { + Process.setThreadPriority(Process.THREAD_PRIORITY_URGENT_AUDIO) + recorder?.startRecording() - while (recorder != null) { val buffer = ByteArray(bufferSize) - recorder?.read(buffer, 0, buffer.size) - onData(buffer) - } + while (isActive) { + val bytesRead = recorder?.read(buffer, 0, buffer.size) ?: -1 + when { + bytesRead > 0 -> { + val data = buffer.copyOf(bytesRead) + onData(data) + } - stopRecording() + bytesRead == AudioRecord.ERROR_INVALID_OPERATION -> { + println("Error: Invalid operation during audio read.") + break + } + + bytesRead == AudioRecord.ERROR_BAD_VALUE -> { + println("Error: Bad value during audio read.") + break + } + } + } + + stopRecordingInternal() + } } + /** + * Stops audio recording. + */ fun stopRecording() { - recorder?.stop() - recorder?.release() - recorder = null + recordingJob?.cancel() + } + + private suspend fun stopRecordingInternal() { + withContext(NonCancellable) { + var exception: Throwable? = null + recorder?.apply { + if (recordingState == AudioRecord.RECORDSTATE_RECORDING) { + runCatching { stop() } + .exceptionOrNull() + ?.also { + exception = it + println("Error stopping AudioRecord: ${it.message}") + } + } + runCatching { release() } + .exceptionOrNull() + ?.also { + exception = exception ?: it + println("Error releasing AudioRecord: ${it.message}") + } + } + recorder = null + exception?.let { throw it } + } } } \ No newline at end of file diff --git a/data/src/main/java/com/critt/data/TranslationRepository.kt b/data/src/main/java/com/critt/data/TranslationRepository.kt index c817c37..a5e9b59 100644 --- a/data/src/main/java/com/critt/data/TranslationRepository.kt +++ b/data/src/main/java/com/critt/data/TranslationRepository.kt @@ -4,29 +4,9 @@ import com.critt.domain.LanguageData import kotlinx.coroutines.flow.Flow import javax.inject.Inject -class TranslationRepository @Inject constructor( - private val translationSource: TranslationSource, +class LanguageRepository @Inject constructor( private val languageSource: LanguageSource ) { - /** returns Flow - * this represents the translation of the subject language to the object language - */ - fun connectObject(languageSubject: String, languageObject: String) = - translationSource.connectObject(languageSubject, languageObject) - - /** returns Flow - * this represents the translation of the object language to the subject language - */ - fun connectSubject(languageSubject: String, languageObject: String) = - translationSource.connectSubject(languageSubject, languageObject) - - fun onData(subjectData: ByteArray?, objectData: ByteArray?) = - translationSource.onData(subjectData, objectData) - - fun disconnect() { - translationSource.disconnect() - } - /** returns Flow>?> * this represents all of the languages we support for voice transcription and translation */ diff --git a/data/src/main/java/com/critt/data/TranslationSource.kt b/data/src/main/java/com/critt/data/TranslationSource.kt index 500c289..1786086 100644 --- a/data/src/main/java/com/critt/data/TranslationSource.kt +++ b/data/src/main/java/com/critt/data/TranslationSource.kt @@ -1,82 +1,209 @@ package com.critt.data -import com.critt.data.BuildConfig +import com.critt.domain.Speaker import com.critt.domain.SpeechData import com.google.gson.Gson +import com.google.gson.GsonBuilder import io.socket.client.IO import io.socket.client.Socket -import kotlinx.coroutines.channels.awaitClose -import kotlinx.coroutines.flow.Flow -import kotlinx.coroutines.flow.callbackFlow +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.asStateFlow +import kotlinx.coroutines.flow.update import timber.log.Timber +import java.net.URI +import java.util.concurrent.ConcurrentHashMap class TranslationSource( private val sessionManager: SessionManager ) { - private var socketSubject: Socket? = null - private var socketObject: Socket? = null - fun connectObject(languageSubject: String, languageObject: String): Flow { - /** - * https://socketio.github.io/socket.io-client-java/initialization.html#auth - * */ - val options: IO.Options = IO.Options.builder().setAuth(mapOf("token" to sessionManager.getAuthToken())).build() - socketObject = IO.socket(BuildConfig.API_BASE_URL + "object", options) - return initSocket(socketObject, getTranscriptionConfig(languageObject, languageSubject)) + // StateFLow for SpeechData: SUBJECT + private val _speechDataSubject = MutableStateFlow(null) + val speechDataSubject = _speechDataSubject.asStateFlow() + + // StateFLow for SpeechData: OBJECT + private val _speechDataObject = MutableStateFlow(null) + val speechDataObject = _speechDataObject.asStateFlow() + + /** + * A map of open sockets, keyed by [Speaker]. + * It stores the active socket connections for each speaker. + */ + private val openSockets = ConcurrentHashMap() + + /** + * Connects to the translation service and sets up listeners for speech data. + * + * This function initializes a Socket.IO connection for the specified + * speaker and sets up a listener for the [EVENT_TEXT_DATA] event. When + * speech data is received, it's parsed and the corresponding state flow + * ([_speechDataSubject] or [_speechDataObject]) is updated. + * + * It also emits a [EVENT_START_GOOGLE_CLOUD_STREAM] event to start the + * Google Cloud Speech-to-Text stream. + * + * @param languageSubject The language code for the SUBJECT speaker. + * @param languageObject The language code for the OBJECT speaker. + * @param speaker The speaker for whom to establish the connection. + * @return A lambda function that accepts a ByteArray of audio data and sends it to the server. + */ + fun connect( + languageSubject: String, + languageObject: String, + speaker: Speaker + ): (ByteArray?) -> Unit { + val socket = initSocket(speaker) + val gson: Gson = GsonBuilder().create() + + socket.on(EVENT_TEXT_DATA) { args -> + Timber.d("$EVENT_TEXT_DATA received") + + try { + val speechData = gson.fromJson(args[0].toString(), SpeechData::class.java) + Timber.d("$EVENT_TEXT_DATA: $speechData") + + when (speaker) { + Speaker.SUBJECT -> _speechDataSubject.update { speechData } + Speaker.OBJECT -> _speechDataObject.update { speechData } + } + } catch (e: Exception) { + // 7. Handle parsing errors + Timber.e("Error parsing speechData: ${e.message}") + } + } + + socket.connect() + + socket.emit( + EVENT_START_GOOGLE_CLOUD_STREAM, + gson.toJson(getTranscriptionConfig(languageSubject, languageObject, speaker)) + ) + + return { + onAudioData(speaker, it) + } } - fun connectSubject(languageSubject: String, languageObject: String): Flow { - /** - * https://socketio.github.io/socket.io-client-java/initialization.html#auth - * */ - val options: IO.Options = IO.Options.builder().setAuth(mapOf("token" to sessionManager.getAuthToken())).build() - socketSubject = IO.socket(BuildConfig.API_BASE_URL + "subject", options) - return initSocket(socketSubject, getTranscriptionConfig(languageSubject, languageObject)) + /** + * Sends audio data to the server for translation. + * + * This function emits the [EVENT_AUDIO_DATA] event with the provided + * audio data to the server. It uses the socket associated with the + * specified speaker. + * + * If the socket for the speaker is not open, it logs an informational + * message and does nothing. + * + * @param speaker The speaker for whom to send the audio data. + * @param audioData The audio data to send as a ByteArray. + */ + private fun onAudioData(speaker: Speaker, audioData: ByteArray?) { + openSockets[speaker]?.emit(EVENT_AUDIO_DATA, audioData) ?: run { + Timber.i("onAudioData():: not emitting data: ${speaker.name} socket is null") + } } - fun onData(subjectData: ByteArray?, objectData: ByteArray?) { - socketSubject?.emit("binaryAudioData", subjectData) - socketObject?.emit("binaryAudioData", objectData) + /** + * Disconnects a single socket and cleans up its resources. + * + * This function emits the [EVENT_END_GOOGLE_CLOUD_STREAM] event, removes + * the listener for [EVENT_TEXT_DATA], and disconnects the socket. + * + * It handles potential exceptions during the cleanup process and logs + * any errors that occur. + * + * @param socket The socket to disconnect. + */ + private fun disconnectSocket(socket: Socket) { + listOf( + { socket.emit(EVENT_END_GOOGLE_CLOUD_STREAM) }, + { socket.off(EVENT_TEXT_DATA) }, + { socket.disconnect() } + ).forEach { action -> + try { + action() + } catch (e: Exception) { + Timber.e("Error during socket cleanup: ${e.message}") + } + } + Timber.d("Socket disconnected") } - private fun initSocket(socket: Socket?, config: Map): Flow = callbackFlow { // Cold Flow - Timber.d("initSocket() :: config = $config") + /** + * Disconnects all open sockets and clears the [openSockets] map. + * + * This function iterates through all the sockets in the [openSockets] map + * and calls [disconnectSocket] on each one. It then clears the map to + * release the references to the sockets. + */ + fun disconnectAllSockets() { + for (socket in openSockets.values) { + disconnectSocket(socket) + } - socket?.connect() - socket?.emit("startGoogleCloudStream", Gson().toJson(config)) + openSockets.clear() + } - socket?.on("speechData") { args -> - Gson().fromJson(args[0].toString(), SpeechData::class.java).let { - println("Object speechData: $it") - trySend(it) - } + /** + * Initializes a Socket.IO connection for the specified speaker. + * + * This function creates a new Socket.IO connection to the server, + * authenticates the connection using the session manager's auth token, + * and stores the socket in the [openSockets] map. + * + * If a socket already exists for the speaker, it's disconnected and + * removed from the map before creating the new socket. + * + * @param speaker The speaker for whom to initialize the socket. + * @return The initialized Socket.IO socket. + */ + private fun initSocket(speaker: Speaker): Socket { + val socketChannel = when (speaker) { + Speaker.SUBJECT -> CHANNEL_SUBJECT + Speaker.OBJECT -> CHANNEL_OBJECT } - awaitClose { - socket?.emit("endGoogleCloudStream") - socket?.off("speechData") - socket?.disconnect() + val socketUri = URI.create("${BuildConfig.API_BASE_URL}$socketChannel") + + /** + * https://socketio.github.io/socket.io-client-java/initialization.html#auth + * */ + val socketOptions: IO.Options = IO.Options.builder() + .setAuth(mapOf(AUTH_TOKEN_KEY to sessionManager.getAuthToken())) + .build() + + val socket: Socket = IO.socket(socketUri, socketOptions) + + // clean up and remove existing socket if it exists for the speaker + openSockets[speaker]?.let { + disconnectSocket(it) + openSockets.remove(speaker) } + + openSockets[speaker] = socket + return socket } - fun disconnect() { - socketObject?.emit("endGoogleCloudStream") - socketObject?.off("speechData") - socketObject?.disconnect() + companion object { + private const val EVENT_TEXT_DATA = "speechData" + private const val EVENT_AUDIO_DATA = "binaryAudioData" + private const val EVENT_START_GOOGLE_CLOUD_STREAM = "startGoogleCloudStream" + private const val EVENT_END_GOOGLE_CLOUD_STREAM = "endGoogleCloudStream" + private const val CHANNEL_SUBJECT = "subject" + private const val CHANNEL_OBJECT = "object" + private const val AUTH_TOKEN_KEY = "token" + private const val CONFIG_SAMPLE_RATE_HERTZ = 16000 + private const val CONFIG_ENCODING = "LINEAR16" - socketSubject?.emit("endGoogleCloudStream") - socketSubject?.off("speechData") - socketSubject?.disconnect() + private fun getTranscriptionConfig(languageSubject: String, languageObject: String, speaker: Speaker) = + mapOf( + "audio" to mapOf( + "encoding" to CONFIG_ENCODING, + "sampleRateHertz" to CONFIG_SAMPLE_RATE_HERTZ, + "languageCode" to if (speaker == Speaker.SUBJECT) languageSubject else languageObject + ), + "interimResults" to true, + "targetLanguage" to if (speaker == Speaker.SUBJECT) languageObject else languageSubject + ) } - - private fun getTranscriptionConfig(languageSubject: String, languageObject: String) = - mapOf( - "audio" to mapOf( - "encoding" to "LINEAR16", - "sampleRateHertz" to 16000, - "languageCode" to languageSubject - ), - "interimResults" to true, - "targetLanguage" to languageObject - ) } \ No newline at end of file diff --git a/data/src/main/java/com/critt/data/di/SocketsModule.kt b/data/src/main/java/com/critt/data/di/SocketsModule.kt index 7b703ca..ff65faf 100644 --- a/data/src/main/java/com/critt/data/di/SocketsModule.kt +++ b/data/src/main/java/com/critt/data/di/SocketsModule.kt @@ -5,14 +5,16 @@ import com.critt.data.TranslationSource import dagger.Module import dagger.Provides import dagger.hilt.InstallIn -import dagger.hilt.components.SingletonComponent -import javax.inject.Singleton +import dagger.hilt.android.components.ViewModelComponent +import dagger.hilt.android.scopes.ViewModelScoped @Module -@InstallIn(SingletonComponent::class) +@InstallIn(ViewModelComponent::class) object SocketsModule { @Provides - @Singleton - fun provideSocketsService(sessionManager: SessionManager): TranslationSource = + @ViewModelScoped + fun provideSocketsService( + sessionManager: SessionManager + ): TranslationSource = TranslationSource(sessionManager) } \ No newline at end of file