speechmatics · sam-s10s · Feb 4, 2026 · Feb 4, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/sdk/rt/speechmatics/rt/_async_client.py b/sdk/rt/speechmatics/rt/_async_client.py
@@ -97,6 +97,10 @@ def __init__(
         self.on(ServerMessageType.WARNING, self._on_warning)
         self.on(ServerMessageType.AUDIO_ADDED, self._on_audio_added)
 
+        # Audio format is set when start_session is called with an explicit format.
+        # Deliberately None until then to avoid silently using incorrect defaults.
+        self._audio_format: Optional[AudioFormat] = None
+
         self._logger.debug("AsyncClient initialized (request_id=%s)", self._session.request_id)
 
     async def start_session(
@@ -133,7 +137,10 @@ async def start_session(
                 ...     await client.start_session()
                 ...     await client.send_audio(frame)
         """
-        await self._start_recognition_session(
+
+        # _start_recognition_session resolves defaults (e.g. AudioFormat() if None),
+        # so we capture the resolved format to keep _audio_format in sync.
+        _, self._audio_format = await self._start_recognition_session(
             transcription_config=transcription_config,
             audio_format=audio_format,
             translation_config=translation_config,
@@ -161,16 +168,27 @@ async def stop_session(self) -> None:
         await self._session_done_evt.wait()  # Wait for end of transcript event to indicate we can stop listening
         await self.close()
 
-    async def force_end_of_utterance(self) -> None:
+    async def force_end_of_utterance(self, timestamp: Optional[float] = None) -> float:
         """
         This method sends a ForceEndOfUtterance message to the server to signal
         the end of an utterance. Forcing end of utterance will cause the final
         transcript to be sent to the client early.
 
+        Takes an optional timestamp parameter to specify a marker for the engine
+        to use for timing of the end of the utterance. If not provided, the timestamp
+        will be calculated based on the cumulative audio sent to the server.
+
+        Args:
+            timestamp: Optional timestamp for the request.
+
+        Returns:
+            The timestamp that was used for the request.
+
         Raises:
             ConnectionError: If the WebSocket connection fails.
             TranscriptionError: If the server reports an error during teardown.
             TimeoutError: If the connection or teardown times out.
+            ValueError: If the audio format does not have an encoding set.
 
         Examples:
             Basic streaming:
@@ -179,7 +197,26 @@ async def force_end_of_utterance(self) -> None:
                 ...     await client.send_audio(frame)
                 ...     await client.force_end_of_utterance()
         """
-        await self.send_message({"message": ClientMessageType.FORCE_END_OF_UTTERANCE})
+        if timestamp is None:
+            timestamp = self.audio_seconds_sent
+
+        await self.send_message({"message": ClientMessageType.FORCE_END_OF_UTTERANCE, "timestamp": timestamp})
+
+        return timestamp
+
+    @property
+    def audio_seconds_sent(self) -> float:
+        """Number of audio seconds sent to the server.
+
+        Raises:
+            ValueError: If called before start_session has set the audio format,
+                or if the audio format does not have an encoding set.
+        """
+        # _audio_format is only set once start_session receives an explicit AudioFormat.
+        # Failing here prevents silently computing with wrong defaults (e.g. 44100Hz).
+        if self._audio_format is None:
+            raise ValueError("audio_seconds_sent is not available before start_session is called with an audio format")
+        return self._audio_bytes_sent / (self._audio_format.sample_rate * self._audio_format.bytes_per_sample)
 
     async def transcribe(
         self,

diff --git a/sdk/rt/speechmatics/rt/_base_client.py b/sdk/rt/speechmatics/rt/_base_client.py
@@ -42,6 +42,7 @@ def __init__(self, transport: Transport) -> None:
         self._recv_task: Optional[asyncio.Task[None]] = None
         self._closed_evt = asyncio.Event()
         self._eos_sent = False
+        self._audio_bytes_sent = 0
         self._seq_no = 0
 
         self._logger = get_logger("speechmatics.rt.base_client")
@@ -122,11 +123,17 @@ async def send_audio(self, payload: bytes) -> None:
 
         try:
             await self._transport.send_message(payload)
+            self._audio_bytes_sent += len(payload)
             self._seq_no += 1
         except Exception:
             self._closed_evt.set()
             raise
 
+    @property
+    def audio_bytes_sent(self) -> int:
+        """Number of audio bytes sent to the server."""
+        return self._audio_bytes_sent
+
     async def send_message(self, message: dict[str, Any]) -> None:
         """
         Send a message through the WebSocket.

diff --git a/sdk/rt/speechmatics/rt/_models.py b/sdk/rt/speechmatics/rt/_models.py
@@ -183,6 +183,29 @@ class AudioFormat:
     sample_rate: int = 44100
     chunk_size: int = 4096
 
+    _BYTES_PER_SAMPLE = {
+        AudioEncoding.PCM_F32LE: 4,
+        AudioEncoding.PCM_S16LE: 2,
+        AudioEncoding.MULAW: 1,
+    }
+
+    @property
+    def bytes_per_sample(self) -> int:
+        """Number of bytes per audio sample based on encoding.
+
+        Raises:
+            ValueError: If encoding is None (file type) or unrecognized.
+        """
+        if self.encoding is None:
+            raise ValueError(
+                "Cannot determine bytes per sample for file-type audio format. "
+                "Set an explicit encoding on AudioFormat."
+            )
+        try:
+            return self._BYTES_PER_SAMPLE[self.encoding]
+        except KeyError:
+            raise ValueError(f"Unknown encoding: {self.encoding}")
+
     def to_dict(self) -> dict[str, Any]:
         """
         Convert audio format to dictionary.

diff --git a/sdk/voice/pyproject.toml b/sdk/voice/pyproject.toml
@@ -11,7 +11,7 @@ authors = [{ name = "Speechmatics", email = "support@speechmatics.com" }]
 license = "MIT"
 requires-python = ">=3.9"
 dependencies = [
-    "speechmatics-rt>=0.5.3",
+    "speechmatics-rt==0.5.3",
     "pydantic>=2.10.6,<3",
     "numpy>=1.26.4,<3"
 ]