Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ https://github.com/user-attachments/assets/4c223e85-2916-494f-b7b1-766ce1bdc991
- **Requires a Wayland session** (GNOME, KDE Plasma Wayland, Sway, Hyprland)

- **Waybar** (optional, for status bar)
- **gtk4** (optional, for visualizer)
- **gtk4 + PyCairo** (optional, for visualizer)
- **NVIDIA GPU** (optional, for CUDA acceleration)
- **AMD/Intel GPU / APU** (optional, for Vulkan acceleration)

Expand Down
7 changes: 6 additions & 1 deletion docs/CONFIGURATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -638,13 +638,16 @@ Two modes available:
{
"transcription_backend": "realtime-ws",
"websocket_provider": "openai",
"websocket_model": "gpt-realtime-mini-2025-12-15",
"websocket_model": "gpt-realtime-whisper",
"realtime_mode": "transcribe", // "transcribe" or "converse"
"realtime_transcription_delay": "low", // "minimal", "low", "medium", "high", or "xhigh"
"realtime_timeout": 30, // Advanced: seconds to wait after stop for final transcript
"realtime_buffer_max_seconds": 5 // Advanced: max unsent audio backlog (seconds) before dropping old chunks
}
```

With OpenAI `gpt-realtime-whisper`, the mic OSD can show a live partial transcript while recording. Only the completed final transcript is pasted after you stop.

#### Google Gemini

Realtime streaming transcription via Google's Gemini Live API.
Expand Down Expand Up @@ -701,6 +704,8 @@ Visual feedback that will auto-match Omarchy themes.
}
```

When OpenAI `gpt-realtime-whisper` live preview is enabled, partial transcript text is written to a restrictive runtime IPC file under `$XDG_RUNTIME_DIR/hyprwhspr/` so the OSD daemon can render it. The file is `0600`, cleared on normal hide/shutdown and scrubbed on service startup; if the machine loses power or the service is killed, the last partial may remain until the next startup/runtime-directory cleanup.

### Audio feedback

Optional sound notifications:
Expand Down
33 changes: 32 additions & 1 deletion lib/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
from paths import (
RECORDING_STATUS_FILE, RECORDING_CONTROL_FILE, AUDIO_LEVEL_FILE, RECOVERY_REQUESTED_FILE,
RECOVERY_RESULT_FILE, MIC_ZERO_VOLUME_FILE, LOCK_FILE, LONGFORM_STATE_FILE, LONGFORM_SEGMENTS_DIR,
MODEL_UNLOADED_FILE, SOCKET_FILE
MODEL_UNLOADED_FILE, SOCKET_FILE, TRANSCRIPT_PREVIEW_FILE
)
from backend_utils import normalize_backend
from segment_manager import SegmentManager
Expand Down Expand Up @@ -223,6 +223,9 @@ def __init__(self):
import traceback
traceback.print_exc()

if hasattr(self.whisper_manager, 'set_realtime_partial_callback'):
self.whisper_manager.set_realtime_partial_callback(self._set_mic_osd_preview_text)

# Set up global shortcuts (needed for headless operation)
self._setup_global_shortcuts()

Expand Down Expand Up @@ -1170,6 +1173,8 @@ def _start_recording(self, language_override=None):
print("Recording started", flush=True)

try:
self._clear_mic_osd_preview_text()

# Clear zero-volume signal file when starting a new recording
# This allows waybar to recover immediately on successful start
self._clear_zero_volume_signal()
Expand Down Expand Up @@ -1366,6 +1371,10 @@ def _cleanup_recording_state(self):
"""Best-effort cleanup after any recording ends. Safe to call multiple times."""
self._notify_capture_subscriber("", final=True)

try:
self._clear_mic_osd_preview_text()
except Exception:
pass
try:
self._hide_mic_osd()
except Exception:
Expand Down Expand Up @@ -1436,6 +1445,7 @@ def _stop_recording(self):
print("Recording stopped", flush=True)

try:
self._clear_mic_osd_preview_text()

# Set visualizer to processing state (keep it visible during transcription)
self._set_visualizer_state('processing')
Expand Down Expand Up @@ -1547,6 +1557,7 @@ def _process_audio(self, audio_data):
print(f"[ERROR] Error processing audio: {e}", flush=True)
finally:
self._notify_capture_subscriber("", final=True)
self._clear_mic_osd_preview_text()
self.is_processing = False
# Show success/error state and hide OSD after delay
self._show_result_and_hide(success)
Expand Down Expand Up @@ -1683,6 +1694,7 @@ def _reset_stale_state(self):
RECOVERY_REQUESTED_FILE,
RECOVERY_RESULT_FILE,
MODEL_UNLOADED_FILE,
TRANSCRIPT_PREVIEW_FILE,
]
for f in stale_files:
try:
Expand All @@ -1708,6 +1720,7 @@ def _show_mic_osd(self):
with self._cancel_pending_hide_lock:
self._cancel_pending_hide = True
if self._mic_osd_runner and self._mic_osd_runner.is_available():
self._mic_osd_runner.clear_preview_text()
self._mic_osd_runner.set_state('recording')
self._mic_osd_runner.show()

Expand All @@ -1718,6 +1731,24 @@ def _hide_mic_osd(self):
try:
runner.hide()
runner.clear_state()
runner.clear_preview_text()
except Exception:
pass

def _set_mic_osd_preview_text(self, text: str):
"""Update live transcript preview text in the mic OSD."""
runner = getattr(self, '_mic_osd_runner', None)
if runner:
try:
runner.set_preview_text(text)
except Exception:
pass

def _clear_mic_osd_preview_text(self):
runner = getattr(self, '_mic_osd_runner', None)
if runner:
try:
runner.clear_preview_text()
except Exception:
pass

Expand Down
57 changes: 51 additions & 6 deletions lib/mic_osd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,37 @@ def is_gnome():
desktop = os.environ.get('XDG_CURRENT_DESKTOP', '').lower()
return 'gnome' in desktop

from .window import OSDWindow, load_css
from .audio import AudioMonitor
from .visualizations import VISUALIZATIONS
from .theme import ThemeWatcher
_MIC_OSD_IMPORT_ERROR = None
try:
from .window import OSDWindow, load_css
from .audio import AudioMonitor
from .visualizations import VISUALIZATIONS
from .theme import ThemeWatcher
except ImportError as e:
_MIC_OSD_IMPORT_ERROR = e
OSDWindow = None
AudioMonitor = None
VISUALIZATIONS = {}
ThemeWatcher = None

# Import paths with fallback for daemon context
try:
from ..src.paths import RECORDING_STATUS_FILE, VISUALIZER_STATE_FILE
from ..src.paths import RECORDING_STATUS_FILE, VISUALIZER_STATE_FILE, TRANSCRIPT_PREVIEW_FILE
except ImportError:
try:
from src.paths import RECORDING_STATUS_FILE, VISUALIZER_STATE_FILE
from src.paths import RECORDING_STATUS_FILE, VISUALIZER_STATE_FILE, TRANSCRIPT_PREVIEW_FILE
except ImportError:
# Fallback: construct paths manually if imports fail
home = Path.home()
xdg_config = Path(os.environ.get('XDG_CONFIG_HOME', home / '.config'))
xdg_runtime = os.environ.get('XDG_RUNTIME_DIR')
if xdg_runtime:
runtime_dir = Path(xdg_runtime) / 'hyprwhspr'
else:
runtime_dir = Path(os.environ.get('TMPDIR', '/tmp')) / f"hyprwhspr-{os.getuid()}"
RECORDING_STATUS_FILE = xdg_config / 'hyprwhspr' / 'recording_status'
VISUALIZER_STATE_FILE = xdg_config / 'hyprwhspr' / 'visualizer_state'
TRANSCRIPT_PREVIEW_FILE = runtime_dir / 'transcript_preview'


class MicOSD:
Expand All @@ -54,6 +68,7 @@ def __init__(self, visualization="waveform", width=400, height=68, daemon=False)
self._auto_hide_timeout_id = None
self._state_poll_timer_id = None
self._last_visualizer_state = None
self._last_preview_text = None
self.daemon = daemon
self.visible = False
self.theme_watcher = None
Expand Down Expand Up @@ -210,6 +225,8 @@ def _show(self):

def _hide(self):
"""Hide the OSD and stop audio monitoring."""
self._clear_preview_file()

if not self.visible:
return

Expand All @@ -221,6 +238,9 @@ def _hide(self):

try:
self.visible = False
if hasattr(self.window, 'set_preview_text'):
self.window.set_preview_text("")
self._last_preview_text = None
self.window.set_visible(False)

# Stop update timer
Expand Down Expand Up @@ -277,6 +297,13 @@ def _hide(self):
except Exception:
pass
self.audio_monitor = None

def _clear_preview_file(self):
try:
if TRANSCRIPT_PREVIEW_FILE.exists():
TRANSCRIPT_PREVIEW_FILE.unlink()
except Exception:
pass

def _update(self):
"""Update visualization with current audio data."""
Expand All @@ -294,15 +321,27 @@ def _poll_state_file(self):
state = f.read().strip()
if state and state != self._last_visualizer_state:
self._last_visualizer_state = state
if self.window and hasattr(self.window, 'set_visualizer_state'):
self.window.set_visualizer_state(state)
# Update visualization state if it has the set_state method
if hasattr(self.visualization, 'set_state'):
self.visualization.set_state(state)
else:
# No state file means default to recording state
if self._last_visualizer_state != 'recording':
self._last_visualizer_state = 'recording'
if self.window and hasattr(self.window, 'set_visualizer_state'):
self.window.set_visualizer_state('recording')
if hasattr(self.visualization, 'set_state'):
self.visualization.set_state('recording')

preview = ""
if TRANSCRIPT_PREVIEW_FILE.exists():
preview = TRANSCRIPT_PREVIEW_FILE.read_text(encoding='utf-8').rstrip('\r\n')
if preview != self._last_preview_text:
self._last_preview_text = preview
if self.window and hasattr(self.window, 'set_preview_text'):
self.window.set_preview_text(preview)
except Exception:
pass # Ignore file read errors
return True # Continue polling
Expand Down Expand Up @@ -358,6 +397,8 @@ def stop(self):

def _cleanup(self):
"""Clean up resources."""
self._clear_preview_file()

if self.update_timer_id:
GLib.source_remove(self.update_timer_id)
self.update_timer_id = None
Expand Down Expand Up @@ -439,6 +480,10 @@ def main():
help="Run as daemon (start hidden, show on SIGUSR1, hide on SIGUSR2)"
)
args = parser.parse_args()

if _MIC_OSD_IMPORT_ERROR is not None:
print(f"[MIC-OSD] Unavailable: {_MIC_OSD_IMPORT_ERROR}", file=sys.stderr, flush=True)
return 1

# Set up signal handlers
signal.signal(signal.SIGTERM, _signal_handler)
Expand Down
Loading
Loading