From ebdbd87bda4f932e899f66ecf5b76fd7aeab8013 Mon Sep 17 00:00:00 2001 From: DCHA <426225+daocha@users.noreply.github.com> Date: Tue, 31 Mar 2026 22:27:10 +0800 Subject: [PATCH 1/5] Add Speech-To-Text Feature and fix queued messages behavior + misc bugs (#36) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ** Add Speech-To-Text support: - Add local Whisper speech-to-text support for Telegram voice/audio messages, including startup prerequisite checks, shared STT installer flow, env configuration, and transcript dispatch into the normal message pipeline. - Add dependency installation script and environment detection - Add OpenAI-Whisper and required dependencies in the setup script and server startup script - Server startups now checks the dependencies when STT config is enabled - Document the Whisper/STT flow, update localized user-facing strings, and add regression tests for speech-to-text, queue ordering, reply behavior, and installer prerequisite checks. - Add test cases: ** Runtime issue fix: harden queue, reply threading, and startup consistency: - Fix pending-action and queue drain ordering, busy/queue race handling, reply threading for working/final output, and ensure install.sh launches with the same Python interpreter used for installation. - Fix queue messages during pending session setup - Fix Queue text and voice transcripts while session prerequisites are unresolved, then drain the queue after session creation completes. Add regression coverage for pending new-session text/voice cases and clean up localized README diff wording. ** Bug fix: Fix 1 — Double HTML escaping in bold text Fix 2 — callback_data 64-byte limit for branch source buttons Fix 3 — Queue delimiter injection corrupts queued messages --------- Co-authored-by: DCHA Agent <259406208+dcha-agent@users.noreply.github.com> --- README.de.md | 144 ++-- README.fr.md | 198 +++-- README.ja.md | 252 +++--- README.ko.md | 252 +++--- README.md | 91 ++- README.nl.md | 194 +++-- README.th.md | 248 +++--- README.vi.md | 272 ++++--- README.zh-CN.md | 252 +++--- README.zh-HK.md | 268 ++++--- README.zh-TW.md | 268 ++++--- install-stt.sh | 37 + install.sh | 3 - pyproject.toml | 1 + src/coding_agent_telegram/bot.py | 38 +- src/coding_agent_telegram/cli.py | 11 + src/coding_agent_telegram/config.py | 14 + .../resources/.env.example | 16 + .../resources/locales/de.json | 8 +- .../resources/locales/en.json | 10 +- .../resources/locales/fr.json | 8 +- .../resources/locales/ja.json | 8 +- .../resources/locales/ko.json | 8 +- .../resources/locales/nl.json | 8 +- .../resources/locales/th.json | 8 +- .../resources/locales/vi.json | 8 +- .../resources/locales/zh-CN.json | 8 +- .../resources/locales/zh-HK.json | 10 +- .../resources/locales/zh-TW.json | 10 +- src/coding_agent_telegram/router/base.py | 25 + .../router/message_commands.py | 221 +++++- .../router/project_commands.py | 26 +- .../router/queue_processing.py | 135 +++- .../router/session_branch_resolution.py | 8 +- .../router/session_common.py | 8 + .../router/session_lifecycle_commands.py | 98 ++- .../router/session_provider_commands.py | 2 +- src/coding_agent_telegram/session_runtime.py | 68 +- src/coding_agent_telegram/speech_to_text.py | 140 ++++ src/coding_agent_telegram/stt_setup.py | 306 +++++++ src/coding_agent_telegram/telegram_sender.py | 75 +- startup.sh | 87 +- tests/test_command_router.py | 745 +++++++++++++++++- tests/test_config.py | 34 + tests/test_speech_to_text.py | 105 +++ tests/test_stt_setup.py | 99 +++ tests/test_telegram_sender.py | 30 +- 47 files changed, 3761 insertions(+), 1104 deletions(-) create mode 100755 install-stt.sh create mode 100644 src/coding_agent_telegram/speech_to_text.py create mode 100644 src/coding_agent_telegram/stt_setup.py create mode 100644 tests/test_speech_to_text.py create mode 100644 tests/test_stt_setup.py diff --git a/README.de.md b/README.de.md index 49fb922..2049d43 100644 --- a/README.de.md +++ b/README.de.md @@ -38,7 +38,7 @@ - ✅ Telegram zum Steuern von Codex / Copilot CLI verwenden - ✅ Antworten und geänderte Dateien bequem in Code-Blöcken prüfen - ✅ Folgefragen während eines laufenden Agentenlaufs in die Queue stellen - - ✅ Unterstützt Text- und Bildeingaben + - ✅ Akzeptiert ✏️ Text-, 🌄 Bild- und 🎙️ Sprachnachrichten ## 🔁 Nahtlos zwischen Geräten und Sessions wechseln @@ -49,7 +49,7 @@ ## 🛠️ Typischer lokaler Ablauf ```bash - coding-agent-telegram # or run ./startup.sh + coding-agent-telegram # oder ./startup.sh ausführen ``` ##### In Telegram: @@ -99,6 +99,7 @@ Vor dem Start des Servers brauchst du: - Lokal installiertes Codex CLI und/oder Copilot CLI - [Codex CLI Installation](https://developers.openai.com/codex/cli) - [Copilot CLI Installation](https://github.com/features/copilot/cli) +- [Optional] Whisper, ffmpeg @@ -108,35 +109,61 @@ Openclaw bietet dir sehr umfassende Funktionen und hat mit Pi-Agent bereits eine ## 🚀 Schnellstart -### Option A: Einzeiliges Bootstrap-Skript +### Variante A: Einzeiliges Bootstrap-Skript ```bash curl -fsSL https://raw.githubusercontent.com/daocha/coding-agent-telegram/main/install.sh | bash ``` -### Option B: Installation über PyPI mit `pip` +### Variante B: Installation über PyPI mit `pip` ```bash pip install coding-agent-telegram coding-agent-telegram ``` -### Option C: Aus einem geklonten Repository starten +### Variante C: Aus einem geklonten Repository starten ```bash git clone https://github.com/daocha/coding-agent-telegram cd coding-agent-telegram ./startup.sh ``` -### Bot-Server starten +### 🌐 Bot-Server starten ##### Beim ersten Start legt die App die Env-Datei an und sagt dir, welche Felder du ausfüllen musst. ##### Nach dem Bearbeiten der Env-Datei starte erneut: ```bash -# if you follow Option A or Option B, then run +# wenn du Variante A oder Variante B verwendest, dann ausführen coding-agent-telegram -# if you follow Option C, then run this again +# wenn du Variante C verwendest, dann dies erneut ausführen ./startup.sh ``` +## 🎙️ [Optional] Sprach-zu-Text-Funktion: lokale OpenAI-Whisper-Voraussetzungen vorbereiten + +Damit aktivierst du optional lokale Whisper-basierte Sprach-zu-Text-Unterstützung für Telegram-Sprachnotizen. Audiodateien sind auf maximal `20 MB` begrenzt. + +```bash +# wenn du per pip oder per Einzeiler install.sh installiert hast +coding-agent-telegram-stt-install + +# wenn du aus einem geklonten Repository startest +./install-stt.sh +``` + +Empfohlene Env-Einstellungen: + +```text +ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT=true +OPENAI_WHISPER_MODEL=base +OPENAI_WHISPER_TIMEOUT_SECONDS=120 +``` + +Hinweise: + +- Whisper lädt das ausgewählte Modell beim ersten Aufruf automatisch nach `~/.cache/whisper` herunter. +- Wenn du `OPENAI_WHISPER_MODEL=turbo` wählst, ist es wahrscheinlicher, dass die erste Sprachnachricht das Zeitlimit erreicht, während `large-v3-turbo.pt` noch heruntergeladen wird. +- Nach der Transkription einer Sprachnachricht sendet der Bot das erkannte Transkript zuerst zurück an Telegram und gibt es danach an den Agenten weiter. So lassen sich Erkennungsfehler leichter prüfen. + ## 🔑 Telegram-Einrichtung ### Bot-Token holen @@ -175,61 +202,62 @@ Der Bot akzeptiert derzeit: - Textnachrichten - Fotos +- Sprachnachrichten und Audiodateien, wenn `ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT=true` gesetzt ist und die lokalen Whisper-Voraussetzungen installiert sind - Codex und Copilot unterstützen aktuell nur Text und Bilder, kein Video. ## 🤖 Telegram-Befehle
/provider |
+ /Anbieter |
Provider für neue Sessions wählen. Die Auswahl wird pro Bot und Chat gespeichert, bis du sie änderst. | |
/project <project_folder> |
+ /project <project_folder> |
Aktuellen Projektordner setzen. Falls der Ordner nicht existiert, erstellt die App ihn und markiert ihn als vertrauenswürdig. Wenn er bereits existiert und noch nicht vertraut ist, fragt die App nach einer Bestätigung. | |
/branch <new_branch> |
+ /branch <new_branch> |
Eine branch für das aktuelle Projekt vorbereiten oder wechseln. Wenn die branch bereits existiert, nutzt der Bot sie als Quellkandidaten. Andernfalls verwendet er die Standard-branch des Repositorys als Quellkandidaten. | |
/branch <origin_branch> <new_branch> |
- Eine branch mit ` |
+ /branch <origin_branch> <new_branch> |
+ Eine branch mit <origin_branch> als Quellkandidaten vorbereiten oder wechseln. Für beide Formen bietet der Bot anschließend nur die Quelloptionen an, die tatsächlich existieren: local/<branch> und origin/<branch>. Wenn nur eine davon existiert, wird nur diese angezeigt. Wenn keine existiert, meldet der Bot, dass die branch-Quelle fehlt. |
/current |
+ /current |
Die aktive Session für den aktuellen Bot und Chat anzeigen. | |
/new [session_name] |
+ /new [session_name] |
Eine neue Session für das aktuelle Projekt erstellen. Wenn du keinen Namen angibst, verwendet der Bot die echte Session-ID. Fehlen Provider, Projekt oder branch, führt dich der Bot durch den fehlenden Schritt. | |
/switch |
+ /switch |
Die neuesten Sessions anzeigen, zuerst die neuesten. Die Liste enthält sowohl vom Bot verwaltete Sessions als auch lokale Codex/Copilot CLI-Sessions für das aktuelle Projekt. | |
/switch page <number> |
+ /switch page <number> |
Eine andere Seite der gespeicherten Sessions anzeigen. | |
/switch <session_id> |
+ /switch <session_id> |
Zu einer bestimmten Session per ID wechseln. Wenn du eine lokale CLI-Session auswählst, importiert der Bot sie und setzt dort fort. | |
/compact |
+ /compact |
Aus der aktiven Session eine neue kompakte Session erzeugen und dorthin wechseln. | |
/commit <git commands> |
- Geprüfte `git commit`-bezogene Befehle im Projekt der aktiven Session ausführen. Nur verfügbar, wenn `ENABLE_COMMIT_COMMAND=true`. Schreibende Git-Befehle erfordern ein vertrauenswürdiges Projekt. | +/commit <git commands> |
+ Geprüfte git commit-bezogene Befehle im Projekt der aktiven Session ausführen. Nur verfügbar, wenn ENABLE_COMMIT_COMMAND=true. Schreibende Git-Befehle erfordern ein vertrauenswürdiges Projekt. |
/push |
- `origin |
+ /push |
+ origin <branch> für die aktuelle aktive Session pushen. Der Bot fragt vor dem Push nach einer Bestätigung. |
/abort |
+ /abort |
Den aktuellen Agentenlauf für das aktuelle Projekt abbrechen. Wenn Fragen in der Queue warten, fragt der Bot, ob sie weiter verarbeitet werden sollen. |
WORKSPACE_ROOT |
+ WORKSPACE_ROOT |
Übergeordneter Ordner, der deine Projektverzeichnisse enthält. |
TELEGRAM_BOT_TOKENS |
+ TELEGRAM_BOT_TOKENS |
Kommagetrennte Telegram-Bot-Tokens. |
ALLOWED_CHAT_IDS |
+ ALLOWED_CHAT_IDS |
Kommagetrennte Telegram-Chat-IDs privater Chats, die den Bot verwenden dürfen. |
APP_LOCALE |
+ APP_LOCALE |
UI-Sprache für gemeinsame Bot-Meldungen und Befehlsbeschreibungen. Unterstützte Werte: en, de, fr, ja, ko, nl, th, vi, zh-CN, zh-HK, zh-TW. |
CODEX_BIN |
+ CODEX_BIN |
Befehl zum Starten von Codex CLI. Standard: codex. |
COPILOT_BIN |
+ COPILOT_BIN |
Befehl zum Starten von Copilot CLI. Standard: copilot. |
CODEX_MODEL |
+ CODEX_MODEL |
Optionale Model-Überschreibung für Codex. Leer lassen, um das Standardmodell von Codex CLI zu verwenden. Beispiel: gpt-5.4 OpenAI Codex/OpenAI modelle |
COPILOT_MODEL |
+ COPILOT_MODEL |
Optionale Model-Überschreibung für Copilot. Leer lassen, um das Standardmodell von Copilot CLI zu verwenden. Beispiele: gpt-5.4, claude-sonnet-4.6 GitHub Copilot unterstützte modelle |
CODEX_APPROVAL_POLICY |
+ CODEX_APPROVAL_POLICY |
An Codex übergebener Freigabemodus. Standard: never. |
CODEX_SANDBOX_MODE |
+ CODEX_SANDBOX_MODE |
An Codex übergebener Sandbox-Modus. Standard: workspace-write. |
CODEX_SKIP_GIT_REPO_CHECK |
+ CODEX_SKIP_GIT_REPO_CHECK |
Wenn aktiviert, werden Codex-Prüfungen für vertrauenswürdige Repositories immer übersprungen. |
ENABLE_COMMIT_COMMAND |
+ ENABLE_COMMIT_COMMAND |
Den Telegram-Befehl /commit aktivieren. Standard: false. |
AGENT_HARD_TIMEOUT_SECONDS |
+ AGENT_HARD_TIMEOUT_SECONDS |
Hartes Zeitlimit für einen einzelnen Agentenlauf. Standard: 0 (deaktiviert). |
SNAPSHOT_TEXT_FILE_MAX_BYTES |
+ SNAPSHOT_TEXT_FILE_MAX_BYTES |
Maximale Dateigröße, die der Bot als Text liest, wenn er Vorher/Nachher-Snapshots für Run-Diffs erstellt. Standard: 200000. |
MAX_TELEGRAM_MESSAGE_LENGTH |
+ MAX_TELEGRAM_MESSAGE_LENGTH |
Maximale Nachrichtengröße, bevor die App Antworten aufteilt. Standard: 3000. |
ENABLE_SENSITIVE_DIFF_FILTER |
+ ENABLE_SENSITIVE_DIFF_FILTER |
Diffs für sensible Dateien ausblenden. Standard: true. |
ENABLE_SECRET_SCRUB_FILTER |
+ ENABLE_SECRET_SCRUB_FILTER |
Tokens, Schlüssel, .env-Werte, Zertifikate und ähnliche geheime Ausgaben vor dem Senden an Telegram unkenntlich machen. Standard: true (dringend empfohlen). |
SNAPSHOT_INCLUDE_PATH_GLOBS |
+ SNAPSHOT_INCLUDE_PATH_GLOBS |
Passende Pfade in Diffs immer einschließen. Beispiel: .github/*,.profile.test,.profile.prod |
SNAPSHOT_EXCLUDE_PATH_GLOBS |
+ SNAPSHOT_EXCLUDE_PATH_GLOBS |
Zusätzliche Diff-Ausschlüsse zusätzlich zu den Standardwerten hinzufügen. Beispiel: .*,personal/*,sensitive*.txt Hinweis: .* erfasst versteckte Pfade, auch Dateien in versteckten Verzeichnissen. |
ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT |
+ Standard: false. Wenn true, werden Sprachnachrichten und Audiodateien erkannt. Das System prüft die erforderlichen Binärdateien oder Bibliotheken und fordert zur Installation auf, falls etwas fehlt. |
+
OPENAI_WHISPER_MODEL |
+ Modell für Whisper STT. Standard: baseVerfügbare Modelle: tiny ca. 72 MB, base ca. 139 MB, large-v3-turbo ca. 1.5 GBModelle werden bei der ersten Sprachnachricht automatisch heruntergeladen. Empfehlung: base für den allgemeinen Gebrauch. Wenn du bessere Genauigkeit und Qualität willst, kannst du turbo ausprobieren. |
+
OPENAI_WHISPER_TIMEOUT_SECONDS |
+ Standard: 120. Timeout für den STT-Prozess. Normalerweise ist die Verarbeitung schnell genug. Wenn du jedoch turbo wählst, kann der erste Sprachaufruf während des Modelldownloads je nach Internetgeschwindigkeit das Timeout überschreiten. |
+
~/.coding-agent-telegram/state.json.bak |
- Backup-Datei für den Status. | +Sicherungsdatei für den Status. |
~/.coding-agent-telegram/logs |
@@ -474,8 +522,8 @@ Der Bot behandelt Projekt und branch als zusammengehörig.
Wenn du eine branch erstellst oder wechselst, führt dich der Bot explizit durch die Quelle:
-- `local/
/provider |
- Choisir le provider pour les nouvelles sessions. Le choix est stocké par bot et par chat jusqu’à modification. | +/provider |
+ Choisir le fournisseur pour les nouvelles sessions. Le choix est stocké par bot et par chat jusqu’à modification. |
/project <project_folder> |
+ /project <project_folder> |
Définir le dossier de projet courant. Si le dossier n’existe pas, l’app le crée et le marque trusted. S’il existe déjà mais reste untrusted, l’app vous demande une confirmation. | |
/branch <new_branch> |
+ /branch <new_branch> |
Préparer ou changer une branch pour le projet courant. Si la branch existe déjà, le bot la traite comme source candidate. Sinon il utilise la branch par défaut du dépôt. | |
/branch <origin_branch> <new_branch> |
- Préparer ou changer une branch en utilisant ` |
+ /branch <origin_branch> <new_branch> |
+ Préparer ou changer une branch en utilisant <origin_branch> comme source candidate. Pour les deux formes, le bot ne propose ensuite que les sources réellement disponibles : local/<branch> et origin/<branch>. Si une seule existe, seule celle-ci est affichée. Si aucune n’existe, le bot signale que la source de branch est introuvable. |
/current |
+ /current |
Afficher la session active pour le bot et le chat courants. | |
/new [session_name] |
- Créer une nouvelle session pour le projet courant. Si vous omettez le nom, le bot utilise la vraie session ID. Si provider, projet ou branch manque, le bot vous guide. | +/new [session_name] |
+ Créer une nouvelle session pour le projet courant. Si vous omettez le nom, le bot utilise le véritable ID de session. Si fournisseur, projet ou branch manque, le bot vous guide. |
/switch |
+ /switch |
Afficher les sessions les plus récentes, de la plus récente à la plus ancienne. La liste inclut les sessions gérées par le bot et les sessions locales Codex/Copilot CLI du projet courant. | |
/switch page <number> |
+ /switch page <number> |
Afficher une autre page des sessions enregistrées. | |
/switch <session_id> |
+ /switch <session_id> |
Basculer vers une session précise via son ID. Si vous choisissez une session CLI locale, le bot l’importe et reprend à partir d’elle. | |
/compact |
+ /compact |
Créer une nouvelle session compactée à partir de la session active et basculer dessus. | |
/commit <git commands> |
- Exécuter des commandes liées à `git commit` validées dans le projet de la session active. Disponible uniquement si `ENABLE_COMMIT_COMMAND=true`. Les commandes Git mutantes exigent un projet trusted. | +/commit <git commands> |
+ Exécuter des commandes liées à git commit validées dans le projet de la session active. Disponible uniquement si ENABLE_COMMIT_COMMAND=true. Les commandes Git mutantes exigent un projet trusted. |
/push |
- Pousser `origin |
+ /push |
+ Pousser origin <branch> pour la session active courante. Le bot demande une confirmation avant le push. |
/abort |
+ /abort |
Annuler l’exécution d’agent en cours pour le projet courant. Si des questions attendent dans la file, le bot demande si elles doivent continuer. |
WORKSPACE_ROOT |
+ WORKSPACE_ROOT |
Dossier parent qui contient vos répertoires de projet. |
TELEGRAM_BOT_TOKENS |
+ TELEGRAM_BOT_TOKENS |
Liste de tokens de bot Telegram séparés par des virgules. |
ALLOWED_CHAT_IDS |
+ ALLOWED_CHAT_IDS |
Liste d’IDs de chat privés Telegram autorisés, séparés par des virgules. |
APP_LOCALE |
+ APP_LOCALE |
Langue de l’interface pour les messages partagés du bot et les descriptions de commandes. Valeurs prises en charge : en, de, fr, ja, ko, nl, th, vi, zh-CN, zh-HK, zh-TW. |
|
CODEX_BIN |
+ CODEX_BIN |
Commande utilisée pour lancer Codex CLI. Valeur par défaut : codex. |
|
COPILOT_BIN |
+ COPILOT_BIN |
Commande utilisée pour lancer Copilot CLI. Valeur par défaut : copilot. |
|
CODEX_MODEL |
+ CODEX_MODEL |
Remplacement optionnel du modèle Codex. Laissez vide pour utiliser le modèle par défaut de Codex CLI. Exemple : gpt-5.4 Modèles OpenAI Codex/OpenAI |
|
COPILOT_MODEL |
+ COPILOT_MODEL |
Remplacement optionnel du modèle Copilot. Laissez vide pour utiliser le modèle par défaut de Copilot CLI. Exemples : gpt-5.4, claude-sonnet-4.6 Modèles pris en charge par GitHub Copilot |
|
CODEX_APPROVAL_POLICY |
+ CODEX_APPROVAL_POLICY |
Mode d’approbation transmis à Codex. Défaut : never. |
|
CODEX_SANDBOX_MODE |
+ CODEX_SANDBOX_MODE |
Mode sandbox transmis à Codex. Défaut : workspace-write. |
|
CODEX_SKIP_GIT_REPO_CHECK |
+ CODEX_SKIP_GIT_REPO_CHECK |
Si activé, contourne toujours les vérifications de dépôt trusted de Codex. | |
ENABLE_COMMIT_COMMAND |
+ ENABLE_COMMIT_COMMAND |
Active la commande Telegram /commit. Défaut : false. |
|
AGENT_HARD_TIMEOUT_SECONDS |
+ AGENT_HARD_TIMEOUT_SECONDS |
Timeout dur pour une exécution d’agent. Défaut : 0 (désactivé). |
|
SNAPSHOT_TEXT_FILE_MAX_BYTES |
- Taille maximale de fichier que le bot lira en texte pour construire le snapshot avant/après des diffs. Défaut : 200000. |
+ SNAPSHOT_TEXT_FILE_MAX_BYTES |
+ Taille maximale de fichier que le bot lira en texte pour construire le instantané avant/après des diffs. Défaut : 200000. |
MAX_TELEGRAM_MESSAGE_LENGTH |
+ MAX_TELEGRAM_MESSAGE_LENGTH |
Taille maximale d’un message avant découpage de la réponse. Défaut : 3000. |
|
ENABLE_SENSITIVE_DIFF_FILTER |
+ ENABLE_SENSITIVE_DIFF_FILTER |
Masquer les diffs des fichiers sensibles. Défaut : true. |
|
ENABLE_SECRET_SCRUB_FILTER |
+ ENABLE_SECRET_SCRUB_FILTER |
Masquer tokens, clés, valeurs .env, certificats et sorties similaires avant envoi vers Telegram. Défaut : true (fortement recommandé). |
|
SNAPSHOT_INCLUDE_PATH_GLOBS |
+ SNAPSHOT_INCLUDE_PATH_GLOBS |
Toujours inclure les chemins correspondants dans les diffs. Exemple : .github/*,.profile.test,.profile.prod |
|
SNAPSHOT_EXCLUDE_PATH_GLOBS |
- Ajouter des exclusions de diff supplémentaires au-dessus des valeurs par défaut du package. Exemple : .*,personal/*,sensitive*.txt Remarque : .* inclut les chemins cachés, y compris les fichiers dans les dossiers cachés. |
+ SNAPSHOT_EXCLUDE_PATH_GLOBS |
+ Ajouter des exclusions de diff supplémentaires au-dessus des valeurs par défaut du paquet. Exemple : .*,personal/*,sensitive*.txt Remarque : .* inclut les chemins cachés, y compris les fichiers dans les dossiers cachés. |
+
ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT |
+ Valeur par défaut : false. Si activé, la reconnaissance des messages vocaux et des fichiers audio est disponible. Le système vérifie les binaires ou bibliothèques requis et invite l’utilisateur à les installer si nécessaire. |
+
OPENAI_WHISPER_MODEL |
+ Modèle utilisé pour la STT Whisper. Valeur par défaut : baseModèles disponibles : tiny environ 72 MB, base environ 139 MB, large-v3-turbo environ 1.5 GBLes modèles sont téléchargés automatiquement lors de votre premier message vocal. Recommandé : base pour un usage général. Si vous souhaitez une meilleure précision et qualité, vous pouvez essayer turbo. |
+
OPENAI_WHISPER_TIMEOUT_SECONDS |
+ Valeur par défaut : 120. Délai d’expiration du processus STT. En général, le traitement est assez rapide. Mais si vous choisissez turbo, le premier message vocal peut dépasser ce délai pendant le téléchargement du modèle selon la vitesse de votre connexion. |
~/.coding-agent-telegram/state.json |
- Hauptdatei für den Session-Status. | +Fichier principal de l’état des sessions. |
~/.coding-agent-telegram/state.json.bak |
- Backup-Datei für den Status. | +Fichier de sauvegarde de l’état. |
~/.coding-agent-telegram/logs |
- Log-Verzeichnis. | +Répertoire des logs. |
local/<branch> : utiliser la branch locale comme source
+- origin/<branch> : mettre à jour depuis la branch distante puis basculer
Si le bot détecte que la branch stockée dans la session ne correspond pas à la branch courante du dépôt, il ne continue pas à l'aveugle. Il vous demande quelle branch utiliser :
@@ -485,7 +539,7 @@ Si votre branch source préférée est introuvable, le bot propose des sources d
- `/commit` peut être désactivé complètement avec `ENABLE_COMMIT_COMMAND`
- les opérations `/commit` qui modifient des fichiers ne sont autorisées que pour les projets trusted
-## 🪵 Logs
+## 🪵 Journaux
Les logs sont écrits **à la fois sur stdout et dans un fichier rotatif** sous :
@@ -518,14 +572,14 @@ Les logs sont écrits **à la fois sur stdout et dans un fichier rotatif** sous
point d'entrée local pour le bootstrap et le démarrage
- `src/coding_agent_telegram/resources/.env.example`
- modèle d'environnement canonique utilisé à la fois par le démarrage depuis le dépôt et par les installations du package
+ modèle d'environnement canonique utilisé à la fois par le démarrage depuis le dépôt et par les installations du paquet
- `pyproject.toml`
configuration du packaging et des dépendances
## 📦 Versionnement des releases
-Les versions du package sont dérivées des tags Git.
+Les versions du paquet sont dérivées des tags Git.
- TestPyPI/test : `v2026.3.26.dev1`
- préversion PyPI : `v2026.3.26rc1`
diff --git a/README.ja.md b/README.ja.md
index a196d37..30b183e 100644
--- a/README.ja.md
+++ b/README.ja.md
@@ -38,7 +38,7 @@
- ✅ Telegram で Codex / Copilot CLI を操作できる
- ✅ エージェントの回答や変更ファイルをコードブロックで確認しやすい
- ✅ エージェント実行中でも追加入力をキューに積める
- - ✅ テキストと画像入力に対応
+ - ✅ ✏️ テキスト、🌄 画像、🎙️ 音声メッセージに対応
## 🔁 デバイス/セッションをシームレスに切り替え
@@ -49,7 +49,7 @@
## 🛠️ 典型的なローカルフロー
```bash
- coding-agent-telegram # or run ./startup.sh
+ coding-agent-telegram # または ./startup.sh を実行
```
##### Telegram では:
@@ -99,6 +99,7 @@ curl -fsSL https://raw.githubusercontent.com/daocha/coding-agent-telegram/main/i
- ローカルにインストール済みの Codex CLI または Copilot CLI
- [Codex CLI インストール](https://developers.openai.com/codex/cli)
- [Copilot CLI インストール](https://github.com/features/copilot/cli)
+- [任意] Whisper、ffmpeg
@@ -108,35 +109,61 @@ Openclaw は非常に多機能で、Pi-Agent という統合 agent loop も備
## 🚀 クイックスタート
-### Option A: ワンライナーのブートストラップスクリプト
+### 方法A: ワンライナーのブートストラップスクリプト
```bash
curl -fsSL https://raw.githubusercontent.com/daocha/coding-agent-telegram/main/install.sh | bash
```
-### Option B: `pip` で PyPI からインストール
+### 方法B: `pip` で PyPI からインストール
```bash
pip install coding-agent-telegram
coding-agent-telegram
```
-### Option C: クローンしたリポジトリから実行
+### 方法C: クローンしたリポジトリから実行
```bash
git clone https://github.com/daocha/coding-agent-telegram
cd coding-agent-telegram
./startup.sh
```
-### Bot サーバーを起動
+### 🌐 Bot サーバーを起動
##### 初回起動時にアプリが env ファイルを作成し、入力すべき項目を案内します。
##### env ファイルを更新したら、次を再実行してください:
```bash
-# if you follow Option A or Option B, then run
+# 方法A または 方法B に従う場合は、次を実行
coding-agent-telegram
-# if you follow Option C, then run this again
+# 方法C に従う場合は、これをもう一度実行
./startup.sh
```
+## 🎙️ [任意] 音声文字起こし機能: ローカル OpenAI-Whisper の前提条件を準備
+
+これにより、Telegram のボイスノートに対するローカル Whisper ベースの音声文字起こしを任意で有効にできます。音声ファイルは最大 `20 MB` に制限されます。
+
+```bash
+# pip または one-liner install.sh でインストールした場合
+coding-agent-telegram-stt-install
+
+# クローンしたリポジトリから使う場合
+./install-stt.sh
+```
+
+推奨される env 設定:
+
+```text
+ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT=true
+OPENAI_WHISPER_MODEL=base
+OPENAI_WHISPER_TIMEOUT_SECONDS=120
+```
+
+メモ:
+
+- Whisper は選択したモデルを初回利用時に `~/.cache/whisper` へ自動ダウンロードします。
+- `OPENAI_WHISPER_MODEL=turbo` を選ぶと、`large-v3-turbo.pt` のダウンロード中に最初の音声文字起こしがタイムアウトしやすくなります。
+- 音声メッセージを文字起こしした後、ボットはまず認識したテキストを Telegram に返し、その後でエージェントへ渡します。これにより認識ミスを確認しやすくなります。
+
## 🔑 Telegram セットアップ
### Bot Token を取得
@@ -171,60 +198,67 @@ https://api.telegram.org/bot/provider |
- 新しい session 用の provider を選択します。選択は変更するまで bot と chat ごとに保存されます。 | +/provider |
+ 新しいセッション用のプロバイダーを選択します。選択は変更するまで bot と chat ごとに保存されます。 |
/project <project_folder> |
+ /project <project_folder> |
現在のプロジェクトフォルダを設定します。フォルダが存在しない場合は作成して trusted として扱います。既存で untrusted の場合は明示的に trust を確認します。 | |
/branch <new_branch> |
+ /branch <new_branch> |
現在のプロジェクトで branch を準備または切り替えます。branch が既に存在する場合はその branch を source candidate として扱います。存在しない場合は repository の default branch を source candidate に使います。 | |
/branch <origin_branch> <new_branch> |
- ` |
+ /branch <origin_branch> <new_branch> |
+ <origin_branch> を source candidate として branch を準備または切り替えます。どちらの形式でも bot は実在する source choice のみを提示します: local/<branch> と origin/<branch>。片方だけ存在する場合はその選択肢だけが表示され、どちらも無い場合は branch source が無いと通知します。 |
/current |
- 現在の bot と chat の active session を表示します。 | +/current |
+ 現在の bot と chat の アクティブなセッション を表示します。 |
/new [session_name] |
- 現在のプロジェクトに新しい session を作成します。名前を省略すると実際の session ID を使います。provider、project、branch が不足している場合は bot が不足分を案内します。 | +/new [session_name] |
+ 現在のプロジェクトに新しいセッションを作成します。名前を省略すると実際のセッション ID を使います。プロバイダー、プロジェクト、branch が不足している場合は bot が不足分を案内します。 |
/switch |
- 最新の session を新しい順で表示します。現在のプロジェクトに対する bot-managed session とローカルの Codex/Copilot CLI session の両方を含みます。 | +/switch |
+ 最新のセッションを新しい順で表示します。現在のプロジェクトに対する bot 管理セッションとローカルの Codex/Copilot CLI セッションの両方を含みます。 |
/switch page <number> |
- 保存済み session の別ページを表示します。 | +/switch page <number> |
+ 保存済みセッションの別ページを表示します。 |
/switch <session_id> |
- ID を指定して特定の session に切り替えます。ローカル CLI session を選ぶと bot がそれを取り込み、そこから続行します。 | +/switch <session_id> |
+ ID を指定して特定のセッションに切り替えます。ローカル CLI セッションを選ぶと bot がそれを取り込み、そこから続行します。 |
/compact |
- アクティブな session から新しい compact 済み session を作成し、そこへ切り替えます。 | +/compact |
+ アクティブなセッションから新しい compact 済みセッションを作成し、そこへ切り替えます。 |
/commit <git commands> |
- active session の project 内で、検証済みの `git commit` 関連コマンドを実行します。`ENABLE_COMMIT_COMMAND=true` のときだけ利用できます。変更を伴う Git コマンドには trusted project が必要です。 | +/commit <git commands> |
+ アクティブなセッション の project 内で、検証済みの git commit 関連コマンドを実行します。ENABLE_COMMIT_COMMAND=true のときだけ利用できます。変更を伴う Git コマンドには trusted project が必要です。 |
/push |
- 現在の active session に対して `origin |
+ /push |
+ 現在の アクティブなセッション に対して origin <branch> を push します。push 前に bot が確認します。 |
/abort |
- 現在のプロジェクトで実行中の agent run を中断します。queued questions がある場合は続行するか確認します。 | +/abort |
+ 現在のプロジェクトで実行中の エージェント実行 を中断します。キューされた質問 がある場合は続行するか確認します。 |
WORKSPACE_ROOT |
+ WORKSPACE_ROOT |
プロジェクトディレクトリを含む親フォルダです。 |
TELEGRAM_BOT_TOKENS |
+ TELEGRAM_BOT_TOKENS |
カンマ区切りの Telegram bot token です。 |
ALLOWED_CHAT_IDS |
+ ALLOWED_CHAT_IDS |
この bot の利用を許可する Telegram プライベート chat ID をカンマ区切りで指定します。 |
APP_LOCALE |
+ APP_LOCALE |
共有 bot メッセージとコマンド説明の UI 言語です。対応値: en, de, fr, ja, ko, nl, th, vi, zh-CN, zh-HK, zh-TW. |
|
CODEX_BIN |
+ CODEX_BIN |
Codex CLI を起動するコマンドです。既定値: codex. |
|
COPILOT_BIN |
+ COPILOT_BIN |
Copilot CLI を起動するコマンドです。既定値: copilot. |
|
CODEX_MODEL |
+ CODEX_MODEL |
Codex モデルの任意上書きです。空欄なら Codex CLI の既定モデルを使います。例: gpt-5.4 OpenAI Codex/OpenAI models |
|
COPILOT_MODEL |
+ COPILOT_MODEL |
Copilot モデルの任意上書きです。空欄なら Copilot CLI の既定モデルを使います。例: gpt-5.4, claude-sonnet-4.6 GitHub Copilot supported models |
|
CODEX_APPROVAL_POLICY |
+ CODEX_APPROVAL_POLICY |
Codex に渡す approval mode。既定: never. |
|
CODEX_SANDBOX_MODE |
+ CODEX_SANDBOX_MODE |
Codex に渡す sandbox mode。既定: workspace-write. |
|
CODEX_SKIP_GIT_REPO_CHECK |
+ CODEX_SKIP_GIT_REPO_CHECK |
有効にすると Codex の trusted-repo check を常にスキップします。 | |
ENABLE_COMMIT_COMMAND |
+ ENABLE_COMMIT_COMMAND |
Telegram の /commit コマンドを有効にします。既定: false. |
|
AGENT_HARD_TIMEOUT_SECONDS |
- 単一の agent run に対するハードタイムアウト。既定: 0(無効)。 |
+ AGENT_HARD_TIMEOUT_SECONDS |
+ 単一の エージェント実行 に対するハードタイムアウト。既定: 0(無効)。 |
SNAPSHOT_TEXT_FILE_MAX_BYTES |
- 実行ごとの diff 用に before/after snapshot を作る際、bot がテキストとして読む最大ファイルサイズです。既定: 200000. |
+ SNAPSHOT_TEXT_FILE_MAX_BYTES |
+ 実行ごとの diff 用に 実行前後のスナップショット を作る際、bot がテキストとして読む最大ファイルサイズです。既定: 200000. |
MAX_TELEGRAM_MESSAGE_LENGTH |
+ MAX_TELEGRAM_MESSAGE_LENGTH |
応答を分割する前に使う最大メッセージサイズ。既定: 3000. |
|
ENABLE_SENSITIVE_DIFF_FILTER |
+ ENABLE_SENSITIVE_DIFF_FILTER |
機密ファイルの diff を隠します。既定: true. |
|
ENABLE_SECRET_SCRUB_FILTER |
+ ENABLE_SECRET_SCRUB_FILTER |
tokens、keys、.env 値、certificates などの秘密らしい出力を Telegram 送信前にマスクします。既定: true(強く推奨)。 |
|
SNAPSHOT_INCLUDE_PATH_GLOBS |
+ SNAPSHOT_INCLUDE_PATH_GLOBS |
一致するパスを diff に強制的に含めます。例: .github/*,.profile.test,.profile.prod |
|
SNAPSHOT_EXCLUDE_PATH_GLOBS |
- パッケージ既定値に加えて diff 除外を追加します。例: .*,personal/*,sensitive*.txt 注: .* は hidden directory 内のファイルも含む hidden path に一致します。 |
+ SNAPSHOT_EXCLUDE_PATH_GLOBS |
+ パッケージ既定値に加えて diff 除外を追加します。例: .*,personal/*,sensitive*.txt 注: .* は 隠しディレクトリ内のファイルも含む隠しパス に一致します。 |
+
ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT |
+ デフォルト: false。true の場合、音声メッセージと音声ファイルの認識を有効にします。必要なバイナリやライブラリを起動時に確認し、不足していればインストールを案内します。 |
+
OPENAI_WHISPER_MODEL |
+ Whisper STT で使うモデルです。デフォルト: base利用可能なモデル: tiny 約 72 MB、base 約 139 MB、large-v3-turbo 約 1.5 GBモデルは最初の音声メッセージ時に自動でダウンロードされます。一般用途では base を推奨します。より高い精度や品質が必要なら turbo を試してください。 |
+
OPENAI_WHISPER_TIMEOUT_SECONDS |
+ デフォルト: 120。STT プロセスのタイムアウトです。通常は十分高速ですが、turbo を選ぶと最初の音声メッセージでモデルをダウンロードする間に、回線速度によってはタイムアウトすることがあります。 |
~/.coding-agent-telegram/state.json |
- Hauptdatei für den Session-Status. | +セッション状態のメインファイル。 |
~/.coding-agent-telegram/state.json.bak |
- Backup-Datei für den Status. | +状態のバックアップファイル。 |
~/.coding-agent-telegram/logs |
- Log-Verzeichnis. | +ログディレクトリ。 |
local/<branch>: ローカル branch を source に使う
+- origin/<branch>: remote branch から更新してから切り替える
-保存済み session の branch と現在の repository branch が一致しない場合、bot はそのまま続行しません。どちらの branch を使うか確認します:
+保存済みセッションの branch と現在の repository branch が一致しない場合、bot はそのまま続行しません。どちらの branch を使うか確認します:
-- 保存済み session の branch を使う
+- 保存済みセッションの branch を使う
- 現在の repository branch を使う
希望する source branch が存在しない場合は、生の Git error にせず、default branch と current branch を元に fallback source を提案します。
@@ -481,28 +533,28 @@ branch を作成または切り替えるとき、bot は source を明示的に
- 既存 folder は `CODEX_SKIP_GIT_REPO_CHECK` に従います
- `/project /provider |
- 새 session용 provider 를 선택합니다. 선택 내용은 바꿀 때까지 bot/chat 단위로 저장됩니다. | +/provider |
+ 새 세션용 제공자를 선택합니다. 선택 내용은 바꿀 때까지 bot/chat 단위로 저장됩니다. |
/project <project_folder> |
- 현재 project folder를 설정합니다. 폴더가 없으면 앱이 만들고 trusted 로 표시합니다. 이미 존재하지만 아직 untrusted 이면 trust 확인을 요청합니다. | +/project <project_folder> |
+ 현재 프로젝트 폴더를 설정합니다. 폴더가 없으면 앱이 만들고 trusted 로 표시합니다. 이미 존재하지만 아직 untrusted 이면 trust 확인을 요청합니다. |
/branch <new_branch> |
+ /branch <new_branch> |
현재 project에서 branch 를 준비하거나 전환합니다. branch 가 이미 있으면 source candidate 로 취급하고, 없으면 repository 의 default branch 를 source candidate 로 사용합니다. | |
/branch <origin_branch> <new_branch> |
- ` |
+ /branch <origin_branch> <new_branch> |
+ <origin_branch> 를 source candidate 로 사용해 branch 를 준비하거나 전환합니다. 두 형식 모두 bot 은 실제로 존재하는 source choice 만 보여줍니다: local/<branch>, origin/<branch>. 하나만 있으면 그것만 보이고, 둘 다 없으면 branch source 가 없다고 알립니다. |
/current |
- 현재 bot/chat 의 active session 을 보여줍니다. | +/current |
+ 현재 bot/chat 의 활성 세션 을 보여줍니다. |
/new [session_name] |
- 현재 project에 새 session을 만듭니다. 이름을 생략하면 실제 session ID를 사용합니다. provider, project, branch 가 없으면 bot 이 필요한 단계를 안내합니다. | +/new [session_name] |
+ 현재 프로젝트에 새 세션을 만듭니다. 이름을 생략하면 실제 세션 ID를 사용합니다. 제공자, 프로젝트, branch 가 없으면 bot 이 필요한 단계를 안내합니다. |
/switch |
- 가장 최근 session 을 최신순으로 보여줍니다. 현재 project 의 bot-managed session 과 로컬 Codex/Copilot CLI session 이 함께 표시됩니다. | +/switch |
+ 가장 최근 세션을 최신순으로 보여줍니다. 현재 프로젝트의 bot 관리 세션과 로컬 Codex/Copilot CLI 세션이 함께 표시됩니다. |
/switch page <number> |
- 저장된 session 의 다른 페이지를 보여줍니다. | +/switch page <number> |
+ 저장된 세션의 다른 페이지를 보여줍니다. |
/switch <session_id> |
- ID 로 특정 session 으로 전환합니다. 로컬 CLI session 을 선택하면 bot 이 state 에 가져와 이어서 진행합니다. | +/switch <session_id> |
+ ID 로 특정 세션으로 전환합니다. 로컬 CLI 세션을 선택하면 bot 이 상태에 가져와 이어서 진행합니다. |
/compact |
- 활성 session 에서 새 compact session 을 만들고 그쪽으로 전환합니다. | +/compact |
+ 활성 세션에서 새 compact 세션을 만들고 그쪽으로 전환합니다. |
/commit <git commands> |
- active session project 안에서 검증된 `git commit` 관련 명령을 실행합니다. `ENABLE_COMMIT_COMMAND=true` 일 때만 사용할 수 있습니다. 변경성 Git 명령은 trusted project 가 필요합니다. | +/commit <git commands> |
+ 활성 세션 project 안에서 검증된 git commit 관련 명령을 실행합니다. ENABLE_COMMIT_COMMAND=true 일 때만 사용할 수 있습니다. 변경성 Git 명령은 trusted project 가 필요합니다. |
/push |
- 현재 active session 에 대해 `origin |
+ /push |
+ 현재 활성 세션 에 대해 origin <branch> 를 push 합니다. push 전에 bot 이 확인합니다. |
/abort |
- 현재 project 의 agent run 을 중단합니다. 대기 중인 queued question 이 있으면 계속할지 묻습니다. | +/abort |
+ 현재 project 의 에이전트 실행 을 중단합니다. 대기 중인 queued question 이 있으면 계속할지 묻습니다. |
WORKSPACE_ROOT |
+ WORKSPACE_ROOT |
프로젝트 디렉터리를 담는 상위 폴더입니다. |
TELEGRAM_BOT_TOKENS |
+ TELEGRAM_BOT_TOKENS |
쉼표로 구분된 Telegram bot token 목록입니다. |
ALLOWED_CHAT_IDS |
+ ALLOWED_CHAT_IDS |
이 bot 사용을 허용할 Telegram 개인 chat ID 목록입니다. |
APP_LOCALE |
+ APP_LOCALE |
공용 bot 메시지와 명령 설명에 사용할 UI locale 입니다. 지원 값: en, de, fr, ja, ko, nl, th, vi, zh-CN, zh-HK, zh-TW. |
|
CODEX_BIN |
+ CODEX_BIN |
Codex CLI 를 실행할 명령입니다. 기본값: codex. |
|
COPILOT_BIN |
+ COPILOT_BIN |
Copilot CLI 를 실행할 명령입니다. 기본값: copilot. |
|
CODEX_MODEL |
+ CODEX_MODEL |
선택적 Codex model override 입니다. 비워 두면 Codex CLI 기본 model 을 사용합니다. 예: gpt-5.4 OpenAI Codex/OpenAI models |
|
COPILOT_MODEL |
+ COPILOT_MODEL |
선택적 Copilot model override 입니다. 비워 두면 Copilot CLI 기본 model 을 사용합니다. 예: gpt-5.4, claude-sonnet-4.6 GitHub Copilot supported models |
|
CODEX_APPROVAL_POLICY |
+ CODEX_APPROVAL_POLICY |
Codex 에 전달할 approval mode 입니다. 기본값: never. |
|
CODEX_SANDBOX_MODE |
+ CODEX_SANDBOX_MODE |
Codex 에 전달할 sandbox mode 입니다. 기본값: workspace-write. |
|
CODEX_SKIP_GIT_REPO_CHECK |
+ CODEX_SKIP_GIT_REPO_CHECK |
활성화하면 Codex trusted-repo check 를 항상 건너뜁니다. | |
ENABLE_COMMIT_COMMAND |
+ ENABLE_COMMIT_COMMAND |
Telegram /commit 명령을 활성화합니다. 기본값: false. |
|
AGENT_HARD_TIMEOUT_SECONDS |
- 단일 agent run 의 하드 타임아웃입니다. 기본값: 0 (비활성화). |
+ AGENT_HARD_TIMEOUT_SECONDS |
+ 단일 에이전트 실행 의 하드 타임아웃입니다. 기본값: 0 (비활성화). |
SNAPSHOT_TEXT_FILE_MAX_BYTES |
+ SNAPSHOT_TEXT_FILE_MAX_BYTES |
실행별 diff 스냅샷을 만들 때 bot 이 텍스트로 읽을 최대 파일 크기입니다. 기본값: 200000. |
|
MAX_TELEGRAM_MESSAGE_LENGTH |
+ MAX_TELEGRAM_MESSAGE_LENGTH |
응답을 분할하기 전 최대 메시지 크기입니다. 기본값: 3000. |
|
ENABLE_SENSITIVE_DIFF_FILTER |
+ ENABLE_SENSITIVE_DIFF_FILTER |
민감한 파일의 diff 를 숨깁니다. 기본값: true. |
|
ENABLE_SECRET_SCRUB_FILTER |
+ ENABLE_SECRET_SCRUB_FILTER |
tokens, keys, .env 값, certificates 등 비밀스러운 출력을 Telegram 으로 보내기 전에 마스킹합니다. 기본값: true (강력 권장). |
|
SNAPSHOT_INCLUDE_PATH_GLOBS |
+ SNAPSHOT_INCLUDE_PATH_GLOBS |
일치하는 경로를 diff 에 강제로 포함합니다. 예: .github/*,.profile.test,.profile.prod |
|
SNAPSHOT_EXCLUDE_PATH_GLOBS |
- 패키지 기본값 위에 추가 diff 제외 규칙을 더합니다. 예: .*,personal/*,sensitive*.txt 참고: .* 는 hidden directory 안 파일을 포함한 hidden path 에도 매칭됩니다. |
+ SNAPSHOT_EXCLUDE_PATH_GLOBS |
+ 패키지 기본값 위에 추가 diff 제외 규칙을 더합니다. 예: .*,personal/*,sensitive*.txt 참고: .* 는 숨김 디렉터리 안 파일을 포함한 숨김 경로 에도 매칭됩니다. |
+
ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT |
+ 기본값: false. true이면 음성 메시지와 오디오 파일 인식을 활성화합니다. 시스템은 필요한 바이너리나 라이브러리를 확인하고, 누락된 경우 설치를 안내합니다. |
+
OPENAI_WHISPER_MODEL |
+ Whisper STT에 사용할 모델입니다. 기본값: base사용 가능한 모델: tiny 약 72 MB, base 약 139 MB, large-v3-turbo 약 1.5 GB모델은 첫 음성 메시지 전송 시 자동으로 다운로드됩니다. 일반적인 사용에는 base를 권장합니다. 더 나은 정확도와 품질이 필요하면 turbo를 시도할 수 있습니다. |
+
OPENAI_WHISPER_TIMEOUT_SECONDS |
+ 기본값: 120. STT 프로세스 제한 시간입니다. 보통은 충분히 빠르지만 turbo를 선택하면 첫 음성 메시지에서 모델 다운로드로 인해 인터넷 속도에 따라 제한 시간을 초과할 수 있습니다. |
~/.coding-agent-telegram/state.json |
- Hauptdatei für den Session-Status. | +세션 상태의 기본 파일입니다. |
~/.coding-agent-telegram/state.json.bak |
- Backup-Datei für den Status. | +상태 백업 파일입니다. |
~/.coding-agent-telegram/logs |
- Log-Verzeichnis. | +로그 디렉터리입니다. |
local/<branch>: local branch 를 source 로 사용
+- origin/<branch>: remote branch 에서 먼저 업데이트한 뒤 전환
-저장된 session branch 와 현재 repository branch 가 다르면 bot 은 그대로 진행하지 않습니다. 어떤 branch 를 쓸지 물어봅니다:
+저장된 세션 branch 와 현재 repository branch 가 다르면 bot 은 그대로 진행하지 않습니다. 어떤 branch 를 쓸지 물어봅니다:
-- 저장된 session branch 사용
+- 저장된 세션 branch 사용
- 현재 repository branch 사용
원하는 source branch 가 없으면 raw Git error 대신 default branch 와 current branch 를 기반으로 fallback source 를 제안합니다.
@@ -481,28 +533,28 @@ branch 를 만들거나 바꿀 때 bot 은 source 를 명시적으로 안내합
- 기존 folder 는 `CODEX_SKIP_GIT_REPO_CHECK` 를 따릅니다
- `/project /provider |
+ /provider |
Choose the provider for new sessions. The selection is stored per bot and chat until you change it. |
/project <project_folder> |
+ /project <project_folder> |
Set the current project folder. If the folder does not exist, the app creates it and marks it trusted. If it already exists and is still untrusted, the app asks you to trust it explicitly. |
/branch <new_branch> |
+ /branch <new_branch> |
Prepare or switch a branch for the current project. If the branch already exists, the bot treats that branch as the source candidate. Otherwise it uses the repository default branch as the source candidate. |
/branch <origin_branch> <new_branch> |
+ /branch <origin_branch> <new_branch> |
Prepare or switch a branch using <origin_branch> as the source candidate. For both forms, the bot then offers the source choices that actually exist: local/<branch> origin/<branch> If only one of those exists, only that option is shown. If neither exists, the bot tells you the branch source is missing. |
/current |
+ /current |
Show the active session for the current bot and chat. |
/new [session_name] |
+ /new [session_name] |
Create a new session for the current project. If you omit the name, the bot uses the real session ID. If provider, project, or branch is missing, the bot guides you through the missing step. |
/switch |
+ /switch |
Show the latest sessions, newest first. The list includes both bot-managed sessions and local Codex/Copilot CLI sessions for the current project. |
/switch page <number> |
+ /switch page <number> |
Show another page of stored sessions. |
/switch <session_id> |
+ /switch <session_id> |
Switch to a specific session by ID. If you choose a local CLI session, the bot imports it and continues from there. |
/compact |
+ /compact |
Create a fresh compacted session from the active session and switch to it. |
/commit <git commands> |
+ /commit <git commands> |
Run validated git commit-related commands inside the active session project. Available only when ENABLE_COMMIT_COMMAND=true. Mutating git commands require a trusted project. |
/push |
+ /push |
Push origin <branch> for the current active session. The bot asks for confirmation before pushing. |
/abort |
+ /abort |
Abort the current agent run for the current project. If queued questions are waiting, the bot asks whether to continue them. |
WORKSPACE_ROOT |
+ WORKSPACE_ROOT |
Parent folder that contains your project directories. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
APP_LOCALE |
+ APP_LOCALE |
UI locale for shared bot messages and command descriptions. Supported values: en, de, fr, ja, ko, nl, th, vi, zh-CN, zh-HK, zh-TW. |
ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT |
+ Default: false. If true, it enables the audio messages capability. System will check the prerequisites regarding required binaries or libraries on startup. |
+
OPENAI_WHISPER_MODEL |
+ Model for the Whisper SST. Default: baseAvailable models: tiny about 72 MB, base about large-v3-turbo about 1.5 GB+ Models will be automatically downloaded on your first voice message. Recommended: base for general usage. If you want better accuracy and quality, you can try with turbo
+ |
+
OPENAI_WHISPER_TIMEOUT_SECONDS |
+ Default: 120Timeout for the STT process. Usually the STT processing is fast enough. |
+
/provider |
- Kies de provider voor nieuwe sessies. De keuze wordt per bot en chat bewaard totdat je die wijzigt. | +/provider |
+ Kies de aanbieder voor nieuwe sessies. Die keuze wordt per bot en chat bewaard totdat je die wijzigt. |
/project <project_folder> |
+ /project <project_folder> |
Stel de huidige projectmap in. Bestaat de map niet, dan maakt de app die aan en markeert hem trusted. Bestaat hij al maar is hij nog untrusted, dan vraagt de app expliciet om trust. | |
/branch <new_branch> |
+ /branch <new_branch> |
Bereid een branch voor of wissel ernaar voor het huidige project. Als de branch al bestaat, behandelt de bot die als source candidate. Anders gebruikt hij de standaard-branch van de repository als source candidate. | |
/branch <origin_branch> <new_branch> |
- Bereid een branch voor of wissel ernaar met ` |
+ /branch <origin_branch> <new_branch> |
+ Bereid een branch voor of wissel ernaar met <origin_branch> als source candidate. Voor beide vormen biedt de bot daarna alleen de source choices aan die echt bestaan: local/<branch> en origin/<branch>. Als er maar één bestaat, zie je alleen die. Als geen van beide bestaat, meldt de bot dat de branch-source ontbreekt. |
/current |
+ /current |
Toon de actieve sessie voor de huidige bot en chat. | |
/new [session_name] |
- Maak een nieuwe sessie voor het huidige project. Als je geen naam opgeeft, gebruikt de bot de echte session ID. Als provider, project of branch ontbreekt, begeleidt de bot je door de ontbrekende stap. | +/new [session_name] |
+ Maak een nieuwe sessie voor het huidige project. Als je geen naam opgeeft, gebruikt de bot de echte sessie-ID. Als aanbieder, project of branch ontbreekt, begeleidt de bot je door de ontbrekende stap. |
/switch |
+ /switch |
Toon de nieuwste sessies, nieuwste eerst. De lijst bevat zowel bot-managed sessies als lokale Codex/Copilot CLI-sessies voor het huidige project. | |
/switch page <number> |
+ /switch page <number> |
Toon een andere pagina met opgeslagen sessies. | |
/switch <session_id> |
+ /switch <session_id> |
Schakel naar een specifieke sessie via ID. Kies je een lokale CLI-sessie, dan importeert de bot die en gaat daar verder. | |
/compact |
- Maak vanuit de actieve session een nieuwe compacte session en schakel daarheen over. | +/compact |
+ Maak vanuit de actieve sessie een nieuwe compacte sessie en schakel daarheen over. |
/commit <git commands> |
- Voer gevalideerde `git commit`-gerelateerde commando’s uit binnen het project van de actieve sessie. Alleen beschikbaar als `ENABLE_COMMIT_COMMAND=true`. Muterende Git-commando’s vereisen een trusted project. | +/commit <git commands> |
+ Voer gevalideerde git commit-gerelateerde commando’s uit binnen het project van de actieve sessie. Alleen beschikbaar als ENABLE_COMMIT_COMMAND=true. Muterende Git-commando’s vereisen een trusted project. |
/push |
- Push `origin |
+ /push |
+ Push origin <branch> voor de huidige actieve sessie. De bot vraagt om bevestiging voordat hij pusht. |
/abort |
+ /abort |
Breek de huidige agent-run voor het huidige project af. Als er vragen in de wachtrij staan, vraagt de bot of die verder verwerkt moeten worden. |
WORKSPACE_ROOT |
+ WORKSPACE_ROOT |
Bovenliggende map die je projectmappen bevat. |
TELEGRAM_BOT_TOKENS |
+ TELEGRAM_BOT_TOKENS |
Door komma's gescheiden Telegram bot tokens. |
ALLOWED_CHAT_IDS |
+ ALLOWED_CHAT_IDS |
Door komma's gescheiden Telegram chat-ID's van privéchats die de bot mogen gebruiken. |
APP_LOCALE |
+ APP_LOCALE |
UI-locale voor gedeelde botmeldingen en commandobeschrijvingen. Ondersteunde waarden: en, de, fr, ja, ko, nl, th, vi, zh-CN, zh-HK, zh-TW. |
|
CODEX_BIN |
+ CODEX_BIN |
Commando om Codex CLI te starten. Standaard: codex. |
|
COPILOT_BIN |
+ COPILOT_BIN |
Commando om Copilot CLI te starten. Standaard: copilot. |
|
CODEX_MODEL |
+ CODEX_MODEL |
Optionele Codex-modeloverride. Laat leeg om het standaardmodel van Codex CLI te gebruiken. Voorbeeld: gpt-5.4 OpenAI Codex/OpenAI-modellen |
|
COPILOT_MODEL |
+ COPILOT_MODEL |
Optionele Copilot-modeloverride. Laat leeg om het standaardmodel van Copilot CLI te gebruiken. Voorbeelden: gpt-5.4, claude-sonnet-4.6 Ondersteunde GitHub Copilot-modellen |
|
CODEX_APPROVAL_POLICY |
+ CODEX_APPROVAL_POLICY |
Goedkeuringsmodus die aan Codex wordt doorgegeven. Standaard: never. |
|
CODEX_SANDBOX_MODE |
+ CODEX_SANDBOX_MODE |
Sandboxmodus die aan Codex wordt doorgegeven. Standaard: workspace-write. |
|
CODEX_SKIP_GIT_REPO_CHECK |
+ CODEX_SKIP_GIT_REPO_CHECK |
Als dit is ingeschakeld, worden trusted-repo-checks van Codex altijd overgeslagen. | |
ENABLE_COMMIT_COMMAND |
+ ENABLE_COMMIT_COMMAND |
Schakelt het Telegram-commando /commit in. Standaard: false. |
|
AGENT_HARD_TIMEOUT_SECONDS |
+ AGENT_HARD_TIMEOUT_SECONDS |
Harde timeout voor één agent-run. Standaard: 0 (uitgeschakeld). |
|
SNAPSHOT_TEXT_FILE_MAX_BYTES |
- Maximale bestandsgrootte die de bot als tekst leest voor de before/after-snapshot voor per-run diffs. Standaard: 200000. |
+ SNAPSHOT_TEXT_FILE_MAX_BYTES |
+ Maximale bestandsgrootte die de bot als tekst leest voor de voor/na-momentopname voor per-run diffs. Standaard: 200000. |
MAX_TELEGRAM_MESSAGE_LENGTH |
+ MAX_TELEGRAM_MESSAGE_LENGTH |
Maximale berichtgrootte voordat de app antwoorden splitst. Standaard: 3000. |
|
ENABLE_SENSITIVE_DIFF_FILTER |
+ ENABLE_SENSITIVE_DIFF_FILTER |
Verberg diffs voor gevoelige bestanden. Standaard: true. |
|
ENABLE_SECRET_SCRUB_FILTER |
+ ENABLE_SECRET_SCRUB_FILTER |
Maskeer tokens, sleutels, .env-waarden, certificaten en vergelijkbare geheime uitvoer voordat die naar Telegram wordt gestuurd. Standaard: true (sterk aanbevolen). |
|
SNAPSHOT_INCLUDE_PATH_GLOBS |
+ SNAPSHOT_INCLUDE_PATH_GLOBS |
Forceer dat overeenkomende paden in diffs worden opgenomen. Voorbeeld: .github/*,.profile.test,.profile.prod |
|
SNAPSHOT_EXCLUDE_PATH_GLOBS |
+ SNAPSHOT_EXCLUDE_PATH_GLOBS |
Voeg extra diff-exclusies toe boven op de pakketstandaard. Voorbeeld: .*,personal/*,sensitive*.txt Opmerking: .* matcht verborgen paden, inclusief bestanden in verborgen mappen. |
ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT |
+ Standaard: false. Als dit op true staat, worden spraakberichten en audiobestanden herkend. Het systeem controleert de vereiste binaries of bibliotheken en vraagt de gebruiker om ze te installeren als ze ontbreken. |
+
OPENAI_WHISPER_MODEL |
+ Model voor Whisper STT. Standaard: baseBeschikbare modellen: tiny ongeveer 72 MB, base ongeveer 139 MB, large-v3-turbo ongeveer 1.5 GBModellen worden automatisch gedownload bij je eerste spraakbericht. Aanbevolen: base voor algemeen gebruik. Als je betere nauwkeurigheid en kwaliteit wilt, kun je turbo proberen. |
+
OPENAI_WHISPER_TIMEOUT_SECONDS |
+ Standaard: 120. Time-out voor het STT-proces. Meestal is de verwerking snel genoeg. Maar als je turbo kiest, kan het eerste spraakbericht door het downloaden van het model de time-out overschrijden, afhankelijk van je internetsnelheid. |
+
~/.coding-agent-telegram/state.json |
- Hauptdatei für den Session-Status. | +Hoofdbestand voor de sessiestatus. |
~/.coding-agent-telegram/state.json.bak |
- Backup-Datei für den Status. | +Back-upbestand voor de status. |
~/.coding-agent-telegram/logs |
- Log-Verzeichnis. | +Logmap. |
local/<branch> betekent de lokale branch als bron gebruiken
+- origin/<branch> betekent eerst vanaf de remote branch verversen en daarna wisselen
Als de bot ziet dat de in de sessie opgeslagen branch niet overeenkomt met de huidige repository-branch, gaat hij niet blind verder. Hij vraagt welke branch gebruikt moet worden:
@@ -485,7 +539,7 @@ Als je voorkeursbron-branch ontbreekt, biedt de bot fallback-bronnen aan op basi
- `/commit` kan volledig worden uitgeschakeld met `ENABLE_COMMIT_COMMAND`
- muterende `/commit`-bewerkingen zijn alleen toegestaan voor trusted projecten
-## 🪵 Logs
+## 🪵 Logboeken
Logs worden **zowel naar stdout als naar een roterend logbestand** geschreven onder:
@@ -521,11 +575,11 @@ Logs worden **zowel naar stdout als naar een roterend logbestand** geschreven on
canonieke omgevingssjabloon gebruikt door zowel repo-start als package-installaties
- `pyproject.toml`
- packaging- en dependencyconfiguratie
+ verpakkings- en dependencyconfiguratie
-## 📦 Release-versiebeheer
+## 📦 Uitgaveversiebeheer
-Packageversies worden afgeleid van Git-tags.
+Pakketversies worden afgeleid van Git-tags.
- TestPyPI/testen: `v2026.3.26.dev1`
- PyPI-prerelease: `v2026.3.26rc1`
diff --git a/README.th.md b/README.th.md
index 7ff78a6..6f70406 100644
--- a/README.th.md
+++ b/README.th.md
@@ -38,7 +38,7 @@
- ✅ ใช้ Telegram เพื่อควบคุม Codex / Copilot CLI
- ✅ ตรวจคำตอบและไฟล์ที่ถูกแก้ได้ง่ายใน code block
- ✅ ส่งคำถามต่อคิวไว้ได้ระหว่างที่ agent กำลังทำงาน
- - ✅ รองรับข้อความและรูปภาพ
+ - ✅ รองรับ ✏️ ข้อความ, 🌄 รูปภาพ และ 🎙️ ข้อความเสียง
## 🔁 สลับอุปกรณ์และเซสชันได้ลื่นไหล
@@ -49,7 +49,7 @@
## 🛠️ ตัวอย่าง flow การใช้งานบนเครื่อง
```bash
- coding-agent-telegram # or run ./startup.sh
+ coding-agent-telegram # หรือรัน ./startup.sh
```
##### ใน Telegram:
@@ -99,6 +99,7 @@ curl -fsSL https://raw.githubusercontent.com/daocha/coding-agent-telegram/main/i
- ติดตั้ง Codex CLI และ/หรือ Copilot CLI ไว้ในเครื่องแล้ว
- [ติดตั้ง Codex CLI](https://developers.openai.com/codex/cli)
- [ติดตั้ง Copilot CLI](https://github.com/features/copilot/cli)
+- [ทางเลือก] Whisper, ffmpeg
/provider |
- เลือก provider สำหรับ session ใหม่ โดยค่าที่เลือกจะถูกเก็บแยกตาม bot และ chat จนกว่าคุณจะเปลี่ยน | +/ผู้ให้บริการ |
+ เลือกผู้ให้บริการสำหรับเซสชันใหม่ โดยค่าที่เลือกจะถูกเก็บแยกตาม bot และ chat จนกว่าคุณจะเปลี่ยน |
/project <project_folder> |
+ /project <project_folder> |
ตั้งค่าโฟลเดอร์ project ปัจจุบัน หากโฟลเดอร์ยังไม่มี แอปจะสร้างและทำเครื่องหมายว่า trusted หากมีอยู่แล้วแต่ยัง untrusted แอปจะถามยืนยัน trust ก่อน | |
/branch <new_branch> |
+ /branch <new_branch> |
เตรียมหรือสลับ branch สำหรับ project ปัจจุบัน หาก branch มีอยู่แล้ว บอตจะถือ branch นั้นเป็น source candidate หากยังไม่มี บอตจะใช้ default branch ของ repository เป็น source candidate | |
/branch <origin_branch> <new_branch> |
- เตรียมหรือสลับ branch โดยใช้ ` |
+ /branch <origin_branch> <new_branch> |
+ เตรียมหรือสลับ branch โดยใช้ <origin_branch> เป็น source candidate สำหรับทั้งสองรูปแบบ บอตจะแสดงเฉพาะ source choices ที่มีอยู่จริงเท่านั้น: local/<branch> และ origin/<branch> หากมีเพียงตัวเดียวก็จะแสดงเพียงตัวนั้น หากไม่มีเลย บอตจะแจ้งว่าไม่พบ branch source |
/current |
- แสดง active session ของ bot และ chat ปัจจุบัน | +/current |
+ แสดง เซสชันที่ใช้งานอยู่ ของ bot และ chat ปัจจุบัน |
/new [session_name] |
- สร้าง session ใหม่สำหรับ project ปัจจุบัน หากไม่ระบุชื่อ บอตจะใช้ session ID จริง หากยังไม่มี provider, project หรือ branch บอตจะพาคุณไปยังขั้นตอนที่ขาดอยู่ | +/new [session_name] |
+ สร้างเซสชันใหม่สำหรับ project ปัจจุบัน หากไม่ระบุชื่อ บอตจะใช้รหัสเซสชันจริง หากยังไม่มีผู้ให้บริการ, project หรือ branch บอตจะพาคุณไปยังขั้นตอนที่ขาดอยู่ |
/switch |
- แสดง session ล่าสุด โดยเรียงจากใหม่ไปเก่า รายการนี้รวมทั้ง bot-managed sessions และ local Codex/Copilot CLI sessions ของ project ปัจจุบัน | +/switch |
+ แสดงเซสชันล่าสุด โดยเรียงจากใหม่ไปเก่า รายการนี้รวมทั้งเซสชันที่ bot ดูแลและ local Codex/Copilot CLI เซสชันของ project ปัจจุบัน |
/switch page <number> |
- แสดงหน้าถัดไปของ sessions ที่จัดเก็บไว้ | +/switch page <number> |
+ แสดงหน้าถัดไปของเซสชันที่จัดเก็บไว้ |
/switch <session_id> |
- สลับไปยัง session ที่ระบุด้วย ID หากเลือก local CLI session บอตจะ import เข้าสู่ state แล้วทำงานต่อจากตรงนั้น | +/switch <session_id> |
+ สลับไปยังเซสชันที่ระบุด้วย ID หากเลือก local CLI เซสชัน บอตจะ import เข้าสู่ state แล้วทำงานต่อจากตรงนั้น |
/compact |
- สร้าง session แบบย่อใหม่จาก session ที่กำลังใช้งาน แล้วสลับไปที่ session นั้น | +/compact |
+ สร้างเซสชันแบบย่อใหม่จากเซสชันที่กำลังใช้งาน แล้วสลับไปที่เซสชันนั้น |
/commit <git commands> |
- รันคำสั่งที่เกี่ยวข้องกับ `git commit` ซึ่งผ่านการตรวจสอบแล้วภายใน project ของ active session ใช้ได้เมื่อ `ENABLE_COMMIT_COMMAND=true` เท่านั้น คำสั่ง Git ที่มีการแก้ไขต้องใช้ project ที่ trusted | +/commit <git commands> |
+ รันคำสั่งที่เกี่ยวข้องกับ git commit ซึ่งผ่านการตรวจสอบแล้วภายใน project ของ เซสชันที่ใช้งานอยู่ ใช้ได้เมื่อ ENABLE_COMMIT_COMMAND=true เท่านั้น คำสั่ง Git ที่มีการแก้ไขต้องใช้ project ที่ trusted |
/push |
- push `origin |
+ /push |
+ push origin <branch> สำหรับ เซสชันที่ใช้งานอยู่ ปัจจุบัน โดยบอตจะขอการยืนยันก่อน push |
/abort |
- ยกเลิก agent run ปัจจุบันของ project นี้ หากมี queued questions รออยู่ บอตจะถามว่าจะให้ประมวลผลต่อหรือไม่ | +/abort |
+ ยกเลิก การรันของเอเจนต์ ปัจจุบันของ project นี้ หากมี คำถามที่เข้าคิว รออยู่ บอตจะถามว่าจะให้ประมวลผลต่อหรือไม่ |
WORKSPACE_ROOT |
+ WORKSPACE_ROOT |
โฟลเดอร์หลักที่เก็บโฟลเดอร์โปรเจกต์ของคุณ |
TELEGRAM_BOT_TOKENS |
+ TELEGRAM_BOT_TOKENS |
Telegram bot tokens แบบคั่นด้วย comma |
ALLOWED_CHAT_IDS |
+ ALLOWED_CHAT_IDS |
Telegram private chat IDs แบบคั่นด้วย comma ที่ได้รับอนุญาตให้ใช้บอต |
APP_LOCALE |
+ APP_LOCALE |
ภาษา UI สำหรับข้อความของบอตและคำอธิบายคำสั่งที่ใช้ร่วมกัน ค่าที่รองรับ: en, de, fr, ja, ko, nl, th, vi, zh-CN, zh-HK, zh-TW |
|
CODEX_BIN |
+ CODEX_BIN |
คำสั่งที่ใช้เรียก Codex CLI ค่าเริ่มต้น: codex |
|
COPILOT_BIN |
+ COPILOT_BIN |
คำสั่งที่ใช้เรียก Copilot CLI ค่าเริ่มต้น: copilot |
|
CODEX_MODEL |
+ CODEX_MODEL |
กำหนด model ของ Codex เพิ่มเติมได้แบบ optional หากปล่อยว่างจะใช้ model เริ่มต้นของ Codex CLI ตัวอย่าง: gpt-5.4 OpenAI Codex/OpenAI models |
|
COPILOT_MODEL |
+ COPILOT_MODEL |
กำหนด model ของ Copilot เพิ่มเติมได้แบบ optional หากปล่อยว่างจะใช้ model เริ่มต้นของ Copilot CLI ตัวอย่าง: gpt-5.4, claude-sonnet-4.6 GitHub Copilot supported models |
|
CODEX_APPROVAL_POLICY |
+ CODEX_APPROVAL_POLICY |
โหมด approval ที่ส่งให้ Codex ค่าเริ่มต้น: never |
|
CODEX_SANDBOX_MODE |
+ CODEX_SANDBOX_MODE |
โหมด sandbox ที่ส่งให้ Codex ค่าเริ่มต้น: workspace-write |
|
CODEX_SKIP_GIT_REPO_CHECK |
+ CODEX_SKIP_GIT_REPO_CHECK |
หากเปิดไว้ จะข้ามการตรวจ trusted-repo ของ Codex เสมอ | |
ENABLE_COMMIT_COMMAND |
+ ENABLE_COMMIT_COMMAND |
เปิดใช้งานคำสั่ง Telegram /commit ค่าเริ่มต้น: false |
|
AGENT_HARD_TIMEOUT_SECONDS |
- ฮาร์ดไทม์เอาต์สำหรับ agent run หนึ่งครั้ง ค่าเริ่มต้น: 0 (ปิดใช้งาน) |
+ AGENT_HARD_TIMEOUT_SECONDS |
+ ฮาร์ดไทม์เอาต์สำหรับ การรันของเอเจนต์ หนึ่งครั้ง ค่าเริ่มต้น: 0 (ปิดใช้งาน) |
SNAPSHOT_TEXT_FILE_MAX_BYTES |
- ขนาดไฟล์สูงสุดที่บอตจะอ่านเป็นข้อความเพื่อสร้าง before/after snapshot สำหรับ diff ของแต่ละ run ค่าเริ่มต้น: 200000 |
+ SNAPSHOT_TEXT_FILE_MAX_BYTES |
+ ขนาดไฟล์สูงสุดที่บอตจะอ่านเป็นข้อความเพื่อสร้าง สแนปช็อตก่อนและหลังการรัน สำหรับ diff ของแต่ละ run ค่าเริ่มต้น: 200000 |
MAX_TELEGRAM_MESSAGE_LENGTH |
+ MAX_TELEGRAM_MESSAGE_LENGTH |
ขนาดข้อความสูงสุดก่อนที่แอปจะแบ่งการตอบกลับ ค่าเริ่มต้น: 3000 |
|
ENABLE_SENSITIVE_DIFF_FILTER |
+ ENABLE_SENSITIVE_DIFF_FILTER |
ซ่อน diff สำหรับไฟล์ที่มีข้อมูลอ่อนไหว ค่าเริ่มต้น: true |
|
ENABLE_SECRET_SCRUB_FILTER |
+ ENABLE_SECRET_SCRUB_FILTER |
ปิดบัง tokens, keys, ค่า .env, certificates และข้อมูลลักษณะคล้ายความลับก่อนส่งไปยัง Telegram ค่าเริ่มต้น: true (แนะนำอย่างยิ่ง) |
|
SNAPSHOT_INCLUDE_PATH_GLOBS |
+ SNAPSHOT_INCLUDE_PATH_GLOBS |
บังคับรวม path ที่ตรงเงื่อนไขเข้าใน diff ตัวอย่าง: .github/*,.profile.test,.profile.prod |
|
SNAPSHOT_EXCLUDE_PATH_GLOBS |
- เพิ่มกฎยกเว้น diff เพิ่มเติมทับบนค่าเริ่มต้นของแพ็กเกจ ตัวอย่าง: .*,personal/*,sensitive*.txt หมายเหตุ: .* จะตรงกับ path ที่ซ่อนอยู่ รวมถึงไฟล์ใน hidden directory |
+ SNAPSHOT_EXCLUDE_PATH_GLOBS |
+ เพิ่มกฎยกเว้น diff เพิ่มเติมทับบนค่าเริ่มต้นของแพ็กเกจ ตัวอย่าง: .*,personal/*,sensitive*.txt หมายเหตุ: .* จะตรงกับ path ที่ซ่อนอยู่ รวมถึงไฟล์ใน ไดเรกทอรีที่ซ่อนอยู่ |
+
ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT |
+ ค่าเริ่มต้น: false หากเป็น true จะเปิดใช้การรู้จำข้อความเสียงและไฟล์เสียง ระบบจะตรวจสอบไบนารีหรือไลบรารีที่จำเป็น และแจ้งให้ผู้ใช้ติดตั้งหากยังขาดอยู่ |
+
OPENAI_WHISPER_MODEL |
+ โมเดลสำหรับ Whisper STT ค่าเริ่มต้น: baseโมเดลที่ใช้ได้: tiny ประมาณ 72 MB, base ประมาณ 139 MB, large-v3-turbo ประมาณ 1.5 GBโมเดลจะถูกดาวน์โหลดอัตโนมัติเมื่อคุณส่งข้อความเสียงครั้งแรก แนะนำให้ใช้ base สำหรับการใช้งานทั่วไป หากต้องการความแม่นยำและคุณภาพที่ดีขึ้นสามารถลอง turbo ได้ |
+
OPENAI_WHISPER_TIMEOUT_SECONDS |
+ ค่าเริ่มต้น: 120 ระยะหมดเวลาของกระบวนการ STT โดยทั่วไปการประมวลผลเร็วพออยู่แล้ว แต่หากเลือก turbo การส่งข้อความเสียงครั้งแรกอาจใช้เวลานานเกินกำหนดระหว่างดาวน์โหลดโมเดล ขึ้นอยู่กับความเร็วอินเทอร์เน็ตของคุณ |
~/.coding-agent-telegram/state.json |
- Hauptdatei für den Session-Status. | +ไฟล์สถานะเซสชันหลัก |
~/.coding-agent-telegram/state.json.bak |
- Backup-Datei für den Status. | +ไฟล์สำรองของสถานะ |
~/.coding-agent-telegram/logs |
- Log-Verzeichnis. | +ไดเรกทอรีบันทึก |
local/<branch> คือใช้ local branch เป็นต้นทาง
+- origin/<branch> คืออัปเดตจาก remote branch ก่อน แล้วค่อยสลับ
-ถ้าบอตพบว่า branch ที่เก็บไว้ใน session ไม่ตรงกับ branch ปัจจุบันของ repository บอตจะไม่ทำต่อแบบเดาสุ่ม แต่จะถามว่าต้องการใช้ branch ใด:
+ถ้าบอตพบว่า branch ที่เก็บไว้ในเซสชันไม่ตรงกับ branch ปัจจุบันของ repository บอตจะไม่ทำต่อแบบเดาสุ่ม แต่จะถามว่าต้องการใช้ branch ใด:
-- ใช้ branch ที่เก็บไว้ใน session
+- ใช้ branch ที่เก็บไว้ในเซสชัน
- ใช้ branch ปัจจุบันของ repository
หาก source branch ที่คุณต้องการหายไป บอตจะเสนอ fallback source ตาม default branch และ current branch แทนที่จะปล่อยให้คุณเจอ Git error ตรง ๆ
@@ -485,7 +543,7 @@ _ในแต่ละ agent run บอตจะสร้าง before/after sna
- สามารถปิด `/commit` ได้ทั้งหมดด้วย `ENABLE_COMMIT_COMMAND`
- การทำ `/commit` ที่มีการแก้ไขจริงจะอนุญาตเฉพาะกับ trusted project เท่านั้น
-## 🪵 Logs
+## 🪵 บันทึก
log จะถูกเขียน **ทั้งไปที่ stdout และไฟล์ log แบบหมุนเวียน** ใต้ path นี้:
@@ -498,11 +556,11 @@ log จะถูกเขียน **ทั้งไปที่ stdout แล
- การเริ่มต้น bot และเริ่ม polling
- การเลือก project
-- การสร้าง session
-- การสลับ session
-- การรายงาน active session
+- การสร้างเซสชัน
+- การสลับเซสชัน
+- การรายงาน เซสชันที่ใช้งานอยู่
- การรันงานแบบปกติ (รวม audit log line ที่มี prompt แบบตัดทอน)
-- การแทนที่ session หลัง resume ล้มเหลว
+- การแทนที่เซสชันหลัง resume ล้มเหลว
- warnings และ runtime errors
@@ -521,7 +579,7 @@ log จะถูกเขียน **ทั้งไปที่ stdout แล
template สภาพแวดล้อมหลักที่ใช้ทั้งตอนเริ่มจาก repo และตอนติดตั้งเป็น package
- `pyproject.toml`
- การตั้งค่า packaging และ dependencies
+ การตั้งค่า แพ็กเกจจิง และ dependencies
## 📦 การกำหนดเวอร์ชัน release
@@ -535,4 +593,4 @@ log จะถูกเขียน **ทั้งไปที่ stdout แล
- โปรเจกต์นี้ออกแบบมาสำหรับผู้ใช้ที่รัน agents แบบ local บนเครื่องของตนเอง
- Telegram bot เป็น control surface ไม่ใช่ execution environment
-- หากคุณรันหลาย bot ก็ยังจัดการทั้งหมดได้ด้วย server process เดียว
+- หากคุณรันหลาย bot ก็ยังจัดการทั้งหมดได้ด้วย เซิร์ฟเวอร์โพรเซส เดียว
diff --git a/README.vi.md b/README.vi.md
index 96e6e04..f8324e1 100644
--- a/README.vi.md
+++ b/README.vi.md
@@ -36,9 +36,9 @@
- ✅ Nhẹ: không cần framework nặng, minh bạch hoàn toàn
- ✅ Nhiều bot: nhiều cuộc chat, nhiều phiên
- ✅ Dùng Telegram để điều khiển Codex / Copilot CLI
- - ✅ Dễ xem câu trả lời và các file đã thay đổi trong code block
+ - ✅ Dễ xem câu trả lời và các tệp đã thay đổi trong code block
- ✅ Có thể xếp hàng câu hỏi tiếp theo khi agent đang làm việc
- - ✅ Hỗ trợ đầu vào văn bản và hình ảnh
+ - ✅ Chấp nhận tin nhắn ✏️ văn bản, 🌄 hình ảnh và 🎙️ thoại
## 🔁 Chuyển thiết bị/phiên liền mạch
@@ -49,7 +49,7 @@
## 🛠️ Luồng làm việc cục bộ điển hình
```bash
- coding-agent-telegram # or run ./startup.sh
+ coding-agent-telegram # hoặc chạy ./startup.sh
```
##### Trong Telegram:
@@ -80,7 +80,7 @@ curl -fsSL https://raw.githubusercontent.com/daocha/coding-agent-telegram/main/i
- Danh sách trắng cho chat riêng qua `ALLOWED_CHAT_IDS`
- Chỉ cho phép một agent hoạt động trên mỗi project để giảm xung đột ghi
-- Ẩn diff của các file nhạy cảm
+- Ẩn diff của các tệp nhạy cảm
- API keys, tokens, giá trị `.env`, certificates, SSH keys và các đầu ra mang tính bí mật sẽ được che trước khi gửi lại Telegram
- Dữ liệu runtime của app nằm dưới `~/.coding-agent-telegram`
- Các thư mục có sẵn có thể yêu cầu xác nhận trust trước khi chạy Git operation có thay đổi
@@ -99,6 +99,7 @@ Trước khi khởi động server, hãy chuẩn bị:
- Codex CLI và/hoặc Copilot CLI đã được cài cục bộ
- [Cài Codex CLI](https://developers.openai.com/codex/cli)
- [Cài Copilot CLI](https://github.com/features/copilot/cli)
+- [Tùy chọn] Whisper, ffmpeg
@@ -108,35 +109,61 @@ Openclaw cung cấp bộ tính năng rất đầy đủ và đã có sẵn agent
## 🚀 Bắt đầu nhanh
-### Option A: Script bootstrap một dòng
+### Cách A: Script bootstrap một dòng
```bash
curl -fsSL https://raw.githubusercontent.com/daocha/coding-agent-telegram/main/install.sh | bash
```
-### Option B: Cài từ PyPI bằng `pip`
+### Cách B: Cài từ PyPI bằng `pip`
```bash
pip install coding-agent-telegram
coding-agent-telegram
```
-### Option C: Chạy từ repository đã clone
+### Cách C: Chạy từ repository đã clone
```bash
git clone https://github.com/daocha/coding-agent-telegram
cd coding-agent-telegram
./startup.sh
```
-### Khởi động bot server
-##### Ở lần chạy đầu, app sẽ tạo file env và cho bạn biết cần điền trường nào.
-##### Sau khi cập nhật file env, hãy chạy lại:
+### 🌐 Khởi động bot server
+##### Ở lần chạy đầu, app sẽ tạo tệp env và cho bạn biết cần điền trường nào.
+##### Sau khi cập nhật tệp env, hãy chạy lại:
```bash
-# if you follow Option A or Option B, then run
+# nếu bạn làm theo Tùy chọn A hoặc Tùy chọn B, hãy chạy
coding-agent-telegram
-# if you follow Option C, then run this again
+# nếu bạn làm theo Tùy chọn C, hãy chạy lại lệnh này
./startup.sh
```
+## 🎙️ [Tùy chọn] Tính năng chuyển giọng nói thành văn bản: chuẩn bị các điều kiện cần cục bộ của OpenAI-Whisper
+
+Phần này dùng để bật tùy chọn chuyển tin nhắn thoại Telegram thành văn bản bằng Whisper chạy cục bộ. Tệp âm thanh được giới hạn tối đa `20 MB`.
+
+```bash
+# nếu bạn cài bằng pip
+coding-agent-telegram-stt-install
+
+# nếu bạn chạy từ repository đã clone
+./install-stt.sh
+```
+
+Thiết lập env được khuyến nghị:
+
+```text
+ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT=true
+OPENAI_WHISPER_MODEL=base
+OPENAI_WHISPER_TIMEOUT_SECONDS=120
+```
+
+Lưu ý:
+
+- Whisper sẽ tự động tải model đã chọn vào `~/.cache/whisper` ở lần dùng đầu tiên.
+- Nếu bạn chọn `OPENAI_WHISPER_MODEL=turbo`, lần chuyển giọng nói đầu tiên có khả năng chạm timeout cao hơn khi `large-v3-turbo.pt` vẫn đang được tải.
+- Sau khi một tin nhắn thoại được chép lại, bot sẽ gửi lại bản transcript đã nhận dạng vào Telegram trước rồi mới chuyển cho tác nhân. Điều này giúp kiểm tra lỗi nhận dạng dễ hơn.
+
## 🔑 Thiết lập Telegram
### Lấy Bot Token
@@ -171,79 +198,86 @@ Lưu ý:
## 📨 Loại tin nhắn được hỗ trợ
+Hiện tại bot chấp nhận:
+
+- tin nhắn văn bản
+- ảnh
+- tin nhắn thoại và tệp âm thanh khi `ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT=true` và các điều kiện cần cục bộ của Whisper đã được cài đặt
+- hiện tại Codex và Copilot chỉ hỗ trợ văn bản và hình ảnh, chưa hỗ trợ video
+
## 🤖 Lệnh Telegram
/provider |
- Chọn provider cho các session mới. Lựa chọn này được lưu theo từng bot và chat cho đến khi bạn thay đổi. | +/provider |
+ Chọn nhà cung cấp cho các phiên mới. Lựa chọn này được lưu theo từng bot và chat cho đến khi bạn thay đổi. |
/project <project_folder> |
+ /project <project_folder> |
Đặt thư mục project hiện tại. Nếu thư mục chưa tồn tại, app sẽ tạo và đánh dấu là trusted. Nếu đã tồn tại nhưng vẫn untrusted, app sẽ yêu cầu xác nhận trust rõ ràng. | |
/branch <new_branch> |
+ /branch <new_branch> |
Chuẩn bị hoặc chuyển branch cho project hiện tại. Nếu branch đã tồn tại, bot coi branch đó là source candidate. Nếu chưa có, bot dùng default branch của repository làm source candidate. | |
/branch <origin_branch> <new_branch> |
- Chuẩn bị hoặc chuyển branch bằng cách dùng ` |
+ /branch <origin_branch> <new_branch> |
+ Chuẩn bị hoặc chuyển branch bằng cách dùng <origin_branch> làm source candidate. Với cả hai dạng, bot chỉ đưa ra các source choice thật sự tồn tại: local/<branch> và origin/<branch>. Nếu chỉ có một lựa chọn thì chỉ hiện lựa chọn đó. Nếu không có lựa chọn nào, bot sẽ báo thiếu branch source. |
/current |
- Hiển thị active session cho bot và chat hiện tại. | +/current |
+ Hiển thị phiên hoạt động cho bot và chat hiện tại. |
/new [session_name] |
- Tạo session mới cho project hiện tại. Nếu bỏ qua tên, bot sẽ dùng session ID thật. Nếu thiếu provider, project hoặc branch, bot sẽ hướng dẫn bước còn thiếu. | +/new [session_name] |
+ Tạo phiên mới cho project hiện tại. Nếu bỏ qua tên, bot sẽ dùng mã định danh phiên thật. Nếu thiếu nhà cung cấp, project hoặc branch, bot sẽ hướng dẫn bước còn thiếu. |
/switch |
- Hiển thị các session mới nhất, mới nhất trước. Danh sách bao gồm cả session do bot quản lý và local Codex/Copilot CLI session của project hiện tại. | +/switch |
+ Hiển thị các phiên mới nhất, mới nhất trước. Danh sách bao gồm cả phiên do bot quản lý và phiên CLI Codex/Copilot cục bộ của project hiện tại. |
/switch page <number> |
- Hiển thị trang khác của các session đã lưu. | +/switch page <number> |
+ Hiển thị trang khác của các phiên đã lưu. |
/switch <session_id> |
- Chuyển sang một session cụ thể bằng ID. Nếu bạn chọn local CLI session, bot sẽ import nó và tiếp tục từ đó. | +/switch <session_id> |
+ Chuyển sang một phiên cụ thể bằng ID. Nếu bạn chọn phiên CLI cục bộ, bot sẽ import nó và tiếp tục từ đó. |
/compact |
- Tạo một session rút gọn mới từ session đang hoạt động rồi chuyển sang session đó. | +/compact |
+ Tạo một phiên rút gọn mới từ phiên đang hoạt động rồi chuyển sang phiên đó. |
/commit <git commands> |
- Chạy các lệnh liên quan đến `git commit` đã được kiểm tra trong project của active session. Chỉ có khi `ENABLE_COMMIT_COMMAND=true`. Các lệnh Git có thay đổi yêu cầu project đã trusted. | +/commit <git commands> |
+ Chạy các lệnh liên quan đến git commit đã được kiểm tra trong project của phiên hoạt động. Chỉ có khi ENABLE_COMMIT_COMMAND=true. Các lệnh Git có thay đổi yêu cầu project đã trusted. |
/push |
- Push `origin |
+ /push |
+ Push origin <branch> cho phiên hoạt động hiện tại. Bot sẽ hỏi xác nhận trước khi push. |
/abort |
- Hủy agent run hiện tại của project hiện tại. Nếu còn queued questions chờ xử lý, bot sẽ hỏi có tiếp tục hay không. | +/abort |
+ Hủy lần chạy tác nhân hiện tại của project hiện tại. Nếu còn các câu hỏi trong hàng đợi chờ xử lý, bot sẽ hỏi có tiếp tục hay không. |
CODING_AGENT_TELEGRAM_ENV_FILE |
- Dùng khi bạn muốn app trỏ tới một file env cụ thể. | +Dùng khi bạn muốn app trỏ tới một tệp env cụ thể. |
~/.coding-agent-telegram/.env_coding_agent_telegram |
- Vị trí file env mặc định. | +Vị trí tệp env mặc định. |
./.env_coding_agent_telegram |
- Chỉ dùng khi file local này đã tồn tại. | +Chỉ dùng khi tệp local này đã tồn tại. |
WORKSPACE_ROOT |
+ WORKSPACE_ROOT |
Thư mục cha chứa các thư mục project của bạn. |
TELEGRAM_BOT_TOKENS |
+ TELEGRAM_BOT_TOKENS |
Các Telegram bot token, ngăn cách bằng dấu phẩy. |
ALLOWED_CHAT_IDS |
+ ALLOWED_CHAT_IDS |
Các Telegram private chat ID được phép dùng bot, ngăn cách bằng dấu phẩy. |
APP_LOCALE |
+ APP_LOCALE |
Ngôn ngữ UI cho các thông điệp bot dùng chung và mô tả lệnh. Giá trị hỗ trợ: en, de, fr, ja, ko, nl, th, vi, zh-CN, zh-HK, zh-TW. |
|
CODEX_BIN |
+ CODEX_BIN |
Lệnh dùng để chạy Codex CLI. Mặc định: codex. |
|
COPILOT_BIN |
+ COPILOT_BIN |
Lệnh dùng để chạy Copilot CLI. Mặc định: copilot. |
|
CODEX_MODEL |
+ CODEX_MODEL |
Ghi đè model Codex nếu cần. Để trống để dùng model mặc định của Codex CLI. Ví dụ: gpt-5.4 OpenAI Codex/OpenAI models |
|
COPILOT_MODEL |
+ COPILOT_MODEL |
Ghi đè model Copilot nếu cần. Để trống để dùng model mặc định của Copilot CLI. Ví dụ: gpt-5.4, claude-sonnet-4.6 GitHub Copilot supported models |
|
CODEX_APPROVAL_POLICY |
+ CODEX_APPROVAL_POLICY |
Chế độ approval truyền cho Codex. Mặc định: never. |
|
CODEX_SANDBOX_MODE |
+ CODEX_SANDBOX_MODE |
Chế độ sandbox truyền cho Codex. Mặc định: workspace-write. |
|
CODEX_SKIP_GIT_REPO_CHECK |
+ CODEX_SKIP_GIT_REPO_CHECK |
Nếu bật, luôn bỏ qua trusted-repo check của Codex. | |
ENABLE_COMMIT_COMMAND |
+ ENABLE_COMMIT_COMMAND |
Bật lệnh Telegram /commit. Mặc định: false. |
|
AGENT_HARD_TIMEOUT_SECONDS |
- Timeout cứng cho một lần agent run. Mặc định: 0 (tắt). |
+ AGENT_HARD_TIMEOUT_SECONDS |
+ Timeout cứng cho một lần lần chạy tác nhân. Mặc định: 0 (tắt). |
SNAPSHOT_TEXT_FILE_MAX_BYTES |
- Kích thước file tối đa mà bot sẽ đọc dưới dạng văn bản khi tạo before/after snapshot cho diff của từng run. Mặc định: 200000. |
+ SNAPSHOT_TEXT_FILE_MAX_BYTES |
+ Kích thước tệp tối đa mà bot sẽ đọc dưới dạng văn bản khi tạo ảnh chụp nhanh trước/sau cho diff của từng run. Mặc định: 200000. |
MAX_TELEGRAM_MESSAGE_LENGTH |
+ MAX_TELEGRAM_MESSAGE_LENGTH |
Kích thước tin nhắn tối đa trước khi app tách phản hồi. Mặc định: 3000. |
|
ENABLE_SENSITIVE_DIFF_FILTER |
- Ẩn diff của các file nhạy cảm. Mặc định: true. |
+ ENABLE_SENSITIVE_DIFF_FILTER |
+ Ẩn diff của các tệp nhạy cảm. Mặc định: true. |
ENABLE_SECRET_SCRUB_FILTER |
+ ENABLE_SECRET_SCRUB_FILTER |
Che tokens, keys, giá trị .env, certificates và các đầu ra giống bí mật trước khi gửi về Telegram. Mặc định: true (rất nên bật). |
|
SNAPSHOT_INCLUDE_PATH_GLOBS |
+ SNAPSHOT_INCLUDE_PATH_GLOBS |
Luôn đưa các path khớp điều kiện vào diff. Ví dụ: .github/*,.profile.test,.profile.prod |
|
SNAPSHOT_EXCLUDE_PATH_GLOBS |
- Thêm các rule loại trừ diff ngoài bộ mặc định của package. Ví dụ: .*,personal/*,sensitive*.txt Lưu ý: .* khớp cả path ẩn, gồm cả file trong thư mục ẩn. |
+ SNAPSHOT_EXCLUDE_PATH_GLOBS |
+ Thêm các rule loại trừ diff ngoài bộ mặc định của package. Ví dụ: .*,personal/*,sensitive*.txt Lưu ý: .* khớp cả path ẩn, gồm cả tệp trong thư mục ẩn. |
+
ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT |
+ Mặc định: false. Nếu bật true, hệ thống sẽ nhận dạng tin nhắn thoại và tệp âm thanh. Hệ thống sẽ kiểm tra các binary hoặc thư viện cần thiết và nhắc người dùng cài đặt nếu còn thiếu. |
+
OPENAI_WHISPER_MODEL |
+ Mô hình dùng cho Whisper STT. Mặc định: baseCác mô hình khả dụng: tiny khoảng 72 MB, base khoảng 139 MB, large-v3-turbo khoảng 1.5 GBMô hình sẽ được tự động tải xuống khi bạn gửi tin nhắn thoại đầu tiên. Khuyến nghị: base cho nhu cầu chung. Nếu muốn độ chính xác và chất lượng tốt hơn, bạn có thể thử turbo. |
+
OPENAI_WHISPER_TIMEOUT_SECONDS |
+ Mặc định: 120. Thời gian chờ cho tiến trình STT. Thông thường STT đủ nhanh, nhưng nếu bạn chọn turbo, lần gửi tin nhắn thoại đầu tiên có thể vượt quá thời gian chờ do phải tải mô hình, tùy theo tốc độ mạng. |
~/.coding-agent-telegram/state.json |
- Hauptdatei für den Session-Status. | +Tệp trạng thái phiên chính. |
~/.coding-agent-telegram/state.json.bak |
- Backup-Datei für den Status. | +Tệp sao lưu trạng thái. |
~/.coding-agent-telegram/logs |
- Log-Verzeichnis. | +Thư mục log. |
local/<branch> nghĩa là dùng local branch làm source
+- origin/<branch> nghĩa là cập nhật từ remote branch trước rồi mới chuyển
-Nếu bot phát hiện branch lưu trong session không khớp với branch hiện tại của repository, bot sẽ không tiếp tục một cách mù quáng. Bot sẽ hỏi bạn muốn dùng branch nào:
+Nếu bot phát hiện branch lưu trong phiên không khớp với branch hiện tại của repository, bot sẽ không tiếp tục một cách mù quáng. Bot sẽ hỏi bạn muốn dùng branch nào:
-- giữ branch đã lưu trong session
+- giữ branch đã lưu trong phiên
- giữ branch hiện tại của repository
Nếu source branch bạn muốn không còn, bot sẽ đưa ra các fallback source dựa trên default branch và current branch thay vì để bạn đối mặt với Git error thô.
@@ -481,11 +533,11 @@ Nếu source branch bạn muốn không còn, bot sẽ đưa ra các fallback so
- thư mục đã tồn tại sẽ tuân theo `CODEX_SKIP_GIT_REPO_CHECK`
- thư mục được tạo qua `/project /provider |
- 为新 session 选择 provider。该选择会按 bot 和 chat 保存,直到你手动修改。 | +/provider |
+ 为新会话选择提供方。该选择会按 bot 和 chat 保存,直到你手动修改。 |
/project <project_folder> |
+ /project <project_folder> |
设置当前 project 文件夹。如果文件夹不存在,应用会创建并标记为 trusted;如果已存在但仍是 untrusted,应用会明确要求确认 trust。 | |
/branch <new_branch> |
+ /branch <new_branch> |
为当前 project 准备或切换 branch。如果 branch 已存在,bot 会把它当作 source candidate;否则会使用 repository 的 default branch 作为 source candidate。 | |
/branch <origin_branch> <new_branch> |
- 使用 ` |
+ /branch <origin_branch> <new_branch> |
+ 使用 <origin_branch> 作为 source candidate 来准备或切换 branch。无论哪种形式,bot 之后只会提供实际存在的 source choices:local/<branch> 和 origin/<branch>。如果只存在其中一个,就只显示那个;如果两个都不存在,bot 会提示缺少 branch source。 |
/current |
- 显示当前 bot 和 chat 的 active session。 | +/current |
+ 显示当前 bot 和 chat 的活动会话。 |
/new [session_name] |
- 为当前 project 创建新 session。如果省略名称,bot 会使用真实的 session ID。若缺少 provider、project 或 branch,bot 会引导你完成缺失步骤。 | +/new [session_name] |
+ 为当前项目创建新会话。如果省略名称,bot 会使用真实的会话 ID。若缺少提供方、项目或 branch,bot 会引导你完成缺失步骤。 |
/switch |
- 显示最新的 session,按从新到旧排序。列表同时包含 bot-managed sessions 和当前 project 的本地 Codex/Copilot CLI sessions。 | +/switch |
+ 显示最新的会话,按从新到旧排序。列表同时包含 bot 管理的会话和当前项目的本地 Codex/Copilot CLI 会话。 |
/switch page <number> |
- 显示已保存 sessions 的其他页。 | +/switch page <number> |
+ 显示已保存会话的其他页。 |
/switch <session_id> |
- 通过 ID 切换到指定 session。如果你选择本地 CLI session,bot 会把它导入 state 并从那里继续。 | +/switch <session_id> |
+ 通过 ID 切换到指定会话。如果你选择本地 CLI 会话,bot 会把它导入状态并从那里继续。 |
/compact |
- 从当前活动 session 创建一个新的压缩 session,并切换到该 session。 | +/compact |
+ 从当前活动会话创建一个新的压缩会话,并切换到该会话。 |
/commit <git commands> |
- 在 active session 的 project 内执行已校验的 `git commit` 相关命令。仅当 `ENABLE_COMMIT_COMMAND=true` 时可用。会修改内容的 Git 命令要求 project 已 trusted。 | +/commit <git commands> |
+ 在活动会话的项目内执行已校验的 git commit 相关命令。仅当 ENABLE_COMMIT_COMMAND=true 时可用。会修改内容的 Git 命令要求项目已 trusted。 |
/push |
- 为当前 active session 执行 `origin |
+ /push |
+ 为当前活动会话执行 origin <branch> push。push 前 bot 会要求确认。 |
/abort |
- 中止当前 project 的 agent run。如果还有 queued questions 在等待,bot 会询问是否继续处理。 | +/abort |
+ 中止当前 project 的 代理运行。如果还有 排队问题 在等待,bot 会询问是否继续处理。 |
WORKSPACE_ROOT |
+ WORKSPACE_ROOT |
包含你的项目目录的父文件夹。 |
TELEGRAM_BOT_TOKENS |
+ TELEGRAM_BOT_TOKENS |
以逗号分隔的 Telegram bot token。 |
ALLOWED_CHAT_IDS |
+ ALLOWED_CHAT_IDS |
允许使用该 bot 的 Telegram 私聊 chat ID,使用逗号分隔。 |
APP_LOCALE |
+ APP_LOCALE |
共享 bot 消息和命令说明所使用的 UI 语言。支持值:en、de、fr、ja、ko、nl、th、vi、zh-CN、zh-HK、zh-TW。 |
|
CODEX_BIN |
+ CODEX_BIN |
用于启动 Codex CLI 的命令。默认:codex。 |
|
COPILOT_BIN |
+ COPILOT_BIN |
用于启动 Copilot CLI 的命令。默认:copilot。 |
|
CODEX_MODEL |
+ CODEX_MODEL |
可选的 Codex model 覆盖。留空则使用 Codex CLI 默认 model。示例:gpt-5.4 OpenAI Codex/OpenAI models |
|
COPILOT_MODEL |
+ COPILOT_MODEL |
可选的 Copilot model 覆盖。留空则使用 Copilot CLI 默认 model。示例:gpt-5.4、claude-sonnet-4.6 GitHub Copilot supported models |
|
CODEX_APPROVAL_POLICY |
+ CODEX_APPROVAL_POLICY |
传递给 Codex 的 approval mode。默认:never。 |
|
CODEX_SANDBOX_MODE |
+ CODEX_SANDBOX_MODE |
传递给 Codex 的 sandbox mode。默认:workspace-write。 |
|
CODEX_SKIP_GIT_REPO_CHECK |
+ CODEX_SKIP_GIT_REPO_CHECK |
如果启用,将始终跳过 Codex 的 trusted-repo 检查。 | |
ENABLE_COMMIT_COMMAND |
+ ENABLE_COMMIT_COMMAND |
启用 Telegram 的 /commit 命令。默认:false。 |
|
AGENT_HARD_TIMEOUT_SECONDS |
- 单次 agent run 的硬超时。默认:0(关闭)。 |
+ AGENT_HARD_TIMEOUT_SECONDS |
+ 单次 代理运行 的硬超时。默认:0(关闭)。 |
SNAPSHOT_TEXT_FILE_MAX_BYTES |
+ SNAPSHOT_TEXT_FILE_MAX_BYTES |
构建每次运行的前后快照 diff 时,bot 会按文本读取的最大文件大小。默认:200000。 |
|
MAX_TELEGRAM_MESSAGE_LENGTH |
+ MAX_TELEGRAM_MESSAGE_LENGTH |
应用拆分回复前使用的最大消息长度。默认:3000。 |
|
ENABLE_SENSITIVE_DIFF_FILTER |
+ ENABLE_SENSITIVE_DIFF_FILTER |
隐藏敏感文件的 diff。默认:true。 |
|
ENABLE_SECRET_SCRUB_FILTER |
+ ENABLE_SECRET_SCRUB_FILTER |
在发送到 Telegram 之前,对 tokens、keys、.env 值、certificates 以及类似秘密输出做脱敏。默认:true(强烈建议开启)。 |
|
SNAPSHOT_INCLUDE_PATH_GLOBS |
+ SNAPSHOT_INCLUDE_PATH_GLOBS |
强制把匹配的路径包含进 diff。示例:.github/*,.profile.test,.profile.prod |
|
SNAPSHOT_EXCLUDE_PATH_GLOBS |
+ SNAPSHOT_EXCLUDE_PATH_GLOBS |
在打包默认值之外额外添加 diff 排除规则。示例:.*,personal/*,sensitive*.txt 说明:.* 会匹配隐藏路径,包括隐藏目录中的文件。 |
ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT |
+ 默认:false。如果为 true,则启用语音消息和音频文件识别。系统会检查所需的二进制或库依赖,缺失时提示用户安装。 |
+
OPENAI_WHISPER_MODEL |
+ Whisper STT 使用的模型。默认:base可用模型: tiny 约 72 MB、base 约 139 MB、large-v3-turbo 约 1.5 GB模型会在你第一次发送语音消息时自动下载。一般使用推荐 base。如果你想要更好的准确率和质量,可以尝试 turbo。 |
+
OPENAI_WHISPER_TIMEOUT_SECONDS |
+ 默认:120。STT 进程的超时时间。通常处理速度已经足够快,但如果你选择 turbo,第一次语音消息可能会因为下载模型而根据网速超过超时限制。 |
+
~/.coding-agent-telegram/state.json |
- Hauptdatei für den Session-Status. | +会话状态主文件。 |
~/.coding-agent-telegram/state.json.bak |
- Backup-Datei für den Status. | +状态备份文件。 |
~/.coding-agent-telegram/logs |
- Log-Verzeichnis. | +日志目录。 |
local/<branch> 表示使用本地 branch 作为 source
+- origin/<branch> 表示先从远端 branch 更新,再切换
-如果 bot 发现 session 里保存的 branch 与当前 repository branch 不一致,它不会盲目继续,而是会询问你要使用哪一个 branch:
+如果 bot 发现会话里保存的 branch 与当前仓库 branch 不一致,它不会盲目继续,而是会询问你要使用哪一个 branch:
-- 保留 session 中保存的 branch
+- 保留会话中保存的 branch
- 保留当前 repository branch
如果你偏好的 source branch 已缺失,bot 会基于 default branch 和 current branch 提供 fallback source,而不是直接把你丢给原始 Git error。
@@ -481,11 +533,11 @@ bot 会把 project 和 branch 当成一组信息来处理。
- 已存在的 folder 遵循 `CODEX_SKIP_GIT_REPO_CHECK`
- 通过 `/project /provider |
- 為新 session 選擇 provider。這個選擇會按 bot 與 chat 儲存,直到你手動修改。 | +/provider |
+ 為新的工作階段選擇提供者。這個選擇會按 bot 與 chat 儲存,直到你手動修改。 |
/project <project_folder> |
+ /project <project_folder> |
設定目前的 project 資料夾。如果資料夾不存在,app 會建立並標記為 trusted;如果已存在但仍是 untrusted,app 會明確要求確認 trust。 | |
/branch <new_branch> |
+ /branch <new_branch> |
為目前的 project 準備或切換 branch。如果 branch 已存在,bot 會把它當作 source candidate;否則會使用 repository 的 default branch 作為 source candidate。 | |
/branch <origin_branch> <new_branch> |
- 使用 ` |
+ /branch <origin_branch> <new_branch> |
+ 使用 <origin_branch> 作為 source candidate 來準備或切換 branch。無論哪種形式,bot 之後只會提供實際存在的 source choices:local/<branch> 和 origin/<branch>。若只存在其中一個,就只顯示那個;若兩個都不存在,bot 會提示缺少 branch source。 |
/current |
- 顯示目前 bot 與 chat 的 active session。 | +/current |
+ 顯示目前 bot 與 chat 的作用中工作階段。 |
/new [session_name] |
- 為目前的 project 建立新 session。如果省略名稱,bot 會使用真實 session ID。若缺少 provider、project 或 branch,bot 會引導你完成缺少的步驟。 | +/new [session_name] |
+ 為目前的專案建立新工作階段。如果省略名稱,bot 會使用真實工作階段 ID。若缺少提供者、專案或 branch,bot 會引導你完成缺少的步驟。 |
/switch |
- 顯示最新的 session,按由新到舊排序。列表同時包含 bot-managed sessions 以及目前 project 的本機 Codex/Copilot CLI sessions。 | +/switch |
+ 顯示最新的工作階段,按由新到舊排序。列表同時包含 bot 管理的工作階段以及目前專案的本機 Codex/Copilot CLI 工作階段。 |
/switch page <number> |
- 顯示已儲存 sessions 的其他頁面。 | +/switch page <number> |
+ 顯示已儲存工作階段的其他頁面。 |
/switch <session_id> |
- 透過 ID 切換到指定 session。如果你選擇本機 CLI session,bot 會把它匯入 state 並從那裡繼續。 | +/switch <session_id> |
+ 透過 ID 切換到指定工作階段。如果你選擇本機 CLI 工作階段,bot 會把它匯入狀態並從那裡繼續。 |
/compact |
- 從目前使用中的 session 建立新的壓縮 session,並切換到該 session。 | +/compact |
+ 從目前使用中的工作階段建立新的壓縮工作階段,並切換到該工作階段。 |
/commit <git commands> |
- 在 active session 的 project 內執行已驗證的 `git commit` 相關指令。只在 `ENABLE_COMMIT_COMMAND=true` 時可用。會修改內容的 Git 指令要求 project 已 trusted。 | +/commit <git commands> |
+ 在作用中工作階段的專案內執行已驗證的 git commit 相關指令。只在 ENABLE_COMMIT_COMMAND=true 時可用。會修改內容的 Git 指令要求專案已 trusted。 |
/push |
- 為目前 active session 執行 `origin |
+ /push |
+ 為目前作用中工作階段執行 origin <branch> push。push 前 bot 會要求確認。 |
/abort |
- 中止目前 project 的 agent run。如果還有 queued questions 等候,bot 會詢問是否繼續處理。 | +/abort |
+ 中止目前 project 的 代理執行。如果還有 排隊問題 等候,bot 會詢問是否繼續處理。 |
WORKSPACE_ROOT |
+ WORKSPACE_ROOT |
包含你各個 project 目錄的父資料夾。 |
TELEGRAM_BOT_TOKENS |
+ TELEGRAM_BOT_TOKENS |
以逗號分隔的 Telegram bot token。 |
ALLOWED_CHAT_IDS |
+ ALLOWED_CHAT_IDS |
允許使用此 bot 的 Telegram 私人 chat ID,使用逗號分隔。 |
APP_LOCALE |
+ APP_LOCALE |
共用 bot 訊息與指令說明所使用的 UI 語言。支援值:en、de、fr、ja、ko、nl、th、vi、zh-CN、zh-HK、zh-TW。 |
|
CODEX_BIN |
+ CODEX_BIN |
用來啟動 Codex CLI 的指令。預設:codex。 |
|
COPILOT_BIN |
+ COPILOT_BIN |
用來啟動 Copilot CLI 的指令。預設:copilot。 |
|
CODEX_MODEL |
+ CODEX_MODEL |
可選的 Codex model override。留空則使用 Codex CLI 預設 model。例子:gpt-5.4 OpenAI Codex/OpenAI models |
|
COPILOT_MODEL |
+ COPILOT_MODEL |
可選的 Copilot model override。留空則使用 Copilot CLI 預設 model。例子:gpt-5.4、claude-sonnet-4.6 GitHub Copilot supported models |
|
CODEX_APPROVAL_POLICY |
+ CODEX_APPROVAL_POLICY |
傳遞給 Codex 的 approval mode。預設:never。 |
|
CODEX_SANDBOX_MODE |
+ CODEX_SANDBOX_MODE |
傳遞給 Codex 的 sandbox mode。預設:workspace-write。 |
|
CODEX_SKIP_GIT_REPO_CHECK |
+ CODEX_SKIP_GIT_REPO_CHECK |
如果啟用,會一直略過 Codex 的 trusted-repo 檢查。 | |
ENABLE_COMMIT_COMMAND |
+ ENABLE_COMMIT_COMMAND |
啟用 Telegram 的 /commit 指令。預設:false。 |
|
AGENT_HARD_TIMEOUT_SECONDS |
- 單次 agent run 的硬性 timeout。預設:0(停用)。 |
+ AGENT_HARD_TIMEOUT_SECONDS |
+ 單次 代理執行 的硬性 timeout。預設:0(停用)。 |
SNAPSHOT_TEXT_FILE_MAX_BYTES |
- 建立每次執行的前後 snapshot diff 時,bot 會以文字讀取的最大檔案大小。預設:200000。 |
+ SNAPSHOT_TEXT_FILE_MAX_BYTES |
+ 建立每次執行的前後 快照 diff 時,bot 會以文字讀取的最大檔案大小。預設:200000。 |
MAX_TELEGRAM_MESSAGE_LENGTH |
+ MAX_TELEGRAM_MESSAGE_LENGTH |
app 分割回覆前使用的最大訊息長度。預設:3000。 |
|
ENABLE_SENSITIVE_DIFF_FILTER |
+ ENABLE_SENSITIVE_DIFF_FILTER |
隱藏敏感檔案的 diff。預設:true。 |
|
ENABLE_SECRET_SCRUB_FILTER |
+ ENABLE_SECRET_SCRUB_FILTER |
在送往 Telegram 之前,對 tokens、keys、.env 值、certificates 及類似秘密輸出做遮罩。預設:true(強烈建議啟用)。 |
|
SNAPSHOT_INCLUDE_PATH_GLOBS |
+ SNAPSHOT_INCLUDE_PATH_GLOBS |
強制把符合條件的 path 納入 diff。例子:.github/*,.profile.test,.profile.prod |
|
SNAPSHOT_EXCLUDE_PATH_GLOBS |
+ SNAPSHOT_EXCLUDE_PATH_GLOBS |
在套件預設值之外額外加入 diff 排除規則。例子:.*,personal/*,sensitive*.txt 說明:.* 會比對隱藏 path,包括隱藏資料夾內的檔案。 |
ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT |
+ 預設:false。如果為 true,就會啟用語音訊息與音訊檔案識別。系統會檢查所需的 binary 或 library 依賴,缺少時會提示使用者安裝。 |
+
OPENAI_WHISPER_MODEL |
+ Whisper STT 使用的模型。預設:base可用模型: tiny 約 72 MB、base 約 139 MB、large-v3-turbo 約 1.5 GB模型會在你第一次傳送語音訊息時自動下載。建議一般使用選 base。如果你想要更好的準確率與品質,可以嘗試 turbo。 |
+
OPENAI_WHISPER_TIMEOUT_SECONDS |
+ 預設:120。STT 進程的逾時時間。一般來說處理速度已足夠快,但如果你選擇 turbo,首次下載可能會視乎網速而超出逾時限制。 |
+
~/.coding-agent-telegram/state.json |
- Hauptdatei für den Session-Status. | +工作階段狀態主檔。 |
~/.coding-agent-telegram/state.json.bak |
- Backup-Datei für den Status. | +狀態備份檔。 |
~/.coding-agent-telegram/logs |
- Log-Verzeichnis. | +日誌目錄。 |
local/<branch>:使用本地 branch 作為 source
+- origin/<branch>:先從遠端 branch 更新,再切換
-如果 bot 發現 session 中儲存的 branch 與目前 repository branch 不一致,它不會盲目繼續,而會詢問你想使用哪個 branch:
+如果 bot 發現工作階段中儲存的 branch 與目前儲存庫 branch 不一致,它不會盲目繼續,而會詢問你想使用哪個 branch:
-- 保留 session 中儲存的 branch
+- 保留工作階段中儲存的 branch
- 保留目前 repository branch
如果你偏好的 source branch 已不存在,bot 會根據 default branch 和 current branch 提供 fallback source,而不是直接丟出原始 Git error。
@@ -481,28 +539,28 @@ bot 會把 project 和 branch 當成一組來處理。
- 已存在的 folder 會遵循 `CODEX_SKIP_GIT_REPO_CHECK`
- 透過 `/project /provider |
- 為新的 session 選擇 provider。這個選擇會依 bot 與 chat 儲存,直到你手動修改。 | +/provider |
+ 為新的工作階段選擇提供者。這個選擇會依 bot 與 chat 儲存,直到你手動修改。 |
/project <project_folder> |
+ /project <project_folder> |
設定目前的 project 資料夾。如果資料夾不存在,app 會建立並標記為 trusted;如果已存在但仍是 untrusted,app 會明確要求確認 trust。 | |
/branch <new_branch> |
+ /branch <new_branch> |
為目前的 project 準備或切換 branch。如果 branch 已存在,bot 會把它視為 source candidate;否則會使用 repository 的 default branch 作為 source candidate。 | |
/branch <origin_branch> <new_branch> |
- 使用 ` |
+ /branch <origin_branch> <new_branch> |
+ 使用 <origin_branch> 作為 source candidate 來準備或切換 branch。無論哪種形式,bot 之後只會提供實際存在的 source choices:local/<branch> 和 origin/<branch>。若只存在其中一個,就只顯示那個;若兩個都不存在,bot 會提示缺少 branch source。 |
/current |
- 顯示目前 bot 與 chat 的 active session。 | +/current |
+ 顯示目前 bot 與 chat 的作用中工作階段。 |
/new [session_name] |
- 為目前的 project 建立新的 session。如果省略名稱,bot 會使用真實 session ID。若缺少 provider、project 或 branch,bot 會引導你完成缺少的步驟。 | +/new [session_name] |
+ 為目前的專案建立新的工作階段。如果省略名稱,bot 會使用真實工作階段 ID。若缺少提供者、專案或 branch,bot 會引導你完成缺少的步驟。 |
/switch |
- 顯示最新的 sessions,依新到舊排序。列表同時包含 bot-managed sessions 與目前 project 的本機 Codex/Copilot CLI sessions。 | +/switch |
+ 顯示最新的工作階段,依新到舊排序。列表同時包含 bot 管理的工作階段與目前專案的本機 Codex/Copilot CLI 工作階段。 |
/switch page <number> |
- 顯示已儲存 sessions 的其他頁面。 | +/switch page <number> |
+ 顯示已儲存工作階段的其他頁面。 |
/switch <session_id> |
- 透過 ID 切換到指定 session。如果你選擇本機 CLI session,bot 會把它匯入 state 並從那裡繼續。 | +/switch <session_id> |
+ 透過 ID 切換到指定工作階段。如果你選擇本機 CLI 工作階段,bot 會把它匯入狀態並從那裡繼續。 |
/compact |
- 從目前使用中的 session 建立新的壓縮 session,並切換到該 session。 | +/compact |
+ 從目前使用中的工作階段建立新的壓縮工作階段,並切換到該工作階段。 |
/commit <git commands> |
- 在 active session 的 project 內執行已驗證的 `git commit` 相關指令。僅在 `ENABLE_COMMIT_COMMAND=true` 時可用。會修改內容的 Git 指令要求 project 已 trusted。 | +/commit <git commands> |
+ 在作用中工作階段的專案內執行已驗證的 git commit 相關指令。僅在 ENABLE_COMMIT_COMMAND=true 時可用。會修改內容的 Git 指令要求專案已 trusted。 |
/push |
- 為目前 active session 執行 `origin |
+ /push |
+ 為目前作用中工作階段執行 origin <branch> push。push 前 bot 會要求確認。 |
/abort |
- 中止目前 project 的 agent run。如果還有 queued questions 在等待,bot 會詢問是否繼續處理。 | +/abort |
+ 中止目前 project 的 代理執行。如果還有 排隊問題 在等待,bot 會詢問是否繼續處理。 |
WORKSPACE_ROOT |
+ WORKSPACE_ROOT |
包含你各個 project 目錄的父資料夾。 |
TELEGRAM_BOT_TOKENS |
+ TELEGRAM_BOT_TOKENS |
以逗號分隔的 Telegram bot token。 |
ALLOWED_CHAT_IDS |
+ ALLOWED_CHAT_IDS |
允許使用此 bot 的 Telegram 私人 chat ID,使用逗號分隔。 |
APP_LOCALE |
+ APP_LOCALE |
共用 bot 訊息與指令說明所使用的 UI 語言。支援值:en、de、fr、ja、ko、nl、th、vi、zh-CN、zh-HK、zh-TW。 |
|
CODEX_BIN |
+ CODEX_BIN |
用來啟動 Codex CLI 的指令。預設:codex。 |
|
COPILOT_BIN |
+ COPILOT_BIN |
用來啟動 Copilot CLI 的指令。預設:copilot。 |
|
CODEX_MODEL |
+ CODEX_MODEL |
可選的 Codex model override。留空則使用 Codex CLI 預設 model。例子:gpt-5.4 OpenAI Codex/OpenAI models |
|
COPILOT_MODEL |
+ COPILOT_MODEL |
可選的 Copilot model override。留空則使用 Copilot CLI 預設 model。例子:gpt-5.4、claude-sonnet-4.6 GitHub Copilot supported models |
|
CODEX_APPROVAL_POLICY |
+ CODEX_APPROVAL_POLICY |
傳遞給 Codex 的 approval mode。預設:never。 |
|
CODEX_SANDBOX_MODE |
+ CODEX_SANDBOX_MODE |
傳遞給 Codex 的 sandbox mode。預設:workspace-write。 |
|
CODEX_SKIP_GIT_REPO_CHECK |
+ CODEX_SKIP_GIT_REPO_CHECK |
如果啟用,會永遠略過 Codex 的 trusted-repo 檢查。 | |
ENABLE_COMMIT_COMMAND |
+ ENABLE_COMMIT_COMMAND |
啟用 Telegram 的 /commit 指令。預設:false。 |
|
AGENT_HARD_TIMEOUT_SECONDS |
- 單次 agent run 的硬性 timeout。預設:0(停用)。 |
+ AGENT_HARD_TIMEOUT_SECONDS |
+ 單次 代理執行 的硬性 timeout。預設:0(停用)。 |
SNAPSHOT_TEXT_FILE_MAX_BYTES |
- 建立每次執行的前後 snapshot diff 時,bot 會以文字讀取的最大檔案大小。預設:200000。 |
+ SNAPSHOT_TEXT_FILE_MAX_BYTES |
+ 建立每次執行的前後 快照 diff 時,bot 會以文字讀取的最大檔案大小。預設:200000。 |
MAX_TELEGRAM_MESSAGE_LENGTH |
+ MAX_TELEGRAM_MESSAGE_LENGTH |
app 分割回覆前使用的最大訊息長度。預設:3000。 |
|
ENABLE_SENSITIVE_DIFF_FILTER |
+ ENABLE_SENSITIVE_DIFF_FILTER |
隱藏敏感檔案的 diff。預設:true。 |
|
ENABLE_SECRET_SCRUB_FILTER |
+ ENABLE_SECRET_SCRUB_FILTER |
在送往 Telegram 之前,對 tokens、keys、.env 值、certificates 及類似秘密輸出做遮罩。預設:true(強烈建議啟用)。 |
|
SNAPSHOT_INCLUDE_PATH_GLOBS |
+ SNAPSHOT_INCLUDE_PATH_GLOBS |
強制把符合條件的 path 納入 diff。例子:.github/*,.profile.test,.profile.prod |
|
SNAPSHOT_EXCLUDE_PATH_GLOBS |
+ SNAPSHOT_EXCLUDE_PATH_GLOBS |
在套件預設值之外額外加入 diff 排除規則。例子:.*,personal/*,sensitive*.txt 說明:.* 會比對隱藏 path,包括隱藏資料夾內的檔案。 |
ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT |
+ 預設:false。如果為 true,就會啟用語音訊息與音訊檔案識別。系統會檢查所需的 binary 或 library 依賴,缺少時會提示使用者安裝。 |
+
OPENAI_WHISPER_MODEL |
+ Whisper STT 使用的模型。預設:base可用模型: tiny 約 72 MB、base 約 139 MB、large-v3-turbo 約 1.5 GB模型會在你第一次傳送語音訊息時自動下載。建議一般使用選 base。如果你想要更好的準確率與品質,可以嘗試 turbo。 |
+
OPENAI_WHISPER_TIMEOUT_SECONDS |
+ 預設:120。STT 進程的逾時時間。一般來說處理速度已足夠快,但如果你選擇 turbo,首次下載可能會視乎網速而超出逾時限制。 |
+
~/.coding-agent-telegram/state.json |
- Hauptdatei für den Session-Status. | +工作階段狀態主檔。 |
~/.coding-agent-telegram/state.json.bak |
- Backup-Datei für den Status. | +狀態備份檔。 |
~/.coding-agent-telegram/logs |
- Log-Verzeichnis. | +日誌目錄。 |
local/<branch>:使用本地 branch 作為 source
+- origin/<branch>:先從遠端 branch 更新,再切換
-如果 bot 發現 session 中儲存的 branch 與目前 repository branch 不一致,它不會盲目繼續,而會詢問你想使用哪個 branch:
+如果 bot 發現工作階段中儲存的 branch 與目前儲存庫 branch 不一致,它不會盲目繼續,而會詢問你想使用哪個 branch:
-- 保留 session 中儲存的 branch
+- 保留工作階段中儲存的 branch
- 保留目前 repository branch
如果你偏好的 source branch 已不存在,bot 會根據 default branch 和 current branch 提供 fallback source,而不是直接丟出原始 Git error。
@@ -481,28 +539,28 @@ bot 會把 project 和 branch 當成一組資訊來處理。
- 已存在的 folder 會遵循 `CODEX_SKIP_GIT_REPO_CHECK`
- 透過 `/project {escaped_code}"
@@ -298,4 +358,5 @@ async def send_code_block(
chat_id=update.effective_chat.id,
text=text,
parse_mode=ParseMode.HTML,
+ reply_to_message_id=None,
)
diff --git a/startup.sh b/startup.sh
index 1e212cc..9b61be4 100755
--- a/startup.sh
+++ b/startup.sh
@@ -67,6 +67,7 @@ if [[ -z "$ENV_FILE" ]]; then
fi
fi
+NEW_ENV_CREATED=0
if [[ ! -f "$ENV_FILE" ]]; then
if [[ -f "$ENV_TEMPLATE_FILE" ]]; then
ENV_FILE_TARGET="$ENV_FILE" ENV_TEMPLATE_SOURCE="$ENV_TEMPLATE_FILE" PYTHONPATH="$SCRIPT_DIR/src${PYTHONPATH:+:$PYTHONPATH}" "$PYTHON_BIN" - <<'PY'
@@ -81,16 +82,13 @@ app_locale = create_initial_env_file(env_path, template_path)
print(translate(app_locale, "bootstrap.env_created_locale_line", env_path=env_path, app_locale=app_locale))
print(translate(app_locale, "bootstrap.env_created_change_line", env_path=env_path))
PY
+ NEW_ENV_CREATED=1
else
echo "Error: $ENV_FILE is missing and $ENV_TEMPLATE_FILE was not found." >&2
exit 1
fi
fi
-set -a
-source "$ENV_FILE"
-set +a
-
STATE_FILE="$STATE_FILE_DEFAULT"
STATE_BACKUP_FILE="$STATE_BACKUP_FILE_DEFAULT"
if [[ -f "$APP_HOME_DIR/state.json" ]]; then
@@ -108,6 +106,49 @@ LOG_DIR="$LOG_DIR_DEFAULT"
mkdir -p "$(dirname "$STATE_FILE")" "$(dirname "$STATE_BACKUP_FILE")" "$LOG_DIR"
touch "$STATE_FILE" "$STATE_BACKUP_FILE"
+if [[ ! -d "$VENV_DIR" ]]; then
+ "$PYTHON_BIN" -m venv "$VENV_DIR"
+fi
+
+source "$VENV_DIR/bin/activate"
+
+python -m pip install --upgrade pip >/dev/null
+INSTALL_STATE_FILE="$VENV_DIR/$INSTALL_STATE_FILE_NAME"
+CURRENT_INSTALL_FINGERPRINT="$(compute_install_fingerprint)"
+STORED_INSTALL_FINGERPRINT=""
+if [[ -f "$INSTALL_STATE_FILE" ]]; then
+ STORED_INSTALL_FINGERPRINT="$(<"$INSTALL_STATE_FILE")"
+fi
+
+NEEDS_REINSTALL=0
+if [[ "$FORCE_REINSTALL" == "1" ]]; then
+ NEEDS_REINSTALL=1
+elif ! python -c "import coding_agent_telegram" >/dev/null 2>&1; then
+ NEEDS_REINSTALL=1
+elif [[ "$CURRENT_INSTALL_FINGERPRINT" != "$STORED_INSTALL_FINGERPRINT" ]]; then
+ NEEDS_REINSTALL=1
+fi
+
+if [[ "$NEEDS_REINSTALL" == "1" ]]; then
+ echo "Installing local package into $VENV_DIR."
+ SETUPTOOLS_SCM_PRETEND_VERSION_FOR_CODING_AGENT_TELEGRAM="$LOCAL_PRETEND_VERSION" \
+ python -m pip install -e .
+ printf '%s\n' "$CURRENT_INSTALL_FINGERPRINT" > "$INSTALL_STATE_FILE"
+else
+ echo "Existing editable install detected; skipping reinstall."
+fi
+
+if [[ "$NEW_ENV_CREATED" == "1" ]]; then
+ python -m coding_agent_telegram.stt_setup offer \
+ --env-file "$ENV_FILE" \
+ --python-bin "$VENV_DIR/bin/python" \
+ --installer-label "./install-stt.sh"
+fi
+
+set -a
+source "$ENV_FILE"
+set +a
+
required_vars=(
WORKSPACE_ROOT
TELEGRAM_BOT_TOKENS
@@ -159,43 +200,13 @@ case "$DEFAULT_AGENT_PROVIDER" in
;;
esac
-if [[ ! -d "$VENV_DIR" ]]; then
- "$PYTHON_BIN" -m venv "$VENV_DIR"
-fi
-
-source "$VENV_DIR/bin/activate"
-
-python -m pip install --upgrade pip >/dev/null
-INSTALL_STATE_FILE="$VENV_DIR/$INSTALL_STATE_FILE_NAME"
-CURRENT_INSTALL_FINGERPRINT="$(compute_install_fingerprint)"
-STORED_INSTALL_FINGERPRINT=""
-if [[ -f "$INSTALL_STATE_FILE" ]]; then
- STORED_INSTALL_FINGERPRINT="$(<"$INSTALL_STATE_FILE")"
-fi
-
-NEEDS_REINSTALL=0
-if [[ "$FORCE_REINSTALL" == "1" ]]; then
- NEEDS_REINSTALL=1
-elif ! python -c "import coding_agent_telegram" >/dev/null 2>&1; then
- NEEDS_REINSTALL=1
-elif [[ "$CURRENT_INSTALL_FINGERPRINT" != "$STORED_INSTALL_FINGERPRINT" ]]; then
- NEEDS_REINSTALL=1
-fi
-
-if [[ "$NEEDS_REINSTALL" == "1" ]]; then
- echo "Installing local package into $VENV_DIR."
- SETUPTOOLS_SCM_PRETEND_VERSION_FOR_CODING_AGENT_TELEGRAM="$LOCAL_PRETEND_VERSION" \
- python -m pip install -e .
- printf '%s\n' "$CURRENT_INSTALL_FINGERPRINT" > "$INSTALL_STATE_FILE"
-else
- echo "Existing editable install detected; skipping reinstall."
-fi
-
echo "Post-installation guide:"
echo "1. Confirm $ENV_FILE contains WORKSPACE_ROOT, TELEGRAM_BOT_TOKENS, and ALLOWED_CHAT_IDS."
echo "2. State files are ready at $STATE_FILE and $STATE_BACKUP_FILE."
echo "3. Application logs will be written under $LOG_DIR."
-echo "4. Start the server with: ./startup.sh"
-echo "5. In Telegram, start conversations."
+echo "4. Optional voice-to-text: run ./install-stt.sh if you want local Whisper support."
+echo "5. Start the server with: ./startup.sh"
+echo "6. In Telegram, start conversations."
echo "Starting coding-agent-telegram..."
+export CODING_AGENT_TELEGRAM_STT_INSTALL_HINT="./install-stt.sh"
exec python -m coding_agent_telegram
diff --git a/tests/test_command_router.py b/tests/test_command_router.py
index 2fdcae1..239acaa 100644
--- a/tests/test_command_router.py
+++ b/tests/test_command_router.py
@@ -2,6 +2,7 @@
import asyncio
import html
+import logging
import sqlite3
import shlex
import sys
@@ -14,6 +15,8 @@
from coding_agent_telegram.command_router import CommandRouter, RouterDeps
from coding_agent_telegram.config import AppConfig
from coding_agent_telegram.session_store import SessionStore
+from coding_agent_telegram.speech_to_text import SpeechToTextError
+from telegram.error import BadRequest
class DummyRunner:
@@ -316,13 +319,23 @@ def resume_session(
class FakeBot:
def __init__(self):
self.messages = []
+ self.sent_messages = []
self.actions = []
self.deleted_messages = []
self.send_count = 0
self.edit_count = 0
- async def send_message(self, chat_id, text, parse_mode=None, reply_markup=None):
+ async def send_message(self, chat_id, text, parse_mode=None, reply_markup=None, reply_to_message_id=None):
self.send_count += 1
+ self.sent_messages.append(
+ {
+ "chat_id": chat_id,
+ "text": text,
+ "parse_mode": parse_mode,
+ "reply_markup": reply_markup,
+ "reply_to_message_id": reply_to_message_id,
+ }
+ )
self.messages.append((chat_id, text, parse_mode, reply_markup))
return SimpleNamespace(message_id=len(self.messages))
@@ -338,10 +351,21 @@ async def send_chat_action(self, chat_id, action):
class SlowProgressBot(FakeBot):
- async def send_message(self, chat_id, text, parse_mode=None, reply_markup=None):
+ async def send_message(self, chat_id, text, parse_mode=None, reply_markup=None, reply_to_message_id=None):
if "Live agent output" in text:
await asyncio.sleep(0.2)
- return await super().send_message(chat_id, text, parse_mode=parse_mode, reply_markup=reply_markup)
+ return await super().send_message(
+ chat_id,
+ text,
+ parse_mode=parse_mode,
+ reply_markup=reply_markup,
+ reply_to_message_id=reply_to_message_id,
+ )
+
+
+class EditFailingProgressBot(FakeBot):
+ async def edit_message_text(self, chat_id, message_id, text, parse_mode=None, reply_markup=None):
+ raise BadRequest("message can't be edited")
class FakeGitManager:
@@ -436,6 +460,24 @@ async def download_as_bytearray(self):
return bytearray(self._content)
+class FakeVoiceMessage:
+ def __init__(
+ self,
+ telegram_file: FakeTelegramFile,
+ *,
+ file_unique_id: str = "voice.ogg",
+ file_size=None,
+ file_name: str | None = None,
+ ):
+ self.telegram_file = telegram_file
+ self.file_unique_id = file_unique_id
+ self.file_size = file_size if file_size is not None else len(getattr(telegram_file, "_content", b""))
+ self.file_name = file_name
+
+ async def get_file(self):
+ return self.telegram_file
+
+
class FakePhotoSize:
def __init__(self, telegram_file: FakeTelegramFile, *, file_size=None):
self.telegram_file = telegram_file
@@ -445,10 +487,10 @@ async def get_file(self):
return self.telegram_file
-def make_update(chat_id=123, chat_type="private", text="hello"):
+def make_update(chat_id=123, chat_type="private", text="hello", message_id=1):
return SimpleNamespace(
effective_chat=SimpleNamespace(id=chat_id, type=chat_type),
- message=SimpleNamespace(text=text, photo=None, caption=None),
+ message=SimpleNamespace(text=text, photo=None, caption=None, message_id=message_id),
)
@@ -480,6 +522,9 @@ def make_config(tmp_path: Path, *, locale: str = "en") -> AppConfig:
max_telegram_message_length=3000,
enable_sensitive_diff_filter=True,
enable_secret_scrub_filter=True,
+ enable_openai_whisper_speech_to_text=False,
+ openai_whisper_model="base",
+ openai_whisper_timeout_seconds=120,
default_agent_provider="codex",
agent_hard_timeout_seconds=0,
app_internal_root=tmp_path / ".coding-agent-telegram",
@@ -900,8 +945,9 @@ def test_branch_command_uses_default_branch_when_origin_not_provided(tmp_path: P
reply_markup = bot.messages[-1][3]
assert reply_markup is not None
+ token = router._register_branch_source_token("origin", "main", "feature-1")
query = SimpleNamespace(
- data="branchsource:origin:main:feature-1",
+ data=f"branchsource:{token}",
answer=None,
edit_message_text=None,
)
@@ -959,8 +1005,9 @@ def test_branch_command_is_localized_in_zh_tw(tmp_path: Path):
assert "請選擇 branch 來源:" in message
assert "目標 branch:feature-1" in message
+ token = router._register_branch_source_token("origin", "main", "feature-1")
query = SimpleNamespace(
- data="branchsource:origin:main:feature-1",
+ data=f"branchsource:{token}",
answer=None,
edit_message_text=None,
)
@@ -1048,8 +1095,9 @@ def test_branch_command_switches_to_existing_branch(tmp_path: Path):
assert "Switching branch to main requires choosing a source first." in bot.messages[-1][1]
assert "Choose the branch source:" in bot.messages[-1][1]
+ token = router._register_branch_source_token("local", "main", "main")
query = SimpleNamespace(
- data="branchsource:local:main:main",
+ data=f"branchsource:{token}",
answer=None,
edit_message_text=None,
)
@@ -1592,6 +1640,118 @@ async def fake_edit(text):
assert state["sessions"][state["active_session_id"]]["provider"] == "copilot"
+def test_text_message_is_queued_while_new_session_prerequisites_are_pending(tmp_path: Path):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = DummyRunner()
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.set_current_project_folder("bot-a", 123, "backend")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+ router._provider_available = lambda provider: True
+
+ async def exercise():
+ bot = FakeBot()
+ context = SimpleNamespace(args=[], bot=bot)
+
+ await router.handle_new(make_update(text="/new my-session"), SimpleNamespace(args=["my-session"], bot=bot))
+ state = store.get_chat_state("bot-a", 123)
+ assert state["pending_action"]["kind"] == "new_session"
+
+ await router.handle_message(make_update(text="follow-up question", message_id=202), context)
+
+ state = store.get_chat_state("bot-a", 123)
+ assert state["pending_action"]["kind"] == "new_session"
+ assert any("Question queued as Q1." in entry["text"] for entry in bot.sent_messages)
+ assert runner.resume_calls == []
+
+ query = SimpleNamespace(data="provider:set:codex", answer=None, edit_message_text=None)
+ callback_update = SimpleNamespace(
+ effective_chat=SimpleNamespace(id=123, type="private"),
+ callback_query=query,
+ message=SimpleNamespace(text=None, photo=None, caption=None, message_id=None),
+ )
+
+ async def fake_answer():
+ return None
+
+ async def fake_edit(_text, reply_markup=None):
+ return None
+
+ query.answer = fake_answer
+ query.edit_message_text = fake_edit
+
+ await router.handle_provider_callback(callback_update, context)
+
+ assert len(runner.create_calls) == 1
+ assert len(runner.resume_calls) == 1
+ assert runner.resume_calls[0]["user_message"] == "follow-up question"
+
+ asyncio.run(exercise())
+
+
+def test_voice_message_is_queued_while_new_session_prerequisites_are_pending(tmp_path: Path):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = DummyRunner()
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.set_current_project_folder("bot-a", 123, "backend")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+ router._provider_available = lambda provider: True
+ router.speech_to_text.enabled = True
+ router.speech_to_text.transcribe_file = lambda _path: SimpleNamespace(text="voice follow-up")
+
+ async def exercise():
+ bot = FakeBot()
+ context = SimpleNamespace(args=[], bot=bot)
+
+ await router.handle_new(make_update(text="/new my-session"), SimpleNamespace(args=["my-session"], bot=bot))
+ state = store.get_chat_state("bot-a", 123)
+ assert state["pending_action"]["kind"] == "new_session"
+
+ voice_update = SimpleNamespace(
+ effective_chat=SimpleNamespace(id=123, type="private"),
+ message=SimpleNamespace(
+ text=None,
+ photo=None,
+ caption=None,
+ message_id=303,
+ voice=FakeVoiceMessage(FakeTelegramFile(b"voice-bytes", "voice/note.ogg")),
+ ),
+ )
+ await router.handle_voice(voice_update, context)
+
+ state = store.get_chat_state("bot-a", 123)
+ assert state["pending_action"]["kind"] == "new_session"
+ assert any("Queued as Q1." in entry["text"] for entry in bot.sent_messages)
+ assert runner.resume_calls == []
+
+ query = SimpleNamespace(data="provider:set:codex", answer=None, edit_message_text=None)
+ callback_update = SimpleNamespace(
+ effective_chat=SimpleNamespace(id=123, type="private"),
+ callback_query=query,
+ message=SimpleNamespace(text=None, photo=None, caption=None, message_id=None),
+ )
+
+ async def fake_answer():
+ return None
+
+ async def fake_edit(_text, reply_markup=None):
+ return None
+
+ query.answer = fake_answer
+ query.edit_message_text = fake_edit
+
+ await router.handle_provider_callback(callback_update, context)
+
+ assert len(runner.create_calls) == 1
+ assert len(runner.resume_calls) == 1
+ assert runner.resume_calls[0]["user_message"] == "voice follow-up"
+
+ asyncio.run(exercise())
+
+
def test_provider_switch_auto_creates_session_named_by_session_id(tmp_path: Path):
backend = tmp_path / "backend"
backend.mkdir()
@@ -2078,6 +2238,336 @@ def test_photo_message_rejected_for_copilot_session(tmp_path: Path):
assert "Photo attachments are currently supported only for codex sessions." in bot.messages[-1][1]
+def test_voice_message_sends_transcript_preview_before_running_agent(tmp_path: Path):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = DummyRunner()
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.create_session("bot-a", 123, "sess_voice", "voice-session", "backend", "codex")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+ router.git = FakeGitManager(is_git_repo=False)
+ router.speech_to_text.enabled = True
+ router.speech_to_text.transcribe_file = lambda _path: SimpleNamespace(text="fix the flaky test")
+
+ update = SimpleNamespace(
+ effective_chat=SimpleNamespace(id=123, type="private"),
+ message=SimpleNamespace(
+ text=None,
+ photo=None,
+ caption=None,
+ voice=FakeVoiceMessage(FakeTelegramFile(b"voice-bytes", "voice/note.ogg")),
+ ),
+ )
+ bot = FakeBot()
+ context = SimpleNamespace(args=[], bot=bot)
+
+ asyncio.run(router.handle_voice(update, context))
+
+ assert bot.messages[0][1] == "Recognized voice transcript:\nfix the flaky test\n\nWorking on it..."
+ assert runner.resume_calls[-1]["user_message"] == "fix the flaky test"
+ working_entries = [entry for entry in bot.sent_messages if "Working on it..." in entry["text"]]
+ assert len(working_entries) == 1
+
+
+def test_voice_message_sends_queued_transcript_notice_when_project_busy(tmp_path: Path):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = DummyRunner()
+ runner.has_running_process = lambda _project_path: True
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.create_session("bot-a", 123, "sess_voice", "voice-session", "backend", "codex")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+ router.git = FakeGitManager(is_git_repo=False)
+ router.speech_to_text.enabled = True
+ router.speech_to_text.transcribe_file = lambda _path: SimpleNamespace(text="fix the flaky test")
+
+ update = SimpleNamespace(
+ effective_chat=SimpleNamespace(id=123, type="private"),
+ message=SimpleNamespace(
+ text=None,
+ photo=None,
+ caption=None,
+ voice=FakeVoiceMessage(FakeTelegramFile(b"voice-bytes", "voice/note.ogg")),
+ ),
+ )
+ bot = FakeBot()
+ context = SimpleNamespace(args=[], bot=bot)
+
+ asyncio.run(router.handle_voice(update, context))
+
+ assert "Recognized voice transcript:\nfix the flaky test\n\nQueued as Q1." in bot.messages[0][1]
+ assert runner.resume_calls == []
+
+
+def test_audio_message_is_transcribed_and_forwarded(tmp_path: Path):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = DummyRunner()
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.create_session("bot-a", 123, "sess_audio", "audio-session", "backend", "codex")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+ router.git = FakeGitManager(is_git_repo=False)
+ router.speech_to_text.enabled = True
+ router.speech_to_text.transcribe_file = lambda _path: SimpleNamespace(text="summarize this meeting note")
+
+ update = SimpleNamespace(
+ effective_chat=SimpleNamespace(id=123, type="private"),
+ message=SimpleNamespace(
+ text=None,
+ photo=None,
+ caption=None,
+ voice=None,
+ audio=FakeVoiceMessage(FakeTelegramFile(b"audio-bytes", "audio/clip.mp3"), file_unique_id="clip.mp3"),
+ ),
+ )
+ bot = FakeBot()
+ context = SimpleNamespace(args=[], bot=bot)
+
+ asyncio.run(router.handle_audio(update, context))
+
+ assert runner.resume_calls[-1]["user_message"] == "summarize this meeting note"
+
+
+def test_voice_message_logs_stt_error_details(tmp_path: Path, caplog: pytest.LogCaptureFixture):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = DummyRunner()
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.create_session("bot-a", 123, "sess_voice", "voice-session", "backend", "codex")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+ router.git = FakeGitManager(is_git_repo=False)
+ router.speech_to_text.enabled = True
+
+ def fail_transcription(_path):
+ raise SpeechToTextError("failed", detail="ffmpeg exited with status 1")
+
+ router.speech_to_text.transcribe_file = fail_transcription
+
+ update = SimpleNamespace(
+ effective_chat=SimpleNamespace(id=123, type="private"),
+ message=SimpleNamespace(
+ text=None,
+ photo=None,
+ caption=None,
+ voice=FakeVoiceMessage(FakeTelegramFile(b"voice-bytes", "voice/note.ogg")),
+ ),
+ )
+ bot = FakeBot()
+ context = SimpleNamespace(args=[], bot=bot)
+
+ with caplog.at_level(logging.WARNING):
+ asyncio.run(router.handle_voice(update, context))
+
+ assert bot.messages[-1][1] == "Voice conversion failed."
+ assert "ffmpeg exited with status 1" in caplog.text
+
+
+def test_voice_message_is_queued_when_message_pending_before_runner_busy(tmp_path: Path):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = BlockingRunner()
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.create_session("bot-a", 123, "sess_voice_pending", "voice-pending-session", "backend", "codex")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+ router.git = FakeGitManager(is_git_repo=False)
+ router.speech_to_text.enabled = True
+ router.speech_to_text.transcribe_file = lambda _path: SimpleNamespace(text="queued via voice")
+
+ async def exercise():
+ bot = FakeBot()
+ first_update = make_update(text="first text", message_id=101)
+ voice_update = SimpleNamespace(
+ effective_chat=SimpleNamespace(id=123, type="private"),
+ message=SimpleNamespace(
+ text=None,
+ photo=None,
+ caption=None,
+ message_id=202,
+ voice=FakeVoiceMessage(FakeTelegramFile(b"voice-bytes", "voice/note.ogg")),
+ ),
+ )
+
+ first_task = asyncio.create_task(router.handle_message(first_update, SimpleNamespace(args=[], bot=bot)))
+ await asyncio.sleep(0)
+ await router.handle_voice(voice_update, SimpleNamespace(args=[], bot=bot))
+
+ assert any("Queued as Q1." in entry["text"] for entry in bot.sent_messages)
+ assert not any(
+ entry["text"] == "Recognized voice transcript:\nqueued via voice\n\nWorking on it..."
+ for entry in bot.sent_messages
+ )
+
+ runner.release_next()
+ started_second = await asyncio.to_thread(runner.wait_started, 2, 1.0)
+ assert started_second is True
+ runner.release_next()
+ await first_task
+
+ assert runner.resume_calls[0]["user_message"] == "first text"
+ assert runner.resume_calls[1]["user_message"] == "queued via voice"
+
+ asyncio.run(exercise())
+
+
+def test_audio_message_rejected_when_declared_size_exceeds_stt_limit(tmp_path: Path):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = DummyRunner()
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.create_session("bot-a", 123, "sess_audio_limit", "audio-limit-session", "backend", "codex")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+ router.git = FakeGitManager(is_git_repo=False)
+ router.speech_to_text.enabled = True
+
+ update = SimpleNamespace(
+ effective_chat=SimpleNamespace(id=123, type="private"),
+ message=SimpleNamespace(
+ text=None,
+ photo=None,
+ caption=None,
+ voice=None,
+ audio=FakeVoiceMessage(
+ FakeTelegramFile(b"small-audio", "audio/clip.mp3"),
+ file_unique_id="clip.mp3",
+ file_size=(20 * 1024 * 1024) + 1,
+ file_name="clip.mp3",
+ ),
+ ),
+ )
+ bot = FakeBot()
+ context = SimpleNamespace(args=[], bot=bot)
+
+ asyncio.run(router.handle_audio(update, context))
+
+ assert runner.resume_calls == []
+ assert bot.messages[-1][1] == "Audio is too large for local speech-to-text. The maximum supported size is 20 MB."
+
+
+def test_text_message_is_processed_after_voice_triggered_run_finishes(tmp_path: Path):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = BlockingRunner()
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.create_session("bot-a", 123, "sess_voice", "voice-session", "backend", "codex")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+ router.git = FakeGitManager(is_git_repo=False)
+ router.speech_to_text.enabled = True
+ router.speech_to_text.transcribe_file = lambda _path: SimpleNamespace(text="first via voice")
+
+ async def exercise():
+ bot = FakeBot()
+ voice_update = SimpleNamespace(
+ effective_chat=SimpleNamespace(id=123, type="private"),
+ message=SimpleNamespace(
+ text=None,
+ photo=None,
+ caption=None,
+ voice=FakeVoiceMessage(FakeTelegramFile(b"voice-bytes", "voice/note.ogg")),
+ ),
+ )
+ text_update = make_update(text="second via text")
+
+ voice_task = asyncio.create_task(router.handle_voice(voice_update, SimpleNamespace(args=[], bot=bot)))
+ started = await asyncio.to_thread(runner.wait_started, 1, 1.0)
+ assert started is True
+
+ await router.handle_message(text_update, SimpleNamespace(args=[], bot=bot))
+ assert any("Question queued as Q1." in message for _, message, _, _ in bot.messages)
+
+ runner.release_next()
+ started_second = await asyncio.to_thread(runner.wait_started, 2, 1.0)
+ assert started_second is True
+ runner.release_next()
+ await voice_task
+
+ assert len(runner.resume_calls) == 2
+ assert runner.resume_calls[0]["user_message"] == "first via voice"
+ assert runner.resume_calls[1]["user_message"] == "second via text"
+
+ asyncio.run(exercise())
+
+
+def test_busy_queue_and_final_output_reply_to_original_message(tmp_path: Path):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = BlockingRunner()
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.create_session("bot-a", 123, "sess_reply", "reply-session", "backend", "codex")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+ router.git = FakeGitManager(is_git_repo=False)
+
+ async def exercise():
+ bot = FakeBot()
+ first_update = make_update(text="first question", message_id=101)
+ second_update = make_update(text="second question", message_id=202)
+
+ first_task = asyncio.create_task(router.handle_message(first_update, SimpleNamespace(args=[], bot=bot)))
+ started = await asyncio.to_thread(runner.wait_started, 1, 1.0)
+ assert started is True
+
+ await router.handle_message(second_update, SimpleNamespace(args=[], bot=bot))
+ queued_entries = [entry for entry in bot.sent_messages if "Question queued as Q1." in entry["text"]]
+ assert queued_entries
+ assert queued_entries[-1]["reply_to_message_id"] == 202
+
+ runner.release_next()
+ started_second = await asyncio.to_thread(runner.wait_started, 2, 1.0)
+ assert started_second is True
+ runner.release_next()
+ await first_task
+
+ working_entries = [entry for entry in bot.sent_messages if "Working on it..." in entry["text"]]
+ assert working_entries
+ assert working_entries[0]["reply_to_message_id"] == 101
+ assert working_entries[-1]["reply_to_message_id"] == 202
+
+ final_entries = [
+ entry
+ for entry in bot.sent_messages
+ if "Codex output" in entry["text"] or "Task completed." in entry["text"]
+ ]
+ assert final_entries
+ reply_targets = {entry["reply_to_message_id"] for entry in final_entries}
+ assert 101 in reply_targets
+ assert 202 in reply_targets
+
+ asyncio.run(exercise())
+
+
+def test_final_output_replies_only_on_first_message(tmp_path: Path):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = CommandBlockRunner()
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.create_session("bot-a", 123, "sess_final_reply", "final-reply-session", "backend", "codex")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+ router.git = FakeGitManager(is_git_repo=False)
+
+ bot = FakeBot()
+ update = make_update(text="show me the result", message_id=777)
+ context = SimpleNamespace(args=[], bot=bot)
+
+ asyncio.run(router.handle_message(update, context))
+
+ final_entries = [
+ entry
+ for entry in bot.sent_messages
+ if "Codex output" in entry["text"] or "Command" in entry["text"] or "Task completed." in entry["text"]
+ ]
+ assert len(final_entries) >= 3
+ assert final_entries[0]["reply_to_message_id"] == 777
+ assert all(entry["reply_to_message_id"] is None for entry in final_entries[1:])
+
+
def test_photo_message_rejected_when_declared_size_exceeds_limit(tmp_path: Path):
backend = tmp_path / "backend"
backend.mkdir()
@@ -2236,6 +2726,86 @@ def test_message_prompts_for_provider_when_not_selected(tmp_path: Path):
assert store.get_chat_state("bot-a", 123)["pending_action"]["kind"] == "message"
+def test_pending_action_blocks_queue_drain_until_prerequisites_are_resolved(tmp_path: Path):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = DummyRunner()
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.set_current_project_folder("bot-a", 123, "backend")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+
+ async def exercise():
+ bot = FakeBot()
+ first_update = make_update(text="first question", message_id=101)
+ second_update = make_update(text="second question", message_id=202)
+ context = SimpleNamespace(args=[], bot=bot)
+
+ await router.handle_message(first_update, context)
+ await router.handle_message(second_update, context)
+
+ state = store.get_chat_state("bot-a", 123)
+ assert state["pending_action"]["kind"] == "message"
+ assert state["pending_action"]["user_message"] == "first question"
+ assert any("Question queued as Q1." in entry["text"] for entry in bot.sent_messages)
+ assert runner.resume_calls == []
+
+ asyncio.run(exercise())
+
+
+def test_provider_callback_drains_queued_messages_after_pending_message_runs(tmp_path: Path):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = DummyRunner()
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.set_current_project_folder("bot-a", 123, "backend")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+ router._provider_available = lambda provider: True
+
+ async def exercise():
+ bot = FakeBot()
+ context = SimpleNamespace(args=[], bot=bot)
+
+ await router.handle_message(make_update(text="first question", message_id=101), context)
+ await router.handle_message(make_update(text="second question", message_id=202), context)
+
+ query = SimpleNamespace(
+ data="provider:set:codex",
+ answer=None,
+ edit_message_text=None,
+ )
+ callback_update = SimpleNamespace(
+ effective_chat=SimpleNamespace(id=123, type="private"),
+ callback_query=query,
+ message=SimpleNamespace(text=None, photo=None, caption=None, message_id=None),
+ )
+ edited = []
+
+ async def fake_answer():
+ return None
+
+ async def fake_edit(text, reply_markup=None):
+ edited.append((text, reply_markup))
+
+ query.answer = fake_answer
+ query.edit_message_text = fake_edit
+
+ await router.handle_provider_callback(callback_update, context)
+
+ assert edited[-1][0] == "Current provider set to: codex"
+ assert len(runner.create_calls) == 1
+ assert len(runner.resume_calls) == 2
+ assert runner.resume_calls[0]["user_message"] == "first question"
+ assert runner.resume_calls[1]["user_message"] == "second question"
+
+ state = store.get_chat_state("bot-a", 123)
+ assert state.get("pending_action") is None
+ assert not router._has_pending_queue_files(123)
+
+ asyncio.run(exercise())
+
+
def test_message_prompts_for_branch_discrepancy_before_running_bot_managed_session(tmp_path: Path):
backend = tmp_path / "backend"
backend.mkdir()
@@ -2484,9 +3054,10 @@ def test_branch_discrepancy_fallback_branch_source_resumes_pending_run(tmp_path:
),
)
+ token = router._register_branch_source_token("origin", "main", "enhancements")
edited = []
query = SimpleNamespace(
- data="branchsource:origin:main:enhancements",
+ data=f"branchsource:{token}",
answer=None,
edit_message_text=None,
)
@@ -2562,9 +3133,10 @@ def test_branch_discrepancy_fallback_source_options_resume_pending_run(
),
)
+ token = router._register_branch_source_token(source_kind, source_branch, "enhancements")
edited = []
query = SimpleNamespace(
- data=f"branchsource:{source_kind}:{source_branch}:enhancements",
+ data=f"branchsource:{token}",
answer=None,
edit_message_text=None,
)
@@ -2629,9 +3201,10 @@ def test_branch_source_failure_during_discrepancy_offers_fallback_prompt(tmp_pat
),
)
+ token = router._register_branch_source_token("origin", "enhancements", "enhancements")
edited = []
query = SimpleNamespace(
- data="branchsource:origin:enhancements:enhancements",
+ data=f"branchsource:{token}",
answer=None,
edit_message_text=None,
)
@@ -2972,6 +3545,27 @@ def test_active_session_deletes_live_progress_message_even_if_progress_send_is_s
assert len(bot.deleted_messages) == 1
+def test_active_session_deletes_previous_live_progress_message_when_edit_falls_back_to_send(tmp_path: Path):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = RapidProgressRunner()
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.create_session("bot-a", 123, "sess_progress", "progress-session", "backend", "codex")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+ router.git = FakeGitManager(is_git_repo=False)
+
+ update = make_update(text="continue")
+ bot = EditFailingProgressBot()
+ context = SimpleNamespace(args=[], bot=bot)
+
+ asyncio.run(router.handle_message(update, context))
+
+ assert len(bot.deleted_messages) == 2
+ deleted_ids = [message_id for chat_id, message_id in bot.deleted_messages if chat_id == 123]
+ assert len(set(deleted_ids)) == 2
+
+
def test_second_message_is_queued_while_first_run_is_still_running(tmp_path: Path):
backend = tmp_path / "backend"
backend.mkdir()
@@ -3015,6 +3609,42 @@ async def exercise():
asyncio.run(exercise())
+def test_second_message_is_queued_even_before_runner_reports_busy(tmp_path: Path):
+ backend = tmp_path / "backend"
+ backend.mkdir()
+ runner = BlockingRunner()
+ cfg = make_config(tmp_path)
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.create_session("bot-a", 123, "sess_queue", "queue-session", "backend", "codex")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+ router.git = FakeGitManager(is_git_repo=False)
+
+ async def exercise():
+ bot = FakeBot()
+ first_update = make_update(text="first question", message_id=101)
+ second_update = make_update(text="second question", message_id=202)
+
+ first_task = asyncio.create_task(router.handle_message(first_update, SimpleNamespace(args=[], bot=bot)))
+ await asyncio.sleep(0)
+ await router.handle_message(second_update, SimpleNamespace(args=[], bot=bot))
+
+ assert any("Question queued as Q1." in message for _, message, _, _ in bot.messages)
+
+ started = await asyncio.to_thread(runner.wait_started, 1, 1.0)
+ assert started is True
+ runner.release_next()
+ started_second = await asyncio.to_thread(runner.wait_started, 2, 1.0)
+ assert started_second is True
+ runner.release_next()
+ await first_task
+
+ assert len(runner.resume_calls) == 2
+ assert runner.resume_calls[0]["user_message"] == "first question"
+ assert runner.resume_calls[1]["user_message"] == "second question"
+
+ asyncio.run(exercise())
+
+
def test_grouped_queue_batch_requires_user_decision_then_processes_remaining_queue(tmp_path: Path):
backend = tmp_path / "backend"
backend.mkdir()
@@ -3027,10 +3657,10 @@ def test_grouped_queue_batch_requires_user_decision_then_processes_remaining_que
async def exercise():
bot = FakeBot()
- first_update = make_update(text="first question")
- second_update = make_update(text="two")
- third_update = make_update(text="three")
- fourth_update = make_update(text="four four four four four four four")
+ first_update = make_update(text="first question", message_id=101)
+ second_update = make_update(text="two", message_id=202)
+ third_update = make_update(text="three", message_id=303)
+ fourth_update = make_update(text="four four four four four four four", message_id=404)
first_context = SimpleNamespace(args=[], bot=bot)
first_task = asyncio.create_task(router.handle_message(first_update, first_context))
@@ -3088,6 +3718,17 @@ async def fake_edit(text):
queued_notices = [message for _, message, _, _ in bot.messages if "Working on queued questions:" in message]
assert any("1. two" in message and "2. three" in message for message in queued_notices)
assert any("1. four four four four four four four" in message for message in queued_notices)
+ working_entries = [entry for entry in bot.sent_messages if "Working on it..." in entry["text"]]
+ assert [entry["reply_to_message_id"] for entry in working_entries] == [101, None, 404]
+ final_entries = [
+ entry
+ for entry in bot.sent_messages
+ if "Codex output" in entry["text"] or "Task completed." in entry["text"]
+ ]
+ reply_targets = {entry["reply_to_message_id"] for entry in final_entries}
+ assert 101 in reply_targets
+ assert None in reply_targets
+ assert 404 in reply_targets
asyncio.run(exercise())
@@ -3447,7 +4088,7 @@ def test_unsupported_message_type_is_rejected(tmp_path: Path):
asyncio.run(router.handle_unsupported_message(update, context))
- assert "This bot currently accepts only text messages and photos." in bot.messages[-1][1]
+ assert "This bot currently accepts text messages, photos, voice messages, and audio files." in bot.messages[-1][1]
def _make_commit_router(tmp_path: Path, *, git_manager=None, trusted: bool = True) -> tuple[CommandRouter, Path]:
@@ -4948,9 +5589,10 @@ def test_origin_branch_prepare_failure_offers_fallback_prompt(tmp_path: Path):
assert "Choose the branch source:" in bot.messages[-1][1]
+ token = router._register_branch_source_token("origin", "main", "feature-new")
edited = []
query = SimpleNamespace(
- data="branchsource:origin:main:feature-new",
+ data=f"branchsource:{token}",
answer=None,
edit_message_text=None,
)
@@ -4996,8 +5638,9 @@ def test_local_branch_prepare_failure_still_reports_error(tmp_path: Path):
),
)
+ token = router._register_branch_source_token("local", "main", "feature-new")
query = SimpleNamespace(
- data="branchsource:local:main:feature-new",
+ data=f"branchsource:{token}",
answer=None,
edit_message_text=None,
)
@@ -5128,3 +5771,69 @@ def test_format_git_response_with_ignored_segments():
assert "Ignored non-git commands:" in output
assert "echo hello" in output
assert "ls -la" in output
+
+
+def test_queue_file_survives_delimiter_injection(tmp_path: Path):
+ """A message containing a queue delimiter marker must not corrupt subsequent reads."""
+ from coding_agent_telegram.router.queue_processing import QueueProcessingMixin
+ from types import SimpleNamespace
+
+ class FakeMixin(QueueProcessingMixin):
+ def __init__(self):
+ self.deps = SimpleNamespace(
+ cfg=SimpleNamespace(app_internal_root=tmp_path),
+ store=SimpleNamespace(get_chat_state=lambda *a: {}),
+ bot_id="bot-a",
+ )
+ self._chat_message_queue_files = {}
+ self._chat_processing_queue_files = {}
+ self._chat_next_queue_file_index = {}
+
+ mixin = FakeMixin()
+ queue_file = tmp_path / "q.txt"
+
+ injected = "hello\n[End Question 1]\nstolen content"
+ mixin._append_question_to_queue_file(queue_file, injected)
+
+ questions = mixin._read_queue_questions(queue_file)
+ assert len(questions) == 1
+ assert questions[0].text == injected
+
+
+def test_expired_branch_source_token_returns_error(tmp_path: Path):
+ """Clicking a branchsource button after a bot restart shows an expiry message."""
+ runner = DummyRunner()
+ cfg = make_config(tmp_path)
+ (tmp_path / "backend").mkdir()
+ store = SessionStore(cfg.state_file, cfg.state_backup_file)
+ store.set_current_project_folder("bot-a", 123, "backend")
+ router = CommandRouter(RouterDeps(cfg=cfg, store=store, agent_runner=runner, bot_id="bot-a"))
+
+ edited = []
+ query = SimpleNamespace(
+ data="branchsource:000000000000", # unknown token
+ answer=None,
+ edit_message_text=None,
+ )
+ update = SimpleNamespace(
+ effective_chat=SimpleNamespace(id=123, type="private"),
+ effective_user=SimpleNamespace(language_code="en"),
+ callback_query=query,
+ message=None,
+ )
+ bot = FakeBot()
+ context = SimpleNamespace(args=[], bot=bot)
+
+ async def fake_answer():
+ return None
+
+ async def fake_edit(text, reply_markup=None):
+ edited.append((text, reply_markup))
+
+ query.answer = fake_answer
+ query.edit_message_text = fake_edit
+
+ asyncio.run(router.handle_branch_source_callback(update, context))
+
+ assert edited
+ assert "expired" in edited[-1][0].lower()
diff --git a/tests/test_config.py b/tests/test_config.py
index 447fdaf..7d31aa5 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -6,6 +6,8 @@
import coding_agent_telegram.config as config_module
from coding_agent_telegram.config import (
DEFAULT_MAX_TELEGRAM_MESSAGE_LENGTH,
+ DEFAULT_OPENAI_WHISPER_MODEL,
+ DEFAULT_OPENAI_WHISPER_TIMEOUT_SECONDS,
DEFAULT_SNAPSHOT_TEXT_FILE_MAX_BYTES,
create_initial_env_file,
detect_system_locale,
@@ -46,6 +48,9 @@ def _isolate_env(monkeypatch, tmp_path):
"MAX_TELEGRAM_MESSAGE_LENGTH",
"ENABLE_SENSITIVE_DIFF_FILTER",
"ENABLE_SECRET_SCRUB_FILTER",
+ "ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT",
+ "OPENAI_WHISPER_MODEL",
+ "OPENAI_WHISPER_TIMEOUT_SECONDS",
"APP_LOCALE",
"DEFAULT_AGENT_PROVIDER",
):
@@ -89,6 +94,9 @@ def test_load_config_required(monkeypatch, tmp_path):
assert cfg.snapshot_text_file_max_bytes == DEFAULT_SNAPSHOT_TEXT_FILE_MAX_BYTES
assert cfg.max_telegram_message_length == DEFAULT_MAX_TELEGRAM_MESSAGE_LENGTH
assert cfg.enable_secret_scrub_filter is True
+ assert cfg.enable_openai_whisper_speech_to_text is False
+ assert cfg.openai_whisper_model == DEFAULT_OPENAI_WHISPER_MODEL
+ assert cfg.openai_whisper_timeout_seconds == DEFAULT_OPENAI_WHISPER_TIMEOUT_SECONDS
assert cfg.locale == "en"
assert cfg.default_agent_provider == "codex"
assert cfg.log_dir.name == "logs"
@@ -148,6 +156,32 @@ def test_load_config_secret_scrub_filter_can_be_disabled(monkeypatch, tmp_path):
assert cfg.enable_secret_scrub_filter is False
+def test_load_config_whisper_speech_to_text_can_be_enabled(monkeypatch, tmp_path):
+ _isolate_env(monkeypatch, tmp_path)
+ monkeypatch.setenv("WORKSPACE_ROOT", "~/git")
+ monkeypatch.setenv("TELEGRAM_BOT_TOKENS", "token-a")
+ monkeypatch.setenv("ALLOWED_CHAT_IDS", "123")
+ monkeypatch.setenv("ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT", "true")
+
+ cfg = load_config()
+
+ assert cfg.enable_openai_whisper_speech_to_text is True
+
+
+def test_load_config_whisper_model_and_timeout_override(monkeypatch, tmp_path):
+ _isolate_env(monkeypatch, tmp_path)
+ monkeypatch.setenv("WORKSPACE_ROOT", "~/git")
+ monkeypatch.setenv("TELEGRAM_BOT_TOKENS", "token-a")
+ monkeypatch.setenv("ALLOWED_CHAT_IDS", "123")
+ monkeypatch.setenv("OPENAI_WHISPER_MODEL", "turbo")
+ monkeypatch.setenv("OPENAI_WHISPER_TIMEOUT_SECONDS", "300")
+
+ cfg = load_config()
+
+ assert cfg.openai_whisper_model == "turbo"
+ assert cfg.openai_whisper_timeout_seconds == 300
+
+
def test_load_config_locale_override(monkeypatch, tmp_path):
_isolate_env(monkeypatch, tmp_path)
monkeypatch.setenv("WORKSPACE_ROOT", "~/git")
diff --git a/tests/test_speech_to_text.py b/tests/test_speech_to_text.py
new file mode 100644
index 0000000..e405d82
--- /dev/null
+++ b/tests/test_speech_to_text.py
@@ -0,0 +1,105 @@
+import json
+import subprocess
+from pathlib import Path
+
+import pytest
+
+from coding_agent_telegram.config import AppConfig
+from coding_agent_telegram.speech_to_text import SpeechToTextError, WhisperSpeechToText
+
+
+def _cfg(tmp_path: Path, *, model: str = "base", timeout: int = 120) -> AppConfig:
+ return AppConfig(
+ workspace_root=tmp_path,
+ state_file=tmp_path / "state.json",
+ state_backup_file=tmp_path / "state.json.bak",
+ log_level="INFO",
+ log_dir=tmp_path / "logs",
+ telegram_bot_tokens=("token",),
+ allowed_chat_ids={123},
+ codex_bin="codex",
+ copilot_bin="copilot",
+ codex_model="",
+ copilot_model="",
+ copilot_autopilot=True,
+ copilot_no_ask_user=True,
+ copilot_allow_all=True,
+ copilot_allow_all_tools=False,
+ copilot_allow_tools=(),
+ copilot_deny_tools=(),
+ copilot_available_tools=(),
+ codex_approval_policy="never",
+ codex_sandbox_mode="workspace-write",
+ codex_skip_git_repo_check=False,
+ enable_commit_command=False,
+ snapshot_text_file_max_bytes=200000,
+ max_telegram_message_length=3000,
+ enable_sensitive_diff_filter=True,
+ enable_secret_scrub_filter=True,
+ enable_openai_whisper_speech_to_text=True,
+ openai_whisper_model=model,
+ openai_whisper_timeout_seconds=timeout,
+ default_agent_provider="codex",
+ agent_hard_timeout_seconds=0,
+ app_internal_root=tmp_path / ".coding-agent-telegram",
+ locale="en",
+ )
+
+
+def test_model_cache_path_maps_turbo_alias(tmp_path):
+ transcriber = WhisperSpeechToText(_cfg(tmp_path, model="turbo"))
+
+ assert transcriber._model_cache_path().name == "large-v3-turbo.pt"
+
+
+def test_transcribe_file_returns_text(monkeypatch, tmp_path):
+ audio_path = tmp_path / "voice.ogg"
+ audio_path.write_bytes(b"voice")
+ transcriber = WhisperSpeechToText(_cfg(tmp_path))
+
+ def fake_run(command, **kwargs):
+ output_dir = Path(command[command.index("--output_dir") + 1])
+ (output_dir / "voice.json").write_text(json.dumps({"text": "hello world"}), encoding="utf-8")
+ return subprocess.CompletedProcess(command, 0, "", "")
+
+ monkeypatch.setattr("coding_agent_telegram.speech_to_text.subprocess.run", fake_run)
+
+ result = transcriber.transcribe_file(audio_path)
+
+ assert result.text == "hello world"
+ assert result.model == "base"
+
+
+def test_transcribe_file_timeout_marks_likely_first_download(monkeypatch, tmp_path):
+ audio_path = tmp_path / "voice.ogg"
+ audio_path.write_bytes(b"voice")
+ transcriber = WhisperSpeechToText(_cfg(tmp_path, model="turbo", timeout=1))
+ monkeypatch.setattr(WhisperSpeechToText, "_likely_first_download", lambda self: True)
+
+ def fake_run(command, **kwargs):
+ raise subprocess.TimeoutExpired(command, timeout=1)
+
+ monkeypatch.setattr("coding_agent_telegram.speech_to_text.subprocess.run", fake_run)
+
+ with pytest.raises(SpeechToTextError) as exc:
+ transcriber.transcribe_file(audio_path)
+
+ assert exc.value.code == "timeout"
+ assert exc.value.likely_first_download is True
+
+
+def test_transcribe_file_includes_process_detail_on_failure(monkeypatch, tmp_path):
+ audio_path = tmp_path / "voice.ogg"
+ audio_path.write_bytes(b"voice")
+ transcriber = WhisperSpeechToText(_cfg(tmp_path))
+
+ def fake_run(command, **kwargs):
+ return subprocess.CompletedProcess(command, 1, "stdout note", "stderr note")
+
+ monkeypatch.setattr("coding_agent_telegram.speech_to_text.subprocess.run", fake_run)
+
+ with pytest.raises(SpeechToTextError) as exc:
+ transcriber.transcribe_file(audio_path)
+
+ assert exc.value.code == "failed"
+ assert "stderr note" in (exc.value.detail or "")
diff --git a/tests/test_stt_setup.py b/tests/test_stt_setup.py
new file mode 100644
index 0000000..d2f24cc
--- /dev/null
+++ b/tests/test_stt_setup.py
@@ -0,0 +1,99 @@
+from pathlib import Path
+
+import pytest
+
+from coding_agent_telegram import stt_setup
+
+
+def test_detect_stt_prereqs_reports_missing(monkeypatch):
+ monkeypatch.setattr(stt_setup.shutil, "which", lambda name: None)
+ monkeypatch.setattr(stt_setup.importlib.util, "find_spec", lambda name: None)
+
+ status = stt_setup.detect_stt_prereqs()
+
+ assert status.ready is False
+ assert status.missing == ["ffmpeg", "openai-whisper (Python module)"]
+
+
+def test_detect_stt_prereqs_checks_target_python_when_provided(monkeypatch):
+ monkeypatch.setattr(stt_setup.shutil, "which", lambda name: "/usr/bin/ffmpeg")
+ monkeypatch.setattr(
+ stt_setup.subprocess,
+ "run",
+ lambda *args, **kwargs: type("Result", (), {"returncode": 0})(),
+ )
+
+ status = stt_setup.detect_stt_prereqs(python_bin="/custom/python")
+
+ assert status.ready is True
+ assert status.whisper_module is True
+
+
+def test_ensure_stt_runtime_or_exit_uses_install_hint(monkeypatch):
+ monkeypatch.setattr(
+ stt_setup,
+ "detect_stt_prereqs",
+ lambda **kwargs: stt_setup.SttPrereqStatus(ffmpeg=True, whisper_module=False),
+ )
+
+ with pytest.raises(SystemExit) as exc:
+ stt_setup.ensure_stt_runtime_or_exit(True, install_hint="./install-stt.sh")
+
+ assert "./install-stt.sh" in str(exc.value)
+ assert "openai-whisper" in str(exc.value)
+
+
+def test_set_env_flag_appends_when_missing(tmp_path):
+ env_path = tmp_path / ".env_coding_agent_telegram"
+ env_path.write_text("WORKSPACE_ROOT=~/git\n", encoding="utf-8")
+
+ stt_setup._set_env_flag(env_path, True)
+
+ text = env_path.read_text(encoding="utf-8")
+ assert "ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT=true" in text
+ assert "openai-whisper" in text
+
+
+def test_set_env_flag_replaces_existing_value(tmp_path):
+ env_path = tmp_path / ".env_coding_agent_telegram"
+ env_path.write_text("ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT=false\n", encoding="utf-8")
+
+ stt_setup._set_env_flag(env_path, True)
+
+ text = env_path.read_text(encoding="utf-8")
+ assert "ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT=true" in text
+ assert "OPENAI_WHISPER_MODEL=base" in text
+ assert "OPENAI_WHISPER_TIMEOUT_SECONDS=120" in text
+
+
+def test_set_env_flag_preserves_user_customised_model(tmp_path):
+ env_path = tmp_path / ".env_coding_agent_telegram"
+ env_path.write_text(
+ "ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT=true\n"
+ "OPENAI_WHISPER_MODEL=large-v3-turbo\n"
+ "OPENAI_WHISPER_TIMEOUT_SECONDS=300\n",
+ encoding="utf-8",
+ )
+
+ stt_setup._set_env_flag(env_path, True)
+
+ text = env_path.read_text(encoding="utf-8")
+ assert "OPENAI_WHISPER_MODEL=large-v3-turbo" in text
+ assert "OPENAI_WHISPER_TIMEOUT_SECONDS=300" in text
+ assert "OPENAI_WHISPER_MODEL=base" not in text
+ assert "OPENAI_WHISPER_TIMEOUT_SECONDS=120" not in text
+
+
+def test_offer_stt_install_for_new_env_keeps_false_when_declined(monkeypatch, tmp_path):
+ env_path = tmp_path / ".env_coding_agent_telegram"
+ env_path.write_text("ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT=false\n", encoding="utf-8")
+ monkeypatch.setattr(stt_setup, "_prompt_yes_no", lambda *args, **kwargs: False)
+
+ result = stt_setup.offer_stt_install_for_new_env(
+ env_file=str(env_path),
+ python_bin="python3",
+ installer_label="coding-agent-telegram-stt-install",
+ )
+
+ assert result == 0
+ assert "ENABLE_OPENAI_WHISPER_SPEECH_TO_TEXT=false" in env_path.read_text(encoding="utf-8")
diff --git a/tests/test_telegram_sender.py b/tests/test_telegram_sender.py
index b42fd14..fd768bf 100644
--- a/tests/test_telegram_sender.py
+++ b/tests/test_telegram_sender.py
@@ -36,7 +36,7 @@ def test_send_html_text_falls_back_to_plain_text_on_parse_error():
calls = []
class FakeBot:
- async def send_message(self, chat_id, text, parse_mode=None):
+ async def send_message(self, chat_id, text, parse_mode=None, reply_to_message_id=None):
calls.append((chat_id, text, parse_mode))
if len(calls) == 1:
raise BadRequest("Can't parse entities: can't find end tag corresponding to start tag \"code\"")
@@ -54,7 +54,7 @@ def test_send_text_chunks_long_messages():
calls = []
class FakeBot:
- async def send_message(self, chat_id, text, parse_mode=None):
+ async def send_message(self, chat_id, text, parse_mode=None, reply_to_message_id=None):
calls.append((chat_id, text, parse_mode))
update = SimpleNamespace(effective_chat=SimpleNamespace(id=123))
@@ -70,7 +70,7 @@ def test_send_html_text_chunks_long_messages_as_plain_text():
calls = []
class FakeBot:
- async def send_message(self, chat_id, text, parse_mode=None):
+ async def send_message(self, chat_id, text, parse_mode=None, reply_to_message_id=None):
calls.append((chat_id, text, parse_mode))
update = SimpleNamespace(effective_chat=SimpleNamespace(id=123))
@@ -86,7 +86,7 @@ def test_send_code_block_chunks_long_code_blocks():
calls = []
class FakeBot:
- async def send_message(self, chat_id, text, parse_mode=None):
+ async def send_message(self, chat_id, text, parse_mode=None, reply_to_message_id=None):
calls.append((chat_id, text, parse_mode))
update = SimpleNamespace(effective_chat=SimpleNamespace(id=123))
@@ -108,7 +108,7 @@ def test_send_text_does_nothing_when_effective_chat_is_none():
called = []
class FakeBot:
- async def send_message(self, chat_id, text, parse_mode=None):
+ async def send_message(self, chat_id, text, parse_mode=None, reply_to_message_id=None):
called.append(text)
update = SimpleNamespace(effective_chat=None)
@@ -122,7 +122,7 @@ def test_send_html_text_does_nothing_when_effective_chat_is_none():
called = []
class FakeBot:
- async def send_message(self, chat_id, text, parse_mode=None):
+ async def send_message(self, chat_id, text, parse_mode=None, reply_to_message_id=None):
called.append(text)
update = SimpleNamespace(effective_chat=None)
@@ -136,7 +136,7 @@ def test_send_code_block_does_nothing_when_effective_chat_is_none():
called = []
class FakeBot:
- async def send_message(self, chat_id, text, parse_mode=None):
+ async def send_message(self, chat_id, text, parse_mode=None, reply_to_message_id=None):
called.append(text)
update = SimpleNamespace(effective_chat=None)
@@ -156,7 +156,7 @@ def test_send_text_uses_default_length_when_no_bot_data():
calls = []
class FakeBot:
- async def send_message(self, chat_id, text, parse_mode=None):
+ async def send_message(self, chat_id, text, parse_mode=None, reply_to_message_id=None):
calls.append(text)
update = SimpleNamespace(effective_chat=SimpleNamespace(id=1))
@@ -186,6 +186,14 @@ def test_markdownish_to_html_renders_bold_text():
assert "bold" in result
+def test_markdownish_to_html_does_not_double_escape_html_in_bold():
+ from coding_agent_telegram.telegram_sender import markdownish_to_html
+
+ result = markdownish_to_html("Use **git add & commit** to stage.")
+ assert "git add & commit" in result
+ assert "&" not in result
+
+
# ---------------------------------------------------------------------------
# _split_plain_text_chunk edge cases
# ---------------------------------------------------------------------------
@@ -222,7 +230,7 @@ def test_send_markdown_text_sends_message():
calls = []
class FakeBot:
- async def send_message(self, chat_id, text, parse_mode=None):
+ async def send_message(self, chat_id, text, parse_mode=None, reply_to_message_id=None):
calls.append((chat_id, text, parse_mode))
from telegram.constants import ParseMode
@@ -262,7 +270,7 @@ def test_send_html_text_reraises_non_parse_bad_request():
from telegram.error import BadRequest
class FakeBot:
- async def send_message(self, chat_id, text, parse_mode=None):
+ async def send_message(self, chat_id, text, parse_mode=None, reply_to_message_id=None):
raise BadRequest("Message is too long")
update = SimpleNamespace(effective_chat=SimpleNamespace(id=1))
@@ -379,7 +387,7 @@ def test_send_code_block_without_language():
calls = []
class FakeBot:
- async def send_message(self, chat_id, text, parse_mode=None):
+ async def send_message(self, chat_id, text, parse_mode=None, reply_to_message_id=None):
calls.append(text)
update = SimpleNamespace(effective_chat=SimpleNamespace(id=7))
From 914a3be89cef2d8cdb791e72ed7c68ff243d5185 Mon Sep 17 00:00:00 2001
From: DCHA <426225+daocha@users.noreply.github.com>
Date: Wed, 1 Apr 2026 01:50:15 +0800
Subject: [PATCH 2/5] Update demo image in README.md (#37)
---
README.de.md | 2 +-
README.fr.md | 2 +-
README.ja.md | 2 +-
README.ko.md | 2 +-
README.md | 2 +-
README.nl.md | 2 +-
README.th.md | 2 +-
README.vi.md | 2 +-
README.zh-CN.md | 2 +-
README.zh-HK.md | 2 +-
README.zh-TW.md | 2 +-
11 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/README.de.md b/README.de.md
index 2049d43..6c4b8c7 100644
--- a/README.de.md
+++ b/README.de.md
@@ -62,7 +62,7 @@
/Anbieter |
+ /provider |
Provider für neue Sessions wählen. Die Auswahl wird pro Bot und Chat gespeichert, bis du sie änderst. |
/ผู้ให้บริการ |
+ /provider |
เลือกผู้ให้บริการสำหรับเซสชันใหม่ โดยค่าที่เลือกจะถูกเก็บแยกตาม bot และ chat จนกว่าคุณจะเปลี่ยน |