Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions src/ccbot/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,53 @@
1. `ccbot hook` — delegates to hook.hook_main() for Claude Code hook processing.
2. Default — configures logging, initializes tmux session, and starts the
Telegram bot polling loop via bot.create_bot().

Also enforces a single-bot mutex via flock on ``$CCBOT_DIR/ccbot.lock``
— Telegram's getUpdates long-poll is exclusive per token, so a second
instance silently steals updates and the original starts logging
``Conflict: terminated by other getUpdates request`` until one dies.
The flock makes the second instance refuse to start instead.
"""

import fcntl
import logging
import sys
from pathlib import Path
from typing import IO, Any

# Held at module scope so the OS keeps the flock for the whole process
# lifetime. Local-scope file handles would be GC-closed once main()
# returns from acquiring them.
_singleton_lock_handle: IO[Any] | None = None


def _acquire_singleton_lock(lock_path: Path) -> IO[Any]:
"""Acquire an exclusive flock on ``lock_path`` or ``sys.exit(1)``.

Returns the file handle holding the lock; callers MUST keep the
handle alive for the process lifetime (we assign it to
``_singleton_lock_handle`` for this). ``FD_CLOEXEC`` is set so the
lock doesn't leak into ``subprocess`` / ``asyncio.subprocess``
children — a stray child outliving the parent would otherwise hold
the lock and block future bot starts.
"""
lock_path.parent.mkdir(parents=True, exist_ok=True)
fh = open(lock_path, "w")
fcntl.fcntl(fh.fileno(), fcntl.F_SETFD, fcntl.FD_CLOEXEC)
try:
fcntl.flock(fh, fcntl.LOCK_EX | fcntl.LOCK_NB)
except OSError:
# Logging may not be configured yet on this path, so go via
# stderr too — the supervisor wrapper captures it either way.
msg = (
f"Another ccbot instance holds {lock_path}. "
"Refusing to start to avoid Telegram getUpdates conflict."
)
logging.getLogger(__name__).error(msg)
print(f"Error: {msg}", file=sys.stderr)
fh.close()
sys.exit(1)
return fh


def main() -> None:
Expand Down Expand Up @@ -40,6 +83,14 @@ def main() -> None:
print("Get your user ID from @userinfobot on Telegram.")
sys.exit(1)

# Singleton lock has to land BEFORE we touch tmux / create_bot /
# run_polling — otherwise a second instance would still race the
# getUpdates handshake before discovering it can't hold the lock.
global _singleton_lock_handle
from .utils import ccbot_dir

_singleton_lock_handle = _acquire_singleton_lock(ccbot_dir() / "ccbot.lock")

logging.getLogger("ccbot").setLevel(logging.DEBUG)
# AIORateLimiter (max_retries=5) handles retries itself; keep INFO for visibility
logging.getLogger("telegram.ext.AIORateLimiter").setLevel(logging.INFO)
Expand Down
63 changes: 63 additions & 0 deletions tests/ccbot/test_singleton_lock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Tests for the singleton flock that prevents two bot instances from
both calling Telegram's exclusive ``getUpdates``."""

from __future__ import annotations

import fcntl
from pathlib import Path

import pytest

from ccbot.main import _acquire_singleton_lock


def test_fresh_path_locks_and_returns_handle(tmp_path: Path) -> None:
lock = tmp_path / "ccbot.lock"
fh = _acquire_singleton_lock(lock)
try:
assert lock.exists()
assert not fh.closed
# FD_CLOEXEC is set so the lock doesn't leak into subprocess children.
flags = fcntl.fcntl(fh.fileno(), fcntl.F_GETFD)
assert flags & fcntl.FD_CLOEXEC
finally:
fh.close()


def test_second_acquirer_exits(tmp_path: Path) -> None:
lock = tmp_path / "ccbot.lock"
held = _acquire_singleton_lock(lock)
try:
# The first call held the lock; the second must hit sys.exit(1)
# because LOCK_NB returns OSError immediately when contended.
with pytest.raises(SystemExit) as exc:
_acquire_singleton_lock(lock)
assert exc.value.code == 1
finally:
held.close()


def test_released_lock_can_be_reacquired(tmp_path: Path) -> None:
# Holder dies → fcntl releases the lock automatically when the fd
# closes. A fresh start (next supervisor cycle, etc.) should be
# able to come up cleanly without a "stale lock file" sweep.
lock = tmp_path / "ccbot.lock"
first = _acquire_singleton_lock(lock)
first.close()
second = _acquire_singleton_lock(lock)
try:
assert not second.closed
finally:
second.close()


def test_creates_parent_directory(tmp_path: Path) -> None:
# CCBOT_DIR may not exist on first launch; the lock acquire shouldn't
# crash with FileNotFoundError before the rest of bootstrap creates
# state.json.
nested = tmp_path / "fresh" / "subdir" / "ccbot.lock"
fh = _acquire_singleton_lock(nested)
try:
assert nested.exists()
finally:
fh.close()
Loading