From 1de7232f74e6bcad1a36f966c4a001cb8166f1d4 Mon Sep 17 00:00:00 2001 From: Bob Date: Wed, 11 Mar 2026 07:59:55 +0000 Subject: [PATCH 1/5] fix(manager): detect existing server on startup, avoid port conflict When aw-server is already running (e.g. managed by systemd, Docker, or started manually for debugging), aw-qt previously failed to start its own instance with an "Address already in use" error, leaving the user confused and requiring a manual "Restart" click from the tray icon. This change adds a pre-startup check for server modules (aw-server and aw-server-rust): before launching a subprocess, aw-qt probes the expected port. If the server is already responding, it marks the module as started (external) and skips the subprocess launch entirely. - `Module._external_server` flag tracks this state - `is_alive()` returns True for external servers (not our process to track) - `stop()` skips termination for external servers (not ours to stop) Fixes the UX issue reported in #113: users running aw-server via systemd or alongside another process no longer see startup failures. --- aw_qt/manager.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/aw_qt/manager.py b/aw_qt/manager.py index bb22350..e1f8c67 100644 --- a/aw_qt/manager.py +++ b/aw_qt/manager.py @@ -3,6 +3,8 @@ import logging import subprocess import platform +import urllib.error +import urllib.request from pathlib import Path from glob import glob from time import sleep @@ -148,6 +150,7 @@ def __init__(self, name: str, path: Path, type: str) -> None: # self.location = "system" if _is_system_module(name) else "bundled" self._process: Optional[subprocess.Popen[str]] = None self._last_process: Optional[subprocess.Popen[str]] = None + self._external_server: bool = False # True if we detected an already-running server def __hash__(self) -> int: return hash((self.name, self.path)) @@ -161,6 +164,27 @@ def __repr__(self) -> str: def start(self, testing: bool) -> None: logger.info(f"Starting module {self.name}") + # For server modules, check if a server is already running before attempting + # to start one. This avoids port conflicts and the confusing "Restart" requirement + # when aw-server is managed externally (e.g. via systemd or Docker). + if self.name in ("aw-server", "aw-server-rust"): + from .config import _read_server_port + + port = _read_server_port(testing) + try: + urllib.request.urlopen( + f"http://localhost:{port}/api/0/info", timeout=1 + ) + logger.info( + f"{self.name}: server already running on port {port}, " + "using existing instance instead of starting a new one" + ) + self._external_server = True + self.started = True + return + except (urllib.error.URLError, OSError): + pass # No existing server, proceed with normal startup + exec_cmd = [str(self.path)] if testing: exec_cmd.append("--testing") @@ -195,6 +219,14 @@ def stop(self) -> None: f"Tried to stop module {self.name}, but it hasn't been started" ) return + elif self._external_server: + # We didn't start this server, so don't stop it — it's managed externally + logger.info( + f"Module {self.name} is using an external server instance, not stopping it" + ) + self._external_server = False + self.started = False + return elif not self.is_alive(): logger.warning(f"Tried to stop module {self.name}, but it wasn't running") else: @@ -223,6 +255,9 @@ def toggle(self, testing: bool) -> None: self.start(testing) def is_alive(self) -> bool: + if self._external_server: + # We don't own this process; assume it's alive (managed externally) + return True if self._process is None: return False From 5af82d82a4b6447b629c33d43c721199933a61bd Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 16 Mar 2026 12:18:34 +0000 Subject: [PATCH 2/5] fix(manager): tighten external server detection --- aw_qt/manager.py | 58 +++++++++++++++++++++++++++++-------------- tests/test_manager.py | 34 +++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 19 deletions(-) diff --git a/aw_qt/manager.py b/aw_qt/manager.py index e1f8c67..192d5a4 100644 --- a/aw_qt/manager.py +++ b/aw_qt/manager.py @@ -151,6 +151,7 @@ def __init__(self, name: str, path: Path, type: str) -> None: self._process: Optional[subprocess.Popen[str]] = None self._last_process: Optional[subprocess.Popen[str]] = None self._external_server: bool = False # True if we detected an already-running server + self._external_server_testing: bool = False def __hash__(self) -> int: return hash((self.name, self.path)) @@ -161,29 +162,43 @@ def __eq__(self, other: Hashable) -> bool: def __repr__(self) -> str: return f"" + def _get_server_port(self, testing: bool) -> Optional[int]: + if self.name not in ("aw-server", "aw-server-rust"): + return None + + from .config import _read_aw_server_port, _read_server_rust_port + + if self.name == "aw-server": + return _read_aw_server_port(testing) or (5666 if testing else 5600) + return _read_server_rust_port(testing) or (5666 if testing else 5600) + + def _probe_external_server(self, testing: bool) -> bool: + port = self._get_server_port(testing) + if port is None: + return False + + try: + urllib.request.urlopen(f"http://localhost:{port}/api/0/info", timeout=0.2) + except (urllib.error.URLError, OSError): + return False + return True + def start(self, testing: bool) -> None: logger.info(f"Starting module {self.name}") # For server modules, check if a server is already running before attempting # to start one. This avoids port conflicts and the confusing "Restart" requirement # when aw-server is managed externally (e.g. via systemd or Docker). - if self.name in ("aw-server", "aw-server-rust"): - from .config import _read_server_port - - port = _read_server_port(testing) - try: - urllib.request.urlopen( - f"http://localhost:{port}/api/0/info", timeout=1 - ) - logger.info( - f"{self.name}: server already running on port {port}, " - "using existing instance instead of starting a new one" - ) - self._external_server = True - self.started = True - return - except (urllib.error.URLError, OSError): - pass # No existing server, proceed with normal startup + if self._probe_external_server(testing): + port = self._get_server_port(testing) + logger.info( + f"{self.name}: server already running on port {port}, " + "using existing instance instead of starting a new one" + ) + self._external_server = True + self._external_server_testing = testing + self.started = True + return exec_cmd = [str(self.path)] if testing: @@ -225,6 +240,7 @@ def stop(self) -> None: f"Module {self.name} is using an external server instance, not stopping it" ) self._external_server = False + self._external_server_testing = False self.started = False return elif not self.is_alive(): @@ -256,8 +272,12 @@ def toggle(self, testing: bool) -> None: def is_alive(self) -> bool: if self._external_server: - # We don't own this process; assume it's alive (managed externally) - return True + # We don't own this process, so re-probe the server instead. + alive = self._probe_external_server(self._external_server_testing) + if not alive: + self._external_server = False + self._external_server_testing = False + return alive if self._process is None: return False diff --git a/tests/test_manager.py b/tests/test_manager.py index 6fde3c4..e3f7dd1 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -69,6 +69,26 @@ def test_toggle_restarts_crashed_module(self, module): assert not module.started # stop() was called to clean up +class TestModuleServerProbe: + def test_aw_server_uses_python_server_port(self): + mod = Module("aw-server", Path("/usr/bin/aw-server"), "system") + + with ( + patch("aw_qt.config._read_aw_server_port", return_value=5601), + patch("aw_qt.config._read_server_rust_port", return_value=6601), + ): + assert mod._get_server_port(testing=False) == 5601 + + def test_aw_server_rust_uses_rust_server_port(self): + mod = Module("aw-server-rust", Path("/usr/bin/aw-server-rust"), "system") + + with ( + patch("aw_qt.config._read_aw_server_port", return_value=5601), + patch("aw_qt.config._read_server_rust_port", return_value=6601), + ): + assert mod._get_server_port(testing=False) == 6601 + + class TestModuleIsAlive: """Tests for Module.is_alive() behavior.""" @@ -87,6 +107,20 @@ def test_is_alive_exited(self, module): module._process = mock_proc assert not module.is_alive() + def test_is_alive_reprobes_external_server(self): + mod = Module("aw-server", Path("/usr/bin/aw-server"), "system") + mod._external_server = True + mod._external_server_testing = True + + with patch.object(mod, "_probe_external_server", side_effect=[True, False]) as probe: + assert mod.is_alive() + assert mod.is_alive() is False + assert mod._external_server is False + assert mod._external_server_testing is False + + assert probe.call_args_list[0].args == (True,) + assert probe.call_args_list[1].args == (True,) + class TestGetUnexpectedStops: """Tests for Manager.get_unexpected_stops().""" From cc6015524002d5d25f359507875e15cc4c555f77 Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 16 Mar 2026 12:29:56 +0000 Subject: [PATCH 3/5] fix(manager): close and cache external server probes --- aw_qt/manager.py | 33 +++++++++++++++++++++++++++++---- tests/test_manager.py | 41 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 5 deletions(-) diff --git a/aw_qt/manager.py b/aw_qt/manager.py index 192d5a4..d4bffdd 100644 --- a/aw_qt/manager.py +++ b/aw_qt/manager.py @@ -7,7 +7,7 @@ import urllib.request from pathlib import Path from glob import glob -from time import sleep +from time import monotonic, sleep from typing import Optional, List, Hashable, Set, Iterable import aw_core @@ -152,6 +152,8 @@ def __init__(self, name: str, path: Path, type: str) -> None: self._last_process: Optional[subprocess.Popen[str]] = None self._external_server: bool = False # True if we detected an already-running server self._external_server_testing: bool = False + self._external_server_probe_cache: Optional[bool] = None + self._external_server_probe_cache_at: float = 0.0 def __hash__(self) -> int: return hash((self.name, self.path)) @@ -178,10 +180,25 @@ def _probe_external_server(self, testing: bool) -> bool: return False try: - urllib.request.urlopen(f"http://localhost:{port}/api/0/info", timeout=0.2) + with urllib.request.urlopen( + f"http://localhost:{port}/api/0/info", timeout=0.2 + ): + return True except (urllib.error.URLError, OSError): return False - return True + + def _probe_external_server_cached(self, testing: bool, max_age: float = 1.0) -> bool: + now = monotonic() + if ( + self._external_server_probe_cache is not None + and now - self._external_server_probe_cache_at < max_age + ): + return self._external_server_probe_cache + + alive = self._probe_external_server(testing) + self._external_server_probe_cache = alive + self._external_server_probe_cache_at = now + return alive def start(self, testing: bool) -> None: logger.info(f"Starting module {self.name}") @@ -197,6 +214,8 @@ def start(self, testing: bool) -> None: ) self._external_server = True self._external_server_testing = testing + self._external_server_probe_cache = True + self._external_server_probe_cache_at = monotonic() self.started = True return @@ -241,6 +260,8 @@ def stop(self) -> None: ) self._external_server = False self._external_server_testing = False + self._external_server_probe_cache = None + self._external_server_probe_cache_at = 0.0 self.started = False return elif not self.is_alive(): @@ -273,10 +294,14 @@ def toggle(self, testing: bool) -> None: def is_alive(self) -> bool: if self._external_server: # We don't own this process, so re-probe the server instead. - alive = self._probe_external_server(self._external_server_testing) + # Cache short-lived probe results to avoid blocking repeated UI poll + # cycles on the main thread when multiple checks happen close together. + alive = self._probe_external_server_cached(self._external_server_testing) if not alive: self._external_server = False self._external_server_testing = False + self._external_server_probe_cache = None + self._external_server_probe_cache_at = 0.0 return alive if self._process is None: return False diff --git a/tests/test_manager.py b/tests/test_manager.py index e3f7dd1..d5a42c7 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -88,6 +88,42 @@ def test_aw_server_rust_uses_rust_server_port(self): ): assert mod._get_server_port(testing=False) == 6601 + def test_probe_external_server_closes_response(self): + mod = Module("aw-server", Path("/usr/bin/aw-server"), "system") + response = MagicMock() + + with ( + patch.object(mod, "_get_server_port", return_value=5600), + patch("urllib.request.urlopen", return_value=response) as urlopen, + ): + assert mod._probe_external_server(testing=False) is True + + urlopen.assert_called_once_with("http://localhost:5600/api/0/info", timeout=0.2) + response.__enter__.assert_called_once_with() + response.__exit__.assert_called_once() + + def test_probe_external_server_cached_reuses_recent_result(self): + mod = Module("aw-server", Path("/usr/bin/aw-server"), "system") + + with patch.object(mod, "_probe_external_server", return_value=True) as probe: + assert mod._probe_external_server_cached(testing=True, max_age=1.0) is True + assert mod._probe_external_server_cached(testing=True, max_age=1.0) is True + + assert probe.call_count == 1 + + def test_probe_external_server_cached_refreshes_after_ttl(self): + mod = Module("aw-server", Path("/usr/bin/aw-server"), "system") + + with ( + patch("aw_qt.manager.monotonic", side_effect=[10.0, 10.4, 11.5]), + patch.object(mod, "_probe_external_server", side_effect=[True, False]) as probe, + ): + assert mod._probe_external_server_cached(testing=True, max_age=1.0) is True + assert mod._probe_external_server_cached(testing=True, max_age=1.0) is True + assert mod._probe_external_server_cached(testing=True, max_age=1.0) is False + + assert probe.call_count == 2 + class TestModuleIsAlive: """Tests for Module.is_alive() behavior.""" @@ -112,7 +148,10 @@ def test_is_alive_reprobes_external_server(self): mod._external_server = True mod._external_server_testing = True - with patch.object(mod, "_probe_external_server", side_effect=[True, False]) as probe: + with ( + patch("aw_qt.manager.monotonic", side_effect=[10.0, 11.5]), + patch.object(mod, "_probe_external_server", side_effect=[True, False]) as probe, + ): assert mod.is_alive() assert mod.is_alive() is False assert mod._external_server is False From 19c6a98078bf891d1f8a21446322e3f2a232f734 Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 16 Mar 2026 13:05:13 +0000 Subject: [PATCH 4/5] fix(manager): separate startup probe timeout --- aw_qt/manager.py | 6 +++--- tests/test_manager.py | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/aw_qt/manager.py b/aw_qt/manager.py index d4bffdd..2cda3e1 100644 --- a/aw_qt/manager.py +++ b/aw_qt/manager.py @@ -174,14 +174,14 @@ def _get_server_port(self, testing: bool) -> Optional[int]: return _read_aw_server_port(testing) or (5666 if testing else 5600) return _read_server_rust_port(testing) or (5666 if testing else 5600) - def _probe_external_server(self, testing: bool) -> bool: + def _probe_external_server(self, testing: bool, timeout: float = 0.2) -> bool: port = self._get_server_port(testing) if port is None: return False try: with urllib.request.urlopen( - f"http://localhost:{port}/api/0/info", timeout=0.2 + f"http://localhost:{port}/api/0/info", timeout=timeout ): return True except (urllib.error.URLError, OSError): @@ -206,7 +206,7 @@ def start(self, testing: bool) -> None: # For server modules, check if a server is already running before attempting # to start one. This avoids port conflicts and the confusing "Restart" requirement # when aw-server is managed externally (e.g. via systemd or Docker). - if self._probe_external_server(testing): + if self._probe_external_server(testing, timeout=1.0): port = self._get_server_port(testing) logger.info( f"{self.name}: server already running on port {port}, " diff --git a/tests/test_manager.py b/tests/test_manager.py index d5a42c7..cc88fc6 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -102,6 +102,18 @@ def test_probe_external_server_closes_response(self): response.__enter__.assert_called_once_with() response.__exit__.assert_called_once() + def test_probe_external_server_allows_custom_timeout(self): + mod = Module("aw-server", Path("/usr/bin/aw-server"), "system") + response = MagicMock() + + with ( + patch.object(mod, "_get_server_port", return_value=5600), + patch("urllib.request.urlopen", return_value=response) as urlopen, + ): + assert mod._probe_external_server(testing=False, timeout=1.0) is True + + urlopen.assert_called_once_with("http://localhost:5600/api/0/info", timeout=1.0) + def test_probe_external_server_cached_reuses_recent_result(self): mod = Module("aw-server", Path("/usr/bin/aw-server"), "system") @@ -125,6 +137,21 @@ def test_probe_external_server_cached_refreshes_after_ttl(self): assert probe.call_count == 2 +class TestModuleStart: + def test_start_uses_longer_timeout_for_external_server_probe(self): + mod = Module("aw-server", Path("/usr/bin/aw-server"), "system") + + with ( + patch.object(mod, "_probe_external_server", return_value=True) as probe, + patch.object(mod, "_get_server_port", return_value=5600), + ): + mod.start(testing=False) + + probe.assert_called_once_with(False, timeout=1.0) + assert mod.started is True + assert mod._external_server is True + + class TestModuleIsAlive: """Tests for Module.is_alive() behavior.""" From 3abf6e5b89321bf532a1ddd6603ae1ea56134681 Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 16 Mar 2026 13:37:36 +0000 Subject: [PATCH 5/5] test(manager): cover external-server stop and deterministic cache timing --- tests/test_manager.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/tests/test_manager.py b/tests/test_manager.py index cc88fc6..83506d2 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -1,5 +1,6 @@ """Unit tests for the Module manager.""" +import os import subprocess from pathlib import Path from unittest.mock import MagicMock, patch @@ -117,7 +118,10 @@ def test_probe_external_server_allows_custom_timeout(self): def test_probe_external_server_cached_reuses_recent_result(self): mod = Module("aw-server", Path("/usr/bin/aw-server"), "system") - with patch.object(mod, "_probe_external_server", return_value=True) as probe: + with ( + patch("aw_qt.manager.monotonic", side_effect=[10.0, 10.4]), + patch.object(mod, "_probe_external_server", return_value=True) as probe, + ): assert mod._probe_external_server_cached(testing=True, max_age=1.0) is True assert mod._probe_external_server_cached(testing=True, max_age=1.0) is True @@ -174,6 +178,7 @@ def test_is_alive_reprobes_external_server(self): mod = Module("aw-server", Path("/usr/bin/aw-server"), "system") mod._external_server = True mod._external_server_testing = True + mod.started = True with ( patch("aw_qt.manager.monotonic", side_effect=[10.0, 11.5]), @@ -181,12 +186,36 @@ def test_is_alive_reprobes_external_server(self): ): assert mod.is_alive() assert mod.is_alive() is False + assert mod.started is True assert mod._external_server is False assert mod._external_server_testing is False assert probe.call_args_list[0].args == (True,) assert probe.call_args_list[1].args == (True,) + def test_stop_external_server_resets_state_without_terminating_process(self): + mod = Module("aw-server", Path("/usr/bin/aw-server"), "system") + mod.started = True + mod._external_server = True + mod._external_server_testing = True + mock_proc = MagicMock(spec=subprocess.Popen) + mod._process = mock_proc + mod._last_process = None + mod._external_server_probe_cache = True + mod._external_server_probe_cache_at = 10.0 + + mod.stop() + + mock_proc.terminate.assert_not_called() + mock_proc.wait.assert_not_called() + assert mod.started is False + assert mod._external_server is False + assert mod._external_server_testing is False + assert mod._external_server_probe_cache is None + assert mod._external_server_probe_cache_at == 0.0 + assert mod._last_process is None + assert mod._process is mock_proc + class TestGetUnexpectedStops: """Tests for Manager.get_unexpected_stops()."""