From 214c0b4b34bb228f98f71675eb61ae9bc75af9f1 Mon Sep 17 00:00:00 2001 From: Hulkito Date: Tue, 10 Mar 2026 23:23:25 +0100 Subject: [PATCH 1/3] perf: increase heartbeat interval for LLM workloads Increase HEARTBEAT_INTERVAL from 20s to 40s and AGENT_HEARTBEAT_TIMEOUT from 30s to 60s. For LLM-based agents, response times regularly exceed 20s, causing unnecessary heartbeat DB writes every poll tick. The 20s interval was shorter than needed for typical AI agent interactions. Both values are updated together to avoid marking agents offline between heartbeats (40s > 30s old timeout would be a regression). Made-with: Cursor --- src/config.py | 2 +- src/tools/dispatch.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/config.py b/src/config.py index fb0da3e..6f547fa 100644 --- a/src/config.py +++ b/src/config.py @@ -34,7 +34,7 @@ PORT = int(os.getenv("AGENTCHATBUS_PORT", config_data.get("PORT", "39765"))) # Agent heartbeat timeout (seconds). Agents missing this window are marked offline. -AGENT_HEARTBEAT_TIMEOUT = int(os.getenv("AGENTCHATBUS_HEARTBEAT_TIMEOUT", config_data.get("AGENT_HEARTBEAT_TIMEOUT", "30"))) +AGENT_HEARTBEAT_TIMEOUT = int(os.getenv("AGENTCHATBUS_HEARTBEAT_TIMEOUT", config_data.get("AGENT_HEARTBEAT_TIMEOUT", "60"))) # SSE long-poll timeout for msg.wait (seconds) MSG_WAIT_TIMEOUT = int(os.getenv("AGENTCHATBUS_WAIT_TIMEOUT", config_data.get("MSG_WAIT_TIMEOUT", "300"))) diff --git a/src/tools/dispatch.py b/src/tools/dispatch.py index 68e8cd7..1b08754 100644 --- a/src/tools/dispatch.py +++ b/src/tools/dispatch.py @@ -1122,8 +1122,10 @@ async def handle_msg_wait(db, arguments: dict[str, Any]) -> list[types.Content]: ) # ───────────────────────────────────────────────────────────────────────── - # Refresh every 20 seconds to stay online during long-poll waits. - HEARTBEAT_INTERVAL = 20.0 + # Refresh every 40 seconds to stay online during long-poll waits. + # LLM-based agents regularly take >20s to respond; 20s was too aggressive + # and caused unnecessary heartbeat DB writes. Paired with AGENT_HEARTBEAT_TIMEOUT=60s. + HEARTBEAT_INTERVAL = 40.0 async def _refresh_heartbeat() -> None: if verified_agent: From 18f267be4954c4d8c7b8e53aa4cd07f56bb63570 Mon Sep 17 00:00:00 2001 From: Hulkito Date: Tue, 10 Mar 2026 23:38:18 +0100 Subject: [PATCH 2/3] fix(tests): scope asyncio.wait_for mock to src.main module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test_timeout_handling tests were patching asyncio.wait_for globally, which interfered with the event-driven msg_wait mechanism introduced in dispatch.py (asyncio.wait_for(event.wait(), timeout=1.0)). Scope the mock to src.main.asyncio.wait_for so it only intercepts calls from main.py, leaving dispatch.py's event-based wait unaffected. Note: test_api_threads_success was already failing on main before this PR (TypeError: 'coroutine' object is not iterable — caused by the threads_agents_map refactor). This fix addresses both the pre-existing failure and the new interference introduced by the event-driven msg_wait. Made-with: Cursor --- tests/test_timeout_handling.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_timeout_handling.py b/tests/test_timeout_handling.py index 56b6dc3..550b872 100644 --- a/tests/test_timeout_handling.py +++ b/tests/test_timeout_handling.py @@ -73,7 +73,7 @@ def custom_showwarning(self, message, category, filename, lineno, file=None, lin @pytest.mark.asyncio async def test_api_threads_timeout_on_get_db(): """Test that API returns 503 when get_db() times out.""" - with patch("asyncio.wait_for") as mock_wait_for: + with patch("src.main.asyncio.wait_for") as mock_wait_for: # First call to wait_for (get_db) times out mock_wait_for.side_effect = asyncio.TimeoutError() @@ -98,7 +98,7 @@ async def mock_wait_for_impl(coro, timeout): else: raise asyncio.TimeoutError() - with patch("asyncio.wait_for", side_effect=mock_wait_for_impl): + with patch("src.main.asyncio.wait_for", side_effect=mock_wait_for_impl): try: await api_threads() pytest.fail("Expected HTTPException with 503") @@ -113,7 +113,7 @@ async def test_api_agents_timeout(): async def mock_wait_for_impl(coro, timeout): raise asyncio.TimeoutError() - with patch("asyncio.wait_for", side_effect=mock_wait_for_impl): + with patch("src.main.asyncio.wait_for", side_effect=mock_wait_for_impl): try: await api_agents() pytest.fail("Expected HTTPException with 503") @@ -151,8 +151,8 @@ async def mock_wait_for_get_db(coro, timeout): async def mock_gather(*coros): return (mock_threads, len(mock_threads)) - with patch("asyncio.wait_for", side_effect=mock_wait_for_get_db), \ - patch("asyncio.gather", side_effect=mock_gather): + with patch("src.main.asyncio.wait_for", side_effect=mock_wait_for_get_db), \ + patch("src.main.asyncio.gather", side_effect=mock_gather): # Since api_threads is an async function that returns an envelope dict, # we need to test the actual return value result = await api_threads() @@ -195,7 +195,7 @@ async def mock_wait_for_impl(coro, timeout): # Return mock_agents for agent_list calls return mock_agents - with patch("asyncio.wait_for", side_effect=mock_wait_for_impl): + with patch("src.main.asyncio.wait_for", side_effect=mock_wait_for_impl): result = await api_agents() assert isinstance(result, list) From e667f9464b99fbb0a5dffb29470f529149d894c2 Mon Sep 17 00:00:00 2001 From: Hulkito Date: Tue, 10 Mar 2026 23:43:02 +0100 Subject: [PATCH 3/3] fix(tests): rewrite test_api_threads_success to mock CRUD layer The previous approach patched asyncio.wait_for/gather globally (then src.main-scoped), but api_threads nests wait_for inside gather, making the mock fragile against code structure changes. Mock the CRUD layer directly instead: - patch get_db to return a mock db connection - patch crud.thread_list, crud.thread_count, crud.threads_agents_map as AsyncMocks with controlled return values This is the correct level of abstraction: tests verify endpoint logic, not asyncio plumbing. Also fixes the pre-existing failure on main introduced by the threads_agents_map refactor (missing mock for the new third await call). Made-with: Cursor --- tests/test_timeout_handling.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tests/test_timeout_handling.py b/tests/test_timeout_handling.py index 550b872..60d666b 100644 --- a/tests/test_timeout_handling.py +++ b/tests/test_timeout_handling.py @@ -129,7 +129,6 @@ async def mock_wait_for_impl(coro, timeout): @pytest.mark.asyncio async def test_api_threads_success(): """Test successful thread listing with no timeout.""" - mock_db = AsyncMock() import datetime now = datetime.datetime.now() @@ -145,16 +144,12 @@ async def test_api_threads_success(): ) ] - async def mock_wait_for_get_db(coro, timeout): - return mock_db - - async def mock_gather(*coros): - return (mock_threads, len(mock_threads)) + mock_db = AsyncMock() - with patch("src.main.asyncio.wait_for", side_effect=mock_wait_for_get_db), \ - patch("src.main.asyncio.gather", side_effect=mock_gather): - # Since api_threads is an async function that returns an envelope dict, - # we need to test the actual return value + with patch("src.main.get_db", return_value=mock_db), \ + patch("src.main.crud.thread_list", new=AsyncMock(return_value=mock_threads)), \ + patch("src.main.crud.thread_count", new=AsyncMock(return_value=len(mock_threads))), \ + patch("src.main.crud.threads_agents_map", new=AsyncMock(return_value={})): result = await api_threads() # Verify result is an envelope dict with expected structure (UP-20)