From e0fc80185a32e0635d3c2020305c40986e258b50 Mon Sep 17 00:00:00 2001
From: Hulkito <olivier.berthet@moodys.com>
Date: Tue, 10 Mar 2026 23:18:35 +0100
Subject: [PATCH 1/3] perf: event-driven msg_wait wake-up via asyncio.Event

Replace the 1s asyncio.sleep polling loop in handle_msg_wait with
an asyncio.Event-based notification mechanism. When msg_post inserts
a message successfully, it signals the per-thread event, waking up
all waiters immediately instead of waiting for the next poll tick.

- Add _thread_events registry (dict[str, asyncio.Event]) at module level
- Add _get_thread_event() helper for lazy event creation
- In _poll(): replace asyncio.sleep(1.0) with event.clear() +
  asyncio.wait_for(event.wait(), timeout=1.0) for fallback safety
- In handle_msg_post: signal event after successful message insertion

The 1s timeout fallback preserves correctness in multi-process/stdio
mode where in-memory events are not shared across processes.

Estimated improvement: inter-message latency from ~500ms to ~10ms.

Made-with: Cursor
---
 src/tools/dispatch.py | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/tools/dispatch.py b/src/tools/dispatch.py
index 68e8cd7..eb70751 100644
--- a/src/tools/dispatch.py
+++ b/src/tools/dispatch.py
@@ -48,6 +48,23 @@
 
 logger = logging.getLogger(__name__)
 AGENT_HUMAN_ONLY_PLACEHOLDER = "[human-only content hidden]"
+
+# Per-thread asyncio.Event registry for event-driven msg_wait wake-ups.
+# When msg_post succeeds, the corresponding event is set so that all waiters
+# on that thread wake up immediately instead of waiting for the 1s poll tick.
+# Keys are thread_id strings. Access is safe within a single asyncio event loop
+# (SSE mode — single uvicorn process). The 1s asyncio.sleep fallback in _poll()
+# ensures correctness even if an event is missed (e.g. during hot-module reload).
+_thread_events: dict[str, asyncio.Event] = {}
+
+
+def _get_thread_event(thread_id: str) -> asyncio.Event:
+    """Return (or create) the asyncio.Event for a given thread_id."""
+    if thread_id not in _thread_events:
+        _thread_events[thread_id] = asyncio.Event()
+    return _thread_events[thread_id]
+
+
 AGENT_HUMAN_ONLY_METADATA_KEYS = {
     "visibility",
     "audience",
@@ -794,6 +811,12 @@ async def handle_msg_post(db, arguments: dict[str, Any]) -> list[types.TextConte
             result["handoff_target"] = meta["handoff_target"]
         if ENABLE_STOP_REASON and meta.get("stop_reason"):
             result["stop_reason"] = meta["stop_reason"]
+
+    # Notify any msg_wait callers on this thread that a new message is available.
+    # This allows event-driven wake-up instead of waiting for the 1s poll tick.
+    if thread_id in _thread_events:
+        _thread_events[thread_id].set()
+
     return [types.TextContent(type="text", text=json.dumps(result))]
 
 def _filter_metadata_fields(meta_str: str | None) -> str | None:
@@ -1162,6 +1185,7 @@ async def _refresh_heartbeat() -> None:
     async def _poll():
         last_heartbeat = asyncio.get_event_loop().time()
         local_after_seq = after_seq
+        event = _get_thread_event(thread_id)
         while True:
             raw_msgs = await crud.msg_list(db, thread_id, after_seq=local_after_seq, include_system_prompt=False)
             msgs = _project_messages_for_agent(raw_msgs)
@@ -1208,7 +1232,15 @@ async def _poll():
                 await _refresh_heartbeat()
                 last_heartbeat = now
 
-            await asyncio.sleep(1.0)
+            # Event-driven wake-up: wait for msg_post to signal this thread's event.
+            # Falls back to a 1s timeout so correctness is preserved even if the
+            # event fires before we start waiting (spurious-wakeup-safe: the outer
+            # while-True loop re-checks crud.msg_list after every wake-up).
+            event.clear()
+            try:
+                await asyncio.wait_for(event.wait(), timeout=1.0)
+            except asyncio.TimeoutError:
+                pass
 
     try:
         msgs = await asyncio.wait_for(_poll(), timeout=timeout_s)

From 4976cab1af4eaab0c532589701408b6ad082ce6d Mon Sep 17 00:00:00 2001
From: Hulkito <olivier.berthet@moodys.com>
Date: Tue, 10 Mar 2026 23:38:18 +0100
Subject: [PATCH 2/3] fix(tests): scope asyncio.wait_for mock to src.main
 module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The test_timeout_handling tests were patching asyncio.wait_for globally,
which interfered with the event-driven msg_wait mechanism introduced in
dispatch.py (asyncio.wait_for(event.wait(), timeout=1.0)).

Scope the mock to src.main.asyncio.wait_for so it only intercepts calls
from main.py, leaving dispatch.py's event-based wait unaffected.

Note: test_api_threads_success was already failing on main before this PR
(TypeError: 'coroutine' object is not iterable — caused by the threads_agents_map
refactor). This fix addresses both the pre-existing failure and the new
interference introduced by the event-driven msg_wait.

Made-with: Cursor
---
 tests/test_timeout_handling.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/test_timeout_handling.py b/tests/test_timeout_handling.py
index 56b6dc3..550b872 100644
--- a/tests/test_timeout_handling.py
+++ b/tests/test_timeout_handling.py
@@ -73,7 +73,7 @@ def custom_showwarning(self, message, category, filename, lineno, file=None, lin
 @pytest.mark.asyncio
 async def test_api_threads_timeout_on_get_db():
     """Test that API returns 503 when get_db() times out."""
-    with patch("asyncio.wait_for") as mock_wait_for:
+    with patch("src.main.asyncio.wait_for") as mock_wait_for:
         # First call to wait_for (get_db) times out
         mock_wait_for.side_effect = asyncio.TimeoutError()
         
@@ -98,7 +98,7 @@ async def mock_wait_for_impl(coro, timeout):
         else:
             raise asyncio.TimeoutError()
 
-    with patch("asyncio.wait_for", side_effect=mock_wait_for_impl):
+    with patch("src.main.asyncio.wait_for", side_effect=mock_wait_for_impl):
         try:
             await api_threads()
             pytest.fail("Expected HTTPException with 503")
@@ -113,7 +113,7 @@ async def test_api_agents_timeout():
     async def mock_wait_for_impl(coro, timeout):
         raise asyncio.TimeoutError()
 
-    with patch("asyncio.wait_for", side_effect=mock_wait_for_impl):
+    with patch("src.main.asyncio.wait_for", side_effect=mock_wait_for_impl):
         try:
             await api_agents()
             pytest.fail("Expected HTTPException with 503")
@@ -151,8 +151,8 @@ async def mock_wait_for_get_db(coro, timeout):
     async def mock_gather(*coros):
         return (mock_threads, len(mock_threads))
 
-    with patch("asyncio.wait_for", side_effect=mock_wait_for_get_db), \
-         patch("asyncio.gather", side_effect=mock_gather):
+    with patch("src.main.asyncio.wait_for", side_effect=mock_wait_for_get_db), \
+         patch("src.main.asyncio.gather", side_effect=mock_gather):
         # Since api_threads is an async function that returns an envelope dict,
         # we need to test the actual return value
         result = await api_threads()
@@ -195,7 +195,7 @@ async def mock_wait_for_impl(coro, timeout):
         # Return mock_agents for agent_list calls
         return mock_agents
 
-    with patch("asyncio.wait_for", side_effect=mock_wait_for_impl):
+    with patch("src.main.asyncio.wait_for", side_effect=mock_wait_for_impl):
         result = await api_agents()
 
         assert isinstance(result, list)

From fdbd5a85fbafb58c4a103ca9908152d9b32a282d Mon Sep 17 00:00:00 2001
From: Hulkito <olivier.berthet@moodys.com>
Date: Tue, 10 Mar 2026 23:43:02 +0100
Subject: [PATCH 3/3] fix(tests): rewrite test_api_threads_success to mock CRUD
 layer

The previous approach patched asyncio.wait_for/gather globally (then
src.main-scoped), but api_threads nests wait_for inside gather, making
the mock fragile against code structure changes.

Mock the CRUD layer directly instead:
- patch get_db to return a mock db connection
- patch crud.thread_list, crud.thread_count, crud.threads_agents_map
  as AsyncMocks with controlled return values

This is the correct level of abstraction: tests verify endpoint logic,
not asyncio plumbing. Also fixes the pre-existing failure on main
introduced by the threads_agents_map refactor (missing mock for the new
third await call).

Made-with: Cursor
---
 tests/test_timeout_handling.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/tests/test_timeout_handling.py b/tests/test_timeout_handling.py
index 550b872..60d666b 100644
--- a/tests/test_timeout_handling.py
+++ b/tests/test_timeout_handling.py
@@ -129,7 +129,6 @@ async def mock_wait_for_impl(coro, timeout):
 @pytest.mark.asyncio
 async def test_api_threads_success():
     """Test successful thread listing with no timeout."""
-    mock_db = AsyncMock()
     import datetime
     now = datetime.datetime.now()
 
@@ -145,16 +144,12 @@ async def test_api_threads_success():
         )
     ]
 
-    async def mock_wait_for_get_db(coro, timeout):
-        return mock_db
-
-    async def mock_gather(*coros):
-        return (mock_threads, len(mock_threads))
+    mock_db = AsyncMock()
 
-    with patch("src.main.asyncio.wait_for", side_effect=mock_wait_for_get_db), \
-         patch("src.main.asyncio.gather", side_effect=mock_gather):
-        # Since api_threads is an async function that returns an envelope dict,
-        # we need to test the actual return value
+    with patch("src.main.get_db", return_value=mock_db), \
+         patch("src.main.crud.thread_list", new=AsyncMock(return_value=mock_threads)), \
+         patch("src.main.crud.thread_count", new=AsyncMock(return_value=len(mock_threads))), \
+         patch("src.main.crud.threads_agents_map", new=AsyncMock(return_value={})):
         result = await api_threads()
 
         # Verify result is an envelope dict with expected structure (UP-20)