From 7dded37b2781f45901e08622672d4b8d209b371f Mon Sep 17 00:00:00 2001 From: Jason Tung Date: Sun, 24 May 2026 05:45:08 -0400 Subject: [PATCH 1/8] fix: harden against subagent-triggered crash cascade Cursor subagents are short-lived processes that disconnect mid-stream as normal lifecycle behaviour. The rapid churn of broken-pipe events was destabilising the proxy, causing a cascade of crashes and failed restarts. Changes: - server.py: add SO_REUSEPORT (with graceful fallback) so a freshly-started proxy can bind even while a dying predecessor holds the socket in CLOSE_WAIT - server.py: downgrade client-disconnect log messages from WARNING to INFO; broken pipes are expected noise with subagents, not errors - server.py: abort upstream socket drain immediately on client disconnect instead of continuing to read DeepSeek tokens that will never be delivered; also skip the final reasoning-cache write when there is nothing to cache - reasoning_store.py: add PRAGMA busy_timeout=5000 so SQLite waits up to 5 s for a concurrent write lock instead of immediately raising OperationalError("database is locked") - reasoning_store.py: enable WAL journal mode for better concurrent read/write throughput under parallel subagent requests - reasoning_store.py: retry startup prune up to 3 times with 0.5 s wait; degrade gracefully (log warning, skip prune) rather than crashing if the DB is still locked after retries - tests/test_server.py: update two tests that asserted WARNING-level logs for client-disconnect events to assert INFO-level instead Co-authored-by: Cursor --- src/deepseek_cursor_proxy/reasoning_store.py | 38 +++++++++++- src/deepseek_cursor_proxy/server.py | 65 ++++++++++++++------ tests/test_server.py | 4 +- 3 files changed, 86 insertions(+), 21 deletions(-) diff --git a/src/deepseek_cursor_proxy/reasoning_store.py b/src/deepseek_cursor_proxy/reasoning_store.py index 58ca933..e27f851 100644 --- a/src/deepseek_cursor_proxy/reasoning_store.py +++ b/src/deepseek_cursor_proxy/reasoning_store.py @@ -177,6 +177,10 @@ def portable_reasoning_keys( return keys +_STARTUP_PRUNE_BUSY_RETRIES = 3 +_STARTUP_PRUNE_BUSY_WAIT = 0.5 # seconds between retries + + class ReasoningStore: def __init__( self, @@ -197,6 +201,15 @@ def __init__( self._conn = sqlite3.connect( self.reasoning_content_path, check_same_thread=False ) + # busy_timeout lets SQLite wait up to 5 s for a concurrent writer to + # finish instead of immediately raising OperationalError("database is + # locked"). This prevents crash-restart races when multiple proxy + # instances start at nearly the same time (e.g. several terminals open + # in quick succession or after a subagent-triggered crash). + self._conn.execute("PRAGMA busy_timeout = 5000") + # WAL mode allows one writer and multiple concurrent readers without + # locking, which is far better for the threading model of this proxy. + self._conn.execute("PRAGMA journal_mode = WAL") if isinstance(self.reasoning_content_path, Path): self.reasoning_content_path.chmod(0o600) self._conn.execute( @@ -210,7 +223,30 @@ def __init__( """ ) self._conn.commit() - self.prune() + self._startup_prune() + + def _startup_prune(self) -> None: + """Prune on startup with retry + graceful degradation. + + Another proxy instance starting simultaneously can hold a write lock + for a short window. busy_timeout handles most of this, but if the + lock persists longer than busy_timeout we catch the error and skip + the prune rather than crashing — stale rows left behind are harmless + and will be cleaned up on the next successful prune. + """ + for attempt in range(_STARTUP_PRUNE_BUSY_RETRIES): + try: + self.prune() + return + except sqlite3.OperationalError as exc: + if "locked" not in str(exc).lower(): + raise + if attempt < _STARTUP_PRUNE_BUSY_RETRIES - 1: + time.sleep(_STARTUP_PRUNE_BUSY_WAIT) + import logging as _logging + _logging.getLogger("deepseek_cursor_proxy").warning( + "reasoning store: skipped startup prune (database is locked)" + ) def close(self) -> None: with self._lock: diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py index 0202062..82e110a 100644 --- a/src/deepseek_cursor_proxy/server.py +++ b/src/deepseek_cursor_proxy/server.py @@ -7,6 +7,7 @@ import json from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from pathlib import Path +import socket import sys import time from typing import Any @@ -51,6 +52,19 @@ class DeepSeekProxyServer(ThreadingHTTPServer): reasoning_store: ReasoningStore trace_writer: TraceWriter | None + def server_bind(self) -> None: + # SO_REUSEADDR is already set by HTTPServer.allow_reuse_address=True, but + # SO_REUSEPORT (Linux/BSD) lets a freshly-started proxy bind immediately even + # while a dying predecessor still holds the socket in CLOSE_WAIT, which is the + # most common cause of the "Address already in use" cascade after a crash. + try: + self.socket.setsockopt( + socket.SOL_SOCKET, socket.SO_REUSEPORT, 1 # type: ignore[attr-defined] + ) + except AttributeError: + pass # Windows / older BSD — graceful degradation + super().server_bind() + class DeepSeekProxyHandler(BaseHTTPRequestHandler): server_version = "DeepSeekPythonProxy/0.1" @@ -426,7 +440,8 @@ def _send_response_headers( self.send_header(name, value) self.end_headers() except (BrokenPipeError, ConnectionError) as exc: - LOG.warning("client disconnected while %s: %s", disconnect_context, exc) + # Cursor subagents are short-lived; mid-stream disconnects are normal. + LOG.info("client disconnected while %s: %s", disconnect_context, exc) return False return True @@ -442,7 +457,8 @@ def _write_to_client( if flush: self.wfile.flush() except (BrokenPipeError, ConnectionError) as exc: - LOG.warning("client disconnected while %s: %s", disconnect_context, exc) + # Cursor subagents are short-lived; mid-stream disconnects are normal. + LOG.info("client disconnected while %s: %s", disconnect_context, exc) return False return True @@ -693,6 +709,7 @@ def _proxy_streaming_response( else [(scope, response_prior_messages)] ) finalized = False + client_disconnected = False pending_recovery_notice = recovery_notice try: while True: @@ -725,33 +742,45 @@ def _proxy_streaming_response( if not self._write_to_client( rewritten, "sending streaming response chunk", flush=True ): + # Client disconnected (expected for short-lived subagents). + # Stop draining the upstream socket immediately — there is no + # point in reading more DeepSeek tokens that nobody will receive. + client_disconnected = True return ProxyResponseResult(False, usage) if finalized: break finally: - # Store partial reasoning whenever the stream exits without - # the upstream's [DONE] terminator (client disconnect, upstream - # read failure, exception). Without this, a Stop pressed mid-stream + # Store partial reasoning whenever the stream exits without the + # upstream's [DONE] terminator (client disconnect, upstream read + # failure, exception). Without this, a Stop pressed mid-stream # would discard any reasoning the proxy received but never cached. + # Skip the store when the client disconnected early AND the + # accumulator has nothing worth caching (no reasoning content yet) + # to avoid touching the DB unnecessarily on subagent churn. if not finalized: if self.config.verbose: log_json( "model streaming assistant messages", accumulator.messages() ) - stored = sum( - accumulator.store_reasoning( - self.reasoning_store, - ctx_scope, - cache_namespace, - prior_messages, - ) - for ctx_scope, prior_messages in response_contexts - ) - if self.config.verbose and stored: - LOG.info( - "stored %s streaming reasoning cache key(s) before exit", - stored, + messages_with_reasoning = [ + m for m in accumulator.messages() + if m.get("reasoning_content") + ] + if not client_disconnected or messages_with_reasoning: + stored = sum( + accumulator.store_reasoning( + self.reasoning_store, + ctx_scope, + cache_namespace, + prior_messages, + ) + for ctx_scope, prior_messages in response_contexts ) + if self.config.verbose and stored: + LOG.info( + "stored %s streaming reasoning cache key(s) before exit", + stored, + ) return ProxyResponseResult(True, usage) def _rewrite_sse_line( diff --git a/tests/test_server.py b/tests/test_server.py index db89f0f..cff056c 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -264,7 +264,7 @@ def test_regular_response_handles_client_disconnect(self) -> None: } ).encode("utf-8") try: - with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured: + with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured: result = handler._proxy_regular_response( _FakeResponse(body), "deepseek-v4-pro", @@ -290,7 +290,7 @@ def test_streaming_response_stops_on_client_disconnect(self) -> None: ] ) try: - with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured: + with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured: result = handler._proxy_streaming_response( response, "deepseek-v4-pro", From 9195fa9e2e28074f3240d255ce2d9dc38b49c690 Mon Sep 17 00:00:00 2001 From: Jason Tung Date: Sun, 24 May 2026 05:49:34 -0400 Subject: [PATCH 2/8] fix: remove SO_REUSEPORT (wrong approach, requires both sockets to set it) Co-authored-by: Cursor --- src/deepseek_cursor_proxy/server.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py index 82e110a..6fe31da 100644 --- a/src/deepseek_cursor_proxy/server.py +++ b/src/deepseek_cursor_proxy/server.py @@ -7,7 +7,6 @@ import json from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from pathlib import Path -import socket import sys import time from typing import Any @@ -52,18 +51,6 @@ class DeepSeekProxyServer(ThreadingHTTPServer): reasoning_store: ReasoningStore trace_writer: TraceWriter | None - def server_bind(self) -> None: - # SO_REUSEADDR is already set by HTTPServer.allow_reuse_address=True, but - # SO_REUSEPORT (Linux/BSD) lets a freshly-started proxy bind immediately even - # while a dying predecessor still holds the socket in CLOSE_WAIT, which is the - # most common cause of the "Address already in use" cascade after a crash. - try: - self.socket.setsockopt( - socket.SOL_SOCKET, socket.SO_REUSEPORT, 1 # type: ignore[attr-defined] - ) - except AttributeError: - pass # Windows / older BSD — graceful degradation - super().server_bind() class DeepSeekProxyHandler(BaseHTTPRequestHandler): From 9ff08150a4ce5afc9508a5d53be772ea72357487 Mon Sep 17 00:00:00 2001 From: Jason Tung Date: Sun, 24 May 2026 15:54:33 -0400 Subject: [PATCH 3/8] fix: apply black formatting --- src/deepseek_cursor_proxy/reasoning_store.py | 1 + src/deepseek_cursor_proxy/server.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/deepseek_cursor_proxy/reasoning_store.py b/src/deepseek_cursor_proxy/reasoning_store.py index e27f851..7f9e03e 100644 --- a/src/deepseek_cursor_proxy/reasoning_store.py +++ b/src/deepseek_cursor_proxy/reasoning_store.py @@ -244,6 +244,7 @@ def _startup_prune(self) -> None: if attempt < _STARTUP_PRUNE_BUSY_RETRIES - 1: time.sleep(_STARTUP_PRUNE_BUSY_WAIT) import logging as _logging + _logging.getLogger("deepseek_cursor_proxy").warning( "reasoning store: skipped startup prune (database is locked)" ) diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py index 6fe31da..c20012f 100644 --- a/src/deepseek_cursor_proxy/server.py +++ b/src/deepseek_cursor_proxy/server.py @@ -52,7 +52,6 @@ class DeepSeekProxyServer(ThreadingHTTPServer): trace_writer: TraceWriter | None - class DeepSeekProxyHandler(BaseHTTPRequestHandler): server_version = "DeepSeekPythonProxy/0.1" @@ -750,8 +749,7 @@ def _proxy_streaming_response( "model streaming assistant messages", accumulator.messages() ) messages_with_reasoning = [ - m for m in accumulator.messages() - if m.get("reasoning_content") + m for m in accumulator.messages() if m.get("reasoning_content") ] if not client_disconnected or messages_with_reasoning: stored = sum( From 24f74b3d1d4be63c41b3da80b0796fef3e63986e Mon Sep 17 00:00:00 2001 From: Jason Tung Date: Sun, 24 May 2026 17:01:29 -0400 Subject: [PATCH 4/8] perf: eliminate read/write lock contention under concurrent subagent load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The core issue causing multi-second freezes was that every streaming write called _prune_locked() — a full-table O(N log N) sort — while holding the only lock in the store, blocking all concurrent reads. Changes: - Thread-local read connections: each handler thread gets its own SQLite connection; WAL mode handles file-level concurrency so reads never wait on writes and vice versa. - created_at index: turns age and row-count prune queries from O(N log N) to O(log N) / O(max_rows). - Throttled row-count prune: skip the expensive NOT-IN subquery when the DB is well under capacity; always prune when over the limit. - SQLite pragma tuning: 32 MiB write cache, 8 MiB per read connection, synchronous=NORMAL (safe under WAL). - _proxy_already_running() health check in main(): new startup attempts exit cleanly if the proxy is already serving, eliminating the "Address already in use" cascade. - autostart.bash rewritten as a status-check-only script: the proxy is started once manually; agents never trigger auto-start logic. - CONCURRENT_DESIGN.md: full architecture + root-cause + fix narrative. All 96 tests pass. Co-authored-by: Cursor --- CONCURRENT_DESIGN.md | 248 +++++++++++++++++++ src/deepseek_cursor_proxy/reasoning_store.py | 199 ++++++++++++--- src/deepseek_cursor_proxy/server.py | 35 +++ 3 files changed, 444 insertions(+), 38 deletions(-) create mode 100644 CONCURRENT_DESIGN.md diff --git a/CONCURRENT_DESIGN.md b/CONCURRENT_DESIGN.md new file mode 100644 index 0000000..90c8b02 --- /dev/null +++ b/CONCURRENT_DESIGN.md @@ -0,0 +1,248 @@ +# Concurrency Design & Performance Guide + +This document explains how the proxy handles concurrent requests, what went wrong +under multi-agent Cursor load, what the `fix/subagent-resilience` branch changed, +and what the performance optimisations added on top of that branch do. + +--- + +## 1. What the proxy does + +Cursor treats this proxy as a standard OpenAI-compatible API endpoint. The +proxy's unique job is to solve a **DeepSeek-specific API requirement**: when you +send a conversation history that contains an assistant turn with tool calls, +DeepSeek's API requires that turn to also include the original `reasoning_content` +(chain-of-thought tokens) from when it was first generated. Cursor strips that +field before echoing the history back, so the proxy must: + +1. **Cache** every `reasoning_content` it receives from DeepSeek into SQLite. +2. **Restore** cached reasoning into outgoing messages before forwarding to DeepSeek. + +Everything else — CORS handling, model name rewriting, ngrok tunnelling, recovery +notices — is scaffolding around that core loop. + +### Per-request flow + +``` +Cursor IDE + │ + │ POST /v1/chat/completions (OpenAI-compatible) + ▼ +DeepSeekProxyHandler.do_POST() + │ + ├─ parse & validate request body + │ + ├─ prepare_upstream_request() + │ ├─ normalize messages (strip unknown fields, fix content types) + │ └─ for each assistant message with tool_calls: + │ └─ ReasoningStore.get() ← look up cached reasoning_content + │ + ├─ forward to DeepSeek API (urlopen, streaming or non-streaming) + │ + ├─ [streaming path] + │ ├─ for each SSE chunk: + │ │ ├─ rewrite model name, mirror reasoning into content display + │ │ └─ StreamAccumulator.store_ready_reasoning() + │ │ └─ ReasoningStore.put() ← cache reasoning when tool_calls complete + │ └─ ReasoningStore.put() ← cache on [DONE] or client disconnect + │ + └─ [non-streaming path] + └─ rewrite_response_body() → ReasoningStore.put() ← cache the response +``` + +--- + +## 2. Concurrency model + +The server is a Python `ThreadingHTTPServer`: each incoming connection spawns a +new OS thread. When Cursor runs multiple agents or subagents simultaneously, +several threads are active at once — each reading from the SQLite cache before +forwarding its request upstream, and each writing back after receiving a response. + +### The SQLite layer + +SQLite in its default (journal) mode allows only one writer at a time and blocks +readers during writes. This made the original code prone to lock contention under +parallel requests. + +--- + +## 3. Bug history + +### Bug 1 — Startup crash cascade (pre-branch) + +**Symptom:** Multiple "database is locked" tracebacks in `autostart.log`, followed +by repeated "Address already in use" errors. Agents froze because the proxy +crashed in a loop and never became available. + +**Root cause:** + +1. Every new Cursor terminal (including agent tool-call shells) sources `~/.bashrc`, + which runs the proxy autostart script. +2. The script uses `flock` + PID check to be idempotent, but has a startup window. +3. When several agents opened simultaneously, multiple proxy processes started. +4. Each called `ReasoningStore.__init__()` → `self.prune()`. +5. Without `WAL` mode or `busy_timeout`, the second process immediately received + `sqlite3.OperationalError: database is locked` and crashed. +6. The crash triggered another autostart. Loop. +7. Meanwhile the first healthy process held the port, so restarted processes hit + `OSError: [Errno 98] Address already in use` and crashed again. + +**Fix (in this branch):** + +- `PRAGMA journal_mode = WAL` — readers never block writers; writers never block + readers at the SQLite file level. +- `PRAGMA busy_timeout = 5000` — wait up to 5 s for a lock instead of crashing. +- `_startup_prune()` — retries + gracefully skips if still locked after retries. +- `_proxy_already_running()` health check in `main()` — if the proxy is already + responding on `/healthz`, a new startup attempt exits `0` immediately without + touching the port or the database. + +### Bug 2 — Multi-second freezes under concurrent agent load (pre- and post-branch) + +**Symptom:** Log lines showing 10+ seconds between "┌ cursor request" and +"├ context filled", followed by Cursor dropping the connection with a broken-pipe +warning. + +**Root cause:** + +The `ReasoningStore` used a single `threading.RLock` that serialised **all** +database operations — reads and writes — across all handler threads. The +`put()` method called `_prune_locked()` on **every single write**: + +```python +# OLD code — runs a full-table sort on every write +def _prune_locked(self): + self._conn.execute(""" + DELETE FROM reasoning_cache + WHERE key NOT IN ( + SELECT key FROM reasoning_cache + ORDER BY created_at DESC -- ← full-table sort, no index + LIMIT 100000 + ) + """) +``` + +With 55,925 rows and no index on `created_at`, this was an O(N log N) sort on +every write. While one thread held the lock for this sort: + +- Every other thread's `store.get()` call (which normalises 15–30 assistant + messages per request) queued behind the lock. +- Each blocked `get()` added to the "context filled" latency visible in the logs. + +**Fix (this PR):** — see Section 4. + +--- + +## 4. Performance optimisations + +### 4a — Thread-local read connections + +**Before:** One shared `sqlite3.Connection`, one `threading.RLock`. Reads blocked +on writes and vice versa. + +**After:** Two connection layers: + +| Layer | Object | Protected by | Used for | +|---|---|---|---| +| Write | `self._write_conn` | `self._write_lock` (plain `Lock`) | INSERT / UPDATE / DELETE / COMMIT | +| Read | `self._local.conn` (per-thread) | Nothing — WAL handles it | SELECT | + +In WAL mode SQLite guarantees that concurrent readers never see a partial write and +writers never wait for readers. Each request handler thread opens its own read +connection the first time it calls `get()`. Reads across multiple concurrent +requests never contend with each other or with the writer. + +The special case `":memory:"` (used in tests) cannot share data across multiple +connections, so it falls back to the write connection for reads. + +### 4b — `created_at` index + +```sql +CREATE INDEX IF NOT EXISTS idx_reasoning_cache_created_at +ON reasoning_cache(created_at) +``` + +Applied on first startup against existing databases (`CREATE INDEX IF NOT EXISTS` +is idempotent). This index turns: + +| Query | Before | After | +|---|---|---| +| Age prune (`WHERE created_at < ?`) | O(N) full scan | O(log N) range seek | +| Row-count prune (top-N keep) | O(N log N) sort | O(max\_rows) index walk | + +### 4c — Throttled row-count prune + +The age-based prune (`DELETE WHERE created_at < cutoff`) is now fast with the +index and still runs on every write. + +The row-count prune (keeping at most `max_rows` entries) only runs when the +approximate row count exceeds `max_rows` **or** every `_ROWCOUNT_PRUNE_INTERVAL` +(default 50) writes. The row count is seeded from the real DB size at startup so +the threshold is accurate from the first write. + +When the DB is well under the limit (the common case — 55k rows vs 100k limit), +the row-count DELETE skips entirely, saving a subquery evaluation on every request. + +### 4d — SQLite PRAGMA tuning + +| PRAGMA | Value | Effect | +|---|---|---| +| `journal_mode` | `WAL` | Concurrent readers + writer | +| `busy_timeout` | `5000` ms | Wait instead of crash on contention | +| `synchronous` | `NORMAL` | Skip extra fsync; safe under WAL | +| `cache_size` (write conn) | `-32768` (32 MiB) | Hot rows stay in RAM | +| `cache_size` (read conns) | `-8192` (8 MiB) | Per-thread read cache | + +--- + +## 5. Autostart design change + +The old `autostart.bash` automatically launched the proxy inside every new bash +session (including Cursor agent tool-call shells) using `flock` to prevent +duplicates. The flock race window was the original source of the cascade. + +**New behaviour:** The script is a **status check only**. + +- Interactive shell, proxy running → silent (no noise on every tab open). +- Interactive shell, proxy **not** running → one clear warning + start command. +- Non-interactive shell (agent tool calls) → does nothing at all. + +The proxy is started manually once. Agents just talk to it; they never try to +start it. + +--- + +## 6. Reasoning cache lifetime + +Entries are pruned by two independent policies (both configurable): + +| Policy | Default | Purpose | +|---|---|---| +| `reasoning_cache_max_age_seconds` | 30 days | Remove old conversation history | +| `reasoning_cache_max_rows` | 100,000 | Cap total DB size | + +**Active conversations are never pruned** as long as entries are younger than +`max_age_seconds`. If an entry is pruned (old conversation, restarted machine, +cache cleared), the proxy falls back to "recover" mode: it truncates the +conversation to the latest user message, adds a recovery notice, and continues. + +On Cursor restart, reasoning_content for recent tool calls is restored from the +SQLite cache transparently, so the conversation resumes correctly. + +--- + +## 7. Summary of changes in `fix/subagent-resilience` + +| File | Change | Fixes | +|---|---|---| +| `reasoning_store.py` | WAL mode + busy\_timeout | Startup crash cascade | +| `reasoning_store.py` | `_startup_prune()` with retry | Startup crash cascade | +| `reasoning_store.py` | `created_at` index | Multi-second read latency | +| `reasoning_store.py` | Thread-local read connections | Read/write contention | +| `reasoning_store.py` | Throttled row-count prune | Per-write lock contention | +| `reasoning_store.py` | 32 MiB write + 8 MiB read caches | I/O reduction | +| `server.py` | `_proxy_already_running()` health check | "Address already in use" cascade | +| `server.py` | `LOG.info` instead of `LOG.warning` for broken pipes | Log noise | +| `server.py` | `client_disconnected` flag | Unnecessary DB writes on subagent churn | +| `autostart.bash` | Status check only, no auto-start | Cascade crash root cause | diff --git a/src/deepseek_cursor_proxy/reasoning_store.py b/src/deepseek_cursor_proxy/reasoning_store.py index 7f9e03e..04d3db6 100644 --- a/src/deepseek_cursor_proxy/reasoning_store.py +++ b/src/deepseek_cursor_proxy/reasoning_store.py @@ -180,8 +180,39 @@ def portable_reasoning_keys( _STARTUP_PRUNE_BUSY_RETRIES = 3 _STARTUP_PRUNE_BUSY_WAIT = 0.5 # seconds between retries +# Prune the max-rows limit at most once every N writes. Running the full-table +# DELETE on every write was the primary cause of multi-second lock contention +# under concurrent subagent load. Age-based pruning is cheap (index range +# scan) and still happens on every write; row-count pruning is expensive +# (table scan) and only needs to run occasionally. +_ROWCOUNT_PRUNE_INTERVAL = 50 + class ReasoningStore: + """Thread-safe reasoning_content cache backed by SQLite. + + Concurrency model + ----------------- + The proxy runs as a ThreadingHTTPServer: each request is handled on its + own thread. To allow concurrent reads without blocking on writes we use + two connection layers: + + * **Write connection** (``self._write_conn``) — one shared connection, + serialised by ``self._write_lock`` (a plain ``threading.Lock``). All + INSERT/UPDATE/DELETE/COMMIT operations go through this path. + + * **Thread-local read connections** (``self._local.conn``) — each handler + thread opens its own SQLite connection the first time it calls ``get()``. + In WAL mode SQLite guarantees that readers never block writers and + writers never block readers at the *file* level; the Python-level + per-thread connections ensure there is no intra-process contention + either. + + The special case ``":memory:"`` (used in tests) cannot have multiple + connections pointing at the same data, so it falls back to the write + connection for reads (still serialised by ``_write_lock``). + """ + def __init__( self, reasoning_content_path: str | Path, @@ -190,29 +221,19 @@ def __init__( ) -> None: self.max_age_seconds = max_age_seconds self.max_rows = max_rows - if str(reasoning_content_path) == ":memory:": + self._in_memory = str(reasoning_content_path) == ":memory:" + if self._in_memory: self.reasoning_content_path: str | Path = ":memory:" else: self.reasoning_content_path = Path(reasoning_content_path).expanduser() self.reasoning_content_path.parent.mkdir( mode=0o700, parents=True, exist_ok=True ) - self._lock = threading.RLock() - self._conn = sqlite3.connect( - self.reasoning_content_path, check_same_thread=False - ) - # busy_timeout lets SQLite wait up to 5 s for a concurrent writer to - # finish instead of immediately raising OperationalError("database is - # locked"). This prevents crash-restart races when multiple proxy - # instances start at nearly the same time (e.g. several terminals open - # in quick succession or after a subagent-triggered crash). - self._conn.execute("PRAGMA busy_timeout = 5000") - # WAL mode allows one writer and multiple concurrent readers without - # locking, which is far better for the threading model of this proxy. - self._conn.execute("PRAGMA journal_mode = WAL") + self._write_lock = threading.Lock() + self._write_conn = self._open_conn(writer=True) if isinstance(self.reasoning_content_path, Path): self.reasoning_content_path.chmod(0o600) - self._conn.execute( + self._write_conn.execute( """ CREATE TABLE IF NOT EXISTS reasoning_cache ( key TEXT PRIMARY KEY, @@ -222,9 +243,72 @@ def __init__( ) """ ) - self._conn.commit() + # Index on created_at powers both the age-based DELETE (range scan) and + # the max-rows DELETE (top-N scan), turning O(N log N) full-table sorts + # into O(log N) index seeks. CREATE INDEX IF NOT EXISTS is a no-op on + # databases that already have the index, so it is safe to run every + # startup and will transparently add the index to existing DBs. + self._write_conn.execute( + """ + CREATE INDEX IF NOT EXISTS idx_reasoning_cache_created_at + ON reasoning_cache(created_at) + """ + ) + self._write_conn.commit() + if not self._in_memory: + self._local: threading.local = threading.local() + # Seed the write counter with the current row count so the rowcount + # prune triggers correctly: once _write_count exceeds max_rows we know + # the DB could be over capacity and prune on every write; below that + # threshold we only prune every _ROWCOUNT_PRUNE_INTERVAL writes. + row = self._write_conn.execute( + "SELECT COUNT(*) FROM reasoning_cache" + ).fetchone() + self._write_count: int = int(row[0]) if row else 0 self._startup_prune() + # ------------------------------------------------------------------ + # Connection helpers + # ------------------------------------------------------------------ + + def _open_conn(self, *, writer: bool = False) -> sqlite3.Connection: + """Open a new SQLite connection with appropriate PRAGMAs.""" + conn = sqlite3.connect( + str(self.reasoning_content_path), check_same_thread=False + ) + # busy_timeout: wait up to 5 s for a concurrent writer instead of + # immediately raising OperationalError("database is locked"). + conn.execute("PRAGMA busy_timeout = 5000") + # WAL mode: readers never block writers; writers never block readers. + conn.execute("PRAGMA journal_mode = WAL") + # NORMAL durability is safe under WAL (no data loss on OS crash) and + # avoids the extra fsync that FULL mode adds on every commit. + conn.execute("PRAGMA synchronous = NORMAL") + if writer: + # Give the single write connection a large page cache so hot rows + # stay in RAM during high-concurrency bursts from parallel subagents. + conn.execute("PRAGMA cache_size = -32768") # 32 MiB + else: + # Read connections share a smaller per-thread cache. + conn.execute("PRAGMA cache_size = -8192") # 8 MiB per thread + return conn + + def _read_conn(self) -> sqlite3.Connection: + """Return the thread-local read connection, creating it if needed.""" + if self._in_memory: + # All connections to :memory: are independent DBs; share the write + # connection (serialised by _write_lock in the caller). + return self._write_conn + conn: sqlite3.Connection | None = getattr(self._local, "conn", None) + if conn is None: + conn = self._open_conn(writer=False) + self._local.conn = conn + return conn + + # ------------------------------------------------------------------ + # Lifecycle + # ------------------------------------------------------------------ + def _startup_prune(self) -> None: """Prune on startup with retry + graceful degradation. @@ -250,15 +334,21 @@ def _startup_prune(self) -> None: ) def close(self) -> None: - with self._lock: - self._conn.close() + with self._write_lock: + self._write_conn.close() + # Thread-local read connections are closed when their threads exit or + # when the process terminates; the OS reclaims file handles either way. + + # ------------------------------------------------------------------ + # Core read / write + # ------------------------------------------------------------------ def put(self, key: str, reasoning: str, message: dict[str, Any]) -> None: if not isinstance(reasoning, str): return message_json = json.dumps(message, ensure_ascii=False, sort_keys=True) - with self._lock: - self._conn.execute( + with self._write_lock: + self._write_conn.execute( """ INSERT INTO reasoning_cache(key, reasoning, message_json, created_at) VALUES (?, ?, ?, ?) @@ -269,12 +359,37 @@ def put(self, key: str, reasoning: str, message: dict[str, Any]) -> None: """, (key, reasoning, message_json, time.time()), ) - self._prune_locked() - self._conn.commit() + self._write_count += 1 + # Skip the expensive rowcount prune when we're well under + # capacity, running it at most every _ROWCOUNT_PRUNE_INTERVAL + # writes. Once _write_count exceeds max_rows the DB could be + # over capacity, so prune on every write until we're safely + # back under the limit. + at_or_over_limit = ( + self.max_rows is not None + and self._write_count > self.max_rows + ) + skip_rowcount = ( + not at_or_over_limit + and self._write_count % _ROWCOUNT_PRUNE_INTERVAL != 0 + ) + deleted = self._prune_write_locked(skip_rowcount=skip_rowcount) + self._write_count -= deleted + self._write_conn.commit() def get(self, key: str) -> str | None: - with self._lock: - row = self._conn.execute( + # File-based DB: no lock needed — WAL allows concurrent reads even + # while the write connection is mid-transaction. Each thread has its + # own connection so there is no intra-process sharing either. + # :memory: DB: falls back to the write connection (see _read_conn). + if self._in_memory: + with self._write_lock: + row = self._read_conn().execute( + "SELECT reasoning FROM reasoning_cache WHERE key = ?", + (key,), + ).fetchone() + else: + row = self._read_conn().execute( "SELECT reasoning FROM reasoning_cache WHERE key = ?", (key,), ).fetchone() @@ -342,36 +457,44 @@ def backfill_portable_aliases( return len(keys) def clear(self) -> int: - with self._lock: - row = self._conn.execute("SELECT COUNT(*) FROM reasoning_cache").fetchone() + with self._write_lock: + row = self._write_conn.execute( + "SELECT COUNT(*) FROM reasoning_cache" + ).fetchone() count = int(row[0] if row else 0) - self._conn.execute("DELETE FROM reasoning_cache") - self._conn.commit() + self._write_conn.execute("DELETE FROM reasoning_cache") + self._write_conn.commit() return count def prune(self) -> int: - with self._lock: - deleted = self._prune_locked() - self._conn.commit() + with self._write_lock: + deleted = self._prune_write_locked() + self._write_conn.commit() return deleted - def _prune_locked(self) -> int: + def _prune_write_locked(self, *, skip_rowcount: bool = False) -> int: + """Run pruning queries. Must be called with ``_write_lock`` held.""" deleted = 0 if self.max_age_seconds is not None and self.max_age_seconds > 0: cutoff = time.time() - self.max_age_seconds - cursor = self._conn.execute( + # idx_reasoning_cache_created_at makes this a fast index range scan. + cursor = self._write_conn.execute( "DELETE FROM reasoning_cache WHERE created_at < ?", (cutoff,), ) deleted += cursor.rowcount if cursor.rowcount != -1 else 0 - if self.max_rows is not None and self.max_rows > 0: - cursor = self._conn.execute( + if not skip_rowcount and self.max_rows is not None and self.max_rows > 0: + # Keep the top max_rows rows by created_at; delete everything else. + # The subquery walks the created_at index and materialises exactly + # max_rows rowids — O(max_rows) index seek. The outer DELETE is + # O(rows deleted). Using rowid NOT IN handles equal-timestamp ties + # correctly (rowid is unique; created_at is not). + cursor = self._write_conn.execute( """ DELETE FROM reasoning_cache - WHERE key NOT IN ( - SELECT key - FROM reasoning_cache + WHERE rowid NOT IN ( + SELECT rowid FROM reasoning_cache ORDER BY created_at DESC LIMIT ? ) diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py index c20012f..2ffcffc 100644 --- a/src/deepseek_cursor_proxy/server.py +++ b/src/deepseek_cursor_proxy/server.py @@ -1257,6 +1257,28 @@ def warn_if_insecure_upstream(url: str) -> None: LOG.warning("upstream base_url uses plain HTTP; bearer tokens may be exposed") +def _proxy_already_running(host: str, port: int) -> bool: + """Return True if a healthy proxy is already listening at host:port. + + Called before we attempt to bind the server socket. When Cursor spawns + multiple agent / subagent processes simultaneously, each one may trigger + the shell autostart hook. Rather than crashing with 'Address already in + use', a new startup attempt that finds the proxy healthy should just exit + cleanly — the existing process handles requests just fine. + """ + try: + import urllib.request as _urlreq + + req = _urlreq.Request( + f"http://{host}:{port}/healthz", + headers={"User-Agent": "deepseek-cursor-proxy/startup-check"}, + ) + with _urlreq.urlopen(req, timeout=2.0) as resp: + return resp.status == 200 + except Exception: + return False + + def main(argv: list[str] | None = None) -> int: args = build_arg_parser().parse_args(argv) try: @@ -1330,6 +1352,19 @@ def main(argv: list[str] | None = None) -> int: LOG.error("failed to initialize trace directory: %s", exc) store.close() return 2 + if _proxy_already_running(config.host, config.port): + # Another proxy instance is already healthy on this address. This can + # happen when the shell autostart hook fires in multiple Cursor agent + # terminals in quick succession. Exit cleanly so the running instance + # keeps serving requests undisturbed. + LOG.info( + "proxy already running at %s:%s — exiting cleanly", + config.host, + config.port, + ) + store.close() + return 0 + server = DeepSeekProxyServer((config.host, config.port), DeepSeekProxyHandler) server.config = config server.reasoning_store = store From 1e1715266d65fa709ceae1f7a5f924eee2eb5188 Mon Sep 17 00:00:00 2001 From: Jason Tung Date: Sun, 24 May 2026 17:04:42 -0400 Subject: [PATCH 5/8] perf: increase SQLite cache + add mmap for 128 GB RAM machine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Write connection cache_size: 8 MiB → 512 MiB (holds entire 300 MB DB) - Read connection cache_size: 8 MiB → 64 MiB per thread - mmap_size = 2 GiB: maps the DB file into virtual address space so all connections share OS page-cache pages; reads become zero-copy and the 300 MB DB stays warm in RAM indefinitely on a 128 GB machine Co-authored-by: Cursor --- src/deepseek_cursor_proxy/reasoning_store.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/deepseek_cursor_proxy/reasoning_store.py b/src/deepseek_cursor_proxy/reasoning_store.py index 04d3db6..f77ce71 100644 --- a/src/deepseek_cursor_proxy/reasoning_store.py +++ b/src/deepseek_cursor_proxy/reasoning_store.py @@ -285,12 +285,21 @@ def _open_conn(self, *, writer: bool = False) -> sqlite3.Connection: # avoids the extra fsync that FULL mode adds on every commit. conn.execute("PRAGMA synchronous = NORMAL") if writer: - # Give the single write connection a large page cache so hot rows - # stay in RAM during high-concurrency bursts from parallel subagents. - conn.execute("PRAGMA cache_size = -32768") # 32 MiB + # Large page cache for the write connection. The negative value + # is in kibibytes; 524288 = 512 MiB, enough to hold the entire + # 300 MB database in Python heap and absorb large write bursts. + conn.execute("PRAGMA cache_size = -524288") # 512 MiB else: - # Read connections share a smaller per-thread cache. - conn.execute("PRAGMA cache_size = -8192") # 8 MiB per thread + # Read connections each get their own cache, but mmap_size (below) + # means most reads never touch this cache at all. + conn.execute("PRAGMA cache_size = -65536") # 64 MiB per thread + # Memory-map the database file directly into the process address space. + # With mmap enabled the OS page cache becomes the working set — all + # connections share the same physical pages, reads are zero-copy, and + # the entire 300 MB DB can stay warm in RAM indefinitely as long as + # there is free memory. 2 GiB ceiling is far above the current DB + # size; SQLite only maps what actually exists. + conn.execute("PRAGMA mmap_size = 2147483648") # 2 GiB ceiling return conn def _read_conn(self) -> sqlite3.Connection: From 2cb86469710fd4cf57c767e7709f459524a462a1 Mon Sep 17 00:00:00 2001 From: Jason Tung Date: Sun, 24 May 2026 17:16:44 -0400 Subject: [PATCH 6/8] perf: raise limits to 5M rows / 1yr age, mmap ceiling to 32 GiB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Row limit: 100k → 5M (theoretical max ~27 GB at current row size) Max age: 30d → 1yr (conversations from the last year always restore) mmap_size: 2 GiB → 32 GiB ceiling (covers full 5M-row DB in virtual space) The row-count prune fires only when rows actually exceed 5M; at current usage (55,925 rows) it will not trigger for years. Co-authored-by: Cursor --- src/deepseek_cursor_proxy/reasoning_store.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/deepseek_cursor_proxy/reasoning_store.py b/src/deepseek_cursor_proxy/reasoning_store.py index f77ce71..c3fdcbd 100644 --- a/src/deepseek_cursor_proxy/reasoning_store.py +++ b/src/deepseek_cursor_proxy/reasoning_store.py @@ -296,10 +296,11 @@ def _open_conn(self, *, writer: bool = False) -> sqlite3.Connection: # Memory-map the database file directly into the process address space. # With mmap enabled the OS page cache becomes the working set — all # connections share the same physical pages, reads are zero-copy, and - # the entire 300 MB DB can stay warm in RAM indefinitely as long as - # there is free memory. 2 GiB ceiling is far above the current DB - # size; SQLite only maps what actually exists. - conn.execute("PRAGMA mmap_size = 2147483648") # 2 GiB ceiling + # the entire DB can stay warm in RAM indefinitely on a 128 GB machine. + # 32 GiB ceiling covers the ~27 GB theoretical max at 5M rows; + # SQLite only maps pages that actually exist, so unused address space + # costs nothing. + conn.execute("PRAGMA mmap_size = 34359738368") # 32 GiB ceiling return conn def _read_conn(self) -> sqlite3.Connection: From 57f14e87ddd258bd43e5e5d9ce449518778e1bc1 Mon Sep 17 00:00:00 2001 From: Jason Tung Date: Sun, 24 May 2026 17:17:44 -0400 Subject: [PATCH 7/8] style: apply black formatting Co-authored-by: Cursor --- src/deepseek_cursor_proxy/reasoning_store.py | 29 ++++++++++++-------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/deepseek_cursor_proxy/reasoning_store.py b/src/deepseek_cursor_proxy/reasoning_store.py index c3fdcbd..1c62fb4 100644 --- a/src/deepseek_cursor_proxy/reasoning_store.py +++ b/src/deepseek_cursor_proxy/reasoning_store.py @@ -292,7 +292,7 @@ def _open_conn(self, *, writer: bool = False) -> sqlite3.Connection: else: # Read connections each get their own cache, but mmap_size (below) # means most reads never touch this cache at all. - conn.execute("PRAGMA cache_size = -65536") # 64 MiB per thread + conn.execute("PRAGMA cache_size = -65536") # 64 MiB per thread # Memory-map the database file directly into the process address space. # With mmap enabled the OS page cache becomes the working set — all # connections share the same physical pages, reads are zero-copy, and @@ -300,7 +300,7 @@ def _open_conn(self, *, writer: bool = False) -> sqlite3.Connection: # 32 GiB ceiling covers the ~27 GB theoretical max at 5M rows; # SQLite only maps pages that actually exist, so unused address space # costs nothing. - conn.execute("PRAGMA mmap_size = 34359738368") # 32 GiB ceiling + conn.execute("PRAGMA mmap_size = 34359738368") # 32 GiB ceiling return conn def _read_conn(self) -> sqlite3.Connection: @@ -376,8 +376,7 @@ def put(self, key: str, reasoning: str, message: dict[str, Any]) -> None: # over capacity, so prune on every write until we're safely # back under the limit. at_or_over_limit = ( - self.max_rows is not None - and self._write_count > self.max_rows + self.max_rows is not None and self._write_count > self.max_rows ) skip_rowcount = ( not at_or_over_limit @@ -394,15 +393,23 @@ def get(self, key: str) -> str | None: # :memory: DB: falls back to the write connection (see _read_conn). if self._in_memory: with self._write_lock: - row = self._read_conn().execute( + row = ( + self._read_conn() + .execute( + "SELECT reasoning FROM reasoning_cache WHERE key = ?", + (key,), + ) + .fetchone() + ) + else: + row = ( + self._read_conn() + .execute( "SELECT reasoning FROM reasoning_cache WHERE key = ?", (key,), - ).fetchone() - else: - row = self._read_conn().execute( - "SELECT reasoning FROM reasoning_cache WHERE key = ?", - (key,), - ).fetchone() + ) + .fetchone() + ) if row is None: return None return str(row[0]) From 1c691dfb552157d80f042e77079a1a2ca1d2aed5 Mon Sep 17 00:00:00 2001 From: Jason Tung Date: Sun, 24 May 2026 17:33:41 -0400 Subject: [PATCH 8/8] feat(logging): timestamps on every line + startup log trim to 5k lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - All log lines now include a timestamp regardless of verbose mode. Before: INFO lines had no timestamp (message only). After: 2026-05-24 17:31:04,821 INFO ┌ cursor model=... - Verbose mode adds [ThreadName] for concurrent request debugging. - _trim_log_file() trims autostart.log to 5,000 most-recent lines at each proxy startup, keeping the same filename so open file handles in Cursor stay valid and always show the latest content. - Update tests to match new format (timestamps always present). Co-authored-by: Cursor --- src/deepseek_cursor_proxy/logging.py | 19 ++++++++----------- src/deepseek_cursor_proxy/server.py | 27 +++++++++++++++++++++++++++ tests/test_server.py | 17 ++++++++++++----- 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/src/deepseek_cursor_proxy/logging.py b/src/deepseek_cursor_proxy/logging.py index 13ccf0e..1829ae1 100644 --- a/src/deepseek_cursor_proxy/logging.py +++ b/src/deepseek_cursor_proxy/logging.py @@ -8,25 +8,22 @@ LOG = stdlib_logging.getLogger("deepseek_cursor_proxy") -DEFAULT_INFO_LOG_FORMAT = "%(message)s" -DEFAULT_WARNING_LOG_FORMAT = "%(levelname)s %(message)s" -VERBOSE_LOG_FORMAT = "%(asctime)s %(levelname)s %(message)s" +# All log lines include a timestamp regardless of verbose mode. The verbose +# flag controls whether full request/response payloads are printed, not +# whether basic observability (when did this happen?) is available. +LOG_FORMAT = "%(asctime)s %(levelname)s %(message)s" +LOG_FORMAT_VERBOSE = "%(asctime)s %(levelname)s [%(threadName)s] %(message)s" class ConsoleLogFormatter(stdlib_logging.Formatter): def __init__(self, *, verbose: bool) -> None: super().__init__() self.verbose = verbose - self._verbose_formatter = stdlib_logging.Formatter(VERBOSE_LOG_FORMAT) - self._info_formatter = stdlib_logging.Formatter(DEFAULT_INFO_LOG_FORMAT) - self._warning_formatter = stdlib_logging.Formatter(DEFAULT_WARNING_LOG_FORMAT) + fmt = LOG_FORMAT_VERBOSE if verbose else LOG_FORMAT + self._formatter = stdlib_logging.Formatter(fmt) def format(self, record: stdlib_logging.LogRecord) -> str: - if self.verbose: - return self._verbose_formatter.format(record) - if record.levelno <= stdlib_logging.INFO: - return self._info_formatter.format(record) - return self._warning_formatter.format(record) + return self._formatter.format(record) def configure_logging(*, verbose: bool) -> None: diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py index 2ffcffc..86015b6 100644 --- a/src/deepseek_cursor_proxy/server.py +++ b/src/deepseek_cursor_proxy/server.py @@ -1257,6 +1257,28 @@ def warn_if_insecure_upstream(url: str) -> None: LOG.warning("upstream base_url uses plain HTTP; bearer tokens may be exposed") +_LOG_TRIM_LINES = 5_000 # keep this many most-recent lines on startup + + +def _trim_log_file(log_path: Path, max_lines: int = _LOG_TRIM_LINES) -> None: + """Trim *log_path* to at most *max_lines* most-recent lines, in-place. + + Called once at startup so the log file never grows without bound across + long-running sessions. Silently skips if the file is missing, too small + to need trimming, or cannot be read/written (e.g. permission error). + """ + try: + if not log_path.exists(): + return + text = log_path.read_text(encoding="utf-8", errors="replace") + lines = text.splitlines(keepends=True) + if len(lines) <= max_lines: + return + log_path.write_text("".join(lines[-max_lines:]), encoding="utf-8") + except OSError: + pass + + def _proxy_already_running(host: str, port: int) -> bool: """Return True if a healthy proxy is already listening at host:port. @@ -1333,6 +1355,11 @@ def main(argv: list[str] | None = None) -> int: config = replace(config, **updates) configure_logging(verbose=config.verbose) + # Trim the default log file so it never grows without bound. The proxy + # is started via `nohup ... >> autostart.log 2>&1`, so we know exactly + # where logs are going. We trim *before* emitting any new lines so the + # file stays bounded even across many restart cycles. + _trim_log_file(Path.home() / ".deepseek-cursor-proxy" / "autostart.log") warn_if_insecure_upstream(config.upstream_base_url) store = ReasoningStore( config.reasoning_content_path, diff --git a/tests/test_server.py b/tests/test_server.py index cff056c..f7cdac3 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -167,12 +167,18 @@ def test_default_console_logging_hides_info_prefix_and_timestamp(self) -> None: None, ) - self.assertEqual( + # All log lines now include a timestamp regardless of level or verbosity. + self.assertRegex( formatter.format(info_record), - "listening on http://127.0.0.1:9000/v1", + re.compile( + r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} INFO listening on " + ), ) - self.assertEqual( - formatter.format(warning_record), "WARNING trace logging enabled" + self.assertRegex( + formatter.format(warning_record), + re.compile( + r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} WARNING trace logging" + ), ) def test_verbose_console_logging_shows_timestamp_and_level(self) -> None: @@ -187,10 +193,11 @@ def test_verbose_console_logging_shows_timestamp_and_level(self) -> None: None, ) + # Verbose mode includes thread name between level and message. self.assertRegex( formatter.format(record), re.compile( - r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} INFO listening on " + r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} INFO \[.*\] listening on " ), )