From 8d1000ab9cce890abf51f03317a6b5796f78bdfe Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Fri, 1 May 2026 14:41:38 +0200 Subject: [PATCH 1/7] docs(fork): analyze patches and recommend actions --- docs/fork-analysis.md | 192 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 docs/fork-analysis.md diff --git a/docs/fork-analysis.md b/docs/fork-analysis.md new file mode 100644 index 0000000..9462ef1 --- /dev/null +++ b/docs/fork-analysis.md @@ -0,0 +1,192 @@ +# Fork analysis: `mipselqq/deepseek-cursor-proxy` + +A user maintains a fork of this project at with a few patches on top of our `main`. This doc groups those patches into independent **changes** (ignoring how they were split into commits in the fork), describes the real-world problem each one tries to solve, evaluates the trade-offs, and gives a recommendation on whether we should bring it into our `main`. + +The analysis assumes our `main` is at PR #33 (the audit refactor). + +One change from the fork — collapsible `
` blocks for the thinking display — is already on our `main` as PR #32, so it does not need analysis. The remaining changes are below. + +--- + +## Change 1 — Namespace-Independent (NI) cache keys + +**Status:** not on our `main`. + +### What is the problem they are trying to solve? + +The proxy stores DeepSeek's `reasoning_content` in a SQLite cache, keyed by something called a **cache namespace**. The namespace is a hash of: + +- The upstream base URL +- The model family (e.g. `deepseek-v4-pro` vs `deepseek-v4-flash`) +- The thinking mode (`enabled` vs `disabled`) +- The reasoning effort +- A hash of the API key + +This namespace is intentional — it isolates one user's reasoning content from another user's, and one model's reasoning from another model's. + +But it has a side effect. Imagine the user has a long conversation going with `deepseek-v4-pro` in Cursor's **Agent** mode. They've sent many tool-call turns; the proxy has cached the `reasoning_content` for each assistant message. All of those cache entries are stored under namespace `A`. + +Now the user switches Cursor to **Ask** mode, or switches the model to `deepseek-v4-flash`. The thinking-mode default may change, the reasoning effort may change, the model family changed — so the namespace becomes `B`. None of the cache entries under namespace `A` are visible from namespace `B`. + +The proxy then sees assistant messages in the conversation history that have tool-calls but no `reasoning_content`, can't find any matching cache entry under the new namespace, marks them as "missing", and falls back to the `latest_user` recovery strategy — which **strips out almost all the conversation history** down to the latest user message and prefixes the response with `[deepseek-cursor-proxy] Refreshed reasoning_content history.`. + +To the user this looks like the proxy ate their conversation. From the proxy's perspective everything worked correctly — but the namespace boundary turned a model-switch into a context-loss event. + +### How does the fork fix it? + +It introduces a second set of cache keys that have **no namespace prefix**: + +``` +ni:signature:{message_signature} +ni:tool_call:{tool_call_id} +ni:tool_call_signature:{tool_call_signature} +``` + +These are stored *alongside* the existing scope/portable keys. Whenever the proxy stores reasoning, it also stores it under these "namespace-independent" keys. Whenever it looks up reasoning, after trying the scope-based and namespace-portable keys, it falls back to looking under these NI keys. + +Since the NI key is derived purely from the message content (not from the namespace), it survives any model/mode/thinking switch. The cached reasoning becomes findable again, the proxy patches it back in, and the conversation continues without `latest_user` recovery firing. + +### Trade-off + +The NI key includes neither model, mode, nor **API key hash**. That last omission is meaningful: + +- On a single-user local proxy: harmless. There is only one user. +- On a shared proxy (anyone running this in a multi-tenant context): **the cache becomes visible across users**. If user X and user Y happen to send an assistant message with identical content + tool calls, user Y's lookup finds user X's reasoning. The fork's authors dismiss this as "harmless because same content means same reasoning" — that is true for the *content* but loses *tenant isolation*. + +The fork's authors are running this for themselves locally, so they don't care. We need to decide whether we want to defend the multi-tenant case. + +### Recommendation + +**Take the idea, but include `auth_hash` in the NI key**: + +``` +ni:auth:{auth_hash}:signature:{message_signature} +ni:auth:{auth_hash}:tool_call:{tool_call_id} +ni:auth:{auth_hash}:tool_call_signature:{tool_call_signature} +``` + +That keeps the user's cache reachable across model/mode/thinking changes (which is what the fork was trying to fix), while preserving the per-user isolation that our existing namespace was already giving us. It is a small tweak to their patch. + +--- + +## Change 2 — 409 strategy guard + +**Status:** not on our `main`. + +### What is the problem? + +In `server.py`, when the proxy detects assistant messages with missing reasoning, it does this: + +```python +if prepared.missing_reasoning_messages: + self._send_json(409, {"error": {...}}) + return +``` + +The intent of this 409 is to back off only when the user has explicitly opted into the strict mode — i.e. when the config says `missing_reasoning_strategy == "reject"`. In any other mode (including the default `"recover"`), the proxy should soldier on, not 409. + +But the `if` above does not actually check the strategy. It fires the 409 whenever there are missing reasoning messages, period. + +So why does this not blow up in practice today? Because in `"recover"` mode, the recovery loop in `transform.py` runs first and (almost always) drives `missing_indexes` to empty before the server even sees it. The 409 path is reachable, but only in edge cases where recovery exits with `not dropped_messages` — i.e. the latest_user fallback couldn't drop anything more, but missing indexes remain. Rare, but possible, and when it happens the user gets a 409 on a request they explicitly asked the proxy to recover from. + +### The fork's fix + +Add the strategy check explicitly: + +```python +if ( + prepared.missing_reasoning_messages + and self.config.missing_reasoning_strategy == "reject" +): + self._send_json(409, ...) + return +``` + +### Recommendation + +**Take it as-is.** It is a one-line correctness fix. The current code is plainly inconsistent with the user's stated intent in `recover` mode, and the fix makes the gate match the documented contract. No downside. + +--- + +## Change 3 — Passthrough non-DeepSeek models + +**Status:** not on our `main`. + +### The problem + +Cursor lets you set a custom URL for the OpenAI-compatible endpoint. Once you do, **every** chat-completions request goes through that URL — not just calls to your DeepSeek model. That includes `/summarize` (Cursor's auto-summary), Composer, GPT-4o requests for other features, and so on. + +Our proxy currently handles those by silently rewriting the model name to `deepseek-v4-pro` (with a `WARNING` log we just added in PR #33). The result: Cursor's `/summarize` call asks for `gpt-4o-mini` but receives a DeepSeek answer — wrong tokenizer, wrong style, sometimes nonsense. + +### The fork's fix + +When the requested model does not start with `deepseek-`: + +1. Skip our entire pipeline (no `prepare_upstream_request`, no reasoning patching, no response rewriting). +2. Forward the request payload byte-for-byte to a new configurable upstream — `passthrough_url`, defaulting to `https://api.openai.com`. +3. Relay the response (regular or SSE) byte-for-byte back to Cursor. + +This is implemented as ~180 lines added to `server.py` (`_proxy_passthrough`, `_relay_passthrough_regular`, `_relay_passthrough_streaming`) plus a new config field. + +### Why this fix is broken + +The proxy forwards **Cursor's `Authorization` header** to the passthrough URL. But Cursor's bearer token is the user's **DeepSeek** API key (the user typed it into Cursor's "API key" field). Sending a DeepSeek key to OpenAI's `/v1/chat/completions` produces a 401 instantly — OpenAI does not know what to do with it. + +So the passthrough only "works" if the user happened to configure their OpenAI key in Cursor's API-key field. But then their DeepSeek calls would fail, because DeepSeek does not know what to do with an OpenAI key. There is no setup where both work simultaneously, and the fork does not add a separate `passthrough_api_key` config to disentangle them. + +In other words: the fork ships ~180 lines of code that 401 in production for almost everyone. + +A complete passthrough would also need: + +- A separate `passthrough_api_key` config field (and a way to thread it through without leaking it to DeepSeek). +- Probably a way to map model names (`gpt-4o-mini` from Cursor → whatever the upstream actually calls it). +- Documentation around tenancy, cost, and which features Cursor will then route through us. + +That is real work, real surface area, and real risk. + +### Recommendation + +**Don't take this as-is.** A simpler, equivalent UX improvement is to **stop silently rewriting non-DeepSeek models** and instead return a clean 400: + +```json +{ + "error": { + "message": "deepseek-cursor-proxy only serves models starting with `deepseek-`; received `gpt-4o-mini`. Configure that model directly in Cursor's settings instead of routing it through this proxy.", + "type": "unsupported_model", + "code": "unsupported_model" + } +} +``` + +That is a few lines, no new pipeline, no auth confusion, and tells the user exactly what went wrong and how to fix it. We are honest about being a DeepSeek-only proxy. If passthrough ever becomes a real requirement, we can implement it correctly with an opt-in config and a separate auth path; we shouldn't ship a half-working version in the meantime. + +--- + +## Change 4 — CHANGELOG.md + +**Status:** not on our `main`. + +The fork adds a `CHANGELOG.md` documenting the two fork-only fixes plus a tour of the project's architecture (request flow, cache key hierarchy, file-by-file responsibilities, why the tests are structured the way they are). + +The architecture description is accurate as of the fork point but is now slightly out of date relative to our `main` (e.g. it references `test_proxy_end_to_end.py`, which we removed in PR #33; it doesn't mention the strict fake DeepSeek harness in `test_protocol.py`). + +### Recommendation + +If we want a `CHANGELOG.md` we should write our own from our own commit history. Theirs is fine as inspiration but not worth importing wholesale. + +--- + +## Summary + +| Change | Verdict | Why | +|---|---|---| +| **1. NI cache keys** | **Take, with `auth_hash` included** | Solves the real "switching model nukes context" UX problem. The fork's version drops the auth hash, which leaks reasoning across tenants on a shared proxy. Adding `auth_hash` keeps isolation. | +| **2. 409 strategy guard** | **Take** | One-line correctness fix; current code 409s in `recover` mode in edge cases despite the user's explicit opt-in. | +| **3. Non-DeepSeek passthrough** | **Don't take** | The auth model is wrong (forwards DeepSeek key to OpenAI → 401). Better fix: return a clear 400 instead of silently rewriting. | +| **4. CHANGELOG** | Optional | Write our own if we want one; theirs is partly out of date relative to our current `main`. | + +If you agree with these, the natural next step is one PR off `main` doing: + +1. Auth-scoped NI keys (`ni:auth:{h}:signature:{...}` etc.) in `reasoning_store.py` + `transform.py`, with a test that switches `cache_namespace` mid-conversation and verifies reasoning is still found. +2. The 409 strategy guard (one-line `server.py` change + a `test_protocol.py` test for `recover` mode with a non-recoverable history). +3. A clean 400 response for non-`deepseek-*` models in `server.py`, replacing today's silent rewrite + warning log. From cc358b2da88e45b8692dd4c1620d93a5a58fc811 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Fri, 1 May 2026 15:17:24 +0200 Subject: [PATCH 2/7] docs(fork-analysis): revise ni key recommendation --- docs/fork-analysis.md | 87 +++++++++++++++++++++++++++++++++---------- 1 file changed, 67 insertions(+), 20 deletions(-) diff --git a/docs/fork-analysis.md b/docs/fork-analysis.md index 9462ef1..439968c 100644 --- a/docs/fork-analysis.md +++ b/docs/fork-analysis.md @@ -10,14 +10,14 @@ One change from the fork — collapsible `
` blocks for the thinking dis ## Change 1 — Namespace-Independent (NI) cache keys -**Status:** not on our `main`. +**Status:** not on our `main`. **However, the underlying problem is largely already solved by PR #28**, which is on our `main`. See below. ### What is the problem they are trying to solve? The proxy stores DeepSeek's `reasoning_content` in a SQLite cache, keyed by something called a **cache namespace**. The namespace is a hash of: - The upstream base URL -- The model family (e.g. `deepseek-v4-pro` vs `deepseek-v4-flash`) +- The model family - The thinking mode (`enabled` vs `disabled`) - The reasoning effort - A hash of the API key @@ -26,11 +26,9 @@ This namespace is intentional — it isolates one user's reasoning content from But it has a side effect. Imagine the user has a long conversation going with `deepseek-v4-pro` in Cursor's **Agent** mode. They've sent many tool-call turns; the proxy has cached the `reasoning_content` for each assistant message. All of those cache entries are stored under namespace `A`. -Now the user switches Cursor to **Ask** mode, or switches the model to `deepseek-v4-flash`. The thinking-mode default may change, the reasoning effort may change, the model family changed — so the namespace becomes `B`. None of the cache entries under namespace `A` are visible from namespace `B`. - -The proxy then sees assistant messages in the conversation history that have tool-calls but no `reasoning_content`, can't find any matching cache entry under the new namespace, marks them as "missing", and falls back to the `latest_user` recovery strategy — which **strips out almost all the conversation history** down to the latest user message and prefixes the response with `[deepseek-cursor-proxy] Refreshed reasoning_content history.`. +Now the user switches Cursor to **Ask** mode, or switches the model to `deepseek-v4-flash`. If the namespace changes to `B`, none of the cache entries under namespace `A` are visible. -To the user this looks like the proxy ate their conversation. From the proxy's perspective everything worked correctly — but the namespace boundary turned a model-switch into a context-loss event. +The proxy then sees assistant messages in the conversation history that have tool-calls but no `reasoning_content`, can't find any matching cache entry under the new namespace, marks them as "missing", and falls back to the `latest_user` recovery strategy — which strips out almost all the conversation history down to the latest user message and prefixes the response with `[deepseek-cursor-proxy] Refreshed reasoning_content history.`. To the user this looks like the proxy ate their conversation. ### How does the fork fix it? @@ -42,22 +40,68 @@ ni:tool_call:{tool_call_id} ni:tool_call_signature:{tool_call_signature} ``` -These are stored *alongside* the existing scope/portable keys. Whenever the proxy stores reasoning, it also stores it under these "namespace-independent" keys. Whenever it looks up reasoning, after trying the scope-based and namespace-portable keys, it falls back to looking under these NI keys. +These are stored *alongside* the existing scope/portable keys. Whenever the proxy stores reasoning, it also stores it under these "namespace-independent" keys. Whenever it looks up reasoning, after trying the scope-based and namespace-portable keys, it falls back to looking under these NI keys. Since the NI key is derived purely from the message content (not from the namespace), it survives any model/mode/thinking switch. -Since the NI key is derived purely from the message content (not from the namespace), it survives any model/mode/thinking switch. The cached reasoning becomes findable again, the proxy patches it back in, and the conversation continues without `latest_user` recovery firing. +### What our `main` already does (PR #28) -### Trade-off +I verified the following by reading the code, not just the PR description. -The NI key includes neither model, mode, nor **API key hash**. That last omission is meaningful: +1. **Family normalization** — `reasoning_model_family()` in `transform.py:670` maps both `deepseek-v4-pro` and `deepseek-v4-flash` to the family `deepseek-v4`. The namespace (`reasoning_cache_namespace()` in `transform.py:676`) hashes the *family*, not the model, so switching between Pro and Flash is invisible to the cache. **Caveat:** the family list is a hardcoded set `{deepseek-v4-pro, deepseek-v4-flash}`. A future `deepseek-v5-pro` will diverge silently until we update this function — that is a maintenance hazard worth tracking. -- On a single-user local proxy: harmless. There is only one user. -- On a shared proxy (anyone running this in a multi-tenant context): **the cache becomes visible across users**. If user X and user Y happen to send an assistant message with identical content + tool calls, user Y's lookup finds user X's reasoning. The fork's authors dismiss this as "harmless because same content means same reasoning" — that is true for the *content* but loses *tenant isolation*. +2. **Portable turn-scoped keys** — `portable_reasoning_keys()` in `reasoning_store.py:131` builds keys of the form `namespace:{ns}:turn:{turn_signature}:signature:{...}`. The `turn_signature` (`turn_context_signature()` in `reasoning_store.py:94`) hashes only messages from the latest user turn onward, **explicitly skipping system messages**. So even when Cursor's Agent↔Plan mode swap changes the system prompt or the tool surface, past assistant messages keep the same turn signature and are still findable. -The fork's authors are running this for themselves locally, so they don't care. We need to decide whether we want to defend the multi-tenant case. +3. **Strict-hit backfill** — `transform.py:303` calls `store.backfill_portable_aliases()` whenever a scope-only key hits. Existing scope-keyed entries become mode-stable on the way through, without requiring a fresh write. -### Recommendation +4. **Recovery-boundary handling** — `active_messages_from_recovery_boundary()` retires the prefix when a recovery notice is detected; the `continued_recovery_boundary` flag stops the recovery loop from cascading on every later request. + +PR #28's validation trace: 21 requests across Agent↔Plan and Pro↔Flash, only **one** recovery triggered (when the user routed through `composer-2` in the middle and came back), and **no cascade** afterward. Screenshots in the PR show the exact traces. + +So in practice, on our current `main`: -**Take the idea, but include `auth_hash` in the NI key**: +- Pro↔Flash switching: no namespace change, cache hits cleanly. No NI keys needed. +- Agent↔Plan switching (within same model/mode): portable keys catch it. No NI keys needed. +- Going through a non-DeepSeek model and back: triggers one recovery, then continues cleanly. PR #28 documents this as "expected". + +**Action item — restore PR #28's regression tests.** PR #28 originally shipped five regression tests in `tests/test_transform.py`: + +- `test_deepseek_pro_and_flash_share_reasoning_namespace` +- `test_strict_hit_backfills_portable_cache_for_mode_switch` +- `test_portable_turn_cache_restores_final_assistant_after_tool_result` +- `test_portable_turn_cache_isolated_for_reused_tool_call_id` +- `test_recovered_response_is_recorded_under_pre_recovery_scope` + +All five were dropped by PR #33's test refactor (they were in `test_transform.py`, which was trimmed from 1489 → 321 lines). None were migrated to `test_protocol.py`. The fix code is intact, but the regression coverage is gone — anyone refactoring `reasoning_store.py` or `transform.py`'s recovery path could re-break this without a test failing. + +**Plan:** restore the originals from git history (commit `5f14da3`, the merge of PR #28) and re-home them in `test_protocol.py` as a `CrossModeAndModelTests` class. The original tests already operate against the in-process store and `prepare_upstream_request` API, so they should drop in with minimal adjustment for the post-PR-#33 imports and helper layout. Concretely: + +```bash +# Recover the five tests verbatim from PR #28's commit. +git show 5f14da3:tests/test_transform.py > /tmp/pr28_test_transform.py +# Then port the relevant test methods into a new class inside +# tests/test_protocol.py and adjust imports. +``` + +This should land before — or alongside — any further work on the cross-mode/model code path, so we don't rely on PR #28's mechanisms long-term without test coverage. + +### What NI keys would still buy us + +NI keys would close gaps that PR #28 does *not* cover: + +- Switching `thinking` (`enabled`↔`disabled`) +- Switching `reasoning_effort` (e.g. `high`↔`max`) +- Switching `base_url` +- Eliminating that one "expected" recovery when bouncing through a non-DeepSeek model + +These are all real, but they are also rare and arguably *intentional* user actions (the user is saying "I want a different mode now"). It is not obviously wrong for the proxy to treat those as a fresh boundary. + +### Trade-off + +The fork's NI key drops not just the model and mode but also the **API key hash**. That last omission is meaningful: + +- On a single-user local proxy: harmless. There is only one user. +- On a shared proxy: the cache becomes visible across users. If user X and user Y happen to send an assistant message with identical content + tool calls, user Y's lookup finds user X's reasoning. The fork dismisses this as "harmless because same content means same reasoning" — that is true for the *content* but loses *tenant isolation*. + +If we want NI keys, we should at minimum scope them by `auth_hash`: ``` ni:auth:{auth_hash}:signature:{message_signature} @@ -65,7 +109,11 @@ ni:auth:{auth_hash}:tool_call:{tool_call_id} ni:auth:{auth_hash}:tool_call_signature:{tool_call_signature} ``` -That keeps the user's cache reachable across model/mode/thinking changes (which is what the fork was trying to fix), while preserving the per-user isolation that our existing namespace was already giving us. It is a small tweak to their patch. +### Recommendation + +**Likely skip.** The motivating problem (cross-mode/model context loss) is already addressed by PR #28 for the common cases. NI keys would only help with rare config switches (`thinking`, `reasoning_effort`, `base_url`), and even there the user-visible damage is one recovery notice — not the cascading context loss the fork's CHANGELOG describes. The marginal benefit is small, the complexity is non-trivial (two more key namespaces, more lookups per request, more storage), and the fork's specific implementation has a tenant-isolation hole. + +If we ever do adopt NI keys (e.g. user reports show `thinking`-mode switches eating context in real workflows), use the auth-scoped variant above so we don't lose isolation. --- @@ -180,13 +228,12 @@ If we want a `CHANGELOG.md` we should write our own from our own commit history. | Change | Verdict | Why | |---|---|---| -| **1. NI cache keys** | **Take, with `auth_hash` included** | Solves the real "switching model nukes context" UX problem. The fork's version drops the auth hash, which leaks reasoning across tenants on a shared proxy. Adding `auth_hash` keeps isolation. | +| **1. NI cache keys** | **Likely skip** | PR #28 already solves the common cases (Pro↔Flash family normalization, Agent↔Plan portable keys, recovery-boundary handling). NI keys would only help rare switches (`thinking`, `reasoning_effort`, `base_url`); marginal benefit, real complexity, and the fork's version leaks reasoning across tenants. | | **2. 409 strategy guard** | **Take** | One-line correctness fix; current code 409s in `recover` mode in edge cases despite the user's explicit opt-in. | | **3. Non-DeepSeek passthrough** | **Don't take** | The auth model is wrong (forwards DeepSeek key to OpenAI → 401). Better fix: return a clear 400 instead of silently rewriting. | | **4. CHANGELOG** | Optional | Write our own if we want one; theirs is partly out of date relative to our current `main`. | If you agree with these, the natural next step is one PR off `main` doing: -1. Auth-scoped NI keys (`ni:auth:{h}:signature:{...}` etc.) in `reasoning_store.py` + `transform.py`, with a test that switches `cache_namespace` mid-conversation and verifies reasoning is still found. -2. The 409 strategy guard (one-line `server.py` change + a `test_protocol.py` test for `recover` mode with a non-recoverable history). -3. A clean 400 response for non-`deepseek-*` models in `server.py`, replacing today's silent rewrite + warning log. +1. The 409 strategy guard (one-line `server.py` change + a `test_protocol.py` test for `recover` mode with a non-recoverable history). +2. A clean 400 response for non-`deepseek-*` models in `server.py`, replacing today's silent rewrite + warning log. From 09b81532422447f0722701743f862fb82536eb83 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Fri, 1 May 2026 15:23:27 +0200 Subject: [PATCH 3/7] fix(server): only 409 in reject mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unconditional 409 short-circuit fired for any payload where `missing_reasoning_messages > 0`, even in `recover` mode. The recovery loop in `transform.py` cannot always drive missing indexes to empty — e.g. when the request has no user message — so users who explicitly opted into `recover` were getting 409s the proxy reserves for `reject`. Add the strategy guard. Adds a regression test in `test_protocol.py` exercising the no-user-message case in recover mode and asserting the proxy forwards to upstream (which 400s) instead of pre-empting with 409. --- docs/fork-analysis.md | 42 +++++++++++++++++++++++++---- src/deepseek_cursor_proxy/server.py | 5 +++- tests/test_protocol.py | 37 +++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 6 deletions(-) diff --git a/docs/fork-analysis.md b/docs/fork-analysis.md index 439968c..1693c2d 100644 --- a/docs/fork-analysis.md +++ b/docs/fork-analysis.md @@ -123,19 +123,38 @@ If we ever do adopt NI keys (e.g. user reports show `thinking`-mode switches eat ### What is the problem? -In `server.py`, when the proxy detects assistant messages with missing reasoning, it does this: +In `server.py:164`, when the proxy detects assistant messages with missing reasoning, it does this: ```python if prepared.missing_reasoning_messages: + LOG.warning("strict missing-reasoning mode rejected request ...") self._send_json(409, {"error": {...}}) return ``` -The intent of this 409 is to back off only when the user has explicitly opted into the strict mode — i.e. when the config says `missing_reasoning_strategy == "reject"`. In any other mode (including the default `"recover"`), the proxy should soldier on, not 409. +Three things to notice: -But the `if` above does not actually check the strategy. It fires the 409 whenever there are missing reasoning messages, period. +1. The `if` gate has no strategy check. +2. The `LOG.warning` claims strict mode is the reason regardless of actual mode. +3. The 409 error body recommends switching to `--missing-reasoning-strategy recover` — i.e. the message itself assumes this should only fire in `reject` mode. -So why does this not blow up in practice today? Because in `"recover"` mode, the recovery loop in `transform.py` runs first and (almost always) drives `missing_indexes` to empty before the server even sees it. The 409 path is reachable, but only in edge cases where recovery exits with `not dropped_messages` — i.e. the latest_user fallback couldn't drop anything more, but missing indexes remain. Rare, but possible, and when it happens the user gets a 409 on a request they explicitly asked the proxy to recover from. +The *intent* is clearly "only fire when the user has opted into `reject`". But the gate fires whenever `missing_reasoning_messages > 0`, regardless of strategy. + +### Is the bug actually reachable? + +I traced through the recovery loop (`transform.py:817-839`) and `recover_messages_from_missing_reasoning()` (`transform.py:554-641`). The loop only runs when strategy is `"recover"`. It calls the recovery function, then breaks early if `not dropped_messages`. The recovery function has three return paths and **two of them can return `dropped_messages = 0`**: + +- **No user message in the conversation:** `last_user_index = -1` (line 612) → returns `(messages, 0, None, ...)`. The loop hits `if not dropped_messages: break` and exits with `missing_indexes` still populated. +- **Recovery boundary at index 0** (a recovery notice was at the very start, with no real content before it): `omitted_messages = recovery_boundary_index - len(leading_messages) - kept_context_messages = 0` → same break. + +When either fires, `missing_reasoning_messages` stays non-zero, the recovery-loop exits without calling `normalize_messages` again, and the unconditional 409 in `server.py:164` triggers — even though the user is in `recover` mode. + +Concrete cases that can hit this: + +- Cursor sends `[system, assistant_with_tool_calls, tool]` with no user message — possible for `/summarize` or auto-generated traffic. +- A continuation where the only user-visible content above is a recovery notice the proxy itself emitted earlier, with no preceding user turn before that notice. + +Neither is common in ordinary chat flow, but both *can* occur — especially around Cursor's auto-summary endpoints. ### The fork's fix @@ -150,9 +169,22 @@ if ( return ``` +### What changes after the fix? + +In `recover` mode with leftover `missing_indexes`: + +- The proxy stops 409ing. +- It forwards the request to DeepSeek as-is. +- DeepSeek will probably 400 with "the reasoning_content in the thinking mode must be passed back". +- The proxy relays that 400 to Cursor. + +That is consistent with the contract we offer in `recover` mode: we try, and if DeepSeek refuses, we relay the refusal. We do not pre-empt with a synthetic 409. + ### Recommendation -**Take it as-is.** It is a one-line correctness fix. The current code is plainly inconsistent with the user's stated intent in `recover` mode, and the fix makes the gate match the documented contract. No downside. +**Take it as-is.** One-line correctness fix. The current code is inconsistent with the documented `recover` contract, the bug is reachable in edge-case Cursor traffic (verified by tracing the recovery loop), and the fix has no downside. + +We should also add a `test_protocol.py` test that constructs one of the no-user-message cases above in `recover` mode and asserts the proxy *does not* 409 (it should forward to upstream and propagate whatever DeepSeek returns). This locks the contract in. --- diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py index 6479d31..326c58d 100644 --- a/src/deepseek_cursor_proxy/server.py +++ b/src/deepseek_cursor_proxy/server.py @@ -161,7 +161,10 @@ def do_POST(self) -> None: if trace is not None: trace.record_transform(prepared) log_context_summary(prepared) - if prepared.missing_reasoning_messages: + if ( + prepared.missing_reasoning_messages + and self.config.missing_reasoning_strategy == "reject" + ): LOG.warning( ( "strict missing-reasoning mode rejected request path=%s " diff --git a/tests/test_protocol.py b/tests/test_protocol.py index 6f565ae..e763356 100644 --- a/tests/test_protocol.py +++ b/tests/test_protocol.py @@ -658,6 +658,43 @@ def test_recovery_notice_is_stripped_before_upstream_replay(self) -> None: continue self.assertNotIn("deepseek-cursor-proxy", message.get("content", "")) + def test_recover_mode_does_not_short_circuit_with_409(self) -> None: + """In `recover` mode, a payload with no user message leaves the + recovery loop unable to drop anything (`dropped_messages == 0`), + so `missing_indexes` stays populated. The proxy must NOT 409 in + that case — it must forward to upstream and relay whatever + DeepSeek decides. 409 is reserved for `reject` mode.""" + status, _ = _post( + f"{self.proxy.url}/v1/chat/completions", + { + "model": "deepseek-v4-pro", + "messages": [ + {"role": "system", "content": "Be brief."}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": CALL_ID_1, + "type": "function", + "function": {"name": "get_date", "arguments": "{}"}, + } + ], + }, + { + "role": "tool", + "tool_call_id": CALL_ID_1, + "content": "2026-04-24", + }, + ], + }, + ) + # Strict upstream rejects the missing-reasoning history with 400. + # The point of this test is the proxy did NOT pre-empt with 409. + self.assertNotEqual(status, 409) + self.assertEqual(status, 400) + self.assertEqual(len(StrictFakeDeepSeek.requests), 1) + # --------------------------------------------------------------------------- # Streaming behaviour From d1abb6c272559a6d65471717886e72ff1f664356 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Fri, 1 May 2026 15:28:52 +0200 Subject: [PATCH 4/7] docs(fork-analysis): mark change 3 as investigate-later MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the deeper observation: even with proper auth, the fork's passthrough has no valid destination. Cursor's internal non-DeepSeek calls (/summarize, GPT-4o, Composer) normally route to api.cursor.com, gated by Cursor's own session auth that we don't have. OpenAI is a wrong default; we can't replicate Cursor's routing. Also note that real proxy logs from this user's setup show only deepseek-v4-pro traffic — Cursor appears to keep non-DeepSeek calls on its own backend in current versions, so the problem the fork was solving may not be observable in practice. Hold off until we see real traffic show up. --- docs/fork-analysis.md | 66 +++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/docs/fork-analysis.md b/docs/fork-analysis.md index 1693c2d..5718645 100644 --- a/docs/fork-analysis.md +++ b/docs/fork-analysis.md @@ -190,13 +190,11 @@ We should also add a `test_protocol.py` test that constructs one of the no-user- ## Change 3 — Passthrough non-DeepSeek models -**Status:** not on our `main`. - -### The problem +**Status:** not on our `main`. **Verdict: investigate later, do nothing for now.** -Cursor lets you set a custom URL for the OpenAI-compatible endpoint. Once you do, **every** chat-completions request goes through that URL — not just calls to your DeepSeek model. That includes `/summarize` (Cursor's auto-summary), Composer, GPT-4o requests for other features, and so on. +### The problem the fork was trying to solve -Our proxy currently handles those by silently rewriting the model name to `deepseek-v4-pro` (with a `WARNING` log we just added in PR #33). The result: Cursor's `/summarize` call asks for `gpt-4o-mini` but receives a DeepSeek answer — wrong tokenizer, wrong style, sometimes nonsense. +Cursor lets you set a custom URL for the OpenAI-compatible endpoint. The fork's CHANGELOG claims that once you do, **every** chat-completions request goes through that URL — not just calls to your DeepSeek model — including `/summarize` (Cursor's auto-summary), Composer, GPT-4o requests, and so on. Our proxy then silently rewrites the model name to `deepseek-v4-pro` (with a `WARNING` log we added in PR #33), so Cursor's `/summarize` call asks for `gpt-4o-mini` but receives a DeepSeek answer. ### The fork's fix @@ -206,39 +204,46 @@ When the requested model does not start with `deepseek-`: 2. Forward the request payload byte-for-byte to a new configurable upstream — `passthrough_url`, defaulting to `https://api.openai.com`. 3. Relay the response (regular or SSE) byte-for-byte back to Cursor. -This is implemented as ~180 lines added to `server.py` (`_proxy_passthrough`, `_relay_passthrough_regular`, `_relay_passthrough_streaming`) plus a new config field. +Implemented as ~180 lines added to `server.py` plus a new `passthrough_url` config field. + +### Why the fork's fix is broken -### Why this fix is broken +Two layers of "broken", in order of seriousness: -The proxy forwards **Cursor's `Authorization` header** to the passthrough URL. But Cursor's bearer token is the user's **DeepSeek** API key (the user typed it into Cursor's "API key" field). Sending a DeepSeek key to OpenAI's `/v1/chat/completions` produces a 401 instantly — OpenAI does not know what to do with it. +**1. Auth doesn't line up.** The proxy forwards Cursor's `Authorization` header to the passthrough URL. Cursor's bearer is the user's *DeepSeek* API key (it's what they typed into Cursor's API-key field). Sending a DeepSeek key to OpenAI 401s instantly. The fork ships ~180 lines of code that won't authenticate in production. -So the passthrough only "works" if the user happened to configure their OpenAI key in Cursor's API-key field. But then their DeepSeek calls would fail, because DeepSeek does not know what to do with an OpenAI key. There is no setup where both work simultaneously, and the fork does not add a separate `passthrough_api_key` config to disentangle them. +**2. There is no "right" passthrough destination, even with proper auth.** The non-DeepSeek requests Cursor would normally make for `/summarize`, GPT-4o calls, Composer, etc. don't go to OpenAI directly — they go to **Cursor's own backend** (`api.cursor.com` / `api.cursor.sh`), which proxies to OpenAI/Anthropic on Cursor's dime via the user's Cursor subscription. We can't replicate that routing because: -In other words: the fork ships ~180 lines of code that 401 in production for almost everyone. +- We don't have an OpenAI API key (the user only set up DeepSeek auth). +- We don't have Anthropic credentials. +- We **definitely** don't have the user's Cursor session token to call `api.cursor.com` on their behalf — that's gated by Cursor's own auth, invisible to us, and probably not even valid as a generic bearer token. -A complete passthrough would also need: +So the fork's choice of OpenAI as the default `passthrough_url` is a guess that only works in one specific configuration (user has a paid OpenAI account *and* swaps the DeepSeek key in Cursor for an OpenAI one, at which point DeepSeek calls themselves break). There is no setup where both work simultaneously without invasive config redesign. -- A separate `passthrough_api_key` config field (and a way to thread it through without leaking it to DeepSeek). -- Probably a way to map model names (`gpt-4o-mini` from Cursor → whatever the upstream actually calls it). -- Documentation around tenancy, cost, and which features Cursor will then route through us. +### Empirical observation: it might not even be a problem in practice -That is real work, real surface area, and real risk. +Looking at real proxy logs from this user's setup: + +``` +INFO ┌ cursor model=deepseek-v4-pro messages=3 tools=19 +INFO ┌ cursor model=deepseek-v4-pro messages=7 tools=19 +INFO ┌ cursor model=deepseek-v4-pro messages=9 tools=19 +``` + +Every request is `deepseek-v4-pro`. **No `gpt-4o-mini`, no `composer-2`, no `/summarize` calls.** Cursor appears to keep its internal/non-DeepSeek calls on its own backend in current versions, and only routes the user-selected DeepSeek model through the custom URL. That contradicts the fork's premise that "every request" gets diverted. + +So either the fork was based on older Cursor behavior, an edge config, or a misunderstanding. Either way, the problem they're solving isn't visible in our actual usage. ### Recommendation -**Don't take this as-is.** A simpler, equivalent UX improvement is to **stop silently rewriting non-DeepSeek models** and instead return a clean 400: +**Investigate later. Leave the current silent-rewrite + WARNING in place for now.** Reasons: -```json -{ - "error": { - "message": "deepseek-cursor-proxy only serves models starting with `deepseek-`; received `gpt-4o-mini`. Configure that model directly in Cursor's settings instead of routing it through this proxy.", - "type": "unsupported_model", - "code": "unsupported_model" - } -} -``` +- Real traffic shows non-DeepSeek requests aren't even hitting the proxy — there's nothing to fix yet. +- There is no valid passthrough destination (Cursor's backend is gated, no alternative provider creds), so the fork's approach is structurally broken regardless of how we wire it up. +- The current silent rewrite is a harmless fallback for the rare case a non-DeepSeek request *does* slip through (older Cursor versions, edge features, future changes). +- A WARNING log is already in place, so a curious user can find out. -That is a few lines, no new pipeline, no auth confusion, and tells the user exactly what went wrong and how to fix it. We are honest about being a DeepSeek-only proxy. If passthrough ever becomes a real requirement, we can implement it correctly with an opt-in config and a separate auth path; we shouldn't ship a half-working version in the meantime. +When to revisit: if non-DeepSeek requests start showing up in real proxy logs *and* they're causing user-visible bad behavior. At that point the most honest fix is probably a clean 400 ("this proxy only serves `deepseek-*` models") rather than passthrough — but that decision can wait until we actually see the traffic. --- @@ -262,10 +267,11 @@ If we want a `CHANGELOG.md` we should write our own from our own commit history. |---|---|---| | **1. NI cache keys** | **Likely skip** | PR #28 already solves the common cases (Pro↔Flash family normalization, Agent↔Plan portable keys, recovery-boundary handling). NI keys would only help rare switches (`thinking`, `reasoning_effort`, `base_url`); marginal benefit, real complexity, and the fork's version leaks reasoning across tenants. | | **2. 409 strategy guard** | **Take** | One-line correctness fix; current code 409s in `recover` mode in edge cases despite the user's explicit opt-in. | -| **3. Non-DeepSeek passthrough** | **Don't take** | The auth model is wrong (forwards DeepSeek key to OpenAI → 401). Better fix: return a clear 400 instead of silently rewriting. | +| **3. Non-DeepSeek passthrough** | **Investigate later** | Fork's auth model is wrong (forwards DeepSeek key to OpenAI → 401), and the "real" destination (Cursor's own backend) is gated and unreachable from us. Real proxy logs show non-DeepSeek requests aren't even hitting us in practice, so there's nothing user-visible to fix yet. | | **4. CHANGELOG** | Optional | Write our own if we want one; theirs is partly out of date relative to our current `main`. | -If you agree with these, the natural next step is one PR off `main` doing: +Concrete next steps: -1. The 409 strategy guard (one-line `server.py` change + a `test_protocol.py` test for `recover` mode with a non-recoverable history). -2. A clean 400 response for non-`deepseek-*` models in `server.py`, replacing today's silent rewrite + warning log. +1. **Done in this branch:** Change 2 (409 strategy guard) — one-line `server.py` fix + `test_recover_mode_does_not_short_circuit_with_409` regression test in `test_protocol.py`. +2. **Pending:** Restore PR #28's five regression tests from commit `5f14da3`'s `tests/test_transform.py` into a `CrossModeAndModelTests` class in `test_protocol.py`. Pure test recovery; no production-code changes. +3. **Investigate later:** Change 3 (non-DeepSeek model handling) only if real traffic logs start showing non-DeepSeek requests causing user-visible problems. From d0699d19942dd0f7d2d02d5d92b8107c307f171f Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Fri, 1 May 2026 15:32:36 +0200 Subject: [PATCH 5/7] test(transform): restore PR #28's cross-mode regression tests PR #33's test refactor trimmed test_transform.py from 1489 to 321 lines and accidentally dropped five regression tests that locked in PR #28's cross-mode/model context-preservation mechanisms (Pro/Flash family normalization, portable turn-scoped keys, recovery-boundary continuation). The production code is intact, but coverage was gone. Restore the originals verbatim from commit 5f14da3 as a new CrossModeAndModelTests class: - test_deepseek_pro_and_flash_share_reasoning_namespace - test_strict_hit_backfills_portable_cache_for_mode_switch - test_portable_turn_cache_restores_final_assistant_after_tool_result - test_portable_turn_cache_isolated_for_reused_tool_call_id - test_recovered_response_is_recorded_under_pre_recovery_scope Adjusted only the imports and the namespace/scope helpers to fit the post-PR-#33 layout. All 87 tests pass. --- docs/fork-analysis.md | 17 +- tests/test_transform.py | 381 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 384 insertions(+), 14 deletions(-) diff --git a/docs/fork-analysis.md b/docs/fork-analysis.md index 5718645..c0f20cd 100644 --- a/docs/fork-analysis.md +++ b/docs/fork-analysis.md @@ -62,7 +62,7 @@ So in practice, on our current `main`: - Agent↔Plan switching (within same model/mode): portable keys catch it. No NI keys needed. - Going through a non-DeepSeek model and back: triggers one recovery, then continues cleanly. PR #28 documents this as "expected". -**Action item — restore PR #28's regression tests.** PR #28 originally shipped five regression tests in `tests/test_transform.py`: +**Done — PR #28's regression tests have been restored.** They were originally shipped in `tests/test_transform.py`: - `test_deepseek_pro_and_flash_share_reasoning_namespace` - `test_strict_hit_backfills_portable_cache_for_mode_switch` @@ -70,18 +70,7 @@ So in practice, on our current `main`: - `test_portable_turn_cache_isolated_for_reused_tool_call_id` - `test_recovered_response_is_recorded_under_pre_recovery_scope` -All five were dropped by PR #33's test refactor (they were in `test_transform.py`, which was trimmed from 1489 → 321 lines). None were migrated to `test_protocol.py`. The fix code is intact, but the regression coverage is gone — anyone refactoring `reasoning_store.py` or `transform.py`'s recovery path could re-break this without a test failing. - -**Plan:** restore the originals from git history (commit `5f14da3`, the merge of PR #28) and re-home them in `test_protocol.py` as a `CrossModeAndModelTests` class. The original tests already operate against the in-process store and `prepare_upstream_request` API, so they should drop in with minimal adjustment for the post-PR-#33 imports and helper layout. Concretely: - -```bash -# Recover the five tests verbatim from PR #28's commit. -git show 5f14da3:tests/test_transform.py > /tmp/pr28_test_transform.py -# Then port the relevant test methods into a new class inside -# tests/test_protocol.py and adjust imports. -``` - -This should land before — or alongside — any further work on the cross-mode/model code path, so we don't rely on PR #28's mechanisms long-term without test coverage. +All five were dropped by PR #33's test refactor (they were in `test_transform.py`, which was trimmed from 1489 → 321 lines). They are now restored in this branch as a `CrossModeAndModelTests` class in `tests/test_transform.py`, recovered verbatim from commit `5f14da3` and adapted for the post-PR-#33 imports and helper layout. They run as part of the standard `unittest discover` and lock in PR #28's mechanisms so anyone refactoring `reasoning_store.py` or `transform.py`'s recovery path will see a CI failure if they break it. ### What NI keys would still buy us @@ -273,5 +262,5 @@ If we want a `CHANGELOG.md` we should write our own from our own commit history. Concrete next steps: 1. **Done in this branch:** Change 2 (409 strategy guard) — one-line `server.py` fix + `test_recover_mode_does_not_short_circuit_with_409` regression test in `test_protocol.py`. -2. **Pending:** Restore PR #28's five regression tests from commit `5f14da3`'s `tests/test_transform.py` into a `CrossModeAndModelTests` class in `test_protocol.py`. Pure test recovery; no production-code changes. +2. **Done in this branch:** Restored PR #28's five regression tests from commit `5f14da3` as a `CrossModeAndModelTests` class in `tests/test_transform.py`. Pure test recovery; no production-code changes. 3. **Investigate later:** Change 3 (non-DeepSeek model handling) only if real traffic logs start showing non-DeepSeek requests causing user-visible problems. diff --git a/tests/test_transform.py b/tests/test_transform.py index 649a409..5b194db 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -23,12 +23,26 @@ extract_text_content, normalize_reasoning_effort, prepare_upstream_request, + reasoning_cache_namespace, rewrite_response_body, strip_cursor_thinking_blocks, strip_recovery_notice_for_upstream, ) +def _default_cache_namespace() -> str: + return reasoning_cache_namespace( + ProxyConfig(), + "deepseek-v4-pro", + {"type": "enabled"}, + "high", + ) + + +def _cache_scope(messages: list[dict]) -> str: + return conversation_scope(messages, _default_cache_namespace()) + + class ContentHelpersTests(unittest.TestCase): def test_extract_text_content_flattens_multipart_array(self) -> None: content = [ @@ -334,5 +348,372 @@ def test_preserves_prompt_cache_usage_fields(self) -> None: self.assertEqual(usage["prompt_cache_miss_tokens"], 4) +class CrossModeAndModelTests(unittest.TestCase): + """Regression coverage for PR #28's cross-mode/model context preservation + (Pro↔Flash family normalization, portable turn-scoped keys, recovery + boundary continuation). Originally shipped with PR #28 in test_transform.py + and dropped by PR #33's test refactor; restored from commit 5f14da3.""" + + def setUp(self) -> None: + self.store = ReasoningStore(":memory:") + + def tearDown(self) -> None: + self.store.close() + + def test_deepseek_pro_and_flash_share_reasoning_namespace(self) -> None: + config = ProxyConfig() + namespace_pro = reasoning_cache_namespace( + config, + "deepseek-v4-pro", + {"type": "enabled"}, + "high", + "Bearer key-a", + ) + namespace_flash = reasoning_cache_namespace( + config, + "deepseek-v4-flash", + {"type": "enabled"}, + "high", + "Bearer key-a", + ) + self.assertEqual(namespace_pro, namespace_flash) + + prior = [{"role": "user", "content": "read README"}] + tool_call = { + "id": "call_shared", + "type": "function", + "function": { + "name": "read_file", + "arguments": '{"path":"README.md"}', + }, + } + self.store.store_assistant_message( + { + "role": "assistant", + "content": "", + "reasoning_content": "Shared DeepSeek reasoning.", + "tool_calls": [tool_call], + }, + conversation_scope(prior, namespace_pro), + namespace_pro, + prior, + ) + + prepared = prepare_upstream_request( + { + "model": "deepseek-v4-flash", + "messages": [ + *prior, + {"role": "assistant", "content": "", "tool_calls": [tool_call]}, + ], + }, + config, + self.store, + authorization="Bearer key-a", + ) + + self.assertEqual(prepared.missing_reasoning_messages, 0) + self.assertEqual( + prepared.payload["messages"][1]["reasoning_content"], + "Shared DeepSeek reasoning.", + ) + + def test_strict_hit_backfills_portable_cache_for_mode_switch(self) -> None: + agent_prior = [ + {"role": "system", "content": "Agent mode."}, + {"role": "user", "content": "set up the task"}, + {"role": "user", "content": "read README"}, + ] + plan_prior = [ + {"role": "system", "content": "Plan mode."}, + {"role": "user", "content": "set up the task"}, + {"role": "user", "content": "read README"}, + ] + tool_call = { + "id": "call_mode_switch", + "type": "function", + "function": {"name": "read_file", "arguments": '{"path":"README.md"}'}, + } + assistant_message = { + "role": "assistant", + "content": "", + "reasoning_content": "Need README before answering.", + "tool_calls": [tool_call], + } + # Store under Agent scope only — no portable aliases yet. + self.store.store_assistant_message( + assistant_message, + _cache_scope(agent_prior), + ) + + # Agent re-request: strict scope hit, should backfill portable. + strict_prepared = prepare_upstream_request( + { + "model": "deepseek-v4-pro", + "messages": [ + *agent_prior, + {"role": "assistant", "content": "", "tool_calls": [tool_call]}, + ], + }, + ProxyConfig(), + self.store, + ) + # Plan re-request: scope changed (different system prompt) but the + # turn signature still matches, so the portable alias hits. + portable_prepared = prepare_upstream_request( + { + "model": "deepseek-v4-pro", + "messages": [ + *plan_prior, + {"role": "assistant", "content": "", "tool_calls": [tool_call]}, + ], + }, + ProxyConfig(), + self.store, + ) + + self.assertEqual(strict_prepared.patched_reasoning_messages, 1) + self.assertEqual(portable_prepared.patched_reasoning_messages, 1) + self.assertEqual(portable_prepared.missing_reasoning_messages, 0) + self.assertEqual( + portable_prepared.payload["messages"][3]["reasoning_content"], + "Need README before answering.", + ) + self.assertTrue( + str(portable_prepared.reasoning_diagnostics[-1]["hit_kind"]).startswith( + "portable_" + ) + ) + + def test_portable_turn_cache_restores_final_assistant_after_tool_result( + self, + ) -> None: + agent_user = {"role": "user", "content": "look up project state"} + plan_user = dict(agent_user) + tool_call = { + "id": "call_project_state", + "type": "function", + "function": {"name": "lookup", "arguments": '{"query":"state"}'}, + } + tool_result = { + "role": "tool", + "tool_call_id": "call_project_state", + "content": '{"state":"ready"}', + } + tool_assistant = { + "role": "assistant", + "content": "", + "reasoning_content": "Need the project state.", + "tool_calls": [tool_call], + } + final_assistant = { + "role": "assistant", + "content": "The project is ready.", + "reasoning_content": "The tool result is enough to answer.", + } + agent_initial_prior = [ + {"role": "system", "content": "Agent mode."}, + agent_user, + ] + agent_final_prior = [*agent_initial_prior, tool_assistant, tool_result] + self.store.store_assistant_message( + tool_assistant, + _cache_scope(agent_initial_prior), + _default_cache_namespace(), + agent_initial_prior, + ) + self.store.store_assistant_message( + final_assistant, + _cache_scope(agent_final_prior), + _default_cache_namespace(), + agent_final_prior, + ) + + prepared = prepare_upstream_request( + { + "model": "deepseek-v4-pro", + "messages": [ + {"role": "system", "content": "Plan mode."}, + plan_user, + {"role": "assistant", "content": "", "tool_calls": [tool_call]}, + tool_result, + {"role": "assistant", "content": "The project is ready."}, + {"role": "user", "content": "continue"}, + ], + }, + ProxyConfig(missing_reasoning_strategy="reject"), + self.store, + ) + + self.assertEqual(prepared.missing_reasoning_messages, 0) + self.assertEqual(prepared.patched_reasoning_messages, 2) + self.assertEqual( + prepared.payload["messages"][4]["reasoning_content"], + "The tool result is enough to answer.", + ) + + def test_portable_turn_cache_isolated_for_reused_tool_call_id(self) -> None: + # Two different conversations both happen to reuse the same + # tool_call.id. Cache must NOT cross-contaminate. + tool_call = { + "id": "call_reused", + "type": "function", + "function": {"name": "lookup", "arguments": "{}"}, + } + assistant_a = { + "role": "assistant", + "content": "", + "reasoning_content": "Reasoning for thread A.", + "tool_calls": [tool_call], + } + assistant_b = { + "role": "assistant", + "content": "", + "reasoning_content": "Reasoning for thread B.", + "tool_calls": [tool_call], + } + prior_a = [ + {"role": "system", "content": "Agent mode."}, + {"role": "user", "content": "thread A"}, + ] + prior_b = [ + {"role": "system", "content": "Agent mode."}, + {"role": "user", "content": "thread B"}, + ] + self.store.store_assistant_message( + assistant_a, + _cache_scope(prior_a), + _default_cache_namespace(), + prior_a, + ) + self.store.store_assistant_message( + assistant_b, + _cache_scope(prior_b), + _default_cache_namespace(), + prior_b, + ) + + # Plan-mode replay of thread A — should retrieve A's reasoning, not B's. + prepared = prepare_upstream_request( + { + "model": "deepseek-v4-pro", + "messages": [ + {"role": "system", "content": "Plan mode."}, + {"role": "user", "content": "thread A"}, + {"role": "assistant", "content": "", "tool_calls": [tool_call]}, + ], + }, + ProxyConfig(), + self.store, + ) + + self.assertEqual( + prepared.payload["messages"][2]["reasoning_content"], + "Reasoning for thread A.", + ) + + def test_recovered_response_is_recorded_under_pre_recovery_scope(self) -> None: + old_tool_call = { + "id": "call_old", + "type": "function", + "function": { + "name": "read_file", + "arguments": '{"path":"README.md"}', + }, + } + new_tool_call = { + "id": "call_new", + "type": "function", + "function": {"name": "lookup", "arguments": '{"query":"new"}'}, + } + first_payload = { + "model": "deepseek-v4-pro", + "messages": [ + {"role": "user", "content": "old model turn"}, + {"role": "assistant", "content": "", "tool_calls": [old_tool_call]}, + {"role": "tool", "tool_call_id": "call_old", "content": "old result"}, + {"role": "user", "content": "continue with DeepSeek"}, + ], + } + first_recovered = prepare_upstream_request( + first_payload, + ProxyConfig(missing_reasoning_strategy="recover"), + self.store, + ) + self.assertEqual(first_recovered.recovered_reasoning_messages, 1) + + # Simulate DeepSeek's response to the recovered request. + response_body = json.dumps( + { + "id": "chatcmpl-test", + "object": "chat.completion", + "model": "deepseek-v4-pro", + "choices": [ + { + "index": 0, + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "content": "", + "reasoning_content": "Need the new lookup.", + "tool_calls": [new_tool_call], + }, + } + ], + } + ).encode() + rewritten = rewrite_response_body( + response_body, + "deepseek-v4-pro", + self.store, + first_recovered.payload["messages"], + first_recovered.cache_namespace, + content_prefix=first_recovered.recovery_notice, + recording_contexts=first_recovered.record_response_contexts, + ) + recovered_assistant = json.loads(rewritten)["choices"][0]["message"] + + # Reasoning must be recorded under BOTH scopes — pre-recovery (so + # subsequent Cursor requests echoing the with-prefix history hit) and + # post-recovery (so an immediate continuation also hits). + self.assertEqual(len(first_recovered.record_response_contexts), 2) + for scope, _messages in first_recovered.record_response_contexts: + self.assertEqual( + self.store.get( + f"scope:{scope}:signature:{message_signature(recovered_assistant)}" + ), + "Need the new lookup.", + ) + recovered_assistant.pop("reasoning_content", None) + + # Cursor's next request echoes the recovered assistant + tool result. + # The proxy should detect the recovery boundary, retire the prefix, + # and continue cleanly without recovering again. + second_payload = { + "model": "deepseek-v4-pro", + "messages": [ + *first_payload["messages"], + recovered_assistant, + {"role": "tool", "tool_call_id": "call_new", "content": "new result"}, + ], + } + + second_prepared = prepare_upstream_request( + second_payload, + ProxyConfig(missing_reasoning_strategy="recover"), + self.store, + ) + + self.assertEqual(second_prepared.missing_reasoning_messages, 0) + self.assertEqual(second_prepared.recovered_reasoning_messages, 0) + self.assertEqual(second_prepared.recovery_dropped_messages, 0) + self.assertTrue(second_prepared.continued_recovery_boundary) + self.assertGreater(second_prepared.retired_prefix_messages, 0) + self.assertEqual( + second_prepared.payload["messages"][2]["reasoning_content"], + "Need the new lookup.", + ) + + if __name__ == "__main__": unittest.main() From b00184b5944b33ebe84bcd24f619cb564b3664a0 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Fri, 1 May 2026 22:11:48 +0800 Subject: [PATCH 6/7] feat(server): trace all requests and fix 409 guard --- docs/fork-analysis.md | 266 ---------------------------- src/deepseek_cursor_proxy/server.py | 132 ++++++++++++-- src/deepseek_cursor_proxy/trace.py | 22 ++- tests/test_trace.py | 79 +++++++++ 4 files changed, 219 insertions(+), 280 deletions(-) delete mode 100644 docs/fork-analysis.md diff --git a/docs/fork-analysis.md b/docs/fork-analysis.md deleted file mode 100644 index c0f20cd..0000000 --- a/docs/fork-analysis.md +++ /dev/null @@ -1,266 +0,0 @@ -# Fork analysis: `mipselqq/deepseek-cursor-proxy` - -A user maintains a fork of this project at with a few patches on top of our `main`. This doc groups those patches into independent **changes** (ignoring how they were split into commits in the fork), describes the real-world problem each one tries to solve, evaluates the trade-offs, and gives a recommendation on whether we should bring it into our `main`. - -The analysis assumes our `main` is at PR #33 (the audit refactor). - -One change from the fork — collapsible `
` blocks for the thinking display — is already on our `main` as PR #32, so it does not need analysis. The remaining changes are below. - ---- - -## Change 1 — Namespace-Independent (NI) cache keys - -**Status:** not on our `main`. **However, the underlying problem is largely already solved by PR #28**, which is on our `main`. See below. - -### What is the problem they are trying to solve? - -The proxy stores DeepSeek's `reasoning_content` in a SQLite cache, keyed by something called a **cache namespace**. The namespace is a hash of: - -- The upstream base URL -- The model family -- The thinking mode (`enabled` vs `disabled`) -- The reasoning effort -- A hash of the API key - -This namespace is intentional — it isolates one user's reasoning content from another user's, and one model's reasoning from another model's. - -But it has a side effect. Imagine the user has a long conversation going with `deepseek-v4-pro` in Cursor's **Agent** mode. They've sent many tool-call turns; the proxy has cached the `reasoning_content` for each assistant message. All of those cache entries are stored under namespace `A`. - -Now the user switches Cursor to **Ask** mode, or switches the model to `deepseek-v4-flash`. If the namespace changes to `B`, none of the cache entries under namespace `A` are visible. - -The proxy then sees assistant messages in the conversation history that have tool-calls but no `reasoning_content`, can't find any matching cache entry under the new namespace, marks them as "missing", and falls back to the `latest_user` recovery strategy — which strips out almost all the conversation history down to the latest user message and prefixes the response with `[deepseek-cursor-proxy] Refreshed reasoning_content history.`. To the user this looks like the proxy ate their conversation. - -### How does the fork fix it? - -It introduces a second set of cache keys that have **no namespace prefix**: - -``` -ni:signature:{message_signature} -ni:tool_call:{tool_call_id} -ni:tool_call_signature:{tool_call_signature} -``` - -These are stored *alongside* the existing scope/portable keys. Whenever the proxy stores reasoning, it also stores it under these "namespace-independent" keys. Whenever it looks up reasoning, after trying the scope-based and namespace-portable keys, it falls back to looking under these NI keys. Since the NI key is derived purely from the message content (not from the namespace), it survives any model/mode/thinking switch. - -### What our `main` already does (PR #28) - -I verified the following by reading the code, not just the PR description. - -1. **Family normalization** — `reasoning_model_family()` in `transform.py:670` maps both `deepseek-v4-pro` and `deepseek-v4-flash` to the family `deepseek-v4`. The namespace (`reasoning_cache_namespace()` in `transform.py:676`) hashes the *family*, not the model, so switching between Pro and Flash is invisible to the cache. **Caveat:** the family list is a hardcoded set `{deepseek-v4-pro, deepseek-v4-flash}`. A future `deepseek-v5-pro` will diverge silently until we update this function — that is a maintenance hazard worth tracking. - -2. **Portable turn-scoped keys** — `portable_reasoning_keys()` in `reasoning_store.py:131` builds keys of the form `namespace:{ns}:turn:{turn_signature}:signature:{...}`. The `turn_signature` (`turn_context_signature()` in `reasoning_store.py:94`) hashes only messages from the latest user turn onward, **explicitly skipping system messages**. So even when Cursor's Agent↔Plan mode swap changes the system prompt or the tool surface, past assistant messages keep the same turn signature and are still findable. - -3. **Strict-hit backfill** — `transform.py:303` calls `store.backfill_portable_aliases()` whenever a scope-only key hits. Existing scope-keyed entries become mode-stable on the way through, without requiring a fresh write. - -4. **Recovery-boundary handling** — `active_messages_from_recovery_boundary()` retires the prefix when a recovery notice is detected; the `continued_recovery_boundary` flag stops the recovery loop from cascading on every later request. - -PR #28's validation trace: 21 requests across Agent↔Plan and Pro↔Flash, only **one** recovery triggered (when the user routed through `composer-2` in the middle and came back), and **no cascade** afterward. Screenshots in the PR show the exact traces. - -So in practice, on our current `main`: - -- Pro↔Flash switching: no namespace change, cache hits cleanly. No NI keys needed. -- Agent↔Plan switching (within same model/mode): portable keys catch it. No NI keys needed. -- Going through a non-DeepSeek model and back: triggers one recovery, then continues cleanly. PR #28 documents this as "expected". - -**Done — PR #28's regression tests have been restored.** They were originally shipped in `tests/test_transform.py`: - -- `test_deepseek_pro_and_flash_share_reasoning_namespace` -- `test_strict_hit_backfills_portable_cache_for_mode_switch` -- `test_portable_turn_cache_restores_final_assistant_after_tool_result` -- `test_portable_turn_cache_isolated_for_reused_tool_call_id` -- `test_recovered_response_is_recorded_under_pre_recovery_scope` - -All five were dropped by PR #33's test refactor (they were in `test_transform.py`, which was trimmed from 1489 → 321 lines). They are now restored in this branch as a `CrossModeAndModelTests` class in `tests/test_transform.py`, recovered verbatim from commit `5f14da3` and adapted for the post-PR-#33 imports and helper layout. They run as part of the standard `unittest discover` and lock in PR #28's mechanisms so anyone refactoring `reasoning_store.py` or `transform.py`'s recovery path will see a CI failure if they break it. - -### What NI keys would still buy us - -NI keys would close gaps that PR #28 does *not* cover: - -- Switching `thinking` (`enabled`↔`disabled`) -- Switching `reasoning_effort` (e.g. `high`↔`max`) -- Switching `base_url` -- Eliminating that one "expected" recovery when bouncing through a non-DeepSeek model - -These are all real, but they are also rare and arguably *intentional* user actions (the user is saying "I want a different mode now"). It is not obviously wrong for the proxy to treat those as a fresh boundary. - -### Trade-off - -The fork's NI key drops not just the model and mode but also the **API key hash**. That last omission is meaningful: - -- On a single-user local proxy: harmless. There is only one user. -- On a shared proxy: the cache becomes visible across users. If user X and user Y happen to send an assistant message with identical content + tool calls, user Y's lookup finds user X's reasoning. The fork dismisses this as "harmless because same content means same reasoning" — that is true for the *content* but loses *tenant isolation*. - -If we want NI keys, we should at minimum scope them by `auth_hash`: - -``` -ni:auth:{auth_hash}:signature:{message_signature} -ni:auth:{auth_hash}:tool_call:{tool_call_id} -ni:auth:{auth_hash}:tool_call_signature:{tool_call_signature} -``` - -### Recommendation - -**Likely skip.** The motivating problem (cross-mode/model context loss) is already addressed by PR #28 for the common cases. NI keys would only help with rare config switches (`thinking`, `reasoning_effort`, `base_url`), and even there the user-visible damage is one recovery notice — not the cascading context loss the fork's CHANGELOG describes. The marginal benefit is small, the complexity is non-trivial (two more key namespaces, more lookups per request, more storage), and the fork's specific implementation has a tenant-isolation hole. - -If we ever do adopt NI keys (e.g. user reports show `thinking`-mode switches eating context in real workflows), use the auth-scoped variant above so we don't lose isolation. - ---- - -## Change 2 — 409 strategy guard - -**Status:** not on our `main`. - -### What is the problem? - -In `server.py:164`, when the proxy detects assistant messages with missing reasoning, it does this: - -```python -if prepared.missing_reasoning_messages: - LOG.warning("strict missing-reasoning mode rejected request ...") - self._send_json(409, {"error": {...}}) - return -``` - -Three things to notice: - -1. The `if` gate has no strategy check. -2. The `LOG.warning` claims strict mode is the reason regardless of actual mode. -3. The 409 error body recommends switching to `--missing-reasoning-strategy recover` — i.e. the message itself assumes this should only fire in `reject` mode. - -The *intent* is clearly "only fire when the user has opted into `reject`". But the gate fires whenever `missing_reasoning_messages > 0`, regardless of strategy. - -### Is the bug actually reachable? - -I traced through the recovery loop (`transform.py:817-839`) and `recover_messages_from_missing_reasoning()` (`transform.py:554-641`). The loop only runs when strategy is `"recover"`. It calls the recovery function, then breaks early if `not dropped_messages`. The recovery function has three return paths and **two of them can return `dropped_messages = 0`**: - -- **No user message in the conversation:** `last_user_index = -1` (line 612) → returns `(messages, 0, None, ...)`. The loop hits `if not dropped_messages: break` and exits with `missing_indexes` still populated. -- **Recovery boundary at index 0** (a recovery notice was at the very start, with no real content before it): `omitted_messages = recovery_boundary_index - len(leading_messages) - kept_context_messages = 0` → same break. - -When either fires, `missing_reasoning_messages` stays non-zero, the recovery-loop exits without calling `normalize_messages` again, and the unconditional 409 in `server.py:164` triggers — even though the user is in `recover` mode. - -Concrete cases that can hit this: - -- Cursor sends `[system, assistant_with_tool_calls, tool]` with no user message — possible for `/summarize` or auto-generated traffic. -- A continuation where the only user-visible content above is a recovery notice the proxy itself emitted earlier, with no preceding user turn before that notice. - -Neither is common in ordinary chat flow, but both *can* occur — especially around Cursor's auto-summary endpoints. - -### The fork's fix - -Add the strategy check explicitly: - -```python -if ( - prepared.missing_reasoning_messages - and self.config.missing_reasoning_strategy == "reject" -): - self._send_json(409, ...) - return -``` - -### What changes after the fix? - -In `recover` mode with leftover `missing_indexes`: - -- The proxy stops 409ing. -- It forwards the request to DeepSeek as-is. -- DeepSeek will probably 400 with "the reasoning_content in the thinking mode must be passed back". -- The proxy relays that 400 to Cursor. - -That is consistent with the contract we offer in `recover` mode: we try, and if DeepSeek refuses, we relay the refusal. We do not pre-empt with a synthetic 409. - -### Recommendation - -**Take it as-is.** One-line correctness fix. The current code is inconsistent with the documented `recover` contract, the bug is reachable in edge-case Cursor traffic (verified by tracing the recovery loop), and the fix has no downside. - -We should also add a `test_protocol.py` test that constructs one of the no-user-message cases above in `recover` mode and asserts the proxy *does not* 409 (it should forward to upstream and propagate whatever DeepSeek returns). This locks the contract in. - ---- - -## Change 3 — Passthrough non-DeepSeek models - -**Status:** not on our `main`. **Verdict: investigate later, do nothing for now.** - -### The problem the fork was trying to solve - -Cursor lets you set a custom URL for the OpenAI-compatible endpoint. The fork's CHANGELOG claims that once you do, **every** chat-completions request goes through that URL — not just calls to your DeepSeek model — including `/summarize` (Cursor's auto-summary), Composer, GPT-4o requests, and so on. Our proxy then silently rewrites the model name to `deepseek-v4-pro` (with a `WARNING` log we added in PR #33), so Cursor's `/summarize` call asks for `gpt-4o-mini` but receives a DeepSeek answer. - -### The fork's fix - -When the requested model does not start with `deepseek-`: - -1. Skip our entire pipeline (no `prepare_upstream_request`, no reasoning patching, no response rewriting). -2. Forward the request payload byte-for-byte to a new configurable upstream — `passthrough_url`, defaulting to `https://api.openai.com`. -3. Relay the response (regular or SSE) byte-for-byte back to Cursor. - -Implemented as ~180 lines added to `server.py` plus a new `passthrough_url` config field. - -### Why the fork's fix is broken - -Two layers of "broken", in order of seriousness: - -**1. Auth doesn't line up.** The proxy forwards Cursor's `Authorization` header to the passthrough URL. Cursor's bearer is the user's *DeepSeek* API key (it's what they typed into Cursor's API-key field). Sending a DeepSeek key to OpenAI 401s instantly. The fork ships ~180 lines of code that won't authenticate in production. - -**2. There is no "right" passthrough destination, even with proper auth.** The non-DeepSeek requests Cursor would normally make for `/summarize`, GPT-4o calls, Composer, etc. don't go to OpenAI directly — they go to **Cursor's own backend** (`api.cursor.com` / `api.cursor.sh`), which proxies to OpenAI/Anthropic on Cursor's dime via the user's Cursor subscription. We can't replicate that routing because: - -- We don't have an OpenAI API key (the user only set up DeepSeek auth). -- We don't have Anthropic credentials. -- We **definitely** don't have the user's Cursor session token to call `api.cursor.com` on their behalf — that's gated by Cursor's own auth, invisible to us, and probably not even valid as a generic bearer token. - -So the fork's choice of OpenAI as the default `passthrough_url` is a guess that only works in one specific configuration (user has a paid OpenAI account *and* swaps the DeepSeek key in Cursor for an OpenAI one, at which point DeepSeek calls themselves break). There is no setup where both work simultaneously without invasive config redesign. - -### Empirical observation: it might not even be a problem in practice - -Looking at real proxy logs from this user's setup: - -``` -INFO ┌ cursor model=deepseek-v4-pro messages=3 tools=19 -INFO ┌ cursor model=deepseek-v4-pro messages=7 tools=19 -INFO ┌ cursor model=deepseek-v4-pro messages=9 tools=19 -``` - -Every request is `deepseek-v4-pro`. **No `gpt-4o-mini`, no `composer-2`, no `/summarize` calls.** Cursor appears to keep its internal/non-DeepSeek calls on its own backend in current versions, and only routes the user-selected DeepSeek model through the custom URL. That contradicts the fork's premise that "every request" gets diverted. - -So either the fork was based on older Cursor behavior, an edge config, or a misunderstanding. Either way, the problem they're solving isn't visible in our actual usage. - -### Recommendation - -**Investigate later. Leave the current silent-rewrite + WARNING in place for now.** Reasons: - -- Real traffic shows non-DeepSeek requests aren't even hitting the proxy — there's nothing to fix yet. -- There is no valid passthrough destination (Cursor's backend is gated, no alternative provider creds), so the fork's approach is structurally broken regardless of how we wire it up. -- The current silent rewrite is a harmless fallback for the rare case a non-DeepSeek request *does* slip through (older Cursor versions, edge features, future changes). -- A WARNING log is already in place, so a curious user can find out. - -When to revisit: if non-DeepSeek requests start showing up in real proxy logs *and* they're causing user-visible bad behavior. At that point the most honest fix is probably a clean 400 ("this proxy only serves `deepseek-*` models") rather than passthrough — but that decision can wait until we actually see the traffic. - ---- - -## Change 4 — CHANGELOG.md - -**Status:** not on our `main`. - -The fork adds a `CHANGELOG.md` documenting the two fork-only fixes plus a tour of the project's architecture (request flow, cache key hierarchy, file-by-file responsibilities, why the tests are structured the way they are). - -The architecture description is accurate as of the fork point but is now slightly out of date relative to our `main` (e.g. it references `test_proxy_end_to_end.py`, which we removed in PR #33; it doesn't mention the strict fake DeepSeek harness in `test_protocol.py`). - -### Recommendation - -If we want a `CHANGELOG.md` we should write our own from our own commit history. Theirs is fine as inspiration but not worth importing wholesale. - ---- - -## Summary - -| Change | Verdict | Why | -|---|---|---| -| **1. NI cache keys** | **Likely skip** | PR #28 already solves the common cases (Pro↔Flash family normalization, Agent↔Plan portable keys, recovery-boundary handling). NI keys would only help rare switches (`thinking`, `reasoning_effort`, `base_url`); marginal benefit, real complexity, and the fork's version leaks reasoning across tenants. | -| **2. 409 strategy guard** | **Take** | One-line correctness fix; current code 409s in `recover` mode in edge cases despite the user's explicit opt-in. | -| **3. Non-DeepSeek passthrough** | **Investigate later** | Fork's auth model is wrong (forwards DeepSeek key to OpenAI → 401), and the "real" destination (Cursor's own backend) is gated and unreachable from us. Real proxy logs show non-DeepSeek requests aren't even hitting us in practice, so there's nothing user-visible to fix yet. | -| **4. CHANGELOG** | Optional | Write our own if we want one; theirs is partly out of date relative to our current `main`. | - -Concrete next steps: - -1. **Done in this branch:** Change 2 (409 strategy guard) — one-line `server.py` fix + `test_recover_mode_does_not_short_circuit_with_409` regression test in `test_protocol.py`. -2. **Done in this branch:** Restored PR #28's five regression tests from commit `5f14da3` as a `CrossModeAndModelTests` class in `tests/test_transform.py`. Pure test recovery; no production-code changes. -3. **Investigate later:** Change 3 (non-DeepSeek model handling) only if real traffic logs start showing non-DeepSeek requests causing user-visible problems. diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py index 326c58d..aea8650 100644 --- a/src/deepseek_cursor_proxy/server.py +++ b/src/deepseek_cursor_proxy/server.py @@ -72,25 +72,52 @@ def log_message(self, fmt: str, *args: Any) -> None: def do_OPTIONS(self) -> None: request_path = urlparse(self.path).path - if self.config.verbose: + trace = self._start_trace(request_path) + if self.config.verbose or trace is not None: LOG.info( "incoming OPTIONS %s from %s", request_path, self.client_address[0], ) - self._send_response_headers(204, [], "sending CORS preflight response") + if trace is not None: + trace.record_cursor_response(status=204, headers={}) + sent_headers = self._send_response_headers( + 204, [], "sending CORS preflight response" + ) + self._finish_trace( + trace, + "completed" if sent_headers else "client_disconnected", + http_status=204, + ) def do_GET(self) -> None: request_path = urlparse(self.path).path - if self.config.verbose: + trace = self._start_trace(request_path) + if self.config.verbose or trace is not None: LOG.info("incoming GET %s from %s", request_path, self.client_address[0]) if request_path in {"/healthz", "/v1/healthz"}: - self._send_json(200, {"ok": True}) + self._send_json(200, {"ok": True}, trace=trace) + self._finish_trace(trace, "completed", http_status=200) return if request_path in {"/models", "/v1/models"}: - self._send_models() + self._send_models(trace=trace) + self._finish_trace(trace, "completed", http_status=200) return - self._send_json(404, {"error": {"message": "Not found"}}) + LOG.warning("rejected unsupported GET path=%s status=404", request_path) + self._send_json(404, {"error": {"message": "Not found"}}, trace=trace) + self._finish_trace(trace, "rejected", http_status=404) + + def do_HEAD(self) -> None: + self._reject_unsupported_method(read_body=False) + + def do_PUT(self) -> None: + self._reject_unsupported_method(read_body=True) + + def do_PATCH(self) -> None: + self._reject_unsupported_method(read_body=True) + + def do_DELETE(self) -> None: + self._reject_unsupported_method(read_body=True) def do_POST(self) -> None: started = time.monotonic() @@ -106,6 +133,7 @@ def do_POST(self) -> None: ) if request_path not in {"/chat/completions", "/v1/chat/completions"}: LOG.warning("rejected unsupported POST path=%s status=404", request_path) + self._record_request_body_for_trace(trace) self._send_json( 404, {"error": {"message": "Only /v1/chat/completions is supported"}}, @@ -119,6 +147,7 @@ def do_POST(self) -> None: "rejected request path=%s status=401 reason=missing_bearer_token", request_path, ) + self._record_request_body_for_trace(trace) self._send_json( 401, {"error": {"message": "Missing Authorization bearer token"}}, @@ -128,7 +157,7 @@ def do_POST(self) -> None: return try: - payload = self._read_json_body() + payload = self._read_json_body(trace=trace) except RequestBodyTooLarge as exc: LOG.warning( "rejected request path=%s status=413 reason=%s", request_path, exc @@ -144,9 +173,6 @@ def do_POST(self) -> None: self._finish_trace(trace, "rejected", http_status=400, reason=str(exc)) return - if trace is not None: - trace.record_cursor_body(payload) - if self.config.verbose: log_json("cursor request body", payload) @@ -396,6 +422,48 @@ def _send_json( if sent_headers: self._write_to_client(body, "sending JSON response body") + def _send_empty( + self, + status: int, + headers: list[tuple[str, str]] | None = None, + *, + trace: TraceRequest | None = None, + ) -> bool: + response_headers = [("Content-Length", "0")] + if headers: + response_headers.extend(headers) + if trace is not None: + trace.record_cursor_response( + status=status, + headers={name: value for name, value in response_headers}, + ) + return self._send_response_headers( + status, + response_headers, + "sending empty response headers", + ) + + def _reject_unsupported_method(self, *, read_body: bool) -> None: + request_path = urlparse(self.path).path + trace = self._start_trace(request_path) + LOG.warning( + "rejected unsupported %s path=%s status=404", + self.command, + request_path, + ) + if read_body: + self._record_request_body_for_trace(trace) + if self.command == "HEAD": + sent_headers = self._send_empty(404, trace=trace) + self._finish_trace( + trace, + "rejected" if sent_headers else "client_disconnected", + http_status=404, + ) + return + self._send_json(404, {"error": {"message": "Not found"}}, trace=trace) + self._finish_trace(trace, "rejected", http_status=404) + def _send_response_headers( self, status: int, @@ -429,7 +497,7 @@ def _write_to_client( return False return True - def _send_models(self) -> None: + def _send_models(self, *, trace: TraceRequest | None = None) -> None: created = int(time.time()) model_ids = list( dict.fromkeys( @@ -449,20 +517,32 @@ def _send_models(self) -> None: } for model_id in model_ids ] - self._send_json(200, {"object": "list", "data": models}) + self._send_json(200, {"object": "list", "data": models}, trace=trace) - def _read_json_body(self) -> dict[str, Any]: + def _read_json_body(self, *, trace: TraceRequest | None = None) -> dict[str, Any]: try: length = int(self.headers.get("Content-Length") or 0) except ValueError as exc: + if trace is not None: + trace.record_cursor_body_omitted(reason="invalid_content_length") raise ValueError("Invalid Content-Length") from exc if length < 0: + if trace is not None: + trace.record_cursor_body_omitted( + reason="invalid_content_length", body_bytes=length + ) raise ValueError("Invalid Content-Length") if length > self.config.max_request_body_bytes: + if trace is not None: + trace.record_cursor_body_omitted( + reason="body_too_large", body_bytes=length + ) raise RequestBodyTooLarge( f"Request body is too large; limit is {self.config.max_request_body_bytes} bytes" ) raw_body = self.rfile.read(length) + if trace is not None: + trace.record_cursor_body_bytes(raw_body) if not raw_body: raise ValueError("Request body is empty") try: @@ -473,6 +553,32 @@ def _read_json_body(self) -> dict[str, Any]: raise ValueError("Request body must be a JSON object") return payload + def _record_request_body_for_trace(self, trace: TraceRequest | None) -> None: + if trace is None: + return + try: + length = int(self.headers.get("Content-Length") or 0) + except ValueError: + trace.record_cursor_body_omitted(reason="invalid_content_length") + return + if length < 0: + trace.record_cursor_body_omitted( + reason="invalid_content_length", body_bytes=length + ) + return + if length > self.config.max_request_body_bytes: + trace.record_cursor_body_omitted(reason="body_too_large", body_bytes=length) + self.close_connection = True + return + try: + raw_body = self.rfile.read(length) + except OSError as exc: + trace.record_cursor_body_omitted( + reason=f"read_failed:{exc}", body_bytes=length + ) + return + trace.record_cursor_body_bytes(raw_body) + def _upstream_headers(self, stream: bool, authorization: str) -> dict[str, str]: headers = { "Authorization": authorization, diff --git a/src/deepseek_cursor_proxy/trace.py b/src/deepseek_cursor_proxy/trace.py index ec2fa11..2716770 100644 --- a/src/deepseek_cursor_proxy/trace.py +++ b/src/deepseek_cursor_proxy/trace.py @@ -206,7 +206,7 @@ def _write_manifest(self) -> None: "pid": os.getpid(), "base_dir": str(self.base_dir), "session_dir": str(self.session_dir), - "format": "one JSON file per proxied POST request", + "format": "one JSON file per traced HTTP request", }, ) @@ -224,6 +224,26 @@ def record_cursor_body(self, payload: dict[str, Any]) -> None: self.data["request"]["body"] = payload self.data["request"]["summary"] = payload_summary(payload) + def record_cursor_body_bytes(self, body: bytes) -> None: + self.data["request"]["body_bytes"] = len(body) + text = body.decode("utf-8", errors="replace") + try: + payload = json.loads(text) + except json.JSONDecodeError: + self.data["request"]["body"] = {"text": text} + return + self.data["request"]["body"] = payload + if isinstance(payload, dict): + self.data["request"]["summary"] = payload_summary(payload) + + def record_cursor_body_omitted( + self, *, reason: str, body_bytes: int | None = None + ) -> None: + omitted: dict[str, Any] = {"reason": reason} + if body_bytes is not None: + omitted["body_bytes"] = body_bytes + self.data["request"]["body_omitted"] = omitted + def record_transform(self, prepared: Any) -> None: self.data["transform"] = { "original_model": prepared.original_model, diff --git a/tests/test_trace.py b/tests/test_trace.py index 261c35d..a320b1e 100644 --- a/tests/test_trace.py +++ b/tests/test_trace.py @@ -11,6 +11,7 @@ from tempfile import TemporaryDirectory import time import unittest +from urllib.error import HTTPError from urllib.request import Request, urlopen from deepseek_cursor_proxy.config import ProxyConfig @@ -207,6 +208,84 @@ def _post(self, payload: dict) -> dict: with urlopen(request, timeout=5) as response: return json.loads(response.read()) + def test_traces_unsupported_post_path_with_body(self) -> None: + request = Request( + f"{self.proxy.url}/v1/summarize", + data=json.dumps( + { + "model": "gpt-4o-mini", + "messages": [{"role": "user", "content": "summarize"}], + } + ).encode("utf-8"), + method="POST", + headers={ + "Authorization": "Bearer sk-from-cursor", + "Content-Type": "application/json", + }, + ) + with self.assertRaises(HTTPError) as captured: + urlopen(request, timeout=5) + self.assertEqual(captured.exception.code, 404) + captured.exception.read() + + trace = _read_single_trace(self.writer.session_dir) + self.assertEqual(trace["request"]["method"], "POST") + self.assertEqual(trace["request"]["path"], "/v1/summarize") + self.assertEqual(trace["request"]["body"]["model"], "gpt-4o-mini") + self.assertEqual(trace["request"]["summary"]["model"], "gpt-4o-mini") + self.assertEqual(trace["completion"]["status"], "rejected") + self.assertEqual(trace["completion"]["http_status"], 404) + self.assertEqual(trace["transform"], {}) + self.assertEqual(_CannedUpstream.requests, []) + + def test_traces_unsupported_get_path(self) -> None: + request = Request(f"{self.proxy.url}/v1/summarize", method="GET") + with self.assertRaises(HTTPError) as captured: + urlopen(request, timeout=5) + self.assertEqual(captured.exception.code, 404) + captured.exception.read() + + trace = _read_single_trace(self.writer.session_dir) + self.assertEqual(trace["request"]["method"], "GET") + self.assertEqual(trace["request"]["path"], "/v1/summarize") + self.assertEqual(trace["completion"]["status"], "rejected") + self.assertEqual(trace["completion"]["http_status"], 404) + self.assertEqual(trace["cursor_response"]["status"], 404) + self.assertEqual(trace["transform"], {}) + + def test_traces_unsupported_put_path_with_body(self) -> None: + request = Request( + f"{self.proxy.url}/v1/summarize", + data=json.dumps({"model": "gpt-4o-mini"}).encode("utf-8"), + method="PUT", + headers={"Content-Type": "application/json"}, + ) + with self.assertRaises(HTTPError) as captured: + urlopen(request, timeout=5) + self.assertEqual(captured.exception.code, 404) + captured.exception.read() + + trace = _read_single_trace(self.writer.session_dir) + self.assertEqual(trace["request"]["method"], "PUT") + self.assertEqual(trace["request"]["path"], "/v1/summarize") + self.assertEqual(trace["request"]["body"]["model"], "gpt-4o-mini") + self.assertEqual(trace["completion"]["status"], "rejected") + self.assertEqual(trace["completion"]["http_status"], 404) + self.assertEqual(trace["transform"], {}) + + def test_traces_options_request(self) -> None: + request = Request(f"{self.proxy.url}/v1/chat/completions", method="OPTIONS") + with urlopen(request, timeout=5) as response: + self.assertEqual(response.status, 204) + response.read() + + trace = _read_single_trace(self.writer.session_dir) + self.assertEqual(trace["request"]["method"], "OPTIONS") + self.assertEqual(trace["request"]["path"], "/v1/chat/completions") + self.assertEqual(trace["completion"]["status"], "completed") + self.assertEqual(trace["completion"]["http_status"], 204) + self.assertEqual(trace["cursor_response"]["status"], 204) + def test_captures_non_streaming_replay_without_api_key(self) -> None: self._post( { From 50a20742a40426a5277198d5c074cf70e6d3fa5d Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Fri, 1 May 2026 22:22:02 +0800 Subject: [PATCH 7/7] refactor(trace): limit tracing to POST requests --- src/deepseek_cursor_proxy/server.py | 104 ++++------------------------ src/deepseek_cursor_proxy/trace.py | 2 +- tests/test_trace.py | 48 ------------- 3 files changed, 14 insertions(+), 140 deletions(-) diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py index aea8650..ec611fc 100644 --- a/src/deepseek_cursor_proxy/server.py +++ b/src/deepseek_cursor_proxy/server.py @@ -72,52 +72,25 @@ def log_message(self, fmt: str, *args: Any) -> None: def do_OPTIONS(self) -> None: request_path = urlparse(self.path).path - trace = self._start_trace(request_path) - if self.config.verbose or trace is not None: + if self.config.verbose: LOG.info( "incoming OPTIONS %s from %s", request_path, self.client_address[0], ) - if trace is not None: - trace.record_cursor_response(status=204, headers={}) - sent_headers = self._send_response_headers( - 204, [], "sending CORS preflight response" - ) - self._finish_trace( - trace, - "completed" if sent_headers else "client_disconnected", - http_status=204, - ) + self._send_response_headers(204, [], "sending CORS preflight response") def do_GET(self) -> None: request_path = urlparse(self.path).path - trace = self._start_trace(request_path) - if self.config.verbose or trace is not None: + if self.config.verbose: LOG.info("incoming GET %s from %s", request_path, self.client_address[0]) if request_path in {"/healthz", "/v1/healthz"}: - self._send_json(200, {"ok": True}, trace=trace) - self._finish_trace(trace, "completed", http_status=200) + self._send_json(200, {"ok": True}) return if request_path in {"/models", "/v1/models"}: - self._send_models(trace=trace) - self._finish_trace(trace, "completed", http_status=200) + self._send_models() return - LOG.warning("rejected unsupported GET path=%s status=404", request_path) - self._send_json(404, {"error": {"message": "Not found"}}, trace=trace) - self._finish_trace(trace, "rejected", http_status=404) - - def do_HEAD(self) -> None: - self._reject_unsupported_method(read_body=False) - - def do_PUT(self) -> None: - self._reject_unsupported_method(read_body=True) - - def do_PATCH(self) -> None: - self._reject_unsupported_method(read_body=True) - - def do_DELETE(self) -> None: - self._reject_unsupported_method(read_body=True) + self._send_json(404, {"error": {"message": "Not found"}}) def do_POST(self) -> None: started = time.monotonic() @@ -157,7 +130,7 @@ def do_POST(self) -> None: return try: - payload = self._read_json_body(trace=trace) + payload = self._read_json_body() except RequestBodyTooLarge as exc: LOG.warning( "rejected request path=%s status=413 reason=%s", request_path, exc @@ -173,6 +146,9 @@ def do_POST(self) -> None: self._finish_trace(trace, "rejected", http_status=400, reason=str(exc)) return + if trace is not None: + trace.record_cursor_body(payload) + if self.config.verbose: log_json("cursor request body", payload) @@ -422,48 +398,6 @@ def _send_json( if sent_headers: self._write_to_client(body, "sending JSON response body") - def _send_empty( - self, - status: int, - headers: list[tuple[str, str]] | None = None, - *, - trace: TraceRequest | None = None, - ) -> bool: - response_headers = [("Content-Length", "0")] - if headers: - response_headers.extend(headers) - if trace is not None: - trace.record_cursor_response( - status=status, - headers={name: value for name, value in response_headers}, - ) - return self._send_response_headers( - status, - response_headers, - "sending empty response headers", - ) - - def _reject_unsupported_method(self, *, read_body: bool) -> None: - request_path = urlparse(self.path).path - trace = self._start_trace(request_path) - LOG.warning( - "rejected unsupported %s path=%s status=404", - self.command, - request_path, - ) - if read_body: - self._record_request_body_for_trace(trace) - if self.command == "HEAD": - sent_headers = self._send_empty(404, trace=trace) - self._finish_trace( - trace, - "rejected" if sent_headers else "client_disconnected", - http_status=404, - ) - return - self._send_json(404, {"error": {"message": "Not found"}}, trace=trace) - self._finish_trace(trace, "rejected", http_status=404) - def _send_response_headers( self, status: int, @@ -497,7 +431,7 @@ def _write_to_client( return False return True - def _send_models(self, *, trace: TraceRequest | None = None) -> None: + def _send_models(self) -> None: created = int(time.time()) model_ids = list( dict.fromkeys( @@ -517,32 +451,20 @@ def _send_models(self, *, trace: TraceRequest | None = None) -> None: } for model_id in model_ids ] - self._send_json(200, {"object": "list", "data": models}, trace=trace) + self._send_json(200, {"object": "list", "data": models}) - def _read_json_body(self, *, trace: TraceRequest | None = None) -> dict[str, Any]: + def _read_json_body(self) -> dict[str, Any]: try: length = int(self.headers.get("Content-Length") or 0) except ValueError as exc: - if trace is not None: - trace.record_cursor_body_omitted(reason="invalid_content_length") raise ValueError("Invalid Content-Length") from exc if length < 0: - if trace is not None: - trace.record_cursor_body_omitted( - reason="invalid_content_length", body_bytes=length - ) raise ValueError("Invalid Content-Length") if length > self.config.max_request_body_bytes: - if trace is not None: - trace.record_cursor_body_omitted( - reason="body_too_large", body_bytes=length - ) raise RequestBodyTooLarge( f"Request body is too large; limit is {self.config.max_request_body_bytes} bytes" ) raw_body = self.rfile.read(length) - if trace is not None: - trace.record_cursor_body_bytes(raw_body) if not raw_body: raise ValueError("Request body is empty") try: diff --git a/src/deepseek_cursor_proxy/trace.py b/src/deepseek_cursor_proxy/trace.py index 2716770..e070f31 100644 --- a/src/deepseek_cursor_proxy/trace.py +++ b/src/deepseek_cursor_proxy/trace.py @@ -206,7 +206,7 @@ def _write_manifest(self) -> None: "pid": os.getpid(), "base_dir": str(self.base_dir), "session_dir": str(self.session_dir), - "format": "one JSON file per traced HTTP request", + "format": "one JSON file per traced POST request", }, ) diff --git a/tests/test_trace.py b/tests/test_trace.py index a320b1e..bf6db0e 100644 --- a/tests/test_trace.py +++ b/tests/test_trace.py @@ -238,54 +238,6 @@ def test_traces_unsupported_post_path_with_body(self) -> None: self.assertEqual(trace["transform"], {}) self.assertEqual(_CannedUpstream.requests, []) - def test_traces_unsupported_get_path(self) -> None: - request = Request(f"{self.proxy.url}/v1/summarize", method="GET") - with self.assertRaises(HTTPError) as captured: - urlopen(request, timeout=5) - self.assertEqual(captured.exception.code, 404) - captured.exception.read() - - trace = _read_single_trace(self.writer.session_dir) - self.assertEqual(trace["request"]["method"], "GET") - self.assertEqual(trace["request"]["path"], "/v1/summarize") - self.assertEqual(trace["completion"]["status"], "rejected") - self.assertEqual(trace["completion"]["http_status"], 404) - self.assertEqual(trace["cursor_response"]["status"], 404) - self.assertEqual(trace["transform"], {}) - - def test_traces_unsupported_put_path_with_body(self) -> None: - request = Request( - f"{self.proxy.url}/v1/summarize", - data=json.dumps({"model": "gpt-4o-mini"}).encode("utf-8"), - method="PUT", - headers={"Content-Type": "application/json"}, - ) - with self.assertRaises(HTTPError) as captured: - urlopen(request, timeout=5) - self.assertEqual(captured.exception.code, 404) - captured.exception.read() - - trace = _read_single_trace(self.writer.session_dir) - self.assertEqual(trace["request"]["method"], "PUT") - self.assertEqual(trace["request"]["path"], "/v1/summarize") - self.assertEqual(trace["request"]["body"]["model"], "gpt-4o-mini") - self.assertEqual(trace["completion"]["status"], "rejected") - self.assertEqual(trace["completion"]["http_status"], 404) - self.assertEqual(trace["transform"], {}) - - def test_traces_options_request(self) -> None: - request = Request(f"{self.proxy.url}/v1/chat/completions", method="OPTIONS") - with urlopen(request, timeout=5) as response: - self.assertEqual(response.status, 204) - response.read() - - trace = _read_single_trace(self.writer.session_dir) - self.assertEqual(trace["request"]["method"], "OPTIONS") - self.assertEqual(trace["request"]["path"], "/v1/chat/completions") - self.assertEqual(trace["completion"]["status"], "completed") - self.assertEqual(trace["completion"]["http_status"], 204) - self.assertEqual(trace["cursor_response"]["status"], 204) - def test_captures_non_streaming_replay_without_api_key(self) -> None: self._post( {