From fc54e8d9089832a5eb0d33d49370ac564c29e8d3 Mon Sep 17 00:00:00 2001 From: Rafael Cordones Date: Mon, 4 May 2026 09:45:53 +0200 Subject: [PATCH 1/2] Make StepFailed pickles portable across processes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DBOS persists per-step error payloads as base64-encoded pickle bytes. The standalone ``dbos workflow steps`` CLI and DBOS Conductor run outside the Django process and can't import ``episodes.workflows``, so unpickling a typed ``StepFailed`` subclass raised ``ModuleNotFoundError: No module named 'episodes'`` and the CLI printed "exception object could not be deserialized". Switch ``StepFailed.__reduce__`` to return ``(RuntimeError, (str(self),))``. The on-the-wire pickle is now a plain stdlib ``RuntimeError`` carrying the formatted message — any Python process can deserialize it. Worker-side semantics unchanged: ``raise DownloadStepFailed(...)`` still matches ``except StepFailed`` because ``__reduce__`` only kicks in at pickle time, after the typed exception has done its job. Verified no caller catches by typed subclass — the typed hierarchy is for log readability, not control flow. Admin's ``_decode_dbos_payload()`` previously returned the raw b64 blob when ``pickle.loads`` failed. Now returns ``>`` so legacy rows whose typed-class wire format can no longer be rehydrated render an explanation instead of a wall of base64. Tests: new pickle round-trip asserting RuntimeError-on-the-wire; existing legacy-row test updated for the new fallback message. 372 tests passing. Closes #110. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 4 ++ .../2026-05-04-stepfailed-portable-pickle.md | 70 +++++++++++++++++++ .../2026-05-04-stepfailed-portable-pickle.md | 38 ++++++++++ ...-portable-pickle-implementation-session.md | 45 ++++++++++++ ...failed-portable-pickle-planning-session.md | 31 ++++++++ episodes/admin.py | 14 ++-- episodes/tests/test_admin.py | 37 +++++++++- episodes/workflows.py | 32 ++++++--- 8 files changed, 256 insertions(+), 15 deletions(-) create mode 100644 doc/features/2026-05-04-stepfailed-portable-pickle.md create mode 100644 doc/plans/2026-05-04-stepfailed-portable-pickle.md create mode 100644 doc/sessions/2026-05-04-stepfailed-portable-pickle-implementation-session.md create mode 100644 doc/sessions/2026-05-04-stepfailed-portable-pickle-planning-session.md diff --git a/CHANGELOG.md b/CHANGELOG.md index d30275f..fb871ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - AGENTS.md and the Feature PR Documentation Bundle AI check now recognize **agent-orchestrated sessions**. When a parallel implementation agent is launched from a parent Claude Code session (e.g. under Conductor) and has no direct user-to-implementation-agent messages, the transcript may use `### Parent agent (orchestrator)` headings *instead of* `### User`, provided the parent-agent's launching prompt is reproduced verbatim. The transcript must declare the session as agent-orchestrated at the top of `## Detailed conversation`. Same verbatim rule applies — summarized parent prompts are still rejected. This is a policy clarification only; no code changes. - AI checks workflow now renders non-applicable rules as gray "skipped" icons instead of green "pass". `list_ai_checks.py` evaluates each rule's `paths:` frontmatter against `git diff --name-only $BASE_REF...HEAD` and emits `applies: bool` per matrix include; `.github/workflows/ai-checks.yml` gates each `check` shard on `if: ${{ ... && matrix.applies }}`, so non-applicable shards skip at the GHA level — no runner spin-up, no model call, no token cost. Four rules gain `paths:` frontmatter (`pipeline-step-sync`, `asgi-wsgi-scott`, `qdrant-payload-slim`, `entity-creation-race-safety`); the other five remain semantic. The driver's verdict tool drops `skip` from its enum: semantic non-applicability is now `pass` with `summary: "Rule does not apply."` and a one-line `details` explanation. Closes #124 — [plan](doc/plans/2026-05-04-ai-checks-skipped.md), [feature](doc/features/2026-05-04-ai-checks-skipped.md), [planning session](doc/sessions/2026-05-04-ai-checks-skipped-planning-session.md), [implementation session](doc/sessions/2026-05-04-ai-checks-skipped-implementation-session.md) +### Fixed + +- DBOS step errors now deserialize cleanly outside the Django process. `StepFailed.__reduce__` returns `(RuntimeError, (str(self),))` so the on-the-wire pickle is a plain stdlib `RuntimeError` — the standalone `dbos workflow steps` CLI (and DBOS Conductor) can rehydrate it without importing `episodes.workflows`, eliminating the `Warning: exception object could not be deserialized … No module named 'episodes'` noise. Worker-side semantics are unchanged: `raise DownloadStepFailed(...)` still matches `except StepFailed`. The Episode admin's `_decode_dbos_payload()` fallback for legacy rows whose typed-class wire format can no longer be rehydrated now renders `>` instead of the raw b64 blob. Closes #110 — [plan](doc/plans/2026-05-04-stepfailed-portable-pickle.md), [feature](doc/features/2026-05-04-stepfailed-portable-pickle.md), [planning session](doc/sessions/2026-05-04-stepfailed-portable-pickle-planning-session.md), [implementation session](doc/sessions/2026-05-04-stepfailed-portable-pickle-implementation-session.md) + ## 2026-05-01 ### Added diff --git a/doc/features/2026-05-04-stepfailed-portable-pickle.md b/doc/features/2026-05-04-stepfailed-portable-pickle.md new file mode 100644 index 0000000..1e82dbf --- /dev/null +++ b/doc/features/2026-05-04-stepfailed-portable-pickle.md @@ -0,0 +1,70 @@ +# Make `StepFailed` pickles portable across processes + +**Date:** 2026-05-04 + +## Problem + +DBOS persists per-step `error` payloads as base64-encoded pickle bytes. The standalone `dbos workflow steps ` CLI (and DBOS Conductor) runs outside the Django process and can't import `episodes.workflows`, so unpickling a typed `StepFailed` subclass there raises `ModuleNotFoundError: No module named 'episodes'` and the CLI prints: + +``` +Warning: exception object could not be deserialized for workflow … : No module named 'episodes' +``` + +Issue #110. Commit `9ff6719` partially addressed the problem by adding `StepFailed.__reduce__` returning `(self.__class__, (episode_id, error_message))` — that fixed the in-process Episode admin "View workflow steps" page but kept the wire format tied to a Django module the CLI can't reach. + +## Changes + +1. **`StepFailed.__reduce__` now returns `(RuntimeError, (str(self),))`** (Option 3 from the issue). The pickle written to DBOS's system DB is a plain stdlib `RuntimeError` carrying the formatted message (`" failed for episode : "`). Any Python process — CLI, Conductor, ad-hoc REPL — can deserialize it without importing project code. Worker-side semantics are unchanged: `raise DownloadStepFailed(...)` inside `process_episode` still matches `except StepFailed` / `except DownloadStepFailed` because `__reduce__` only kicks in at pickle time, after the typed exception has done its job. + + *Why Option 3 over Option 1 (`raise RuntimeError(json.dumps(...))`):* keeps the typed hierarchy at raise time so step wrappers still read as intent-revealing code (`raise DownloadStepFailed(...)` not `raise RuntimeError(...)`); one method on the base class is the smallest possible blast radius. Verified with `grep` that no caller catches by typed subclass — the typed shape is for log readability, not control flow. + +2. **Admin legacy-row fallback upgraded.** `_decode_dbos_payload()` in `episodes/admin.py` previously returned the raw b64 blob when `pickle.loads` failed (e.g., for legacy rows pickled before `9ff6719` whose `__reduce__` representation can no longer round-trip). Now it returns `>` with the b64 preview truncated to 80 characters, so the admin "View workflow steps" page renders an explanation instead of a wall of base64. + +3. **Tests** in `episodes/tests/test_admin.py`: + - `test_step_failed_pickle_round_trip_yields_runtime_error` — new — confirms `pickle.loads(pickle.dumps(DownloadStepFailed(...)))` returns a plain `RuntimeError` carrying the formatted message, and that the typed instance is still a `StepFailed` before pickling. + - `test_decode_dbos_payload_unpickles_step_output` — updated comment for the new RuntimeError-on-the-wire semantics; assertions unchanged because `str(RuntimeError(msg)) == msg` matches what the old typed unpickle produced. + - `test_decode_dbos_payload_passthrough_for_plain_values` — updated to expect the new "could not deserialize" notice for the `gASnotpickle` legacy-row case. + +## Key parameters + +| Knob | Value | Why | +|---|---|---| +| Wire-format exception class | `RuntimeError` | Stdlib — no project imports needed at unpickle time | +| Legacy-row preview length | 80 chars (truncated to `<77 chars>...`) | Long enough to identify the row, short enough not to overflow the admin column | + +## Verification + +Full unit-test suite: + +```bash +uv run python manage.py test +# Ran 372 tests in ~23s — OK +``` + +Pickle round-trip (smoke test, no DB needed): + +```bash +uv run python -c " +import pickle, django; django.setup() +from episodes.workflows import DownloadStepFailed +err = DownloadStepFailed(episode_id=7, error_message='boom') +out = pickle.loads(pickle.dumps(err)) +print(type(out).__name__, '->', out) +" +# RuntimeError -> downloading failed for episode 7: boom +``` + +End-to-end CLI verification (requires a DBOS-recorded failure): + +```bash +uv run dbos workflow steps episode--run- --sys-db-url "$RAGTIME_DB_URL" +# Expect: error column renders the formatted message; no "could not be deserialized" warning. +``` + +The CLI repro path was not exercised in this PR — the existing pickle round-trip test covers the same property the CLI relies on (deserializing without importing `episodes.workflows`). + +## Files modified + +- `episodes/workflows.py` — `StepFailed.__reduce__` now returns `(RuntimeError, (str(self),))`; comment rewritten to explain cross-process portability. +- `episodes/admin.py` — `_decode_dbos_payload()` failure branch now returns `>` instead of the raw blob. +- `episodes/tests/test_admin.py` — new pickle round-trip test; existing tests updated for the RuntimeError-on-the-wire semantics and the new fallback message. diff --git a/doc/plans/2026-05-04-stepfailed-portable-pickle.md b/doc/plans/2026-05-04-stepfailed-portable-pickle.md new file mode 100644 index 0000000..07109f5 --- /dev/null +++ b/doc/plans/2026-05-04-stepfailed-portable-pickle.md @@ -0,0 +1,38 @@ +# Make `StepFailed` pickles portable across processes + +**Date:** 2026-05-04 + +## Goal + +Resolve issue #110: replace the typed `StepFailed` exception hierarchy on the DBOS wire so the standalone `dbos workflow steps` CLI (and Conductor) can render step errors instead of printing `Warning: exception object could not be deserialized … No module named 'episodes'`. + +## Context + +Commit `9ff6719` partially addressed the issue by adding `StepFailed.__reduce__` returning `(self.__class__, (episode_id, error_message))`, which fixed the in-process Episode admin view but kept the wire format tied to a Django module the CLI can't import. The issue's top comment narrows the remaining acceptance criteria to: + +- [ ] `dbos workflow steps ` renders the step error as a readable message (no "could not be deserialized" warning). +- [ ] Existing in-process unpickle (Episode admin) keeps working. +- [ ] 349+ tests passing. + +## Options considered + +| Option | Sketch | Decision | +|---|---|---| +| **1. Inline raise** | Step wrappers raise `RuntimeError(json.dumps({...}))` directly, drop the typed hierarchy. | Loses intent-revealing code at raise sites. Rejected. | +| **2. Move classes to a stdlib-only module** | New `episodes_dbos/exceptions.py` (no Django imports), import path the CLI can resolve. | Overkill — these classes are 8 lines each, and the CLI still has to know where to find them. Rejected. | +| **3. `__reduce__` returns `RuntimeError`** | Worker-side typed hierarchy intact; the on-the-wire pickle collapses to a stdlib `RuntimeError`. | Smallest blast radius, preserves typed shape inside the raise process, portable everywhere. **Picked.** | + +Verified with `grep` that no caller does `except FetchDetailsFailed` etc. — the typed hierarchy exists for log readability, not control flow, so collapsing on the wire is safe. + +## Plan + +1. **`episodes/workflows.py`** — change `StepFailed.__reduce__` to return `(RuntimeError, (str(self),))`. Update the docstring with the cross-process-portability rationale. +2. **`episodes/admin.py`** — when `_decode_dbos_payload()` can't unpickle a value that *looks* like a pickle blob (legacy rows pickled with the typed class), return `>` instead of the raw b64 string. Improves the admin UX for legacy rows whose typed wire format the current process can no longer rehydrate. +3. **Tests** — new pickle round-trip test confirming `pickle.loads(pickle.dumps(DownloadStepFailed(...)))` returns a plain `RuntimeError`; update the existing legacy-row test to expect the new fallback message. +4. **Run the full test suite** — must stay at the 349+ baseline from `9ff6719`. +5. **Documentation** — feature doc + planning + implementation session transcripts + changelog entry per CLAUDE.md. + +## Out of scope + +- DBOS Conductor end-to-end verification (no Conductor running locally). +- Re-raising legacy admin rows as RuntimeError post-hoc — too invasive; they fall through to the readable fallback message. diff --git a/doc/sessions/2026-05-04-stepfailed-portable-pickle-implementation-session.md b/doc/sessions/2026-05-04-stepfailed-portable-pickle-implementation-session.md new file mode 100644 index 0000000..0068c97 --- /dev/null +++ b/doc/sessions/2026-05-04-stepfailed-portable-pickle-implementation-session.md @@ -0,0 +1,45 @@ +# Make `StepFailed` pickles portable across processes — Implementation Session + +**Date:** 2026-05-04 + +**Session ID:** unavailable + +## Summary + +Implemented Option 3 from issue #110: `StepFailed.__reduce__` now returns `(RuntimeError, (str(self),))` so DBOS-stored exceptions deserialize as plain stdlib `RuntimeError` in any process — the CLI and Conductor included. Worker-side typed hierarchy unchanged (still raises `DownloadStepFailed` etc.). Upgraded the admin's `_decode_dbos_payload()` legacy-row fallback to `>` so legacy rows render an explanation instead of raw base64. Added a pickle round-trip test; existing tests updated for the new wire-format semantics. Full suite stays green at 372 tests. + +## Detailed conversation + +### User + +(Task spec from the agent harness — see planning session for the full text. Key directives: branch off `main` as `rafacm/stepfailed-portable`, pick Option 3, update legacy-row admin fallback, run the full test suite, write the docs, open a PR with "Closes #110".) + +### Assistant + +**Branching.** The current worktree was on `rafacm/triage-issues`; pulled latest into the `main` worktree (already up to date), then `git checkout -b rafacm/stepfailed-portable main` from the `paris` worktree. + +**Implementation.** Edited `episodes/workflows.py`: + +```python +def __reduce__(self): + # Pickle as a plain ``RuntimeError`` carrying the formatted message. + # ... cross-process portability rationale + the broken default-Exception + # round-trip note from 9ff6719, kept because it still applies. + return (RuntimeError, (str(self),)) +``` + +The two-arg `__init__` issue that 9ff6719 worked around is sidestepped automatically when the wire form is a one-arg `RuntimeError`. + +Edited `episodes/admin.py::_decode_dbos_payload()`. Previous behaviour: silently return the raw b64 string when `pickle.loads` failed. New behaviour: return `>` with the preview truncated at 80 characters (`value[:77] + "..."` for longer blobs). Most likely path that triggers the fallback now is legacy rows pickled before this PR whose typed-class wire format the current process can no longer rehydrate. + +**Tests.** Edited `episodes/tests/test_admin.py`: + +- New: `test_step_failed_pickle_round_trip_yields_runtime_error` — asserts `pickle.loads(pickle.dumps(DownloadStepFailed(7, "boom")))` returns a `type(...) is RuntimeError` carrying `"downloading failed for episode 7: boom"`, and that the original instance is still a `StepFailed` subclass before pickling. +- Updated comment block on `test_decode_dbos_payload_unpickles_step_output` to explain the RuntimeError-on-the-wire semantics. Assertions unchanged because `str(RuntimeError(msg)) == msg` matches what the typed unpickle produced. +- Updated `test_decode_dbos_payload_passthrough_for_plain_values`: the `gASnotpickle` case now expects `"could not deserialize"` and the original blob to both appear in the output. + +**Verification.** `uv run python manage.py test` → `Ran 372 tests in 23.067s` → `OK`. Up from the 349 baseline because of intervening commits unrelated to this issue. + +**End-to-end CLI repro.** Not run — would require a live DBOS instance and a freshly-failed workflow. The pickle round-trip test covers the same property the CLI relies on (deserializing without importing `episodes.workflows`); the unit test asserts on `type(out) is RuntimeError` which is exactly what the CLI needs. + +**Documentation.** Wrote plan, feature doc, and both session transcripts (this is the implementation one). Changelog entry follows under today's date. diff --git a/doc/sessions/2026-05-04-stepfailed-portable-pickle-planning-session.md b/doc/sessions/2026-05-04-stepfailed-portable-pickle-planning-session.md new file mode 100644 index 0000000..7d0917f --- /dev/null +++ b/doc/sessions/2026-05-04-stepfailed-portable-pickle-planning-session.md @@ -0,0 +1,31 @@ +# Make `StepFailed` pickles portable across processes — Planning Session + +**Date:** 2026-05-04 + +**Session ID:** unavailable + +## Summary + +Reviewed issue #110 and commit `9ff6719` (which partially addressed it). Confirmed the remaining gap is cross-process portability: the standalone `dbos workflow steps` CLI runs outside Django, can't import `episodes.workflows`, and prints `Warning: exception object could not be deserialized … No module named 'episodes'`. Picked Option 3 from the issue (`__reduce__` returning a stdlib `RuntimeError`) because it preserves the typed exception hierarchy at raise time while making the on-the-wire pickle portable to any Python process. Verified no caller catches `StepFailed` subclasses by type, so collapsing them on the wire is safe. + +## Detailed conversation + +### User + +(Task spec from the agent harness:) Issue #110 — replace typed StepFailed exceptions with serializable failure objects so the dbos CLI can render them. Step 1: confirm staleness — the issue is "partially addressed in 9ff6719", so check what's left. Step 2: pick the smallest viable option (Option 3 = `__reduce__` returning RuntimeError, recommended). Step 3: verify the full test suite still passes. Step 4: documentation. Step 5: PR. + +### Assistant + +Read `episodes/workflows.py` and confirmed the current `__reduce__` returns `(self.__class__, (self.episode_id, self.error_message))` — works in-process but the typed class is a Django-app-namespaced symbol the CLI can't resolve. Read commit `9ff6719` and the issue's top comment to confirm the partial-progress markers; the comment narrows the remaining acceptance criteria to "CLI renders the message", "in-process unpickle still works", "349+ tests passing". + +Surveyed catch-by-type usage: `grep "except (StepFailed|FetchDetailsFailed|DownloadStepFailed|TranscribeStepFailed|SummarizeStepFailed|ChunkStepFailed|ExtractStepFailed|ResolveStepFailed|EmbedStepFailed)"` returned zero matches. The typed hierarchy is purely for log/debug readability, not control flow — so collapsing it at pickle time is safe. + +Weighed three options: + +1. **Inline `raise RuntimeError(json.dumps(...))`.** Loses intent-revealing raise sites. Rejected. +2. **Move classes to a stdlib-only module the CLI imports.** Too much scaffolding for 8-line classes; the CLI still has to know where to look. Rejected. +3. **`__reduce__` returns `(RuntimeError, (str(self),))`.** Worker-side typed hierarchy intact; on-the-wire pickle is stdlib-only, portable everywhere. Picked. + +Also flagged that legacy rows pickled before `9ff6719` (typed-class wire format that the current process can no longer rehydrate) currently render as raw b64 in the admin via the silent fallback in `_decode_dbos_payload`. Decided to upgrade the fallback to `>` so the admin shows context instead of a wall of base64. + +Out of scope: DBOS Conductor end-to-end verification (no Conductor locally) and rewriting legacy admin rows post-hoc (too invasive — the readable fallback covers the UX). diff --git a/episodes/admin.py b/episodes/admin.py index 89def6e..089965d 100644 --- a/episodes/admin.py +++ b/episodes/admin.py @@ -46,9 +46,12 @@ def _decode_dbos_payload(value): 2. Tries ``base64 → pickle.loads`` when the string looks like a pickle stream (starts with ``gAS`` — the b64 prefix for the pickle protocol 4 magic ``\\x80\\x04\\x95``). - 3. Falls back to the raw string when decoding fails (so a - future DBOS version that already returns native objects - still renders). + 3. Falls back to a "could not deserialize: " notice + for pickle blobs we can't decode — most often legacy rows + written before ``StepFailed.__reduce__`` was switched to + ``RuntimeError``, whose typed-class wire form fails to + unpickle if the class signature has since changed. + 4. Returns non-pickle-looking strings unchanged. Pickle is unsafe on untrusted input; here the bytes were written by this same process into a database we own — same @@ -66,7 +69,10 @@ def _decode_dbos_payload(value): try: obj = pickle.loads(base64.b64decode(value)) except Exception: - return value + # Truncate the b64 preview so admins see *something* without + # a 4 KB blob hijacking the column. + preview = value if len(value) <= 80 else value[:77] + "..." + return f"" return str(obj) diff --git a/episodes/tests/test_admin.py b/episodes/tests/test_admin.py index e453992..1e24f07 100644 --- a/episodes/tests/test_admin.py +++ b/episodes/tests/test_admin.py @@ -118,13 +118,40 @@ def test_decode_dbos_payload_unpickles_step_output(self): encoded = base64.b64encode(pickle.dumps(out)).decode("ascii") self.assertEqual(_decode_dbos_payload(encoded), str(out)) - # And for errors. + # Errors pickle as plain ``RuntimeError`` (see + # ``StepFailed.__reduce__``) carrying the formatted message — + # so the CLI / Conductor can deserialize them without + # importing ``episodes.workflows``. The string content is the + # same as ``str(StepFailed(...))``. err = DownloadStepFailed(episode_id=2, error_message="boom") encoded = base64.b64encode(pickle.dumps(err)).decode("ascii") decoded = _decode_dbos_payload(encoded) self.assertIn("downloading failed for episode 2", decoded) self.assertIn("boom", decoded) + def test_step_failed_pickle_round_trip_yields_runtime_error(self): + """``StepFailed.__reduce__`` collapses to ``RuntimeError`` on the wire. + + Required for cross-process portability: the standalone ``dbos + workflow steps`` CLI runs outside Django and can't import + ``episodes.workflows``. Pickling as a stdlib ``RuntimeError`` + means any Python process can deserialize the step error. + """ + import pickle + + from episodes.workflows import DownloadStepFailed, StepFailed + + err = DownloadStepFailed(episode_id=7, error_message="boom") + # In-process raise still matches the typed hierarchy. + self.assertIsInstance(err, StepFailed) + + rehydrated = pickle.loads(pickle.dumps(err)) + # On the wire it's a plain ``RuntimeError`` — portable across + # processes that don't import ``episodes.workflows``. + self.assertIs(type(rehydrated), RuntimeError) + self.assertIn("downloading failed for episode 7", str(rehydrated)) + self.assertIn("boom", str(rehydrated)) + def test_decode_dbos_payload_passthrough_for_plain_values(self): from episodes.admin import _decode_dbos_payload @@ -132,8 +159,12 @@ def test_decode_dbos_payload_passthrough_for_plain_values(self): self.assertEqual(_decode_dbos_payload(""), "") # Already-readable string — left alone. self.assertEqual(_decode_dbos_payload("hello"), "hello") - # Looks pickle-ish but not valid b64/pickle — fall back to raw. - self.assertEqual(_decode_dbos_payload("gASnotpickle"), "gASnotpickle") + # Looks pickle-ish but not valid b64/pickle — graceful + # fallback for legacy rows whose typed-class wire format the + # current process can no longer rehydrate. + decoded = _decode_dbos_payload("gASnotpickle") + self.assertIn("could not deserialize", decoded) + self.assertIn("gASnotpickle", decoded) @patch("episodes.signals.DBOS") @patch("dbos.DBOS") diff --git a/episodes/workflows.py b/episodes/workflows.py index 3b2fa6d..1cb87c1 100644 --- a/episodes/workflows.py +++ b/episodes/workflows.py @@ -88,14 +88,30 @@ def __init__(self, episode_id: int, error_message: str): ) def __reduce__(self): - # Default Exception.__reduce__ unpickles via ``cls(*self.args)`` - # which doesn't match this class's two-arg ``__init__`` signature - # (``self.args`` only carries the formatted message). Return the - # constructor inputs verbatim so DBOS-stored exceptions can be - # round-tripped — necessary for the admin step-trace view to - # render the human-readable error_message instead of the raw - # b64-encoded pickle wire format. - return (self.__class__, (self.episode_id, self.error_message)) + # Pickle as a plain ``RuntimeError`` carrying the formatted + # message. Two reasons: + # + # 1. **Cross-process portability.** DBOS persists step errors + # as base64-encoded pickle bytes. The standalone ``dbos + # workflow steps`` CLI (and DBOS Conductor) run outside the + # Django process and can't import ``episodes.workflows`` — + # unpickling a typed ``StepFailed`` subclass there raises + # ``ModuleNotFoundError`` and the CLI prints "exception + # object could not be deserialized". ``RuntimeError`` lives + # in the stdlib, so any Python process can rehydrate it. + # 2. **Default ``Exception.__reduce__`` round-trip is broken** + # for our two-arg ``__init__`` signature (``self.args`` only + # carries the formatted message); a plain ``RuntimeError`` + # payload sidesteps that entirely. + # + # Worker-side semantics are unchanged: in the process that + # raises, ``except StepFailed`` / ``except FetchDetailsFailed`` + # still match. The typed shape only collapses at pickle time, + # which is when we cross the process boundary anyway. No + # caller currently catches by typed subclass (verified with + # grep) — the typed hierarchy exists for log readability, not + # control flow. + return (RuntimeError, (str(self),)) class FetchDetailsFailed(StepFailed): From 9d1897f644a4f054a9aa5e88a7e3ecd40b4a3a22 Mon Sep 17 00:00:00 2001 From: Rafael Cordones Date: Mon, 4 May 2026 12:08:08 +0200 Subject: [PATCH 2/2] Address codex review followups (PR #129) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace summarized user messages in session transcripts with verbatim text. Implementation session declared as agent-orchestrated; parent prompt reproduced verbatim. - Add subprocess pickle-portability test asserting StepFailed deserializes to RuntimeError without episodes.workflows on sys.path. - CHANGELOG: BREAKING note that pre-fix StepFailed pickles will not render human-readable text after this change. Codex follow-up #2 (extract printable strings from legacy pickle bytes) intentionally not addressed — user will clear the dev DB and accepts the breaking-change tradeoff. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 1 + ...-portable-pickle-implementation-session.md | 48 ++++++- ...failed-portable-pickle-planning-session.md | 37 ++++- episodes/tests/test_stepfailed_subprocess.py | 129 ++++++++++++++++++ 4 files changed, 210 insertions(+), 5 deletions(-) create mode 100644 episodes/tests/test_stepfailed_subprocess.py diff --git a/CHANGELOG.md b/CHANGELOG.md index fb871ee..94efbec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - AGENTS.md and the Feature PR Documentation Bundle AI check now recognize **agent-orchestrated sessions**. When a parallel implementation agent is launched from a parent Claude Code session (e.g. under Conductor) and has no direct user-to-implementation-agent messages, the transcript may use `### Parent agent (orchestrator)` headings *instead of* `### User`, provided the parent-agent's launching prompt is reproduced verbatim. The transcript must declare the session as agent-orchestrated at the top of `## Detailed conversation`. Same verbatim rule applies — summarized parent prompts are still rejected. This is a policy clarification only; no code changes. - AI checks workflow now renders non-applicable rules as gray "skipped" icons instead of green "pass". `list_ai_checks.py` evaluates each rule's `paths:` frontmatter against `git diff --name-only $BASE_REF...HEAD` and emits `applies: bool` per matrix include; `.github/workflows/ai-checks.yml` gates each `check` shard on `if: ${{ ... && matrix.applies }}`, so non-applicable shards skip at the GHA level — no runner spin-up, no model call, no token cost. Four rules gain `paths:` frontmatter (`pipeline-step-sync`, `asgi-wsgi-scott`, `qdrant-payload-slim`, `entity-creation-race-safety`); the other five remain semantic. The driver's verdict tool drops `skip` from its enum: semantic non-applicability is now `pass` with `summary: "Rule does not apply."` and a one-line `details` explanation. Closes #124 — [plan](doc/plans/2026-05-04-ai-checks-skipped.md), [feature](doc/features/2026-05-04-ai-checks-skipped.md), [planning session](doc/sessions/2026-05-04-ai-checks-skipped-planning-session.md), [implementation session](doc/sessions/2026-05-04-ai-checks-skipped-implementation-session.md) +- **BREAKING** — `StepFailed`-derived exceptions now pickle as `RuntimeError(message)` rather than as their typed subclass. Pre-fix workflow rows in the DBOS `dbos.workflow_status` table will not deserialize to readable text — the Episode admin's "View workflow steps" page will show a base64 preview only. Action required: reprocess affected episodes (which produces fresh, portable pickles) or clear the workflow_status table in dev environments. No production impact since this project is pre-prod. See PR #129 for details. ### Fixed diff --git a/doc/sessions/2026-05-04-stepfailed-portable-pickle-implementation-session.md b/doc/sessions/2026-05-04-stepfailed-portable-pickle-implementation-session.md index 0068c97..060123f 100644 --- a/doc/sessions/2026-05-04-stepfailed-portable-pickle-implementation-session.md +++ b/doc/sessions/2026-05-04-stepfailed-portable-pickle-implementation-session.md @@ -10,11 +10,53 @@ Implemented Option 3 from issue #110: `StepFailed.__reduce__` now returns `(Runt ## Detailed conversation -### User +This session was orchestrated by a parent agent acting on behalf of the user via Claude Code's Agent tool with `isolation: worktree`. No direct user-to-implementation-agent messages occurred. The parent-agent's launching prompt — which serves as the de-facto user message for this session — is reproduced verbatim below. -(Task spec from the agent harness — see planning session for the full text. Key directives: branch off `main` as `rafacm/stepfailed-portable`, pick Option 3, update legacy-row admin fallback, run the full test suite, write the docs, open a PR with "Closes #110".) +### Parent agent (orchestrator) -### Assistant +``` +You are working on issue #110 in the rafacm/ragtime repo: "Replace typed StepFailed exceptions with serializable failure objects so the dbos CLI can render them". + +## Read first +- `CLAUDE.md` and `AGENTS.md` (project docs requirements, branching rules) +- The full issue and its top comment: run `gh issue view 110 --comments` +- Commit `9ff6719` which partially addressed this: `git show 9ff6719` + +## Step 0 — Branching +Per AGENTS.md, confirm we are on `main` and pull latest. Create a branch named `rafacm/stepfailed-portable` off `main`. + +## Step 1 — Staleness check +The issue is explicitly noted as "partially addressed in 9ff6719". Verify the current state: +1. Read `episodes/workflows.py` — find `StepFailed` and its subclasses, see if `__reduce__` is already there and what it returns +2. Check if any further work has landed: `git log --oneline --all -- episodes/workflows.py | head -20` + +Decide based on what you find whether further work is needed. If it's already complete, report and stop. + +## Step 2 — Implementation +Pick the smallest viable option from the issue: +- **Option 3 (recommended for minimal blast radius):** Make `__reduce__` return `(RuntimeError, (str(self),))` so the on-the-wire pickle is a plain RuntimeError that any process can deserialize, while the typed shape stays available inside the worker (raise-time). +- **Option 1:** raise `RuntimeError(json.dumps({...}))` directly — drops the typed hierarchy. + +Confirm DBOS or any Django code does NOT do `except FetchDetailsFailed` etc. (grep). If nothing catches by type, both options are safe; pick option 3 for typed-shape preservation. + +Also handle the legacy-rows graceful fallback: if Django's admin "View workflow steps" page tries to deserialize a pre-`9ff6719` pickle blob, it should not crash — instead render a graceful "could not deserialize: " message. Find where the admin renders these and add the fallback. + +## Step 3 — Verification +1. Run `uv run python manage.py test` (full suite — issue says 349+ should pass) +2. If you can stand up DBOS locally and produce a failed workflow, run `uv run dbos workflow steps --sys-db-url ...` and confirm no "could not be deserialized" warning. If impractical, document manual repro steps. + +## Step 4 — Documentation +Follow CLAUDE.md's "Documentation > Implementation phase" — feature doc, session transcripts, changelog. Date prefix `2026-05-04-`. + +## Step 5 — PR +- Push and `gh pr create --base main` (rebase strategy) +- "Closes #110" in body + +## Reporting +At the end report: (a) staleness verdict, (b) which option you picked and why, (c) what you did about legacy rows, (d) files changed, (e) PR URL, (f) tests passing count, (g) what you couldn't manually verify. Under 250 words. +``` + +### Assistant (implementation agent) **Branching.** The current worktree was on `rafacm/triage-issues`; pulled latest into the `main` worktree (already up to date), then `git checkout -b rafacm/stepfailed-portable main` from the `paris` worktree. diff --git a/doc/sessions/2026-05-04-stepfailed-portable-pickle-planning-session.md b/doc/sessions/2026-05-04-stepfailed-portable-pickle-planning-session.md index 7d0917f..a82ee21 100644 --- a/doc/sessions/2026-05-04-stepfailed-portable-pickle-planning-session.md +++ b/doc/sessions/2026-05-04-stepfailed-portable-pickle-planning-session.md @@ -12,11 +12,44 @@ Reviewed issue #110 and commit `9ff6719` (which partially addressed it). Confirm ### User -(Task spec from the agent harness:) Issue #110 — replace typed StepFailed exceptions with serializable failure objects so the dbos CLI can render them. Step 1: confirm staleness — the issue is "partially addressed in 9ff6719", so check what's left. Step 2: pick the smallest viable option (Option 3 = `__reduce__` returning RuntimeError, recommended). Step 3: verify the full test suite still passes. Step 4: documentation. Step 5: PR. +> Let's review the currently open issues and prioritize them for implementation. ### Assistant -Read `episodes/workflows.py` and confirmed the current `__reduce__` returns `(self.__class__, (self.episode_id, self.error_message))` — works in-process but the typed class is a Django-app-namespaced symbol the CLI can't resolve. Read commit `9ff6719` and the issue's top comment to confirm the partial-progress markers; the comment narrows the remaining acceptance criteria to "CLI renders the message", "in-process unpickle still works", "349+ tests passing". +Pulled the open issue list (`gh issue list --state open`), read each issue body and recent comments, and grouped them into tiers by impact and blast radius. Tier 1 covered the issues with the highest user-facing impact and the smallest implementation surface — `#110` (StepFailed CLI rendering, partially addressed in `9ff6719`), `#113` (a download-step regression), `#124` (an admin/UX fix), and `#111` (a larger refactor that needs scoping). Tier 2 and Tier 3 covered nice-to-have polish and longer-running architectural threads. Reported the tiering with one-line justifications per issue and a recommendation to start with Tier 1. + +### User + +> Can any of the issues in Tier 1 be worked in parallel sessions? + +### Assistant + +Walked through each Tier 1 issue and identified the file footprints and likely conflict zones: + +- `#110` touches `episodes/workflows.py` (StepFailed classes) and `episodes/admin.py` (`_decode_dbos_payload` fallback). No overlap with the other tickets. +- `#113` touches `episodes/downloader.py` and the download-step orchestration — independent. +- `#124` touches admin templates and the changelist — also independent. +- `#111` is the larger refactor; needs a planning conversation before any worktree work, and may collide with `#124` on the admin layer. + +Concluded that `#110`, `#113`, and `#124` could safely run in parallel worktrees with isolated branches. `#111` should stay in the main session as a design discussion. Recommended launching three parallel agent sessions for the first three and continuing the `#111` discussion synchronously. + +### User + +> Start parallel sessions for #124, #113 and #110 and let's discuss how to address #111. Be advised that the issues may be stale in the sense that a PR may have already solved part or all of the issues described in the issue. + +### Assistant + +Launched three parallel Agent-tool sessions with `isolation: worktree`, one per issue. The `#110` session received a launching prompt instructing it to: + +1. Read `CLAUDE.md`, `AGENTS.md`, and the issue + its top comment. +2. Inspect commit `9ff6719` to see what was already addressed. +3. Branch off `main` as `rafacm/stepfailed-portable`. +4. Verify staleness — check whether `__reduce__` is already in place and whether further work is needed. +5. If still incomplete, pick Option 3 (`__reduce__` returns `(RuntimeError, (str(self),))`) for minimal blast radius and typed-shape preservation; verify nothing catches `StepFailed` subclasses by type before collapsing them on the wire. +6. Add a graceful legacy-row fallback in the admin's pickle-decode path so pre-`9ff6719` blobs render an explanation instead of crashing. +7. Run the full test suite, write feature/session/changelog docs per `AGENTS.md`, and open a PR. + +Within the planning thread itself, the assistant then read `episodes/workflows.py` and confirmed the current `__reduce__` returns `(self.__class__, (self.episode_id, self.error_message))` — works in-process but the typed class is a Django-app-namespaced symbol the CLI can't resolve. Read commit `9ff6719` and the issue's top comment to confirm the partial-progress markers; the comment narrows the remaining acceptance criteria to "CLI renders the message", "in-process unpickle still works", "349+ tests passing". Surveyed catch-by-type usage: `grep "except (StepFailed|FetchDetailsFailed|DownloadStepFailed|TranscribeStepFailed|SummarizeStepFailed|ChunkStepFailed|ExtractStepFailed|ResolveStepFailed|EmbedStepFailed)"` returned zero matches. The typed hierarchy is purely for log/debug readability, not control flow — so collapsing it at pickle time is safe. diff --git a/episodes/tests/test_stepfailed_subprocess.py b/episodes/tests/test_stepfailed_subprocess.py new file mode 100644 index 0000000..e0b0fcd --- /dev/null +++ b/episodes/tests/test_stepfailed_subprocess.py @@ -0,0 +1,129 @@ +"""Cross-process pickle-portability test for ``StepFailed``. + +The in-process round-trip in ``test_admin.py`` only checks that +``type(rehydrated) is RuntimeError`` inside the same Python process +that defined ``StepFailed``. That isn't quite the property we care +about — DBOS's ``dbos workflow steps`` CLI runs *outside* Django and +can't import ``episodes.workflows``. This test exercises that +genuinely-cross-process property: it spawns a fresh Python +interpreter in a working directory that does not contain the +project, with an empty ``PYTHONPATH``, and asserts the pickled +``StepFailed`` deserializes to a plain ``RuntimeError`` carrying the +original message. + +The test relies on the fact that this project is *not* installed as +an editable wheel into the venv — ``episodes`` is only importable +when ``cwd`` is the project root. A sanity-check subprocess at the +end confirms this by trying ``import episodes.workflows`` in the +same isolated environment and asserting it fails with +``ModuleNotFoundError``. If that sanity check passes, the +cross-process property above is genuinely tested rather than +accidentally satisfied by ``episodes`` being on ``sys.path``. +""" + +from __future__ import annotations + +import base64 +import pickle +import subprocess +import sys + +from django.test import TestCase + +from episodes.workflows import DownloadStepFailed, StepFailed + + +class StepFailedSubprocessPickleTests(TestCase): + """Verify ``StepFailed`` pickles round-trip across process boundaries.""" + + EPISODE_ID = 11 + ERROR_MESSAGE = "subprocess-portability check" + + def _isolated_subprocess(self, script: str) -> subprocess.CompletedProcess: + """Run *script* in a fresh interpreter without project access. + + ``cwd="/tmp"`` ensures a relative ``import episodes`` cannot + find the project package. ``PYTHONPATH=""`` and + ``PYTHONNOUSERSITE=1`` shut the user-site escape hatch. + ``sys.executable`` from the test process is reused so we keep + the same Python version, but since the project isn't + installed into the venv as a wheel, ``episodes`` is not on + the resulting ``sys.path``. + """ + return subprocess.run( + [sys.executable, "-c", script], + cwd="/tmp", + env={"PYTHONPATH": "", "PYTHONNOUSERSITE": "1"}, + capture_output=True, + text=True, + timeout=30, + ) + + def test_pickled_step_failed_deserializes_to_runtime_error_in_subprocess(self): + """Pickle bytes rehydrate to ``RuntimeError`` in a project-less interpreter. + + This is the property the DBOS CLI / Conductor relies on to + render step errors as readable text without importing + ``episodes.workflows``. + """ + err = DownloadStepFailed( + episode_id=self.EPISODE_ID, error_message=self.ERROR_MESSAGE + ) + # Pre-pickle assertion: in-process raise still matches the + # typed hierarchy. + self.assertIsInstance(err, StepFailed) + + pickle_bytes = pickle.dumps(err) + encoded = base64.b64encode(pickle_bytes).decode("ascii") + + script = ( + "import base64, pickle, sys\n" + f"data = base64.b64decode({encoded!r})\n" + "obj = pickle.loads(data)\n" + "assert type(obj).__name__ == 'RuntimeError', type(obj)\n" + "assert type(obj).__module__ == 'builtins', type(obj).__module__\n" + "sys.stdout.write(type(obj).__name__ + '\\n')\n" + "sys.stdout.write(str(obj) + '\\n')\n" + ) + result = self._isolated_subprocess(script) + + self.assertEqual( + result.returncode, + 0, + msg=f"subprocess failed: stdout={result.stdout!r} stderr={result.stderr!r}", + ) + self.assertIn("RuntimeError", result.stdout) + # Original message survives the round trip. + self.assertIn(f"downloading failed for episode {self.EPISODE_ID}", result.stdout) + self.assertIn(self.ERROR_MESSAGE, result.stdout) + + def test_subprocess_environment_really_isolates_episodes_module(self): + """Sanity check: the test environment really has no ``episodes`` package. + + If this passes, the cross-process property tested above is + genuinely exercised — the subprocess deserializing the + pickle truly cannot reach into ``episodes.workflows`` to + rehydrate a typed subclass. + """ + script = ( + "try:\n" + " import episodes.workflows\n" + "except ModuleNotFoundError as exc:\n" + " import sys\n" + " sys.stdout.write('isolated:' + exc.name + '\\n')\n" + " sys.exit(0)\n" + "import sys\n" + "sys.stdout.write('LEAKED\\n')\n" + "sys.exit(1)\n" + ) + result = self._isolated_subprocess(script) + + self.assertEqual( + result.returncode, + 0, + msg=( + "subprocess could import episodes.workflows — environment is " + f"not isolated. stdout={result.stdout!r} stderr={result.stderr!r}" + ), + ) + self.assertIn("isolated:episodes", result.stdout)