From f10383d7bd80ec6d7fd9df225d5ff6db9c6eb4d4 Mon Sep 17 00:00:00 2001 From: Brian McMahon Date: Wed, 20 May 2026 15:52:20 -0700 Subject: [PATCH] fix(archive): persist consolidated_report to S3 so dashboard archive doesn't stale MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit archive_writer was building the consolidated morning brief into `consolidated_report` state, passing it to email_sender for the weekly email, and then dropping it on the floor — ArchiveManager.save_consolidated_report() existed but had no caller for ~2 months. Last `consolidated/{date}/morning.md` write was 2026-03-16; from then through 2026-05-20 the dashboard's Research Briefing Archive page correctly showed "Latest 2026-03-16" because that was genuinely the last persisted artifact in S3. The wiring loss almost certainly happened in the mid-March LangGraph refactor — the persistence call lived in the legacy pre-graph path and didn't get carried into the new archive_writer node. Fix: 3 lines in archive_writer right next to write_signals_json, same try/except shape so a save failure doesn't block the rest of the archive pipeline. Regression tests: - save_consolidated_report writes consolidated/{date}/morning.md to S3 with the brief body. - archive_writer's source contains `save_consolidated_report` — structural pin so removing the call again fails CI instead of silently staling the archive surface for another two months. Dashboard reads from `consolidated/` so no consumer change needed; the next Saturday SF research run will repopulate from 2026-05-23 onward. Historical 2026-03-16 → 2026-05-17 gap can't be backfilled (the briefs were emailed, not retained anywhere else). Co-Authored-By: Claude Opus 4.7 (1M context) --- graph/research_graph.py | 13 +++++++++++ tests/test_archive.py | 48 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/graph/research_graph.py b/graph/research_graph.py index 6297064c..e4b5db68 100644 --- a/graph/research_graph.py +++ b/graph/research_graph.py @@ -2687,6 +2687,19 @@ def archive_writer(state: ResearchState) -> dict: except Exception as e: logger.error("Failed to write signals.json: %s", e) + # Persist the consolidated morning brief alongside signals.json so + # the dashboard's Research Briefing Archive page can read it. The + # brief is the same body that goes out in the morning email + # (`email_sender` node downstream) — emailing it without persisting + # it leaves no audit trail and the archive page stales out, which + # is what happened from 2026-03-16 through 2026-05-20. + consolidated = state.get("consolidated_report", "") or "" + if consolidated: + try: + am.save_consolidated_report(run_date, consolidated) + except Exception as e: + logger.error("Failed to save consolidated_report: %s", e) + # Extract semantic memories from this run (Phase 3) try: from memory.semantic import extract_semantic_memories diff --git a/tests/test_archive.py b/tests/test_archive.py index 10a1ef4c..58652ec2 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -428,3 +428,51 @@ def test_dated_and_latest_have_identical_macro(self, archive_in_memory): if call.kwargs.get("Key", "").startswith("population/") } assert bodies["population/latest.json"] == bodies["population/2026-05-11.json"] + + +# ── Consolidated morning brief persistence ────────────────────────────── +# +# Regression coverage for the 2026-05-20 finding: archive_writer was +# building the consolidated_report state field, emailing it via +# email_sender, and then dropping it on the floor — save_consolidated_report +# existed but had no caller for ~2 months (last morning.md write +# 2026-03-16). The dashboard's Research Briefing Archive page was +# correctly reading what was in S3, which was nothing. + + +class TestConsolidatedReportPersistence: + def test_save_consolidated_report_writes_morning_md_to_s3( + self, archive_in_memory + ): + archive_in_memory.save_consolidated_report( + "2026-05-20", "# Weekly research brief\n\nTop picks: ..." + ) + calls = archive_in_memory.s3.put_object.call_args_list + morning_calls = [ + c for c in calls + if c.kwargs.get("Key", "").endswith("/morning.md") + ] + assert len(morning_calls) == 1 + c = morning_calls[0] + assert c.kwargs["Key"] == "consolidated/2026-05-20/morning.md" + body = c.kwargs["Body"] + if isinstance(body, bytes): + body = body.decode("utf-8") + assert "Weekly research brief" in body + + def test_archive_writer_wires_save_consolidated_report(self): + # Structural regression: pin that archive_writer's source calls + # save_consolidated_report. If the call is removed again, this + # test fails at CI time instead of staling the archive page + # silently for two months. + import inspect + rg = pytest.importorskip( + "graph.research_graph", + reason="graph.research_graph requires gitignored config", + ) + src = inspect.getsource(rg.archive_writer) + assert "save_consolidated_report" in src, ( + "archive_writer must persist consolidated_report — without " + "this call the dashboard's Research Briefing Archive stales " + "out (regression of 2026-03-16 silent drop, fixed 2026-05-20)" + )