cipher813 · cipher813 · May 20, 2026 · May 20, 2026 · May 20, 2026 · May 20, 2026
diff --git a/executor/eod_reconcile.py b/executor/eod_reconcile.py
@@ -18,6 +18,7 @@
 import boto3
 import pandas as pd
 import yaml
+from pydantic import BaseModel, Field, ValidationError
 sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
 
 from executor.eod_emailer import send_eod_email
@@ -257,8 +258,54 @@ def _build_position_contexts(
     return contexts, data_warnings
 
 
+class _Narrative(BaseModel):
+    """One per-position rationale."""
+
+    ticker: str = Field(..., description="Position ticker symbol (e.g. AAPL).")
+    narrative: str = Field(
+        ...,
+        description=(
+            "2-3 sentences explaining why this position is held today, citing the "
+            "research thesis, technical signals, and GBM predictions where relevant. "
+            "If a trade was made today, the narrative also explains why."
+        ),
+    )
+
+
+class _RationalesResponse(BaseModel):
+    """Tool-use payload for the EOD rationale synthesis call. The Anthropic
+    SDK validates this shape at the tool-use layer; Pydantic re-validates it
+    here for type safety + strict-field enforcement. Replaces the legacy
+    "ask for JSON in the prompt and json.loads the text" pattern that L1248
+    / L2669 documented as recurrence-prone (markdown fences, preamble,
+    trailing text — string-pattern whack-a-mole)."""
+
+    narratives: list[_Narrative] = Field(
+        ...,
+        description="One narrative per position in the input list.",
+    )
+
+
+_RATIONALES_TOOL = {
+    "name": "emit_rationales",
+    "description": (
+        "Emit per-position rationales for the EOD report. Call this tool exactly "
+        "once with the full list — one narrative per input position."
+    ),
+    "input_schema": _RationalesResponse.model_json_schema(),
+}
+
+
 def _synthesize_rationales(contexts: list[dict]) -> dict[str, str]:
-    """Call Haiku to synthesize per-position narratives. Falls back to templates."""
+    """Call Haiku via Anthropic tool-use + Pydantic validation to synthesize
+    per-position narratives. Falls back to templates on any failure.
+
+    L1248 / L2669: previous implementation read Haiku's freeform text and
+    tried to ``json.loads`` it — recurrence-prone (markdown fences /
+    preamble / trailing text). Tool-use makes the parse failure mode
+    structurally impossible: Haiku returns a typed ``tool_use`` block
+    whose ``input`` is schema-validated by the SDK *before* it lands here.
+    """
     if not contexts:
         return {}
 
@@ -272,17 +319,38 @@ def _synthesize_rationales(contexts: list[dict]) -> dict[str, str]:
             "For each position below, write 2-3 sentences explaining why it is held, "
             "focusing on near-term catalysts (research thesis, technical signals, GBM predictions). "
             "If a trade was made today, explain why. Be specific about numbers.\n\n"
-            "Return valid JSON only: {\"narratives\": [{\"ticker\": \"XXX\", \"narrative\": \"...\"}]}\n\n"
+            "Call the emit_rationales tool exactly once with one narrative per position.\n\n"
             f"Positions:\n{json.dumps(contexts, indent=2, default=str)}"
         )
 
         response = client.messages.create(
             model="claude-haiku-4-5-20251001",
             max_tokens=2000,
+            tools=[_RATIONALES_TOOL],
+            tool_choice={"type": "tool", "name": "emit_rationales"},
             messages=[{"role": "user", "content": prompt}],
         )
-        result = json.loads(response.content[0].text)
-        return {n["ticker"]: n["narrative"] for n in result.get("narratives", [])}
+        # tool_choice={"type": "tool", "name": ...} forces Haiku to emit a
+        # tool_use block — but Anthropic still allows additional text blocks
+        # alongside it. Pick the tool_use block explicitly.
+        tool_use = next(
+            (b for b in response.content if getattr(b, "type", None) == "tool_use"),
+            None,
+        )
+        if tool_use is None:
+            raise RuntimeError(
+                "Haiku response missing the forced emit_rationales tool_use block — "
+                f"stop_reason={response.stop_reason!r}"
+            )
+        try:
+            parsed = _RationalesResponse.model_validate(tool_use.input)
+        except ValidationError as e:
+            logger.warning(
+                f"LLM rationale tool_use failed Pydantic validation: {e} — "
+                f"input={tool_use.input!r}"
+            )
+            raise
+        return {n.ticker: n.narrative for n in parsed.narratives}
     except Exception as e:
         logger.warning(f"LLM rationale synthesis failed: {e} — using template fallback")
 

diff --git a/tests/test_eod_reconcile_logic.py b/tests/test_eod_reconcile_logic.py
@@ -10,6 +10,8 @@
     _apply_dividend_delta,
     _compute_unattributed_residual_pct,
     _load_constituents_sector_map,
+    _Narrative,
+    _RationalesResponse,
     _resolve_prior_price,
     _synthesize_rationales,
 )
@@ -193,6 +195,137 @@ def test_multiple_tickers(self):
         assert "MSFT" in result
 
 
+class TestRationalesResponsePydantic:
+    """L1248/L2669: Pydantic model that validates the tool-use payload
+    returned by Haiku. Validation here makes the parse-failure-mode that
+    bare json.loads used to hit (markdown fences, preamble, trailing
+    text) structurally impossible — the SDK has already shape-checked
+    the tool_use.input before we see it; this re-validates field types."""
+
+    def test_valid_payload(self):
+        payload = {"narratives": [{"ticker": "AAPL", "narrative": "x" * 50}]}
+        parsed = _RationalesResponse.model_validate(payload)
+        assert len(parsed.narratives) == 1
+        assert parsed.narratives[0].ticker == "AAPL"
+
+    def test_empty_narratives_list_valid(self):
+        # The model permits an empty list — Haiku may emit zero narratives if
+        # the contexts list was empty (the caller short-circuits this earlier,
+        # but the contract should still accept it).
+        parsed = _RationalesResponse.model_validate({"narratives": []})
+        assert parsed.narratives == []
+
+    def test_missing_narratives_field_raises(self):
+        from pydantic import ValidationError as PydValidationError
+        with pytest.raises(PydValidationError):
+            _RationalesResponse.model_validate({})
+
+    def test_narrative_missing_ticker_raises(self):
+        from pydantic import ValidationError as PydValidationError
+        with pytest.raises(PydValidationError):
+            _RationalesResponse.model_validate({"narratives": [{"narrative": "no ticker"}]})
+
+    def test_narrative_missing_narrative_raises(self):
+        from pydantic import ValidationError as PydValidationError
+        with pytest.raises(PydValidationError):
+            _RationalesResponse.model_validate({"narratives": [{"ticker": "AAPL"}]})
+
+    def test_narrative_wrong_type_raises(self):
+        from pydantic import ValidationError as PydValidationError
+        with pytest.raises(PydValidationError):
+            _RationalesResponse.model_validate({"narratives": [{"ticker": 123, "narrative": "x"}]})
+
+
+class TestSynthesizeRationalesToolUse:
+    """End-to-end coverage of the Anthropic tool-use path. The Anthropic
+    client is fully mocked so no real API call happens; the assertions
+    pin (a) the tool/tool_choice wiring (b) Pydantic-validated input
+    flows through to the returned dict (c) malformed / missing tool_use
+    blocks fall back to the template path."""
+
+    def _make_mock_anthropic(self, tool_use_input: dict | None, *, stop_reason: str = "tool_use", include_text_block: bool = False):
+        """Build a MagicMock anthropic module + client + response chain.
+        ``tool_use_input=None`` simulates a response with no tool_use block
+        (degenerate-mode probe). Otherwise the mocked tool_use block carries
+        ``input=tool_use_input``."""
+        mock_anthropic = MagicMock()
+        mock_client = MagicMock()
+        mock_anthropic.Anthropic.return_value = mock_client
+
+        blocks = []
+        if include_text_block:
+            text_block = MagicMock()
+            text_block.type = "text"
+            text_block.text = "Sure, here are the rationales:"
+            blocks.append(text_block)
+        if tool_use_input is not None:
+            tool_block = MagicMock()
+            tool_block.type = "tool_use"
+            tool_block.input = tool_use_input
+            blocks.append(tool_block)
+
+        mock_response = MagicMock()
+        mock_response.content = blocks
+        mock_response.stop_reason = stop_reason
+        mock_client.messages.create.return_value = mock_response
+        return mock_anthropic, mock_client
+
+    def test_tool_use_happy_path(self):
+        mock_anthropic, mock_client = self._make_mock_anthropic(
+            {"narratives": [
+                {"ticker": "AAPL", "narrative": "Held — research score 82, GBM UP."},
+                {"ticker": "MSFT", "narrative": "Reduced 5 shares today on profit-take."},
+            ]}
+        )
+        contexts = [{"ticker": "AAPL"}, {"ticker": "MSFT"}]
+        with patch.dict("sys.modules", {"anthropic": mock_anthropic}):
+            result = _synthesize_rationales(contexts)
+        assert result == {
+            "AAPL": "Held — research score 82, GBM UP.",
+            "MSFT": "Reduced 5 shares today on profit-take.",
+        }
+        # Verify the SDK was invoked with the forced tool_choice wiring.
+        call_kwargs = mock_client.messages.create.call_args.kwargs
+        assert call_kwargs["tool_choice"] == {"type": "tool", "name": "emit_rationales"}
+        assert call_kwargs["tools"][0]["name"] == "emit_rationales"
+
+    def test_tool_use_with_preceding_text_block(self):
+        # Anthropic permits a text block before the tool_use block — the
+        # synthesizer must pick the tool_use block, not the first content block.
+        mock_anthropic, _ = self._make_mock_anthropic(
+            {"narratives": [{"ticker": "GOOG", "narrative": "y" * 40}]},
+            include_text_block=True,
+        )
+        with patch.dict("sys.modules", {"anthropic": mock_anthropic}):
+            result = _synthesize_rationales([{"ticker": "GOOG"}])
+        assert result == {"GOOG": "y" * 40}
+
+    def test_missing_tool_use_falls_back_to_template(self):
+        # Haiku stopped without emitting the forced tool — template fallback.
+        mock_anthropic, _ = self._make_mock_anthropic(None, stop_reason="end_turn", include_text_block=True)
+        with patch.dict("sys.modules", {"anthropic": mock_anthropic}):
+            result = _synthesize_rationales([{"ticker": "AAPL", "research_score": 82.0, "conviction": "rising"}])
+        # Template fallback populates from the context — research_score should
+        # be in the rendered text.
+        assert "AAPL" in result
+        assert "82" in result["AAPL"]
+        assert "rising" in result["AAPL"]
+
+    def test_malformed_tool_input_falls_back_to_template(self):
+        # tool_use block present but input doesn't match the Pydantic schema.
+        mock_anthropic, _ = self._make_mock_anthropic({"narratives": [{"ticker": "AAPL"}]})  # missing 'narrative'
+        with patch.dict("sys.modules", {"anthropic": mock_anthropic}):
+            result = _synthesize_rationales([{"ticker": "AAPL", "research_score": 90.0}])
+        # Template fallback fires; AAPL is still rendered from the context.
+        assert "AAPL" in result
+        assert "90" in result["AAPL"]
+
+    def test_empty_contexts_short_circuits(self):
+        # Empty input never calls the SDK — verify by leaving anthropic
+        # unmocked; a real import would still resolve but not be invoked.
+        assert _synthesize_rationales([]) == {}
+
+
 class TestLoadConstituentsSectorMap:
     """Sector enrichment fallback reads latest weekly constituents.json."""