Mathews-Tom · Mathews-Tom · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,23 @@ All notable changes to Augur are recorded in this file. Format follows [Keep a C
 
 ## [Unreleased]
 
+### Added — Gated LLM Secondary Formatter
+
+- `src/augur_format/llm/` package — the only location in the codebase where LLM SDK imports live, complementing the CI grep guard over `src/augur_signals/`.
+- Expanded `IntelligenceBrief` contract with `headline` (≤ 90 chars), `body_markdown` (≤ 800 chars), `formatter_version`, `generated_at`, and model validators locking `interpretation_mode="llm_assisted"` and `forbidden_token_check="passed"`. Schema re-exported at `schemas/IntelligenceBrief-1.0.0.json`.
+- `AbstractLLMBackend` protocol with two concrete adapters: `OllamaBackend` (plain httpx against the local daemon) and `AnthropicBackend` (lazy-imported anthropic SDK). Both accept retry budgets and raise `BackendError` on exhaustion.
+- Deterministic `PromptBuilder` producing `(system, user)` prompt pairs with the sorted forbidden-phrase list, `IntelligenceBrief` schema summary, `ConsumerType` enum, and per-signal-type templates. Templates live under `augur_format/llm/prompts/templates/` and ship in the wheel.
+- `ForbiddenTokenLinter` with `load_forbidden_phrases` that flattens every `[category].phrases` block in `config/forbidden_tokens.toml`. Matching is case-insensitive; a matched phrase drops the brief before `IntelligenceBrief` construction.
+- `SchemaValidator` wrapping Pydantic `IntelligenceBrief.model_validate` and returning a stable `ValidationResult`.
+- `ProvenanceStamp` carrying model-backend pair, SHA-256 prompt hash, and installed `formatter_version`. Auditors reproduce the hash from the deterministic prompt output.
+- `ConsumerGate` enforcing `accepts_llm_assisted` opt-in per `docs/contracts/consumer-registry.md`.
+- `LLMInterpreter` orchestrator composing backend + prompt + linter + schema validator + stamp. `set_suspended` wires into the Phase-1 `StormController` so briefs stop generating under storm-mode pressure.
+- `config/llm.toml` with `[interpreter] enabled=false` default, Ollama and Anthropic backend blocks, and the prompt template directory path.
+
+### Operational Handoff — LLM Formatter
+
+After merge an operator who edits `config/llm.toml` to set `enabled = true`, installs the chosen backend (`augur-format[llm-local]` for Ollama, `augur-format[llm-cloud]` for Anthropic), and provisions any required credentials (`ANTHROPIC_API_KEY`) receives LLM-rendered briefs alongside the deterministic JSON and Markdown — but only for consumers whose `accepts_llm_assisted = true`. The deterministic pipeline runs regardless of LLM state.
+
 ### Added — Deterministic Formatters
 
 - `src/augur_format/deterministic/json_feed.py` — `to_canonical_json` emits UTF-8 JSON bytes with stable key ordering (top-level, signal block, related-market block), six-decimal float rounding (configurable), and Z-suffix UTC timestamps. Byte-identical across invocations.

diff --git a/config/llm.toml b/config/llm.toml
@@ -0,0 +1,30 @@
+# Gated LLM secondary formatter configuration.
+#
+# Default off per phase-4 §17.1. An operator enables the interpreter
+# by setting [interpreter] enabled = true after reviewing the
+# reputation-risk example in docs/examples/negative-paths.md §Example 4.
+#
+# Model identifiers live in this file per ~/.claude/CLAUDE.md §Models
+# & Configuration — never in source code.
+
+[interpreter]
+enabled = false
+default_backend = "ollama"
+max_tokens = 512
+temperature = 0.2
+suspend_during_storm = true
+
+[backends.ollama]
+endpoint = "http://localhost:11434"
+model = "gemma2:27b"
+timeout_seconds = 30
+max_retries = 2
+
+[backends.anthropic]
+model = "claude-haiku-4-5-20251001"
+timeout_seconds = 20
+max_retries = 3
+api_key_env = "ANTHROPIC_API_KEY"
+
+[prompts]
+template_dir = "src/augur_format/augur_format/llm/prompts/templates"
diff --git a/schemas/IntelligenceBrief-1.0.0.json b/schemas/IntelligenceBrief-1.0.0.json
@@ -15,7 +15,7 @@
     }
   },
   "additionalProperties": false,
-  "description": "Gated LLM formatter output contract.\n\n``actionable_for`` is constrained to the ConsumerType registry in\ndocs/contracts/consumer-registry.md via the Pydantic field type;\nthe closed-enum validator rechecks this at the formatter boundary\nso even dynamically-constructed instances fail loud on unknown\nvalues.",
+  "description": "Gated LLM formatter output contract.\n\nStructural invariants are enforced by Pydantic at construction:\nthe headline is capped at 90 characters so it fits a Slack header,\nbody_markdown is capped at 800 characters so it stays readable on\na dashboard card, ``actionable_for`` is typed as list[ConsumerType]\nso unknown consumers fail immediately, and ``interpretation_mode``\nplus ``forbidden_token_check`` are Literal singletons \u2014 any\nconstruction path that bypasses the linter or the deterministic-\nmode check would have to forge the literal, which is caught in\ncode review.",
   "properties": {
     "actionable_for": {
       "items": {
@@ -25,6 +25,7 @@
       "type": "array"
     },
     "body_markdown": {
+      "maxLength": 800,
       "title": "Body Markdown",
       "type": "string"
     },
@@ -38,7 +39,17 @@
       "title": "Forbidden Token Check",
       "type": "string"
     },
+    "formatter_version": {
+      "title": "Formatter Version",
+      "type": "string"
+    },
+    "generated_at": {
+      "format": "date-time",
+      "title": "Generated At",
+      "type": "string"
+    },
     "headline": {
+      "maxLength": 90,
       "title": "Headline",
       "type": "string"
     },
@@ -83,7 +94,9 @@
     "body_markdown",
     "severity",
     "model",
-    "prompt_hash"
+    "prompt_hash",
+    "formatter_version",
+    "generated_at"
   ],
   "title": "IntelligenceBrief",
   "type": "object"

diff --git a/src/augur_format/augur_format/llm/backends/__init__.py b/src/augur_format/augur_format/llm/backends/__init__.py
@@ -0,0 +1,3 @@
+"""LLM backend abstraction and concrete adapters."""
+
+from __future__ import annotations
diff --git a/src/augur_format/augur_format/llm/backends/anthropic.py b/src/augur_format/augur_format/llm/backends/anthropic.py
@@ -0,0 +1,114 @@
+"""Anthropic backend adapter.
+
+Imports the anthropic SDK lazily via ``importlib.import_module`` so
+that the llm-isolation test continues to assert anthropic is NOT
+importable in the default environment. Operators install anthropic
+via the ``augur-format[llm-cloud]`` extra before enabling the
+backend.
+"""
+
+from __future__ import annotations
+
+import importlib
+import os
+import time
+from typing import Any
+
+from augur_format.llm.backends.base import (
+    AbstractLLMBackend,
+    BackendError,
+    CompletionResult,
+)
+
+
+class AnthropicBackend(AbstractLLMBackend):
+    """AbstractLLMBackend implementation routed through the anthropic SDK."""
+
+    backend_id: str = "anthropic"
+
+    def __init__(
+        self,
+        model: str = "claude-haiku-4-5-20251001",
+        api_key_env: str = "ANTHROPIC_API_KEY",
+        timeout_seconds: float = 20.0,
+        max_retries: int = 3,
+        client: Any | None = None,
+    ) -> None:
+        key = os.environ.get(api_key_env)
+        if key is None and client is None:
+            raise BackendError(
+                f"AnthropicBackend requires {api_key_env} environment variable "
+                "or an injected client"
+            )
+        self._model = model
+        self._timeout = timeout_seconds
+        self._max_retries = max(1, max_retries)
+        if client is None:
+            # Lazy import so the module is safely loadable when the
+            # anthropic extra is not installed; the adapter itself
+            # only runs when the operator opts in.
+            anthropic = importlib.import_module("anthropic")
+            client = anthropic.AsyncAnthropic(api_key=key)
+        self._client = client
+
+    def model_id(self) -> str:
+        return self._model
+
+    async def health_check(self) -> bool:
+        # The SDK does not expose a cheap ping; surface True when the
+        # client constructs successfully and let the first real
+        # completion surface any runtime errors.
+        return self._client is not None
+
+    async def complete(
+        self,
+        system: str,
+        prompt: str,
+        max_tokens: int,
+        temperature: float,
+    ) -> CompletionResult:
+        last_error: BaseException | None = None
+        for _ in range(self._max_retries):
+            started = time.perf_counter()
+            try:
+                response = await self._client.messages.create(
+                    model=self._model,
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                    system=system,
+                    messages=[{"role": "user", "content": prompt}],
+                    timeout=self._timeout,
+                )
+            except Exception as err:
+                # Narrow retry to transient failures. Authentication
+                # and permission errors raise immediately so auth
+                # failures do not burn the retry budget. The class
+                # lookup is string-based so the module stays loadable
+                # without the anthropic SDK installed.
+                class_path = f"{type(err).__module__}.{type(err).__name__}"
+                terminal = {
+                    "anthropic.AuthenticationError",
+                    "anthropic.PermissionDeniedError",
+                    "anthropic.BadRequestError",
+                }
+                if class_path in terminal:
+                    raise BackendError(f"anthropic terminal error: {err!r}") from err
+                last_error = err
+                continue
+            duration_ms = int((time.perf_counter() - started) * 1000)
+            content_blocks = getattr(response, "content", [])
+            text_parts = [
+                getattr(block, "text", "")
+                for block in content_blocks
+                if getattr(block, "type", "") == "text"
+            ]
+            usage = getattr(response, "usage", None)
+            return CompletionResult(
+                text="".join(text_parts),
+                input_tokens=int(getattr(usage, "input_tokens", 0)) if usage else 0,
+                output_tokens=int(getattr(usage, "output_tokens", 0)) if usage else 0,
+                duration_ms=duration_ms,
+            )
+        raise BackendError(
+            f"anthropic completion failed after {self._max_retries} attempts: {last_error!r}"
+        )
diff --git a/src/augur_format/augur_format/llm/backends/base.py b/src/augur_format/augur_format/llm/backends/base.py
@@ -0,0 +1,53 @@
+"""AbstractLLMBackend protocol and completion result model.
+
+Concrete adapters (Ollama, Anthropic) implement the same async
+``complete`` surface so the interpreter dispatches uniformly. The
+completion result exposes only the fields downstream actually needs:
+the raw text, token counts for observability, and the duration in
+milliseconds for the generation-latency SLO.
+"""
+
+from __future__ import annotations
+
+from typing import Protocol
+
+from pydantic import BaseModel, ConfigDict
+
+
+class CompletionResult(BaseModel):
+    """One backend completion's payload plus timing."""
+
+    model_config = ConfigDict(frozen=True, extra="forbid")
+
+    text: str
+    input_tokens: int = 0
+    output_tokens: int = 0
+    duration_ms: int = 0
+
+
+class BackendError(RuntimeError):
+    """Raised when a backend cannot produce a completion."""
+
+
+class AbstractLLMBackend(Protocol):
+    """Uniform surface every LLM backend adapter implements."""
+
+    backend_id: str
+
+    async def complete(
+        self,
+        system: str,
+        prompt: str,
+        max_tokens: int,
+        temperature: float,
+    ) -> CompletionResult:
+        """Return the model's completion for (*system*, *prompt*)."""
+        ...
+
+    def model_id(self) -> str:
+        """Return the active model identifier (e.g. ``gemma2:27b``)."""
+        ...
+
+    async def health_check(self) -> bool:
+        """Verify the backend is reachable and serving the configured model."""
+        ...
diff --git a/src/augur_format/augur_format/llm/backends/ollama.py b/src/augur_format/augur_format/llm/backends/ollama.py
@@ -0,0 +1,101 @@
+"""Ollama backend adapter.
+
+Uses plain httpx against the local Ollama daemon (default
+``http://localhost:11434``) so the adapter has no hard dependency on
+the ``ollama`` Python client. The adapter retries twice on connection
+failures; local daemon outages should surface quickly, not retry for
+a minute.
+"""
+
+from __future__ import annotations
+
+import time
+from typing import Any
+
+import httpx
+
+from augur_format.llm.backends.base import (
+    AbstractLLMBackend,
+    BackendError,
+    CompletionResult,
+)
+
+
+class OllamaBackend(AbstractLLMBackend):
+    """AbstractLLMBackend implementation routed through the local daemon."""
+
+    backend_id: str = "ollama"
+
+    def __init__(
+        self,
+        client: httpx.AsyncClient,
+        endpoint: str = "http://localhost:11434",
+        model: str = "gemma2:27b",
+        timeout_seconds: float = 30.0,
+        max_retries: int = 2,
+    ) -> None:
+        self._client = client
+        self._endpoint = endpoint.rstrip("/")
+        self._model = model
+        self._timeout = timeout_seconds
+        self._max_retries = max(1, max_retries)
+
+    def model_id(self) -> str:
+        return self._model
+
+    async def health_check(self) -> bool:
+        try:
+            response = await self._client.get(f"{self._endpoint}/api/tags", timeout=self._timeout)
+        except Exception:
+            return False
+        return response.status_code == 200
+
+    async def complete(
+        self,
+        system: str,
+        prompt: str,
+        max_tokens: int,
+        temperature: float,
+    ) -> CompletionResult:
+        payload: dict[str, Any] = {
+            "model": self._model,
+            "system": system,
+            "prompt": prompt,
+            "stream": False,
+            "options": {
+                "num_predict": max_tokens,
+                "temperature": temperature,
+            },
+        }
+        last_error: BaseException | None = None
+        for _ in range(self._max_retries):
+            started = time.perf_counter()
+            try:
+                response = await self._client.post(
+                    f"{self._endpoint}/api/generate",
+                    json=payload,
+                    timeout=self._timeout,
+                )
+            except Exception as err:
+                last_error = err
+                continue
+            if response.status_code != 200:
+                status_error = BackendError(f"ollama returned status {response.status_code}")
+                # 4xx indicates a malformed request from the adapter;
+                # retrying will not recover. Surface the error
+                # immediately so callers see the root cause.
+                if 400 <= response.status_code < 500:
+                    raise status_error
+                last_error = status_error
+                continue
+            data: dict[str, Any] = response.json()
+            duration_ms = int((time.perf_counter() - started) * 1000)
+            return CompletionResult(
+                text=str(data.get("response", "")),
+                input_tokens=int(data.get("prompt_eval_count", 0)),
+                output_tokens=int(data.get("eval_count", 0)),
+                duration_ms=duration_ms,
+            )
+        raise BackendError(
+            f"ollama completion failed after {self._max_retries} attempts: {last_error!r}"
+        )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		"""LLM backend abstraction and concrete adapters."""

		from __future__ import annotations