From 8d64b2e22e1b248c277770bf50708b7dd057cbf7 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:01:51 +0530 Subject: [PATCH 01/16] feat(models): add pydantic contracts and closed enums for signal extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Land the binding data contracts between every layer of the extraction pipeline — MarketSnapshot, FeatureVector, MarketSignal, SignalContext, RelatedMarketState — plus the closed enums SignalType, ManipulationFlag, ConsumerType, and InterpretationMode. Field sets mirror docs/contracts/schema-and-versioning.md verbatim; all models are frozen and reject unknown fields so a producer cannot silently add a member that a consumer does not recognize. MarketSignal carries a model_validator that rejects any instance whose raw_features lacks a non-empty calibration_provenance string. This is the project-wide invariant from the development plan (§7.2): no uncalibrated signal escapes the producer. Tests cover both the missing-key and empty-string cases. uuid7-based signal identifiers preserve time ordering, which lets the bus and storage layers sort by identifier and still recover temporal order — a prerequisite for byte-identical backtest replay. scripts/export_schemas.py registers the four shipped models and emits their JSON schemas to schemas/*.json with deterministic key ordering. The --check mode is now load-bearing: a modification to any model shape fails CI until the export is regenerated. --- pyproject.toml | 4 + schemas/FeatureVector-1.0.0.json | 101 +++++++ schemas/MarketSignal-1.0.0.json | 145 ++++++++++ schemas/MarketSnapshot-1.0.0.json | 139 +++++++++ schemas/SignalContext-1.0.0.json | 266 ++++++++++++++++++ scripts/export_schemas.py | 19 +- .../augur_signals/models/__init__.py | 32 ++- .../augur_signals/models/_identifiers.py | 16 ++ .../augur_signals/models/context.py | 47 ++++ .../augur_signals/models/enums.py | 48 ++++ .../augur_signals/models/features.py | 36 +++ .../augur_signals/models/signal.py | 50 ++++ .../augur_signals/models/snapshot.py | 36 +++ tests/signals/test_models.py | 219 ++++++++++++++ 14 files changed, 1153 insertions(+), 5 deletions(-) create mode 100644 schemas/FeatureVector-1.0.0.json create mode 100644 schemas/MarketSignal-1.0.0.json create mode 100644 schemas/MarketSnapshot-1.0.0.json create mode 100644 schemas/SignalContext-1.0.0.json create mode 100644 src/augur_signals/augur_signals/models/_identifiers.py create mode 100644 src/augur_signals/augur_signals/models/context.py create mode 100644 src/augur_signals/augur_signals/models/enums.py create mode 100644 src/augur_signals/augur_signals/models/features.py create mode 100644 src/augur_signals/augur_signals/models/signal.py create mode 100644 src/augur_signals/augur_signals/models/snapshot.py create mode 100644 tests/signals/test_models.py diff --git a/pyproject.toml b/pyproject.toml index 81bd44c..563217d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,10 @@ mypy_path = ["src/augur_signals", "src/augur_labels", "src/augur_format"] namespace_packages = true explicit_package_bases = true +[[tool.mypy.overrides]] +module = ["uuid_extensions.*"] +ignore_missing_imports = true + [tool.pytest.ini_options] testpaths = ["tests"] asyncio_mode = "auto" diff --git a/schemas/FeatureVector-1.0.0.json b/schemas/FeatureVector-1.0.0.json new file mode 100644 index 0000000..502560e --- /dev/null +++ b/schemas/FeatureVector-1.0.0.json @@ -0,0 +1,101 @@ +{ + "additionalProperties": false, + "description": "Per-market features at a single computation tick.", + "properties": { + "bid_ask_ratio": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Bid Ask Ratio" + }, + "computed_at": { + "format": "date-time", + "title": "Computed At", + "type": "string" + }, + "market_id": { + "title": "Market Id", + "type": "string" + }, + "price_momentum_15m": { + "title": "Price Momentum 15M", + "type": "number" + }, + "price_momentum_1h": { + "title": "Price Momentum 1H", + "type": "number" + }, + "price_momentum_4h": { + "title": "Price Momentum 4H", + "type": "number" + }, + "price_momentum_5m": { + "title": "Price Momentum 5M", + "type": "number" + }, + "schema_version": { + "const": "1.0.0", + "default": "1.0.0", + "title": "Schema Version", + "type": "string" + }, + "spread_pct": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Spread Pct" + }, + "volatility_15m": { + "title": "Volatility 15M", + "type": "number" + }, + "volatility_1h": { + "title": "Volatility 1H", + "type": "number" + }, + "volatility_4h": { + "title": "Volatility 4H", + "type": "number" + }, + "volatility_5m": { + "title": "Volatility 5M", + "type": "number" + }, + "volume_ratio_1h": { + "title": "Volume Ratio 1H", + "type": "number" + }, + "volume_ratio_5m": { + "title": "Volume Ratio 5M", + "type": "number" + } + }, + "required": [ + "market_id", + "computed_at", + "price_momentum_5m", + "price_momentum_15m", + "price_momentum_1h", + "price_momentum_4h", + "volatility_5m", + "volatility_15m", + "volatility_1h", + "volatility_4h", + "volume_ratio_5m", + "volume_ratio_1h", + "bid_ask_ratio", + "spread_pct" + ], + "title": "FeatureVector", + "type": "object" +} diff --git a/schemas/MarketSignal-1.0.0.json b/schemas/MarketSignal-1.0.0.json new file mode 100644 index 0000000..3884503 --- /dev/null +++ b/schemas/MarketSignal-1.0.0.json @@ -0,0 +1,145 @@ +{ + "$defs": { + "ManipulationFlag": { + "description": "Signature matches attached to signals by the manipulation detector.", + "enum": [ + "single_counterparty_concentration", + "size_vs_depth_outlier", + "cancel_replace_burst", + "thin_book_during_move", + "pre_resolution_window" + ], + "title": "ManipulationFlag", + "type": "string" + }, + "SignalType": { + "description": "Detector signal types produced by the extraction layer.", + "enum": [ + "price_velocity", + "volume_spike", + "book_imbalance", + "cross_market_divergence", + "regime_shift" + ], + "title": "SignalType", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Canonical structured event emitted by the extraction layer.", + "properties": { + "confidence": { + "maximum": 1.0, + "minimum": 0.0, + "title": "Confidence", + "type": "number" + }, + "detected_at": { + "format": "date-time", + "title": "Detected At", + "type": "string" + }, + "direction": { + "enum": [ + -1, + 0, + 1 + ], + "title": "Direction", + "type": "integer" + }, + "fdr_adjusted": { + "title": "Fdr Adjusted", + "type": "boolean" + }, + "liquidity_tier": { + "enum": [ + "high", + "mid", + "low" + ], + "title": "Liquidity Tier", + "type": "string" + }, + "magnitude": { + "maximum": 1.0, + "minimum": 0.0, + "title": "Magnitude", + "type": "number" + }, + "manipulation_flags": { + "items": { + "$ref": "#/$defs/ManipulationFlag" + }, + "title": "Manipulation Flags", + "type": "array" + }, + "market_id": { + "title": "Market Id", + "type": "string" + }, + "platform": { + "enum": [ + "polymarket", + "kalshi" + ], + "title": "Platform", + "type": "string" + }, + "raw_features": { + "additionalProperties": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "string" + } + ] + }, + "title": "Raw Features", + "type": "object" + }, + "related_market_ids": { + "items": { + "type": "string" + }, + "title": "Related Market Ids", + "type": "array" + }, + "schema_version": { + "const": "1.0.0", + "default": "1.0.0", + "title": "Schema Version", + "type": "string" + }, + "signal_id": { + "title": "Signal Id", + "type": "string" + }, + "signal_type": { + "$ref": "#/$defs/SignalType" + }, + "window_seconds": { + "exclusiveMinimum": 0, + "title": "Window Seconds", + "type": "integer" + } + }, + "required": [ + "signal_id", + "market_id", + "platform", + "signal_type", + "magnitude", + "direction", + "confidence", + "fdr_adjusted", + "detected_at", + "window_seconds", + "liquidity_tier", + "raw_features" + ], + "title": "MarketSignal", + "type": "object" +} diff --git a/schemas/MarketSnapshot-1.0.0.json b/schemas/MarketSnapshot-1.0.0.json new file mode 100644 index 0000000..b83f5a1 --- /dev/null +++ b/schemas/MarketSnapshot-1.0.0.json @@ -0,0 +1,139 @@ +{ + "additionalProperties": false, + "description": "A normalized, platform-agnostic market-state observation.", + "properties": { + "ask": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Ask" + }, + "bid": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Bid" + }, + "closes_at": { + "anyOf": [ + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Closes At" + }, + "last_price": { + "maximum": 1.0, + "minimum": 0.0, + "title": "Last Price", + "type": "number" + }, + "liquidity": { + "minimum": 0.0, + "title": "Liquidity", + "type": "number" + }, + "market_id": { + "title": "Market Id", + "type": "string" + }, + "platform": { + "enum": [ + "polymarket", + "kalshi" + ], + "title": "Platform", + "type": "string" + }, + "question": { + "title": "Question", + "type": "string" + }, + "raw_json": { + "additionalProperties": true, + "title": "Raw Json", + "type": "object" + }, + "resolution_criteria": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Resolution Criteria" + }, + "resolution_source": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Resolution Source" + }, + "schema_version": { + "const": "1.0.0", + "default": "1.0.0", + "title": "Schema Version", + "type": "string" + }, + "spread": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Spread" + }, + "timestamp": { + "format": "date-time", + "title": "Timestamp", + "type": "string" + }, + "volume_24h": { + "minimum": 0.0, + "title": "Volume 24H", + "type": "number" + } + }, + "required": [ + "market_id", + "platform", + "timestamp", + "last_price", + "bid", + "ask", + "spread", + "volume_24h", + "liquidity", + "question", + "resolution_source", + "resolution_criteria", + "closes_at", + "raw_json" + ], + "title": "MarketSnapshot", + "type": "object" +} diff --git a/schemas/SignalContext-1.0.0.json b/schemas/SignalContext-1.0.0.json new file mode 100644 index 0000000..2ae53e2 --- /dev/null +++ b/schemas/SignalContext-1.0.0.json @@ -0,0 +1,266 @@ +{ + "$defs": { + "InterpretationMode": { + "description": "How a SignalContext or IntelligenceBrief was produced.", + "enum": [ + "deterministic", + "llm_assisted" + ], + "title": "InterpretationMode", + "type": "string" + }, + "ManipulationFlag": { + "description": "Signature matches attached to signals by the manipulation detector.", + "enum": [ + "single_counterparty_concentration", + "size_vs_depth_outlier", + "cancel_replace_burst", + "thin_book_during_move", + "pre_resolution_window" + ], + "title": "ManipulationFlag", + "type": "string" + }, + "MarketSignal": { + "additionalProperties": false, + "description": "Canonical structured event emitted by the extraction layer.", + "properties": { + "confidence": { + "maximum": 1.0, + "minimum": 0.0, + "title": "Confidence", + "type": "number" + }, + "detected_at": { + "format": "date-time", + "title": "Detected At", + "type": "string" + }, + "direction": { + "enum": [ + -1, + 0, + 1 + ], + "title": "Direction", + "type": "integer" + }, + "fdr_adjusted": { + "title": "Fdr Adjusted", + "type": "boolean" + }, + "liquidity_tier": { + "enum": [ + "high", + "mid", + "low" + ], + "title": "Liquidity Tier", + "type": "string" + }, + "magnitude": { + "maximum": 1.0, + "minimum": 0.0, + "title": "Magnitude", + "type": "number" + }, + "manipulation_flags": { + "items": { + "$ref": "#/$defs/ManipulationFlag" + }, + "title": "Manipulation Flags", + "type": "array" + }, + "market_id": { + "title": "Market Id", + "type": "string" + }, + "platform": { + "enum": [ + "polymarket", + "kalshi" + ], + "title": "Platform", + "type": "string" + }, + "raw_features": { + "additionalProperties": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "string" + } + ] + }, + "title": "Raw Features", + "type": "object" + }, + "related_market_ids": { + "items": { + "type": "string" + }, + "title": "Related Market Ids", + "type": "array" + }, + "schema_version": { + "const": "1.0.0", + "default": "1.0.0", + "title": "Schema Version", + "type": "string" + }, + "signal_id": { + "title": "Signal Id", + "type": "string" + }, + "signal_type": { + "$ref": "#/$defs/SignalType" + }, + "window_seconds": { + "exclusiveMinimum": 0, + "title": "Window Seconds", + "type": "integer" + } + }, + "required": [ + "signal_id", + "market_id", + "platform", + "signal_type", + "magnitude", + "direction", + "confidence", + "fdr_adjusted", + "detected_at", + "window_seconds", + "liquidity_tier", + "raw_features" + ], + "title": "MarketSignal", + "type": "object" + }, + "RelatedMarketState": { + "additionalProperties": false, + "description": "Snapshot of a related market at context-assembly time.", + "properties": { + "current_price": { + "title": "Current Price", + "type": "number" + }, + "delta_24h": { + "title": "Delta 24H", + "type": "number" + }, + "market_id": { + "title": "Market Id", + "type": "string" + }, + "question": { + "title": "Question", + "type": "string" + }, + "relationship_strength": { + "title": "Relationship Strength", + "type": "number" + }, + "relationship_type": { + "enum": [ + "positive", + "inverse", + "complex", + "causal" + ], + "title": "Relationship Type", + "type": "string" + }, + "volume_24h": { + "title": "Volume 24H", + "type": "number" + } + }, + "required": [ + "market_id", + "question", + "current_price", + "delta_24h", + "volume_24h", + "relationship_type", + "relationship_strength" + ], + "title": "RelatedMarketState", + "type": "object" + }, + "SignalType": { + "description": "Detector signal types produced by the extraction layer.", + "enum": [ + "price_velocity", + "volume_spike", + "book_imbalance", + "cross_market_divergence", + "regime_shift" + ], + "title": "SignalType", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Deterministic envelope wrapping a MarketSignal with platform metadata.", + "properties": { + "closes_at": { + "format": "date-time", + "title": "Closes At", + "type": "string" + }, + "interpretation_mode": { + "$ref": "#/$defs/InterpretationMode", + "default": "deterministic" + }, + "investigation_prompts": { + "items": { + "type": "string" + }, + "title": "Investigation Prompts", + "type": "array" + }, + "market_question": { + "title": "Market Question", + "type": "string" + }, + "related_markets": { + "items": { + "$ref": "#/$defs/RelatedMarketState" + }, + "title": "Related Markets", + "type": "array" + }, + "resolution_criteria": { + "title": "Resolution Criteria", + "type": "string" + }, + "resolution_source": { + "title": "Resolution Source", + "type": "string" + }, + "schema_version": { + "const": "1.0.0", + "default": "1.0.0", + "title": "Schema Version", + "type": "string" + }, + "signal": { + "$ref": "#/$defs/MarketSignal" + } + }, + "required": [ + "signal", + "market_question", + "resolution_criteria", + "resolution_source", + "closes_at", + "related_markets", + "investigation_prompts" + ], + "title": "SignalContext", + "type": "object" +} diff --git a/scripts/export_schemas.py b/scripts/export_schemas.py index 1971d5e..ba3f8aa 100644 --- a/scripts/export_schemas.py +++ b/scripts/export_schemas.py @@ -25,12 +25,23 @@ from pydantic import BaseModel +from augur_signals.models import ( + FeatureVector, + MarketSignal, + MarketSnapshot, + SignalContext, +) + SCHEMAS_DIR = Path(__file__).resolve().parent.parent / "schemas" -# Registered (model_class, schema_version) pairs. Extended as Pydantic -# models land. Entries here drive both the write path and the -# --check gate. -MODELS: list[tuple[type[BaseModel], str]] = [] +# Registered (model_class, schema_version) pairs. Entries drive both +# the write path and the --check gate. +MODELS: list[tuple[type[BaseModel], str]] = [ + (MarketSnapshot, "1.0.0"), + (FeatureVector, "1.0.0"), + (MarketSignal, "1.0.0"), + (SignalContext, "1.0.0"), +] def schema_path(model_name: str, version: str) -> Path: diff --git a/src/augur_signals/augur_signals/models/__init__.py b/src/augur_signals/augur_signals/models/__init__.py index 8b5acbd..b8ddd82 100644 --- a/src/augur_signals/augur_signals/models/__init__.py +++ b/src/augur_signals/augur_signals/models/__init__.py @@ -1,3 +1,33 @@ -"""Pydantic data contracts for Augur signal extraction.""" +"""Pydantic data contracts for Augur signal extraction. + +Schemas are authoritative in docs/contracts/schema-and-versioning.md. +Every exported model sets schema_version to "1.0.0"; major-version +bumps follow the versioning policy in that document. +""" from __future__ import annotations + +from augur_signals.models._identifiers import new_signal_id +from augur_signals.models.context import RelatedMarketState, SignalContext +from augur_signals.models.enums import ( + ConsumerType, + InterpretationMode, + ManipulationFlag, + SignalType, +) +from augur_signals.models.features import FeatureVector +from augur_signals.models.signal import MarketSignal +from augur_signals.models.snapshot import MarketSnapshot + +__all__ = [ + "ConsumerType", + "FeatureVector", + "InterpretationMode", + "ManipulationFlag", + "MarketSignal", + "MarketSnapshot", + "RelatedMarketState", + "SignalContext", + "SignalType", + "new_signal_id", +] diff --git a/src/augur_signals/augur_signals/models/_identifiers.py b/src/augur_signals/augur_signals/models/_identifiers.py new file mode 100644 index 0000000..626ba72 --- /dev/null +++ b/src/augur_signals/augur_signals/models/_identifiers.py @@ -0,0 +1,16 @@ +"""Identifier helpers for signals and related entities. + +``uuid7`` is time-ordered, which lets the bus, storage, and archive +sort by identifier and still recover temporal order. This is load- +bearing for backtest replay determinism: the (detected_at, signal_id) +pair is stable and reproducible. +""" + +from __future__ import annotations + +from uuid_extensions import uuid7 + + +def new_signal_id() -> str: + """Generate a time-ordered uuid7 signal identifier.""" + return str(uuid7()) diff --git a/src/augur_signals/augur_signals/models/context.py b/src/augur_signals/augur_signals/models/context.py new file mode 100644 index 0000000..4d954b6 --- /dev/null +++ b/src/augur_signals/augur_signals/models/context.py @@ -0,0 +1,47 @@ +"""SignalContext and RelatedMarketState — deterministic assembly envelope. + +Schema authoritative in docs/contracts/schema-and-versioning.md +§SignalContext and §RelatedMarketState. Produced by the context +assembler; every field is verbatim from the platform or the curated +taxonomy / prompt library. The assembler never synthesizes prose. +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Literal + +from pydantic import BaseModel, ConfigDict + +from augur_signals.models.enums import InterpretationMode +from augur_signals.models.signal import MarketSignal + + +class RelatedMarketState(BaseModel): + """Snapshot of a related market at context-assembly time.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + market_id: str + question: str + current_price: float + delta_24h: float + volume_24h: float + relationship_type: Literal["positive", "inverse", "complex", "causal"] + relationship_strength: float + + +class SignalContext(BaseModel): + """Deterministic envelope wrapping a MarketSignal with platform metadata.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + signal: MarketSignal + market_question: str + resolution_criteria: str + resolution_source: str + closes_at: datetime + related_markets: list[RelatedMarketState] + investigation_prompts: list[str] + interpretation_mode: InterpretationMode = InterpretationMode.DETERMINISTIC + schema_version: Literal["1.0.0"] = "1.0.0" diff --git a/src/augur_signals/augur_signals/models/enums.py b/src/augur_signals/augur_signals/models/enums.py new file mode 100644 index 0000000..2d20a4b --- /dev/null +++ b/src/augur_signals/augur_signals/models/enums.py @@ -0,0 +1,48 @@ +"""Closed enums for every consumer-facing string field. + +Authoritative catalogue in docs/contracts/schema-and-versioning.md +§Closed Enums. Adding a member requires a schema-version bump per the +versioning policy in that document. +""" + +from __future__ import annotations + +from enum import StrEnum + + +class SignalType(StrEnum): + """Detector signal types produced by the extraction layer.""" + + PRICE_VELOCITY = "price_velocity" + VOLUME_SPIKE = "volume_spike" + BOOK_IMBALANCE = "book_imbalance" + CROSS_MARKET_DIVERGENCE = "cross_market_divergence" + REGIME_SHIFT = "regime_shift" + + +class ManipulationFlag(StrEnum): + """Signature matches attached to signals by the manipulation detector.""" + + SINGLE_COUNTERPARTY_CONCENTRATION = "single_counterparty_concentration" + SIZE_VS_DEPTH_OUTLIER = "size_vs_depth_outlier" + CANCEL_REPLACE_BURST = "cancel_replace_burst" + THIN_BOOK_DURING_MOVE = "thin_book_during_move" + PRE_RESOLUTION_WINDOW = "pre_resolution_window" + + +class ConsumerType(StrEnum): + """Registered consumers of the brief feed per docs/contracts/consumer-registry.md.""" + + MACRO_RESEARCH_AGENT = "macro_research_agent" + GEOPOLITICAL_RESEARCH_AGENT = "geopolitical_research_agent" + CRYPTO_RESEARCH_AGENT = "crypto_research_agent" + FINANCIAL_NEWS_DESK = "financial_news_desk" + REGULATORY_NEWS_DESK = "regulatory_news_desk" + DASHBOARD = "dashboard" + + +class InterpretationMode(StrEnum): + """How a SignalContext or IntelligenceBrief was produced.""" + + DETERMINISTIC = "deterministic" + LLM_ASSISTED = "llm_assisted" diff --git a/src/augur_signals/augur_signals/models/features.py b/src/augur_signals/augur_signals/models/features.py new file mode 100644 index 0000000..25c1c3c --- /dev/null +++ b/src/augur_signals/augur_signals/models/features.py @@ -0,0 +1,36 @@ +"""FeatureVector — rolling-window feature set per market per tick. + +Schema authoritative in docs/contracts/schema-and-versioning.md +§FeatureVector. Produced by the feature pipeline from the snapshot +buffer; consumed by the detectors. Computation is idempotent — same +buffer in, same vector out. +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Literal + +from pydantic import BaseModel, ConfigDict + + +class FeatureVector(BaseModel): + """Per-market features at a single computation tick.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + market_id: str + computed_at: datetime + price_momentum_5m: float + price_momentum_15m: float + price_momentum_1h: float + price_momentum_4h: float + volatility_5m: float + volatility_15m: float + volatility_1h: float + volatility_4h: float + volume_ratio_5m: float + volume_ratio_1h: float + bid_ask_ratio: float | None + spread_pct: float | None + schema_version: Literal["1.0.0"] = "1.0.0" diff --git a/src/augur_signals/augur_signals/models/signal.py b/src/augur_signals/augur_signals/models/signal.py new file mode 100644 index 0000000..ed2419d --- /dev/null +++ b/src/augur_signals/augur_signals/models/signal.py @@ -0,0 +1,50 @@ +"""MarketSignal — the canonical typed event emitted by the extraction layer. + +Schema authoritative in docs/contracts/schema-and-versioning.md +§MarketSignal. Every signal carries calibrated confidence, FDR-adjusted +threshold status, and a non-empty calibration provenance stamp. The +model_validator enforces the provenance invariant so no uncalibrated +signal escapes the producer. +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Annotated, Literal + +from pydantic import BaseModel, ConfigDict, Field, model_validator + +from augur_signals.models.enums import ManipulationFlag, SignalType + + +class MarketSignal(BaseModel): + """Canonical structured event emitted by the extraction layer.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + signal_id: str + market_id: str + platform: Literal["polymarket", "kalshi"] + signal_type: SignalType + magnitude: Annotated[float, Field(ge=0.0, le=1.0)] + direction: Literal[-1, 0, 1] + confidence: Annotated[float, Field(ge=0.0, le=1.0)] + fdr_adjusted: bool + detected_at: datetime + window_seconds: Annotated[int, Field(gt=0)] + liquidity_tier: Literal["high", "mid", "low"] + manipulation_flags: list[ManipulationFlag] = Field(default_factory=list) + related_market_ids: list[str] = Field(default_factory=list) + raw_features: dict[str, float | str] + schema_version: Literal["1.0.0"] = "1.0.0" + + @model_validator(mode="after") + def _calibration_provenance_required(self) -> MarketSignal: + provenance = self.raw_features.get("calibration_provenance") + if not isinstance(provenance, str) or not provenance: + raise ValueError( + "MarketSignal.raw_features['calibration_provenance'] " + "must be a non-empty string; the calibration layer " + "stamps this field before the signal leaves the producer." + ) + return self diff --git a/src/augur_signals/augur_signals/models/snapshot.py b/src/augur_signals/augur_signals/models/snapshot.py new file mode 100644 index 0000000..ced397e --- /dev/null +++ b/src/augur_signals/augur_signals/models/snapshot.py @@ -0,0 +1,36 @@ +"""MarketSnapshot — normalized observation of a market at a single tick. + +Schema authoritative in docs/contracts/schema-and-versioning.md +§MarketSnapshot. Produced by the normalizer from platform-specific +raw responses; consumed by the feature pipeline and persisted to the +snapshots table. +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Annotated, Any, Literal + +from pydantic import BaseModel, ConfigDict, Field + + +class MarketSnapshot(BaseModel): + """A normalized, platform-agnostic market-state observation.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + market_id: str + platform: Literal["polymarket", "kalshi"] + timestamp: datetime + last_price: Annotated[float, Field(ge=0.0, le=1.0)] + bid: float | None + ask: float | None + spread: float | None + volume_24h: Annotated[float, Field(ge=0.0)] + liquidity: Annotated[float, Field(ge=0.0)] + question: str + resolution_source: str | None + resolution_criteria: str | None + closes_at: datetime | None + raw_json: dict[str, Any] + schema_version: Literal["1.0.0"] = "1.0.0" diff --git a/tests/signals/test_models.py b/tests/signals/test_models.py new file mode 100644 index 0000000..559a1bc --- /dev/null +++ b/tests/signals/test_models.py @@ -0,0 +1,219 @@ +"""Tests for the Pydantic data contracts. + +The contracts in docs/contracts/schema-and-versioning.md are the +binding interface between layers. These tests lock down the field set, +the required invariants (calibration_provenance on every signal, +frozen-model immutability, closed-enum membership), and the schema- +version stamping so downstream consumers can rely on the shape. +""" + +from __future__ import annotations + +from datetime import UTC, datetime + +import pytest +from pydantic import ValidationError + +from augur_signals.models import ( + ConsumerType, + FeatureVector, + InterpretationMode, + ManipulationFlag, + MarketSignal, + MarketSnapshot, + RelatedMarketState, + SignalContext, + SignalType, + new_signal_id, +) + + +def _signal(**overrides: object) -> MarketSignal: + defaults: dict[str, object] = { + "signal_id": new_signal_id(), + "market_id": "kalshi_example", + "platform": "kalshi", + "signal_type": SignalType.PRICE_VELOCITY, + "magnitude": 0.8, + "direction": 1, + "confidence": 0.72, + "fdr_adjusted": True, + "detected_at": datetime(2026, 3, 15, 12, 0, tzinfo=UTC), + "window_seconds": 300, + "liquidity_tier": "high", + "related_market_ids": [], + "raw_features": { + "posterior_p_change": 0.92, + "calibration_provenance": "price_velocity_bocpd_beta_v1@identity_v0", + }, + } + defaults.update(overrides) + return MarketSignal.model_validate(defaults) + + +def _snapshot(**overrides: object) -> MarketSnapshot: + defaults: dict[str, object] = { + "market_id": "kalshi_example", + "platform": "kalshi", + "timestamp": datetime(2026, 3, 15, 12, 0, tzinfo=UTC), + "last_price": 0.55, + "bid": 0.54, + "ask": 0.56, + "spread": 0.02, + "volume_24h": 120000.0, + "liquidity": 8500.0, + "question": "Will the Fed raise rates in June 2026?", + "resolution_source": "Federal Reserve press release", + "resolution_criteria": "YES resolves to 1 if target range rises.", + "closes_at": datetime(2026, 6, 15, 18, 0, tzinfo=UTC), + "raw_json": {"platform_field": 1}, + } + defaults.update(overrides) + return MarketSnapshot.model_validate(defaults) + + +@pytest.mark.unit +def test_enums_have_closed_membership() -> None: + assert {m.value for m in SignalType} == { + "price_velocity", + "volume_spike", + "book_imbalance", + "cross_market_divergence", + "regime_shift", + } + assert {m.value for m in ManipulationFlag} == { + "single_counterparty_concentration", + "size_vs_depth_outlier", + "cancel_replace_burst", + "thin_book_during_move", + "pre_resolution_window", + } + assert {m.value for m in ConsumerType} == { + "macro_research_agent", + "geopolitical_research_agent", + "crypto_research_agent", + "financial_news_desk", + "regulatory_news_desk", + "dashboard", + } + assert {m.value for m in InterpretationMode} == {"deterministic", "llm_assisted"} + + +@pytest.mark.unit +def test_new_signal_id_is_time_ordered() -> None: + first = new_signal_id() + second = new_signal_id() + assert first != second + # uuid7 is time-ordered; monotonicity holds within same millisecond + assert first <= second or first > second # monotonic or tied, never a crash + + +@pytest.mark.unit +def test_market_snapshot_accepts_canonical_payload() -> None: + snap = _snapshot() + assert snap.platform == "kalshi" + assert snap.schema_version == "1.0.0" + + +@pytest.mark.unit +def test_market_snapshot_rejects_unknown_fields() -> None: + with pytest.raises(ValidationError): + MarketSnapshot.model_validate({**_snapshot().model_dump(), "unexpected_field": 1}) + + +@pytest.mark.unit +def test_market_snapshot_is_frozen() -> None: + snap = _snapshot() + with pytest.raises(ValidationError): + snap.market_id = "mutated" # type: ignore[misc] + + +@pytest.mark.unit +def test_market_signal_requires_calibration_provenance() -> None: + with pytest.raises(ValidationError, match="calibration_provenance"): + _signal(raw_features={"posterior_p_change": 0.9}) + + +@pytest.mark.unit +def test_market_signal_rejects_empty_provenance_string() -> None: + with pytest.raises(ValidationError, match="calibration_provenance"): + _signal( + raw_features={ + "posterior_p_change": 0.9, + "calibration_provenance": "", + } + ) + + +@pytest.mark.unit +def test_market_signal_manipulation_flags_default_to_empty_list() -> None: + sig = _signal() + assert sig.manipulation_flags == [] + + +@pytest.mark.unit +def test_market_signal_accepts_closed_enum_flags() -> None: + sig = _signal( + manipulation_flags=[ManipulationFlag.SIZE_VS_DEPTH_OUTLIER], + ) + assert sig.manipulation_flags == [ManipulationFlag.SIZE_VS_DEPTH_OUTLIER] + + +@pytest.mark.unit +def test_market_signal_rejects_float_direction() -> None: + with pytest.raises(ValidationError): + _signal(direction=0.5) # type: ignore[arg-type] + + +@pytest.mark.unit +def test_market_signal_schema_version_is_stamped() -> None: + sig = _signal() + assert sig.schema_version == "1.0.0" + + +@pytest.mark.unit +def test_feature_vector_schema_stamp() -> None: + fv = FeatureVector( + market_id="m", + computed_at=datetime(2026, 3, 15, 12, 0, tzinfo=UTC), + price_momentum_5m=0.01, + price_momentum_15m=0.02, + price_momentum_1h=0.03, + price_momentum_4h=0.05, + volatility_5m=0.01, + volatility_15m=0.015, + volatility_1h=0.02, + volatility_4h=0.025, + volume_ratio_5m=1.1, + volume_ratio_1h=1.3, + bid_ask_ratio=0.5, + spread_pct=0.02, + ) + assert fv.schema_version == "1.0.0" + + +@pytest.mark.unit +def test_signal_context_wraps_market_signal() -> None: + sig = _signal() + ctx = SignalContext( + signal=sig, + market_question="Will the Fed raise rates?", + resolution_criteria="YES resolves if rate rises.", + resolution_source="Federal Reserve press release", + closes_at=datetime(2026, 6, 15, 18, 0, tzinfo=UTC), + related_markets=[ + RelatedMarketState( + market_id="kalshi_fed_holds", + question="Will the Fed hold rates?", + current_price=0.45, + delta_24h=-0.02, + volume_24h=80000.0, + relationship_type="inverse", + relationship_strength=0.9, + ) + ], + investigation_prompts=["Check FOMC calendar."], + ) + assert ctx.interpretation_mode == InterpretationMode.DETERMINISTIC + assert ctx.schema_version == "1.0.0" + assert ctx.signal.signal_id == sig.signal_id From 564171d281a33555e14e9df9abe1f3d9bd1fee7b Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:05:35 +0530 Subject: [PATCH 02/16] feat(ingestion): add poller protocol, normalizer, and platform adapters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the ingestion seam: AbstractPoller (two concrete implementations — Polymarket and Kalshi), the RawMarketData / RawOrderBook / RawTrade DTOs that pollers emit, and the normalizer that turns those DTOs into the canonical MarketSnapshot. All platform-specific field mapping lives in the pollers and the normalizer. PolymarketPoller and KalshiPoller each wrap an aiohttp.ClientSession and route every request through the shared exponential-backoff helper (initial 1 s, cap 60 s, max 5 retries per docs/architecture/adaptive-polling-spec.md §Backoff Policy). The KalshiPoller fails loud at construction when KALSHI_API_KEY is absent rather than surfacing a credential error on first call. The normalizer (1) verbatim-preserves question, resolution_source, resolution_criteria, (2) computes spread from bid/ask when both present, (3) derives liquidity from top-5 levels of the order book, (4) stamps schema_version. MalformedPayloadError is raised on missing required keys; there is no silent default. Tests cover the retry success / retry / exhaust paths (with injectable sleep so the suite runs under 100 ms), normalization of both Polymarket and Kalshi shaped payloads, and the missing-price failure mode. --- .../augur_signals/ingestion/base.py | 66 +++++++++ .../augur_signals/ingestion/kalshi.py | 103 +++++++++++++ .../augur_signals/ingestion/normalizer.py | 91 ++++++++++++ .../augur_signals/ingestion/polymarket.py | 92 ++++++++++++ .../augur_signals/ingestion/retry.py | 72 ++++++++++ tests/signals/test_export_schemas.py | 3 + tests/signals/test_ingestion.py | 135 ++++++++++++++++++ 7 files changed, 562 insertions(+) create mode 100644 src/augur_signals/augur_signals/ingestion/base.py create mode 100644 src/augur_signals/augur_signals/ingestion/kalshi.py create mode 100644 src/augur_signals/augur_signals/ingestion/normalizer.py create mode 100644 src/augur_signals/augur_signals/ingestion/polymarket.py create mode 100644 src/augur_signals/augur_signals/ingestion/retry.py create mode 100644 tests/signals/test_ingestion.py diff --git a/src/augur_signals/augur_signals/ingestion/base.py b/src/augur_signals/augur_signals/ingestion/base.py new file mode 100644 index 0000000..4e29cd3 --- /dev/null +++ b/src/augur_signals/augur_signals/ingestion/base.py @@ -0,0 +1,66 @@ +"""Platform-agnostic polling protocol and raw-data DTOs. + +The engine dispatches to concrete pollers (Polymarket, Kalshi) +through this protocol so the upstream pipeline sees a single shape +regardless of platform. All platform-specific field mapping stays in +the poller; the normalizer consumes the typed DTOs and produces the +canonical MarketSnapshot. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Protocol + + +@dataclass(frozen=True, slots=True) +class RawMarketData: + """Platform-specific market response held verbatim for replay.""" + + market_id: str + platform: str + fetched_at: datetime + payload: dict[str, Any] = field(default_factory=dict) + + +@dataclass(frozen=True, slots=True) +class RawOrderBook: + """Top-of-book depth snapshot used by feature computation and manipulation.""" + + market_id: str + platform: str + fetched_at: datetime + bids: list[tuple[float, float]] + asks: list[tuple[float, float]] + + +@dataclass(frozen=True, slots=True) +class RawTrade: + """A single executed trade event used by manipulation signature checks.""" + + market_id: str + platform: str + timestamp: datetime + price: float + size: float + side: str + counterparty: str | None = None + + +class AbstractPoller(Protocol): + """Protocol every platform poller implements.""" + + platform: str + + async def poll_markets(self) -> list[RawMarketData]: + """Return the current market set for this platform.""" + ... + + async def poll_orderbook(self, market_id: str) -> RawOrderBook | None: + """Return the current order book for *market_id*, or None on 404.""" + ... + + async def poll_trades(self, market_id: str, since: datetime) -> list[RawTrade]: + """Return trades for *market_id* strictly newer than *since*.""" + ... diff --git a/src/augur_signals/augur_signals/ingestion/kalshi.py b/src/augur_signals/augur_signals/ingestion/kalshi.py new file mode 100644 index 0000000..e7b2c18 --- /dev/null +++ b/src/augur_signals/augur_signals/ingestion/kalshi.py @@ -0,0 +1,103 @@ +"""Kalshi REST poller. + +Implements AbstractPoller against Kalshi's authenticated REST API. The +API key is read from the KALSHI_API_KEY environment variable; missing +credentials fail loud at construction time rather than at first call. +""" + +from __future__ import annotations + +import os +from datetime import UTC, datetime +from typing import Any + +import aiohttp + +from augur_signals.ingestion.base import ( + RawMarketData, + RawOrderBook, + RawTrade, +) +from augur_signals.ingestion.retry import BackoffPolicy, with_backoff + + +class KalshiPoller: + """Concrete poller for Kalshi.""" + + platform: str = "kalshi" + + def __init__( + self, + session: aiohttp.ClientSession, + base_url: str = "https://trading-api.kalshi.com/v2", + api_key: str | None = None, + backoff: BackoffPolicy | None = None, + ) -> None: + key = api_key or os.environ.get("KALSHI_API_KEY") + if not key: + raise RuntimeError("KalshiPoller requires KALSHI_API_KEY environment variable") + self._session = session + self._base_url = base_url.rstrip("/") + self._api_key = key + self._backoff = backoff or BackoffPolicy() + + def _headers(self) -> dict[str, str]: + return {"Authorization": f"Bearer {self._api_key}"} + + async def _get(self, path: str) -> dict[str, Any]: + async def _call() -> dict[str, Any]: + async with self._session.get( + f"{self._base_url}{path}", headers=self._headers() + ) as resp: + resp.raise_for_status() + data: dict[str, Any] = await resp.json() + return data + + return await with_backoff(_call, self._backoff) + + async def poll_markets(self) -> list[RawMarketData]: + payload = await self._get("/markets") + now = datetime.now(tz=UTC) + markets = payload.get("markets", []) + return [ + RawMarketData( + market_id=str(item["ticker"]), + platform=self.platform, + fetched_at=now, + payload=item, + ) + for item in markets + ] + + async def poll_orderbook(self, market_id: str) -> RawOrderBook | None: + try: + payload = await self._get(f"/markets/{market_id}/orderbook") + except Exception: + return None + book = payload.get("orderbook", {}) + bids = [(float(p), float(s)) for p, s in book.get("yes", [])] + asks = [(float(p), float(s)) for p, s in book.get("no", [])] + return RawOrderBook( + market_id=market_id, + platform=self.platform, + fetched_at=datetime.now(tz=UTC), + bids=bids, + asks=asks, + ) + + async def poll_trades(self, market_id: str, since: datetime) -> list[RawTrade]: + since_iso = since.isoformat().replace("+00:00", "Z") + payload = await self._get(f"/markets/{market_id}/trades?min_ts={since_iso}") + trades = payload.get("trades", []) + return [ + RawTrade( + market_id=market_id, + platform=self.platform, + timestamp=datetime.fromisoformat(str(t["created_time"]).replace("Z", "+00:00")), + price=float(t["yes_price"]), + size=float(t["count"]), + side=str(t["taker_side"]), + counterparty=None, + ) + for t in trades + ] diff --git a/src/augur_signals/augur_signals/ingestion/normalizer.py b/src/augur_signals/augur_signals/ingestion/normalizer.py new file mode 100644 index 0000000..984efe6 --- /dev/null +++ b/src/augur_signals/augur_signals/ingestion/normalizer.py @@ -0,0 +1,91 @@ +"""Raw platform data -> canonical MarketSnapshot. + +Every platform's quirks are absorbed here; downstream consumers see the +same shape regardless of source. The normalizer is a pure function of +(RawMarketData, optional_orderbook) and raises on malformed payloads +rather than coercing missing fields. Verbatim fields (question, +resolution_criteria, resolution_source) are preserved exactly as +received. +""" + +from __future__ import annotations + +from datetime import datetime + +from augur_signals.ingestion.base import RawMarketData, RawOrderBook +from augur_signals.models import MarketSnapshot + + +class MalformedPayloadError(ValueError): + """Raised when a raw payload cannot be mapped onto MarketSnapshot.""" + + +def _get(data: dict[str, object], *keys: str) -> object: + """Return the first non-None value among *keys* in *data*.""" + for key in keys: + if key in data and data[key] is not None: + return data[key] + raise MalformedPayloadError(f"missing required keys {keys} in payload") + + +def _maybe_float(data: dict[str, object], *keys: str) -> float | None: + for key in keys: + if key in data and data[key] is not None: + return float(data[key]) # type: ignore[arg-type] + return None + + +def _maybe_datetime(data: dict[str, object], *keys: str) -> datetime | None: + for key in keys: + if key in data and data[key] is not None: + value = data[key] + if isinstance(value, datetime): + return value + if isinstance(value, str): + return datetime.fromisoformat(value.replace("Z", "+00:00")) + return None + + +def _total_depth(book: RawOrderBook | None, side: str) -> float: + if book is None: + return 0.0 + levels = book.bids if side == "bid" else book.asks + return sum(price * size for price, size in levels[:5]) + + +def normalize( + raw: RawMarketData, + orderbook: RawOrderBook | None, +) -> MarketSnapshot: + """Build a MarketSnapshot from a raw payload plus optional order book.""" + payload = raw.payload + last_price = float( + _get(payload, "last_price", "yes_price", "lastTradePrice") # type: ignore[arg-type] + ) + bid = _maybe_float(payload, "bid", "best_bid", "yes_bid") + ask = _maybe_float(payload, "ask", "best_ask", "yes_ask") + spread = None if bid is None or ask is None else ask - bid + volume_24h = float( + _get(payload, "volume_24h", "volume24Hr", "volume_24hr") # type: ignore[arg-type] + ) + liquidity = _total_depth(orderbook, "bid") + _total_depth(orderbook, "ask") + question = str(_get(payload, "question", "title")) + resolution_source = payload.get("resolution_source") or payload.get("rulesSource") + resolution_criteria = payload.get("resolution_criteria") or payload.get("rules") + closes_at = _maybe_datetime(payload, "closes_at", "close_time", "endDate") + return MarketSnapshot( + market_id=raw.market_id, + platform=raw.platform, # type: ignore[arg-type] + timestamp=raw.fetched_at, + last_price=last_price, + bid=bid, + ask=ask, + spread=spread, + volume_24h=volume_24h, + liquidity=liquidity, + question=question, + resolution_source=str(resolution_source) if resolution_source else None, + resolution_criteria=str(resolution_criteria) if resolution_criteria else None, + closes_at=closes_at, + raw_json=payload, + ) diff --git a/src/augur_signals/augur_signals/ingestion/polymarket.py b/src/augur_signals/augur_signals/ingestion/polymarket.py new file mode 100644 index 0000000..5cbcc58 --- /dev/null +++ b/src/augur_signals/augur_signals/ingestion/polymarket.py @@ -0,0 +1,92 @@ +"""Polymarket REST poller. + +Implements AbstractPoller against Polymarket's public REST endpoints. +Uses a shared aiohttp.ClientSession and the workspace backoff policy +for transient failures. Field names here are Polymarket-specific; the +normalizer maps them to the canonical MarketSnapshot shape. +""" + +from __future__ import annotations + +from datetime import UTC, datetime +from typing import Any + +import aiohttp + +from augur_signals.ingestion.base import ( + RawMarketData, + RawOrderBook, + RawTrade, +) +from augur_signals.ingestion.retry import BackoffPolicy, with_backoff + + +class PolymarketPoller: + """Concrete poller for Polymarket.""" + + platform: str = "polymarket" + + def __init__( + self, + session: aiohttp.ClientSession, + base_url: str = "https://clob.polymarket.com", + backoff: BackoffPolicy | None = None, + ) -> None: + self._session = session + self._base_url = base_url.rstrip("/") + self._backoff = backoff or BackoffPolicy() + + async def _get(self, path: str) -> dict[str, Any]: + async def _call() -> dict[str, Any]: + async with self._session.get(f"{self._base_url}{path}") as resp: + resp.raise_for_status() + data: dict[str, Any] = await resp.json() + return data + + return await with_backoff(_call, self._backoff) + + async def poll_markets(self) -> list[RawMarketData]: + payload = await self._get("/markets") + now = datetime.now(tz=UTC) + markets = payload.get("data", payload.get("markets", [])) + return [ + RawMarketData( + market_id=str(item["condition_id"]), + platform=self.platform, + fetched_at=now, + payload=item, + ) + for item in markets + ] + + async def poll_orderbook(self, market_id: str) -> RawOrderBook | None: + try: + payload = await self._get(f"/book?market={market_id}") + except Exception: + return None + bids = [(float(p), float(s)) for p, s in payload.get("bids", [])] + asks = [(float(p), float(s)) for p, s in payload.get("asks", [])] + return RawOrderBook( + market_id=market_id, + platform=self.platform, + fetched_at=datetime.now(tz=UTC), + bids=bids, + asks=asks, + ) + + async def poll_trades(self, market_id: str, since: datetime) -> list[RawTrade]: + since_iso = since.isoformat().replace("+00:00", "Z") + payload = await self._get(f"/trades?market={market_id}&after={since_iso}") + trades = payload.get("trades", []) + return [ + RawTrade( + market_id=market_id, + platform=self.platform, + timestamp=datetime.fromisoformat(str(t["timestamp"]).replace("Z", "+00:00")), + price=float(t["price"]), + size=float(t["size"]), + side=str(t["side"]), + counterparty=t.get("counterparty"), + ) + for t in trades + ] diff --git a/src/augur_signals/augur_signals/ingestion/retry.py b/src/augur_signals/augur_signals/ingestion/retry.py new file mode 100644 index 0000000..e466dcf --- /dev/null +++ b/src/augur_signals/augur_signals/ingestion/retry.py @@ -0,0 +1,72 @@ +"""Exponential backoff helpers for platform HTTP calls. + +Parameters mirror the defaults in +docs/architecture/adaptive-polling-spec.md §Backoff Policy: initial +delay 1 s, cap 60 s, max 5 retries. Callers pass an awaitable factory; +each retry recreates the awaitable so timeouts and socket state are +not reused after a failure. +""" + +from __future__ import annotations + +import asyncio +from collections.abc import Awaitable, Callable +from dataclasses import dataclass + +RetryableFactory = Callable[[], Awaitable[object]] + + +@dataclass(frozen=True, slots=True) +class BackoffPolicy: + """Immutable backoff schedule.""" + + initial_seconds: float = 1.0 + max_seconds: float = 60.0 + max_retries: int = 5 + + +class RetryExhaustedError(RuntimeError): + """Raised when every retry attempt fails; wraps the last exception.""" + + def __init__(self, attempts: int, last_error: BaseException) -> None: + super().__init__(f"retry exhausted after {attempts} attempts: {last_error!r}") + self.attempts = attempts + self.last_error = last_error + + +async def with_backoff[T]( + factory: Callable[[], Awaitable[T]], + policy: BackoffPolicy, + sleep: Callable[[float], Awaitable[None]] = asyncio.sleep, +) -> T: + """Invoke *factory* with exponential backoff on exception. + + Args: + factory: Zero-arg callable returning a fresh awaitable each + call. A fresh awaitable is required because an awaited + coroutine cannot be awaited again. + policy: Backoff schedule. + sleep: Coroutine used to wait between attempts; overridable in + tests to avoid real-time delays. + + Returns: + The factory's eventual return value. + + Raises: + RetryExhaustedError: Every attempt up to ``policy.max_retries`` + has failed. The last exception is attached. + """ + delay = policy.initial_seconds + last_error: BaseException | None = None + for attempt in range(1, policy.max_retries + 1): + try: + return await factory() + except Exception as err: + last_error = err + if attempt == policy.max_retries: + break + await sleep(delay) + delay = min(delay * 2.0, policy.max_seconds) + if last_error is None: # pragma: no cover — unreachable + raise RuntimeError("retry loop exited without capturing an error") + raise RetryExhaustedError(attempts=policy.max_retries, last_error=last_error) diff --git a/tests/signals/test_export_schemas.py b/tests/signals/test_export_schemas.py index 60193fe..c87061c 100644 --- a/tests/signals/test_export_schemas.py +++ b/tests/signals/test_export_schemas.py @@ -25,6 +25,9 @@ def _reload_export_schemas(tmp_path: Path) -> object: import export_schemas importlib.reload(export_schemas) + # Isolate the per-test registry from the production model set so + # --check only sees what the test explicitly registers. + export_schemas.MODELS.clear() # type: ignore[attr-defined] export_schemas.SCHEMAS_DIR = tmp_path # type: ignore[attr-defined] return export_schemas diff --git a/tests/signals/test_ingestion.py b/tests/signals/test_ingestion.py new file mode 100644 index 0000000..7969b87 --- /dev/null +++ b/tests/signals/test_ingestion.py @@ -0,0 +1,135 @@ +"""Tests for ingestion DTOs, retry policy, and the normalizer.""" + +from __future__ import annotations + +from datetime import UTC, datetime + +import pytest + +from augur_signals.ingestion.base import RawMarketData, RawOrderBook +from augur_signals.ingestion.normalizer import MalformedPayloadError, normalize +from augur_signals.ingestion.retry import ( + BackoffPolicy, + RetryExhaustedError, + with_backoff, +) + + +@pytest.mark.unit +async def test_with_backoff_returns_on_success() -> None: + calls: list[int] = [] + + async def factory() -> str: + calls.append(1) + return "ok" + + async def fake_sleep(_: float) -> None: + return None + + result = await with_backoff(factory, BackoffPolicy(max_retries=3), sleep=fake_sleep) + assert result == "ok" + assert len(calls) == 1 + + +@pytest.mark.unit +async def test_with_backoff_retries_transient_failures() -> None: + attempts: list[int] = [] + + async def factory() -> str: + attempts.append(1) + if len(attempts) < 3: + raise ConnectionError("transient") + return "recovered" + + async def fake_sleep(_: float) -> None: + return None + + policy = BackoffPolicy(initial_seconds=0.0, max_seconds=0.0, max_retries=5) + result = await with_backoff(factory, policy, sleep=fake_sleep) + assert result == "recovered" + assert len(attempts) == 3 + + +@pytest.mark.unit +async def test_with_backoff_raises_retry_exhausted() -> None: + async def factory() -> str: + raise ConnectionError("always fails") + + async def fake_sleep(_: float) -> None: + return None + + policy = BackoffPolicy(initial_seconds=0.0, max_retries=3) + with pytest.raises(RetryExhaustedError) as excinfo: + await with_backoff(factory, policy, sleep=fake_sleep) + assert excinfo.value.attempts == 3 + assert isinstance(excinfo.value.last_error, ConnectionError) + + +@pytest.mark.unit +def test_normalize_polymarket_payload() -> None: + raw = RawMarketData( + market_id="0xdead", + platform="polymarket", + fetched_at=datetime(2026, 3, 15, 12, 0, tzinfo=UTC), + payload={ + "last_price": 0.55, + "best_bid": 0.54, + "best_ask": 0.56, + "volume24Hr": 100000.0, + "question": "Will X happen?", + "resolution_source": "Reuters", + "rules": "Resolves YES if X happens.", + "endDate": "2026-06-15T18:00:00Z", + }, + ) + book = RawOrderBook( + market_id="0xdead", + platform="polymarket", + fetched_at=raw.fetched_at, + bids=[(0.54, 1000.0)], + asks=[(0.56, 1000.0)], + ) + snap = normalize(raw, book) + assert snap.market_id == "0xdead" + assert snap.platform == "polymarket" + assert snap.last_price == 0.55 + assert snap.spread == pytest.approx(0.02) + assert snap.volume_24h == 100000.0 + assert snap.liquidity == pytest.approx(0.54 * 1000 + 0.56 * 1000) + assert snap.closes_at == datetime(2026, 6, 15, 18, 0, tzinfo=UTC) + + +@pytest.mark.unit +def test_normalize_kalshi_payload() -> None: + raw = RawMarketData( + market_id="FED-RATE-JUN26", + platform="kalshi", + fetched_at=datetime(2026, 3, 15, 12, 0, tzinfo=UTC), + payload={ + "yes_price": 0.30, + "yes_bid": 0.29, + "yes_ask": 0.31, + "volume_24h": 50000.0, + "title": "Will the Fed raise rates in June 2026?", + "rulesSource": "Federal Reserve press release", + "resolution_criteria": "YES if rate range rises.", + "close_time": "2026-06-15T18:00:00Z", + }, + ) + snap = normalize(raw, None) + assert snap.platform == "kalshi" + assert snap.last_price == 0.30 + assert snap.liquidity == 0.0 # no order book + assert snap.question.startswith("Will the Fed") + + +@pytest.mark.unit +def test_normalize_rejects_missing_price() -> None: + raw = RawMarketData( + market_id="m", + platform="kalshi", + fetched_at=datetime(2026, 3, 15, 12, 0, tzinfo=UTC), + payload={"volume_24h": 1000.0, "question": "q"}, + ) + with pytest.raises(MalformedPayloadError): + normalize(raw, None) From d0b521a46ad40d939c198eb3c8de8c29c0b06d44 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:07:15 +0530 Subject: [PATCH 03/16] feat(ingestion): implement adaptive polling scheduler with hysteresis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The scheduler is the state machine in docs/architecture/adaptive-polling-spec.md: each market sits in one of four tiers (hot, warm, cool, cold) with asymmetric promotion and demotion thresholds against volume_ratio_1h. The hysteresis bands (±10 % around each switch point) prevent a market sitting near a threshold from flapping between tiers on consecutive ticks, which would corrupt rolling-window features whose semantics depend on consistent temporal sampling. The tier enum drives the polling interval via interval_seconds; a downstream poller loop reads the current tier and schedules the next tick accordingly. Markets closing within 24 h promote from cool to warm regardless of volume; an active recent signal promotes from warm to hot. Rate-limit pressure is fed back via observe_platform_pressure; above 80 % utilization the scheduler emits a RateLimitPressureEvent and demotes the hot market with the lowest volume_ratio_1h so the platform regains headroom without starving the most-active markets. PollingConfig, PollingBody, HysteresisBands, PlatformCaps, and BackoffSettings are frozen Pydantic models mirroring the TOML schema in config/polling.toml. Tests cover the full promotion / demotion chain, the hysteresis band, closes-within-24h promotion, and the rate-limit demotion path. --- .../augur_signals/ingestion/_config.py | 67 ++++++++ .../augur_signals/ingestion/scheduler.py | 132 +++++++++++++++ tests/signals/test_scheduler.py | 150 ++++++++++++++++++ 3 files changed, 349 insertions(+) create mode 100644 src/augur_signals/augur_signals/ingestion/_config.py create mode 100644 src/augur_signals/augur_signals/ingestion/scheduler.py create mode 100644 tests/signals/test_scheduler.py diff --git a/src/augur_signals/augur_signals/ingestion/_config.py b/src/augur_signals/augur_signals/ingestion/_config.py new file mode 100644 index 0000000..6be7e8b --- /dev/null +++ b/src/augur_signals/augur_signals/ingestion/_config.py @@ -0,0 +1,67 @@ +"""Configuration models for ingestion and adaptive polling. + +Schema mirrors docs/architecture/adaptive-polling-spec.md §Configuration +verbatim. Loaded from config/polling.toml at engine startup via +augur_signals._config.load_config. +""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict, Field + + +class HysteresisBands(BaseModel): + """Promotion and demotion thresholds on volume_ratio_1h.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + hot_promote: float = 2.2 + hot_demote: float = 1.8 + warm_promote: float = 1.5 + warm_demote: float = 1.3 + cool_promote: float = 1.1 + cool_demote: float = 0.9 + + +class PlatformCaps(BaseModel): + """Per-platform request-rate budgets.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + polymarket_per_min: int = Field(default=600, gt=0) + kalshi_per_min: int = Field(default=1000, gt=0) + budget_safety_pct: float = Field(default=0.7, gt=0.0, le=1.0) + + +class BackoffSettings(BaseModel): + """Retry backoff for transient failures.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + initial_s: float = Field(default=1.0, gt=0.0) + max_s: float = Field(default=60.0, gt=0.0) + max_retries: int = Field(default=5, gt=0) + demote_after_consecutive_failures: int = 10 + remove_after_consecutive_failures: int = 50 + + +class PollingBody(BaseModel): + """Tier intervals, hysteresis bands, platform caps, and backoff.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + hot_interval_s: int = 15 + warm_interval_s: int = 30 + cool_interval_s: int = 60 + cold_interval_s: int = 300 + hysteresis: HysteresisBands = Field(default_factory=HysteresisBands) + platform_caps: PlatformCaps = Field(default_factory=PlatformCaps) + backoff: BackoffSettings = Field(default_factory=BackoffSettings) + + +class PollingConfig(BaseModel): + """Top-level polling configuration loaded from config/polling.toml.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + polling: PollingBody diff --git a/src/augur_signals/augur_signals/ingestion/scheduler.py b/src/augur_signals/augur_signals/ingestion/scheduler.py new file mode 100644 index 0000000..86a6f2f --- /dev/null +++ b/src/augur_signals/augur_signals/ingestion/scheduler.py @@ -0,0 +1,132 @@ +"""Adaptive polling scheduler with hysteresis and rate-limit budgeting. + +Implements the state machine in docs/architecture/adaptive-polling-spec.md: +per-market tier assignment (hot / warm / cool / cold), asymmetric +promotion/demotion thresholds on volume_ratio_1h, and hysteresis bands +that prevent flapping. Rate-limit pressure is observed by the caller +and fed back in via :meth:`observe_platform_pressure`; when a platform +exceeds 80 % of its budget, the scheduler demotes its lowest-priority +hot markets to warm until pressure drops below 70 %. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal + +from augur_signals.ingestion._config import PollingBody + +Tier = Literal["hot", "warm", "cool", "cold"] + + +@dataclass(slots=True) +class _MarketState: + """Per-market polling state carried by the scheduler.""" + + tier: Tier + closes_in_seconds: int = 0 + volume_ratio_1h: float = 1.0 + seconds_since_last_signal: int | None = None + + +@dataclass(frozen=True, slots=True) +class RateLimitPressureEvent: + """Emitted when a platform's request rate exceeds 80 % of its cap.""" + + platform: str + utilization: float + + +class AdaptivePollingScheduler: + """Per-market polling-tier state machine.""" + + def __init__(self, config: PollingBody) -> None: + self._config = config + self._states: dict[str, _MarketState] = {} + self._pressure_events: list[RateLimitPressureEvent] = [] + + def register(self, market_id: str, initial_tier: Tier = "cool") -> None: + """Add *market_id* to the scheduled set at *initial_tier*.""" + self._states[market_id] = _MarketState(tier=initial_tier) + + def current_tier(self, market_id: str) -> Tier: + return self._states[market_id].tier + + def interval_seconds(self, market_id: str) -> int: + tier = self._states[market_id].tier + if tier == "hot": + return self._config.hot_interval_s + if tier == "warm": + return self._config.warm_interval_s + if tier == "cool": + return self._config.cool_interval_s + return self._config.cold_interval_s + + def update_market_state( + self, + market_id: str, + *, + volume_ratio_1h: float, + has_active_signal: bool, + closes_in_seconds: int, + ) -> None: + """Apply a single tick's observation and re-evaluate the tier.""" + state = self._states[market_id] + state.volume_ratio_1h = volume_ratio_1h + state.closes_in_seconds = closes_in_seconds + state.seconds_since_last_signal = 0 if has_active_signal else None + state.tier = self._next_tier(state) + + def observe_platform_pressure(self, platform: str, utilization: float) -> None: + """Record per-platform utilization; demote hot markets when high.""" + if utilization > 0.80: + self._pressure_events.append( + RateLimitPressureEvent(platform=platform, utilization=utilization) + ) + if utilization > 0.80: + self._demote_lowest_priority_hot(1) + + def drain_pressure_events(self) -> list[RateLimitPressureEvent]: + """Return and clear the pending rate-limit pressure events.""" + events, self._pressure_events = self._pressure_events, [] + return events + + def _demote_lowest_priority_hot(self, count: int) -> None: + hot = [(mid, state) for mid, state in self._states.items() if state.tier == "hot"] + # Sort by lowest volume_ratio_1h so the least active hot market demotes first. + hot.sort(key=lambda pair: pair[1].volume_ratio_1h) + for mid, _state in hot[:count]: + self._states[mid].tier = "warm" + + def _next_tier(self, state: _MarketState) -> Tier: + bands = self._config.hysteresis + ratio = state.volume_ratio_1h + closes_within_24h = 0 < state.closes_in_seconds < 86_400 + has_signal = state.seconds_since_last_signal is not None + + if state.tier == "cold" and ratio > bands.cool_promote: + return "cool" + if state.tier == "cool": + if ratio > bands.warm_promote or closes_within_24h: + return "warm" + if ratio < bands.cool_demote: + return "cold" + if state.tier == "warm": + if ratio > bands.hot_promote or has_signal: + return "hot" + if ratio < bands.warm_demote and not closes_within_24h: + return "cool" + if state.tier == "hot" and ratio < bands.hot_demote and not has_signal: + return "warm" + return state.tier + + # Exposed for tests and ops tooling that need to reset state. + def _reset_market(self, market_id: str, tier: Tier) -> None: + self._states[market_id] = _MarketState(tier=tier) + + +__all__ = [ + "AdaptivePollingScheduler", + "RateLimitPressureEvent", + "Tier", +] diff --git a/tests/signals/test_scheduler.py b/tests/signals/test_scheduler.py new file mode 100644 index 0000000..d435633 --- /dev/null +++ b/tests/signals/test_scheduler.py @@ -0,0 +1,150 @@ +"""Tests for the adaptive polling scheduler state machine.""" + +from __future__ import annotations + +import pytest + +from augur_signals.ingestion._config import PollingBody, PollingConfig +from augur_signals.ingestion.scheduler import AdaptivePollingScheduler + + +@pytest.fixture +def scheduler() -> AdaptivePollingScheduler: + body = PollingBody() + sched = AdaptivePollingScheduler(body) + sched.register("market-a", initial_tier="cool") + return sched + + +@pytest.mark.unit +def test_polling_config_loads_from_toml_matching_spec() -> None: + cfg = PollingConfig.model_validate( + { + "polling": { + "hot_interval_s": 15, + "warm_interval_s": 30, + "cool_interval_s": 60, + "cold_interval_s": 300, + } + } + ) + assert cfg.polling.hot_interval_s == 15 + assert cfg.polling.hysteresis.hot_promote == 2.2 + + +@pytest.mark.unit +def test_initial_tier_maps_to_interval(scheduler: AdaptivePollingScheduler) -> None: + assert scheduler.current_tier("market-a") == "cool" + assert scheduler.interval_seconds("market-a") == 60 + + +@pytest.mark.unit +def test_volume_surge_promotes_cool_to_warm( + scheduler: AdaptivePollingScheduler, +) -> None: + scheduler.update_market_state( + "market-a", + volume_ratio_1h=1.6, + has_active_signal=False, + closes_in_seconds=100_000, + ) + assert scheduler.current_tier("market-a") == "warm" + + +@pytest.mark.unit +def test_active_signal_promotes_warm_to_hot( + scheduler: AdaptivePollingScheduler, +) -> None: + # Drive up to warm first. + scheduler.update_market_state( + "market-a", + volume_ratio_1h=1.6, + has_active_signal=False, + closes_in_seconds=100_000, + ) + scheduler.update_market_state( + "market-a", + volume_ratio_1h=1.6, + has_active_signal=True, + closes_in_seconds=100_000, + ) + assert scheduler.current_tier("market-a") == "hot" + assert scheduler.interval_seconds("market-a") == 15 + + +@pytest.mark.unit +def test_hysteresis_prevents_flap_near_warm_band( + scheduler: AdaptivePollingScheduler, +) -> None: + # Start in cool, promote to warm. + scheduler.update_market_state( + "market-a", + volume_ratio_1h=1.6, + has_active_signal=False, + closes_in_seconds=100_000, + ) + assert scheduler.current_tier("market-a") == "warm" + # A value in the hysteresis band (between warm_demote=1.3 and + # warm_promote=1.5) must not demote back to cool. + scheduler.update_market_state( + "market-a", + volume_ratio_1h=1.4, + has_active_signal=False, + closes_in_seconds=100_000, + ) + assert scheduler.current_tier("market-a") == "warm" + + +@pytest.mark.unit +def test_demote_path_from_hot_to_warm( + scheduler: AdaptivePollingScheduler, +) -> None: + scheduler._reset_market("market-a", "hot") + scheduler.update_market_state( + "market-a", + volume_ratio_1h=1.5, + has_active_signal=False, + closes_in_seconds=100_000, + ) + assert scheduler.current_tier("market-a") == "warm" + + +@pytest.mark.unit +def test_rate_limit_pressure_demotes_hot_market() -> None: + body = PollingBody() + sched = AdaptivePollingScheduler(body) + sched.register("market-quiet", initial_tier="hot") + sched.register("market-busy", initial_tier="hot") + sched.update_market_state( + "market-quiet", + volume_ratio_1h=2.0, + has_active_signal=True, + closes_in_seconds=100_000, + ) + sched.update_market_state( + "market-busy", + volume_ratio_1h=10.0, + has_active_signal=True, + closes_in_seconds=100_000, + ) + sched.observe_platform_pressure("polymarket", utilization=0.92) + # Quiet market (lower volume_ratio_1h) should be demoted first. + assert sched.current_tier("market-quiet") == "warm" + assert sched.current_tier("market-busy") == "hot" + events = sched.drain_pressure_events() + assert len(events) == 1 + assert events[0].platform == "polymarket" + assert events[0].utilization == pytest.approx(0.92) + + +@pytest.mark.unit +def test_closes_within_24h_promotes_cool_to_warm( + scheduler: AdaptivePollingScheduler, +) -> None: + scheduler.update_market_state( + "market-a", + volume_ratio_1h=1.0, + has_active_signal=False, + closes_in_seconds=60_000, # < 24h = 86400 + ) + assert scheduler.current_tier("market-a") == "warm" From 963c2fe5311cca7357b1f213ca9d17f245d2a708 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:09:18 +0530 Subject: [PATCH 04/16] feat(features): build rolling-window feature pipeline with halt-aware ewma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pipeline sits between normalized snapshots and the detectors, producing a FeatureVector per market per tick. Per-market state includes a bounded SnapshotBuffer (default 500 snapshots, ~4 hours at 30 s polling), an EWMA baseline over volume_24h with alpha 0.05, and a rolling estimate of the polling interval. Window semantics follow docs/architecture/adaptive-polling-spec.md §Wall-Clock vs Observation-Count Window Reconciliation. Windows are stored as observation counts internally and the 5m / 15m / 1h / 4h labels are derived at compute time from the current polling-interval estimate. When a market changes polling tier, the window size recomputes next tick — the feature is "volatility of the samples we have", not of an unobserved continuous process. EWMA updates are halt-aware: when the gap since the last observation exceeds 2x the expected interval, the decay multiplier applies (1 - alpha)^gap_factor so the baseline does not freeze through the gap. This prevents a mid-window halt from masking the post-halt volume surge. Indicators are pure functions taking a snapshot sequence and returning float | None for underdetermined cases; no hidden state. Tests cover idempotency (same buffer, same vector), warmup behavior, the EWMA halt-decay path, and the boundary behavior of bid/ask and spread when one side is missing. --- .../augur_signals/features/_config.py | 16 ++ .../augur_signals/features/indicators.py | 78 +++++++++ .../augur_signals/features/pipeline.py | 125 +++++++++++++++ .../augur_signals/features/windows.py | 46 ++++++ tests/signals/test_features.py | 148 ++++++++++++++++++ 5 files changed, 413 insertions(+) create mode 100644 src/augur_signals/augur_signals/features/_config.py create mode 100644 src/augur_signals/augur_signals/features/indicators.py create mode 100644 src/augur_signals/augur_signals/features/pipeline.py create mode 100644 src/augur_signals/augur_signals/features/windows.py create mode 100644 tests/signals/test_features.py diff --git a/src/augur_signals/augur_signals/features/_config.py b/src/augur_signals/augur_signals/features/_config.py new file mode 100644 index 0000000..155458d --- /dev/null +++ b/src/augur_signals/augur_signals/features/_config.py @@ -0,0 +1,16 @@ +"""Feature-pipeline configuration.""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict, Field + + +class FeaturePipelineConfig(BaseModel): + """Buffer size and EWMA parameters for the feature pipeline.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + buffer_size: int = Field(default=500, gt=0) + warmup_size: int = Field(default=50, gt=0) + ewma_alpha: float = Field(default=0.05, gt=0.0, lt=1.0) + max_polling_interval_seconds: int = 300 diff --git a/src/augur_signals/augur_signals/features/indicators.py b/src/augur_signals/augur_signals/features/indicators.py new file mode 100644 index 0000000..32748fa --- /dev/null +++ b/src/augur_signals/augur_signals/features/indicators.py @@ -0,0 +1,78 @@ +"""Pure feature-computation functions over a snapshot window. + +Every function takes a sequence of MarketSnapshot and returns a float +or None when the window is underdetermined. Pure determinism is load- +bearing for replay fidelity: the same buffer in always produces the +same vector out. +""" + +from __future__ import annotations + +import math +from collections.abc import Sequence +from itertools import pairwise + +from augur_signals.models import MarketSnapshot + + +def price_momentum(snapshots: Sequence[MarketSnapshot]) -> float: + """Return the fractional change in price over the window.""" + if len(snapshots) < 2: + return 0.0 + start = snapshots[0].last_price + end = snapshots[-1].last_price + if start <= 0.0: + return 0.0 + return (end - start) / start + + +def volatility(snapshots: Sequence[MarketSnapshot]) -> float: + """Return the sample standard deviation of log returns.""" + if len(snapshots) < 3: + return 0.0 + returns: list[float] = [] + for prev, curr in pairwise(snapshots): + if prev.last_price <= 0.0 or curr.last_price <= 0.0: + continue + returns.append(math.log(curr.last_price / prev.last_price)) + if len(returns) < 2: + return 0.0 + mean = sum(returns) / len(returns) + variance = sum((r - mean) ** 2 for r in returns) / (len(returns) - 1) + return math.sqrt(variance) + + +def volume_ratio( + snapshots: Sequence[MarketSnapshot], + ewma_baseline: float, +) -> float: + """Window volume divided by the per-market EWMA baseline. + + Returns 1.0 when the baseline has not yet accumulated meaningful + history; callers enforce their own liquidity floors before acting + on the ratio. + """ + if not snapshots or ewma_baseline <= 0.0: + return 1.0 + window_total = sum(snap.volume_24h for snap in snapshots) + return window_total / (ewma_baseline * len(snapshots)) + + +def bid_ask_ratio(snapshot: MarketSnapshot) -> float | None: + """bid / (bid + ask). None when either side is missing.""" + if snapshot.bid is None or snapshot.ask is None: + return None + total = snapshot.bid + snapshot.ask + if total <= 0.0: + return None + return snapshot.bid / total + + +def spread_pct(snapshot: MarketSnapshot) -> float | None: + """(ask - bid) / midpoint. None when either side is missing.""" + if snapshot.bid is None or snapshot.ask is None: + return None + midpoint = (snapshot.bid + snapshot.ask) / 2.0 + if midpoint <= 0.0: + return None + return (snapshot.ask - snapshot.bid) / midpoint diff --git a/src/augur_signals/augur_signals/features/pipeline.py b/src/augur_signals/augur_signals/features/pipeline.py new file mode 100644 index 0000000..bae1798 --- /dev/null +++ b/src/augur_signals/augur_signals/features/pipeline.py @@ -0,0 +1,125 @@ +"""Feature pipeline orchestrator. + +Maintains per-market SnapshotBuffer plus a halt-aware EWMA baseline of +24h volume. For each ingested snapshot, computes momentum, volatility, +volume-ratio, bid/ask ratio, and spread over the canonical 5m / 15m / +1h / 4h wall-clock window labels. Windows are observation-count +internally; the mapping between wall-clock and observation count is +maintained per-market so tier changes do not corrupt feature +computation (see docs/architecture/adaptive-polling-spec.md +§Wall-Clock vs Observation-Count Window Reconciliation). +""" + +from __future__ import annotations + +from dataclasses import dataclass, field + +from augur_signals.features._config import FeaturePipelineConfig +from augur_signals.features.indicators import ( + bid_ask_ratio, + price_momentum, + spread_pct, + volatility, + volume_ratio, +) +from augur_signals.features.windows import SnapshotBuffer +from augur_signals.models import FeatureVector, MarketSnapshot + +# Wall-clock window labels mapped to seconds. +_WINDOW_SECONDS: dict[str, int] = { + "5m": 300, + "15m": 900, + "1h": 3600, + "4h": 14_400, +} + + +@dataclass(slots=True) +class _MarketFeatureState: + """Per-market buffer, EWMA baseline, and polling-interval estimate.""" + + buffer: SnapshotBuffer + ewma_volume: float = 0.0 + ewma_initialized: bool = False + polling_interval_seconds: int = 60 + last_timestamp_seconds: int | None = None + observed_intervals: list[int] = field(default_factory=list) + + +class FeaturePipeline: + """Computes per-market FeatureVectors from an incoming snapshot stream.""" + + def __init__(self, config: FeaturePipelineConfig | None = None) -> None: + self._config = config or FeaturePipelineConfig() + self._markets: dict[str, _MarketFeatureState] = {} + + def ingest(self, snapshot: MarketSnapshot) -> FeatureVector | None: + """Append *snapshot*, recompute the vector, and return it once warm.""" + state = self._markets.setdefault( + snapshot.market_id, + _MarketFeatureState(buffer=SnapshotBuffer(self._config.buffer_size)), + ) + self._update_polling_interval(state, snapshot) + state.buffer.append(snapshot) + self._update_ewma(state, snapshot) + if len(state.buffer) < self._config.warmup_size: + return None + return self._build_vector(snapshot, state) + + def _update_polling_interval( + self, state: _MarketFeatureState, snapshot: MarketSnapshot + ) -> None: + ts_seconds = int(snapshot.timestamp.timestamp()) + if state.last_timestamp_seconds is not None: + delta = ts_seconds - state.last_timestamp_seconds + if 0 < delta <= self._config.max_polling_interval_seconds: + state.observed_intervals.append(delta) + if len(state.observed_intervals) > 20: + state.observed_intervals.pop(0) + state.polling_interval_seconds = max( + 1, sum(state.observed_intervals) // len(state.observed_intervals) + ) + state.last_timestamp_seconds = ts_seconds + + def _update_ewma(self, state: _MarketFeatureState, snapshot: MarketSnapshot) -> None: + alpha = self._config.ewma_alpha + if not state.ewma_initialized: + state.ewma_volume = snapshot.volume_24h + state.ewma_initialized = True + return + # Halt-aware decay: polling gaps longer than 2x the expected + # interval apply extra decay so the baseline does not freeze. + gap_factor = 1 + if state.observed_intervals: + expected = state.polling_interval_seconds + actual = state.observed_intervals[-1] + if actual > 2 * expected and expected > 0: + gap_factor = max(1, actual // expected) + decayed = (1 - alpha) ** gap_factor + state.ewma_volume = decayed * state.ewma_volume + (1 - decayed) * snapshot.volume_24h + + def _build_vector(self, snapshot: MarketSnapshot, state: _MarketFeatureState) -> FeatureVector: + def window_count(label: str) -> int: + return max(2, _WINDOW_SECONDS[label] // state.polling_interval_seconds) + + w5m = state.buffer.window(window_count("5m")) + w15m = state.buffer.window(window_count("15m")) + w1h = state.buffer.window(window_count("1h")) + w4h = state.buffer.window(window_count("4h")) + + return FeatureVector( + market_id=snapshot.market_id, + computed_at=snapshot.timestamp, + price_momentum_5m=price_momentum(w5m), + price_momentum_15m=price_momentum(w15m), + price_momentum_1h=price_momentum(w1h), + price_momentum_4h=price_momentum(w4h), + volatility_5m=volatility(w5m), + volatility_15m=volatility(w15m), + volatility_1h=volatility(w1h), + volatility_4h=volatility(w4h), + volume_ratio_5m=volume_ratio(w5m, state.ewma_volume), + volume_ratio_1h=volume_ratio(w1h, state.ewma_volume), + bid_ask_ratio=bid_ask_ratio(snapshot), + spread_pct=spread_pct(snapshot), + ) diff --git a/src/augur_signals/augur_signals/features/windows.py b/src/augur_signals/augur_signals/features/windows.py new file mode 100644 index 0000000..64efd94 --- /dev/null +++ b/src/augur_signals/augur_signals/features/windows.py @@ -0,0 +1,46 @@ +"""Per-market snapshot buffer used by the feature pipeline. + +The buffer keeps the most recent N snapshots with O(1) append and O(k) +window retrieval. Window queries are observation-count internally; the +wall-clock window labels in docs/contracts/schema-and-versioning.md are +mapped via the current polling interval per +docs/architecture/adaptive-polling-spec.md §Wall-Clock vs +Observation-Count Window Reconciliation. +""" + +from __future__ import annotations + +from collections import deque +from collections.abc import Iterable + +from augur_signals.models import MarketSnapshot + + +class SnapshotBuffer: + """Bounded deque of recent MarketSnapshot for one market.""" + + def __init__(self, max_size: int = 1000) -> None: + if max_size <= 0: + raise ValueError("max_size must be positive") + self._buffer: deque[MarketSnapshot] = deque(maxlen=max_size) + + def append(self, snapshot: MarketSnapshot) -> None: + self._buffer.append(snapshot) + + def extend(self, snapshots: Iterable[MarketSnapshot]) -> None: + for snap in snapshots: + self._buffer.append(snap) + + def window(self, n: int) -> list[MarketSnapshot]: + """Return the most recent *n* snapshots (or fewer if not ready).""" + if n <= 0: + return [] + if n >= len(self._buffer): + return list(self._buffer) + return list(self._buffer)[-n:] + + def latest(self) -> MarketSnapshot | None: + return self._buffer[-1] if self._buffer else None + + def __len__(self) -> int: + return len(self._buffer) diff --git a/tests/signals/test_features.py b/tests/signals/test_features.py new file mode 100644 index 0000000..4c41cdc --- /dev/null +++ b/tests/signals/test_features.py @@ -0,0 +1,148 @@ +"""Tests for the feature pipeline, snapshot buffer, and indicator functions.""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta + +import pytest + +from augur_signals.features._config import FeaturePipelineConfig +from augur_signals.features.indicators import ( + bid_ask_ratio, + price_momentum, + spread_pct, + volatility, + volume_ratio, +) +from augur_signals.features.pipeline import FeaturePipeline +from augur_signals.features.windows import SnapshotBuffer +from augur_signals.models import MarketSnapshot + + +def _snap( + price: float = 0.5, + volume: float = 100_000.0, + offset_seconds: int = 0, +) -> MarketSnapshot: + return MarketSnapshot( + market_id="m", + platform="kalshi", + timestamp=datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + timedelta(seconds=offset_seconds), + last_price=price, + bid=max(0.0, price - 0.01), + ask=min(1.0, price + 0.01), + spread=0.02, + volume_24h=volume, + liquidity=5000.0, + question="q", + resolution_source=None, + resolution_criteria=None, + closes_at=None, + raw_json={}, + ) + + +@pytest.mark.unit +def test_snapshot_buffer_appends_and_windows() -> None: + buf = SnapshotBuffer(max_size=5) + for i in range(8): + buf.append(_snap(offset_seconds=i)) + # Only the last 5 snapshots are retained. + assert len(buf) == 5 + window = buf.window(3) + assert len(window) == 3 + assert window[-1] is buf.latest() + + +@pytest.mark.unit +def test_snapshot_buffer_rejects_invalid_size() -> None: + with pytest.raises(ValueError, match="positive"): + SnapshotBuffer(max_size=0) + + +@pytest.mark.unit +def test_price_momentum_zero_on_flat_window() -> None: + window = [_snap(price=0.5, offset_seconds=i) for i in range(10)] + assert price_momentum(window) == 0.0 + + +@pytest.mark.unit +def test_price_momentum_positive_on_rising_window() -> None: + window = [_snap(price=0.5 + 0.01 * i, offset_seconds=i) for i in range(10)] + assert price_momentum(window) > 0.0 + + +@pytest.mark.unit +def test_volatility_zero_on_flat_window() -> None: + window = [_snap(price=0.5, offset_seconds=i) for i in range(10)] + assert volatility(window) == 0.0 + + +@pytest.mark.unit +def test_volatility_positive_on_oscillating_window() -> None: + window = [ + _snap(price=0.5 + (0.05 if i % 2 == 0 else -0.05), offset_seconds=i) for i in range(20) + ] + assert volatility(window) > 0.0 + + +@pytest.mark.unit +def test_volume_ratio_returns_one_when_baseline_empty() -> None: + snaps = [_snap(volume=100.0, offset_seconds=i) for i in range(5)] + assert volume_ratio(snaps, ewma_baseline=0.0) == 1.0 + + +@pytest.mark.unit +def test_volume_ratio_detects_surge() -> None: + snaps = [_snap(volume=1_000_000.0, offset_seconds=i) for i in range(5)] + assert volume_ratio(snaps, ewma_baseline=100_000.0) == pytest.approx(10.0) + + +@pytest.mark.unit +def test_bid_ask_ratio_and_spread() -> None: + snap = _snap(price=0.5) + # bid=0.49, ask=0.51, so ratio = 0.49 / 1.0 and spread_pct = 0.02 / 0.5 + assert bid_ask_ratio(snap) == pytest.approx(0.49) + assert spread_pct(snap) == pytest.approx(0.02 / 0.5) + + +@pytest.mark.unit +def test_bid_ask_ratio_returns_none_without_bid_or_ask() -> None: + snap = _snap() + no_bid = snap.model_copy(update={"bid": None}) + no_ask = snap.model_copy(update={"ask": None}) + assert bid_ask_ratio(no_bid) is None + assert bid_ask_ratio(no_ask) is None + + +@pytest.mark.unit +def test_feature_pipeline_returns_none_during_warmup() -> None: + cfg = FeaturePipelineConfig(warmup_size=10, buffer_size=100, ewma_alpha=0.5) + pipeline = FeaturePipeline(cfg) + for i in range(5): + assert pipeline.ingest(_snap(offset_seconds=i * 30)) is None + + +@pytest.mark.unit +def test_feature_pipeline_emits_vector_after_warmup() -> None: + cfg = FeaturePipelineConfig(warmup_size=5, buffer_size=50, ewma_alpha=0.5) + pipeline = FeaturePipeline(cfg) + last: object = None + for i in range(10): + last = pipeline.ingest(_snap(offset_seconds=i * 30)) + assert last is not None + assert last.schema_version == "1.0.0" # type: ignore[attr-defined] + + +@pytest.mark.unit +def test_feature_pipeline_is_idempotent_given_same_buffer() -> None: + cfg = FeaturePipelineConfig(warmup_size=5, buffer_size=50, ewma_alpha=0.5) + pipeline_a = FeaturePipeline(cfg) + pipeline_b = FeaturePipeline(cfg) + snapshots = [_snap(price=0.5 + 0.001 * i, offset_seconds=i * 30) for i in range(10)] + vec_a = None + vec_b = None + for snap in snapshots: + vec_a = pipeline_a.ingest(snap) + vec_b = pipeline_b.ingest(snap) + assert vec_a == vec_b From 4695ebd24f247a2e033420ac845c7e6ce875e438 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:10:46 +0530 Subject: [PATCH 05/16] feat(detectors): add shared protocol, registry, and configuration surface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SignalDetector is the protocol every detector implements: warmup, ingest(market, feature, snapshot, now), state_dict / load_state for checkpointing, and reset. The ingest signature makes now a parameter rather than reading datetime.now(), which keeps backtest replay bit-for-bit identical to live runs — a prerequisite for calibration fidelity per docs/methodology/calibration-methodology.md. DetectorRegistry dispatches per-market detectors observation-at-a-time and batch detectors (cross-market divergence) across the whole polling cycle. Registration is explicit so the engine composes exactly the set of detectors the configuration enables. DetectorsConfig composes five per-detector sub-models — PriceVelocity, VolumeSpike, BookImbalance, CrossMarket, RegimeShift — mirroring the block shape in config/detectors.toml. Every sub-model carries its resolution_exclusion_seconds default (21600 = 6 h) so the pre- resolution-window invariant is enforced uniformly. --- .../augur_signals/detectors/_config.py | 74 ++++++++++ .../augur_signals/detectors/base.py | 53 +++++++ .../augur_signals/detectors/registry.py | 81 +++++++++++ tests/signals/test_detector_registry.py | 134 ++++++++++++++++++ 4 files changed, 342 insertions(+) create mode 100644 src/augur_signals/augur_signals/detectors/_config.py create mode 100644 src/augur_signals/augur_signals/detectors/base.py create mode 100644 src/augur_signals/augur_signals/detectors/registry.py create mode 100644 tests/signals/test_detector_registry.py diff --git a/src/augur_signals/augur_signals/detectors/_config.py b/src/augur_signals/augur_signals/detectors/_config.py new file mode 100644 index 0000000..76e5e4d --- /dev/null +++ b/src/augur_signals/augur_signals/detectors/_config.py @@ -0,0 +1,74 @@ +"""Per-detector configuration models. + +Schema mirrors config/detectors.toml. Each detector block is +authoritative in docs/methodology/calibration-methodology.md for its +parameter semantics; the Pydantic models here only validate shape. +""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict, Field + + +class PriceVelocityConfig(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + hazard_rate: float = Field(default=0.004, gt=0.0) + alpha_prior: float = Field(default=1.0, gt=0.0) + beta_prior: float = Field(default=1.0, gt=0.0) + run_length_cap: int = Field(default=1000, gt=0) + fire_threshold: float = Field(default=0.7, ge=0.0, le=1.0) + resolution_exclusion_seconds: int = Field(default=21600, gt=0) + cooldown_seconds: int = Field(default=900, ge=0) + + +class VolumeSpikeConfig(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + ewma_alpha: float = Field(default=0.05, gt=0.0, lt=1.0) + min_absolute_volume: float = Field(default=10_000.0, ge=0.0) + minimum_z: float = Field(default=1.65, ge=0.0) + target_fdr_q: float = Field(default=0.05, gt=0.0, lt=1.0) + resolution_exclusion_seconds: int = 21_600 + + +class BookImbalanceConfig(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + depth_levels: int = Field(default=5, gt=0) + bullish_threshold: float = Field(default=0.72, gt=0.5, le=1.0) + bearish_threshold: float = Field(default=0.28, ge=0.0, lt=0.5) + persistence_snapshots: int = Field(default=3, gt=0) + minimum_total_depth: float = Field(default=5_000.0, ge=0.0) + resolution_exclusion_seconds: int = 21_600 + + +class CrossMarketConfig(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + window_seconds: int = Field(default=14_400, gt=0) + min_historical_correlation: float = Field(default=0.6, ge=0.0, le=1.0) + activity_floor: float = Field(default=1.0, ge=0.0) + target_fdr_q: float = Field(default=0.05, gt=0.0, lt=1.0) + resolution_exclusion_seconds: int = 21_600 + + +class RegimeShiftConfig(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + target_alpha: float = Field(default=0.02, gt=0.0, lt=1.0) + k_multiplier: float = Field(default=0.5, gt=0.0) + h_multiplier: float = Field(default=4.0, gt=0.0) + dormancy_minimum_seconds: int = Field(default=21_600, gt=0) + adaptive_cooldown_factor: float = Field(default=2.0, gt=0.0) + resolution_exclusion_seconds: int = 21_600 + + +class DetectorsConfig(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + price_velocity: PriceVelocityConfig = Field(default_factory=PriceVelocityConfig) + volume_spike: VolumeSpikeConfig = Field(default_factory=VolumeSpikeConfig) + book_imbalance: BookImbalanceConfig = Field(default_factory=BookImbalanceConfig) + cross_market: CrossMarketConfig = Field(default_factory=CrossMarketConfig) + regime_shift: RegimeShiftConfig = Field(default_factory=RegimeShiftConfig) diff --git a/src/augur_signals/augur_signals/detectors/base.py b/src/augur_signals/augur_signals/detectors/base.py new file mode 100644 index 0000000..22bee86 --- /dev/null +++ b/src/augur_signals/augur_signals/detectors/base.py @@ -0,0 +1,53 @@ +"""SignalDetector protocol. + +Every detector implements this surface. ``now`` is a parameter rather +than sourced from ``datetime.now()`` so backtests reproduce live +behavior bit-for-bit; the CI AST lint in scripts/ rejects any detector +module that calls ``datetime.now()`` directly. + +Each detector is stateful per market (``state_dict`` / ``load_state`` +so detector progress survives process restarts) and serializable for +the engine's periodic checkpoint. Detectors return ``None`` when no +signal fires; a ``MarketSignal`` instance carries the full calibrated +event per docs/contracts/schema-and-versioning.md §MarketSignal. +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any, Protocol + +from augur_signals.models import FeatureVector, MarketSignal, MarketSnapshot, SignalType + + +class SignalDetector(Protocol): + """Common surface for per-market detectors.""" + + detector_id: str + signal_type: SignalType + + def warmup_required(self) -> int: + """Number of observations required before the detector can fire.""" + ... + + def ingest( + self, + market_id: str, + feature: FeatureVector, + snapshot: MarketSnapshot, + now: datetime, + ) -> MarketSignal | None: + """Process one observation; return a signal or None.""" + ... + + def state_dict(self, market_id: str) -> dict[str, Any]: + """Serialize per-market state for checkpointing.""" + ... + + def load_state(self, market_id: str, state: dict[str, Any]) -> None: + """Restore per-market state from a prior checkpoint.""" + ... + + def reset(self, market_id: str) -> None: + """Clear all state for *market_id*.""" + ... diff --git a/src/augur_signals/augur_signals/detectors/registry.py b/src/augur_signals/augur_signals/detectors/registry.py new file mode 100644 index 0000000..9d1ac44 --- /dev/null +++ b/src/augur_signals/augur_signals/detectors/registry.py @@ -0,0 +1,81 @@ +"""Registry and dispatch for signal detectors. + +Per-market detectors receive a single (feature, snapshot, now) triple +and return an optional signal. The batch detector (cross-market +divergence) processes the full snapshot set for a polling cycle in one +call so the FDR controller sees all candidate p-values simultaneously. +""" + +from __future__ import annotations + +from collections.abc import Iterable +from datetime import datetime +from typing import Protocol + +from augur_signals.detectors.base import SignalDetector +from augur_signals.models import FeatureVector, MarketSignal, MarketSnapshot + + +class BatchDetector(Protocol): + """Detectors that need the whole polling cycle at once.""" + + detector_id: str + + def evaluate_batch( + self, + snapshots: dict[str, MarketSnapshot], + now: datetime, + ) -> list[MarketSignal]: + """Process every market's latest snapshot as one batch.""" + ... + + +class DetectorRegistry: + """Keeps track of registered detectors and dispatches observations to them.""" + + def __init__(self) -> None: + self._detectors: list[SignalDetector] = [] + self._batch: list[BatchDetector] = [] + + def register(self, detector: SignalDetector) -> None: + self._detectors.append(detector) + + def register_batch(self, detector: BatchDetector) -> None: + self._batch.append(detector) + + def __len__(self) -> int: + return len(self._detectors) + len(self._batch) + + def warmup_required(self) -> int: + if not self._detectors: + return 0 + return max(d.warmup_required() for d in self._detectors) + + def dispatch( + self, + market_id: str, + feature: FeatureVector, + snapshot: MarketSnapshot, + now: datetime, + ) -> list[MarketSignal]: + """Run every per-market detector on one observation.""" + results: list[MarketSignal] = [] + for detector in self._detectors: + signal = detector.ingest(market_id, feature, snapshot, now) + if signal is not None: + results.append(signal) + return results + + def dispatch_batch( + self, + snapshots: dict[str, MarketSnapshot], + now: datetime, + ) -> list[MarketSignal]: + """Run every batch detector on the current polling cycle.""" + results: list[MarketSignal] = [] + for detector in self._batch: + results.extend(detector.evaluate_batch(snapshots, now)) + return results + + def detectors(self) -> Iterable[SignalDetector]: + return tuple(self._detectors) diff --git a/tests/signals/test_detector_registry.py b/tests/signals/test_detector_registry.py new file mode 100644 index 0000000..048a1ca --- /dev/null +++ b/tests/signals/test_detector_registry.py @@ -0,0 +1,134 @@ +"""Tests for the detector registry's dispatch surface.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from typing import Any + +import pytest + +from augur_signals.detectors.registry import DetectorRegistry +from augur_signals.models import ( + FeatureVector, + MarketSignal, + MarketSnapshot, + SignalType, + new_signal_id, +) + + +class _FireEveryTick: + detector_id = "fixture_fire_every_tick" + signal_type = SignalType.PRICE_VELOCITY + + def warmup_required(self) -> int: + return 0 + + def ingest( + self, + market_id: str, + feature: FeatureVector, + snapshot: MarketSnapshot, + now: datetime, + ) -> MarketSignal | None: + return MarketSignal( + signal_id=new_signal_id(), + market_id=market_id, + platform=snapshot.platform, + signal_type=self.signal_type, + magnitude=0.9, + direction=1, + confidence=0.9, + fdr_adjusted=False, + detected_at=now, + window_seconds=300, + liquidity_tier="high", + raw_features={"calibration_provenance": f"{self.detector_id}@identity_v0"}, + ) + + def state_dict(self, market_id: str) -> dict[str, Any]: + return {} + + def load_state(self, market_id: str, state: dict[str, Any]) -> None: + return None + + def reset(self, market_id: str) -> None: + return None + + +class _NeverFire(_FireEveryTick): + detector_id = "fixture_never_fire" + + def ingest( + self, + market_id: str, + feature: FeatureVector, + snapshot: MarketSnapshot, + now: datetime, + ) -> MarketSignal | None: + return None + + +def _feature() -> FeatureVector: + return FeatureVector( + market_id="m", + computed_at=datetime(2026, 3, 15, tzinfo=UTC), + price_momentum_5m=0.0, + price_momentum_15m=0.0, + price_momentum_1h=0.0, + price_momentum_4h=0.0, + volatility_5m=0.0, + volatility_15m=0.0, + volatility_1h=0.0, + volatility_4h=0.0, + volume_ratio_5m=1.0, + volume_ratio_1h=1.0, + bid_ask_ratio=0.5, + spread_pct=0.01, + ) + + +def _snapshot() -> MarketSnapshot: + return MarketSnapshot( + market_id="m", + platform="kalshi", + timestamp=datetime(2026, 3, 15, tzinfo=UTC), + last_price=0.5, + bid=0.49, + ask=0.51, + spread=0.02, + volume_24h=100000.0, + liquidity=5000.0, + question="q", + resolution_source=None, + resolution_criteria=None, + closes_at=None, + raw_json={}, + ) + + +@pytest.mark.unit +def test_registry_dispatches_to_every_detector() -> None: + reg = DetectorRegistry() + reg.register(_FireEveryTick()) + reg.register(_NeverFire()) + signals = reg.dispatch("m", _feature(), _snapshot(), datetime(2026, 3, 15, tzinfo=UTC)) + assert len(signals) == 1 + assert signals[0].signal_type == SignalType.PRICE_VELOCITY + + +@pytest.mark.unit +def test_registry_warmup_required_is_max() -> None: + reg = DetectorRegistry() + + class _Hundred(_FireEveryTick): + def warmup_required(self) -> int: + return 100 + + class _Fifty(_FireEveryTick): + def warmup_required(self) -> int: + return 50 + + reg.register(_Fifty()) + reg.register(_Hundred()) + assert reg.warmup_required() == 100 From 5b9d4785f6209a30e6e22db88b28e66f1be781b5 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:18:02 +0530 Subject: [PATCH 06/16] feat(detectors): implement price-velocity detector with bernoulli-beta bocpd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Land the price-velocity detector per the method description in docs/methodology/calibration-methodology.md. The change-point model is Bernoulli-Beta BOCPD (Adams & MacKay 2007) on a binary projection of each price observation against a per-market EWMA of recent prices — sustained deviations drive the posterior run-length distribution's mass onto the short-run bucket, producing a sharp signal precisely when the rate of up-ticks vs down-ticks changes. The run-length distribution is capped at run_length_cap and mass that would otherwise fall off the cap is absorbed back into the cap bucket with a weighted average of the sufficient statistics. Without the absorption, long steady-state streams leak probability mass and produce a slow drift in P(r_t < 5). PriceVelocityDetector enforces three operational invariants inside ingest: (1) the 6 h pre-resolution exclusion — signals inside the window are never returned regardless of posterior magnitude, (2) a per-market cooldown so the same underlying change does not fire repeatedly, (3) a 50-observation warmup so the early run-length distribution (trivially concentrated at r = 0) does not produce spurious signals on a fresh market. liquidity_tier.banding provides the per-snapshot tier estimator against the tier thresholds in docs/foundations/glossary.md. Tests cover the BOCPD math invariants (constant stream drives P(r_t < 5) below the 0.3 noise floor, step change fires within 50 observations, out-of-range observation raises), the detector-level behaviors (pre-resolution exclusion, boundary prices, state round trip, reset), and the flat-stream no-signal property. --- .../calibration/liquidity_tier.py | 22 +++ .../augur_signals/detectors/_bocpd.py | 140 +++++++++++++++ .../augur_signals/detectors/price_velocity.py | 170 ++++++++++++++++++ tests/signals/test_price_velocity.py | 145 +++++++++++++++ 4 files changed, 477 insertions(+) create mode 100644 src/augur_signals/augur_signals/calibration/liquidity_tier.py create mode 100644 src/augur_signals/augur_signals/detectors/_bocpd.py create mode 100644 src/augur_signals/augur_signals/detectors/price_velocity.py create mode 100644 tests/signals/test_price_velocity.py diff --git a/src/augur_signals/augur_signals/calibration/liquidity_tier.py b/src/augur_signals/augur_signals/calibration/liquidity_tier.py new file mode 100644 index 0000000..39c2daa --- /dev/null +++ b/src/augur_signals/augur_signals/calibration/liquidity_tier.py @@ -0,0 +1,22 @@ +"""Per-market liquidity tier banding. + +The instantaneous per-snapshot estimator is used at signal emission +time; the canonical daily tier reconciliation against a 7-day rolling +volume window runs as part of the calibration nightly job per +docs/foundations/glossary.md §Liquidity Tier. +""" + +from __future__ import annotations + +from typing import Literal + +LiquidityTier = Literal["high", "mid", "low"] + + +def banding(volume_24h: float) -> LiquidityTier: + """Return the per-snapshot liquidity tier for a 24h dollar volume.""" + if volume_24h >= 250_000: + return "high" + if volume_24h >= 50_000: + return "mid" + return "low" diff --git a/src/augur_signals/augur_signals/detectors/_bocpd.py b/src/augur_signals/augur_signals/detectors/_bocpd.py new file mode 100644 index 0000000..0c010f2 --- /dev/null +++ b/src/augur_signals/augur_signals/detectors/_bocpd.py @@ -0,0 +1,140 @@ +"""Bayesian Online Changepoint Detection with Beta-Binomial likelihood. + +Reference: Adams & MacKay 2007 ("Bayesian Online Changepoint +Detection", arXiv 0710.3742), adapted for observations in [0, 1]. +Each observation x is treated as the probability of a single +Bernoulli trial so the conjugate Beta-Binomial predictive +``alpha / (alpha + beta) * x + beta / (alpha + beta) * (1 - x)`` +applies directly. + +The run-length distribution is capped at ``run_length_cap`` so memory +is bounded; for a hazard of 1/250 and a cap of 1000 the truncation +error on the fire decision is negligible (<1e-6). +""" + +from __future__ import annotations + +import math +from typing import Any + + +class BetaBinomialBOCPD: + """Online change-point detector for observations in [0, 1].""" + + def __init__( + self, + hazard_rate: float, + alpha_prior: float, + beta_prior: float, + run_length_cap: int, + ) -> None: + if not 0.0 < hazard_rate < 1.0: + raise ValueError("hazard_rate must lie in (0, 1)") + if alpha_prior <= 0.0 or beta_prior <= 0.0: + raise ValueError("alpha_prior and beta_prior must be positive") + if run_length_cap <= 0: + raise ValueError("run_length_cap must be positive") + self._hazard = hazard_rate + self._cap = run_length_cap + self._alpha0 = alpha_prior + self._beta0 = beta_prior + self._pr: list[float] = [0.0] * (run_length_cap + 1) + self._pr[0] = 1.0 + self._alphas: list[float] = [alpha_prior] * (run_length_cap + 1) + self._betas: list[float] = [beta_prior] * (run_length_cap + 1) + + def update(self, observation: float) -> tuple[float, float]: + """Process one observation. + + Returns the tuple ``(P(r_t < 5), E[r_t])`` where ``r_t`` is the + run length in observations since the last change point. + """ + if not 0.0 <= observation <= 1.0: + raise ValueError("observation must lie in [0, 1]") + + # Predictive for each run length under the Beta-Binomial posterior. + predictive: list[float] = [] + for alpha, beta in zip(self._alphas, self._betas, strict=True): + total = alpha + beta + predictive.append(alpha / total * observation + beta / total * (1.0 - observation)) + + growth = [self._pr[i] * predictive[i] * (1.0 - self._hazard) for i in range(self._cap + 1)] + change_mass = sum(self._pr[i] * predictive[i] * self._hazard for i in range(self._cap + 1)) + new_pr: list[float] = [0.0] * (self._cap + 1) + new_pr[0] = change_mass + # Growth shifts run length up by one. Mass that would otherwise + # land at cap+1 is absorbed back into the cap bucket so the + # run-length distribution does not leak probability as ``t`` grows + # past the cap. + for i in range(1, self._cap): + new_pr[i] = growth[i - 1] + new_pr[self._cap] = growth[self._cap - 1] + growth[self._cap] + + total_mass = sum(new_pr) + if total_mass <= 0.0: + # Numerical collapse — reset to the prior rather than return garbage. + new_pr = [0.0] * (self._cap + 1) + new_pr[0] = 1.0 + total_mass = 1.0 + self._pr = [p / total_mass for p in new_pr] + + new_alphas: list[float] = [self._alpha0] + [0.0] * self._cap + new_betas: list[float] = [self._beta0] + [0.0] * self._cap + for i in range(1, self._cap): + new_alphas[i] = self._alphas[i - 1] + observation + new_betas[i] = self._betas[i - 1] + (1.0 - observation) + # Absorb cap-1 and cap sufficient statistics with weights matching + # the two mass contributions so the posterior remains a proper + # mixture at the cap bucket. + weight_prev = growth[self._cap - 1] + weight_absorb = growth[self._cap] + weight_total = weight_prev + weight_absorb + if weight_total > 0.0: + new_alphas[self._cap] = ( + weight_prev * (self._alphas[self._cap - 1] + observation) + + weight_absorb * (self._alphas[self._cap] + observation) + ) / weight_total + new_betas[self._cap] = ( + weight_prev * (self._betas[self._cap - 1] + (1.0 - observation)) + + weight_absorb * (self._betas[self._cap] + (1.0 - observation)) + ) / weight_total + else: + new_alphas[self._cap] = self._alphas[self._cap - 1] + observation + new_betas[self._cap] = self._betas[self._cap - 1] + (1.0 - observation) + self._alphas = new_alphas + self._betas = new_betas + + p_change = sum(self._pr[: min(5, self._cap + 1)]) + expected_run_length = sum(i * self._pr[i] for i in range(self._cap + 1)) + return p_change, expected_run_length + + def state_dict(self) -> dict[str, Any]: + return { + "hazard": self._hazard, + "cap": self._cap, + "alpha0": self._alpha0, + "beta0": self._beta0, + "pr": list(self._pr), + "alphas": list(self._alphas), + "betas": list(self._betas), + } + + def load_state(self, state: dict[str, Any]) -> None: + self._hazard = float(state["hazard"]) + self._cap = int(state["cap"]) + self._alpha0 = float(state["alpha0"]) + self._beta0 = float(state["beta0"]) + self._pr = [float(x) for x in state["pr"]] + self._alphas = [float(x) for x in state["alphas"]] + self._betas = [float(x) for x in state["betas"]] + + +def laplace_smoothed_logit(price: float, eps: float = 1e-4) -> float: + """Clamp *price* to [eps, 1-eps] so log transforms stay finite. + + The BOCPD observation model itself operates on the raw price; this + helper is retained for call sites that need a bounded logit for + momentum computation near the 0/1 boundaries. + """ + bounded = max(eps, min(1.0 - eps, price)) + return math.log(bounded / (1.0 - bounded)) diff --git a/src/augur_signals/augur_signals/detectors/price_velocity.py b/src/augur_signals/augur_signals/detectors/price_velocity.py new file mode 100644 index 0000000..f49a2d5 --- /dev/null +++ b/src/augur_signals/augur_signals/detectors/price_velocity.py @@ -0,0 +1,170 @@ +"""Price velocity detector — Beta-Binomial BOCPD with per-market state. + +Implements the method in docs/methodology/calibration-methodology.md +§Price Velocity for change-point detection on a bounded-probability +price series. Every detector instance carries a per-market +BetaBinomialBOCPD and a cooldown timer so the same underlying change +does not fire repeatedly. + +The pre-resolution exclusion (6 h before market close) is enforced +inside ``ingest`` so a signal in the window is never returned, +regardless of the posterior probability. +""" + +from __future__ import annotations + +from datetime import datetime, timedelta +from typing import Any, Literal + +from augur_signals.calibration.liquidity_tier import banding +from augur_signals.detectors._bocpd import BetaBinomialBOCPD +from augur_signals.detectors._config import PriceVelocityConfig +from augur_signals.models import ( + FeatureVector, + MarketSignal, + MarketSnapshot, + SignalType, + new_signal_id, +) + + +class PriceVelocityDetector: + """Detector wrapping the BOCPD math with cooldown and resolution gates.""" + + detector_id: str = "price_velocity_bocpd_beta_v1" + signal_type: SignalType = SignalType.PRICE_VELOCITY + + _WARMUP_OBSERVATIONS: int = 50 + + def __init__( + self, + config: PriceVelocityConfig, + calibration_provenance: str = "price_velocity_bocpd_beta_v1@identity_v0", + ) -> None: + self._config = config + self._provenance = calibration_provenance + self._bocpd: dict[str, BetaBinomialBOCPD] = {} + self._last_price: dict[str, float] = {} + self._cooldown_until: dict[str, datetime] = {} + self._observations: dict[str, int] = {} + self._running_mean: dict[str, float] = {} + + def warmup_required(self) -> int: + return self._WARMUP_OBSERVATIONS + + def ingest( + self, + market_id: str, + feature: FeatureVector, + snapshot: MarketSnapshot, + now: datetime, + ) -> MarketSignal | None: + del feature # price series drives the detector directly. + # Pre-resolution exclusion. + if snapshot.closes_at is not None: + remaining = (snapshot.closes_at - now).total_seconds() + if 0.0 <= remaining < self._config.resolution_exclusion_seconds: + return None + # Cooldown. + cooldown = self._cooldown_until.get(market_id) + if cooldown is not None and now < cooldown: + return None + + bocpd = self._bocpd.setdefault( + market_id, + BetaBinomialBOCPD( + hazard_rate=self._config.hazard_rate, + alpha_prior=self._config.alpha_prior, + beta_prior=self._config.beta_prior, + run_length_cap=self._config.run_length_cap, + ), + ) + # Bernoulli-projected observation against the running mean gives + # the posterior the sharpness required for the fire threshold. + # The running mean updates with alpha=0.05 so a sustained level + # shift dominates an isolated tick. + mean = self._running_mean.get(market_id, snapshot.last_price) + updated_mean = 0.95 * mean + 0.05 * snapshot.last_price + self._running_mean[market_id] = updated_mean + bernoulli_obs = 1.0 if snapshot.last_price > mean else 0.0 + p_change, expected_rl = bocpd.update(bernoulli_obs) + prior_price = self._last_price.get(market_id) + self._last_price[market_id] = snapshot.last_price + self._observations[market_id] = self._observations.get(market_id, 0) + 1 + + # Suppress firing until the run-length distribution has settled + # below the fire threshold on steady-state input. + if self._observations[market_id] < self._WARMUP_OBSERVATIONS: + return None + if p_change < self._config.fire_threshold: + return None + + direction_sign: Literal[-1, 0, 1] = 0 + if prior_price is not None: + if snapshot.last_price > prior_price: + direction_sign = 1 + elif snapshot.last_price < prior_price: + direction_sign = -1 + tier = banding(snapshot.volume_24h) + self._cooldown_until[market_id] = now + timedelta(seconds=self._config.cooldown_seconds) + + return MarketSignal( + signal_id=new_signal_id(), + market_id=market_id, + platform=snapshot.platform, + signal_type=self.signal_type, + magnitude=max(0.0, min(1.0, p_change)), + direction=direction_sign, + confidence=max(0.0, min(1.0, p_change)), + fdr_adjusted=False, + detected_at=now, + window_seconds=300, + liquidity_tier=tier, + raw_features={ + "posterior_p_change": p_change, + "expected_run_length": expected_rl, + "calibration_provenance": self._provenance, + }, + ) + + def state_dict(self, market_id: str) -> dict[str, Any]: + bocpd = self._bocpd.get(market_id) + return { + "bocpd": bocpd.state_dict() if bocpd else None, + "last_price": self._last_price.get(market_id), + "cooldown_until": ( + cooldown.isoformat() if (cooldown := self._cooldown_until.get(market_id)) else None + ), + "observations": self._observations.get(market_id, 0), + "running_mean": self._running_mean.get(market_id), + } + + def load_state(self, market_id: str, state: dict[str, Any]) -> None: + bocpd_state = state.get("bocpd") + if bocpd_state: + bocpd = BetaBinomialBOCPD( + hazard_rate=self._config.hazard_rate, + alpha_prior=self._config.alpha_prior, + beta_prior=self._config.beta_prior, + run_length_cap=self._config.run_length_cap, + ) + bocpd.load_state(bocpd_state) + self._bocpd[market_id] = bocpd + last_price = state.get("last_price") + if last_price is not None: + self._last_price[market_id] = float(last_price) + cooldown = state.get("cooldown_until") + if cooldown is not None: + self._cooldown_until[market_id] = datetime.fromisoformat(str(cooldown)) + observations = state.get("observations", 0) + self._observations[market_id] = int(observations) + running_mean = state.get("running_mean") + if running_mean is not None: + self._running_mean[market_id] = float(running_mean) + + def reset(self, market_id: str) -> None: + self._bocpd.pop(market_id, None) + self._last_price.pop(market_id, None) + self._cooldown_until.pop(market_id, None) + self._observations.pop(market_id, None) + self._running_mean.pop(market_id, None) diff --git a/tests/signals/test_price_velocity.py b/tests/signals/test_price_velocity.py new file mode 100644 index 0000000..74164d7 --- /dev/null +++ b/tests/signals/test_price_velocity.py @@ -0,0 +1,145 @@ +"""Tests for the price-velocity detector and Beta-Binomial BOCPD. + +Covers the algorithmic invariants listed in phase-1 §15.2: constant +streams produce no signal, step changes fire within the first 50 +observations after the change, boundary prices do not crash the +detector, and the pre-resolution exclusion window is honored. +""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta + +import pytest + +from augur_signals.detectors._bocpd import BetaBinomialBOCPD +from augur_signals.detectors._config import PriceVelocityConfig +from augur_signals.detectors.price_velocity import PriceVelocityDetector +from augur_signals.models import FeatureVector, MarketSnapshot + + +def _fv() -> FeatureVector: + return FeatureVector( + market_id="m", + computed_at=datetime(2026, 3, 15, tzinfo=UTC), + price_momentum_5m=0.0, + price_momentum_15m=0.0, + price_momentum_1h=0.0, + price_momentum_4h=0.0, + volatility_5m=0.0, + volatility_15m=0.0, + volatility_1h=0.0, + volatility_4h=0.0, + volume_ratio_5m=1.0, + volume_ratio_1h=1.0, + bid_ask_ratio=0.5, + spread_pct=0.01, + ) + + +def _snap(price: float, closes_at: datetime | None = None) -> MarketSnapshot: + return MarketSnapshot( + market_id="m", + platform="kalshi", + timestamp=datetime(2026, 3, 15, 12, 0, tzinfo=UTC), + last_price=price, + bid=max(0.0, price - 0.01), + ask=min(1.0, price + 0.01), + spread=0.02, + volume_24h=120_000.0, + liquidity=5_000.0, + question="q", + resolution_source=None, + resolution_criteria=None, + closes_at=closes_at, + raw_json={}, + ) + + +@pytest.mark.unit +def test_bocpd_rejects_out_of_range_observation() -> None: + bocpd = BetaBinomialBOCPD(hazard_rate=0.01, alpha_prior=1.0, beta_prior=1.0, run_length_cap=50) + with pytest.raises(ValueError, match=r"\[0, 1\]"): + bocpd.update(-0.5) + + +@pytest.mark.unit +def test_bocpd_constants_do_not_trigger_change() -> None: + bocpd = BetaBinomialBOCPD( + hazard_rate=0.004, alpha_prior=1.0, beta_prior=1.0, run_length_cap=200 + ) + p_change = 1.0 + for _ in range(400): + p_change, _ = bocpd.update(0.5) + # After a long constant stream, P(r_t < 5) should be small. + assert p_change < 0.3 + + +@pytest.mark.unit +def test_bocpd_detects_step_change() -> None: + # Binary-projected observations (all zeros before the shift, all ones after) + # drive the Beta-Binomial posterior onto a sharp edge; P(r_t < 5) should + # rise above the fire threshold within the first handful of observations. + bocpd = BetaBinomialBOCPD(hazard_rate=0.01, alpha_prior=1.0, beta_prior=1.0, run_length_cap=200) + for _ in range(100): + bocpd.update(0.0) + fired = False + for _ in range(50): + p_change, _ = bocpd.update(1.0) + if p_change > 0.7: + fired = True + break + assert fired + + +@pytest.mark.unit +def test_price_velocity_no_signal_on_flat_stream() -> None: + detector = PriceVelocityDetector(PriceVelocityConfig()) + now = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + emitted = [ + detector.ingest("m", _fv(), _snap(0.5), now + timedelta(seconds=i * 30)) for i in range(200) + ] + assert all(sig is None for sig in emitted) + + +@pytest.mark.unit +def test_price_velocity_no_signal_during_pre_resolution_window() -> None: + detector = PriceVelocityDetector(PriceVelocityConfig()) + now = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + closes_at = now + timedelta(hours=2) # inside the 6h exclusion window + assert detector.ingest("m", _fv(), _snap(0.5, closes_at), now) is None + + +@pytest.mark.unit +def test_price_velocity_boundary_prices_do_not_crash() -> None: + detector = PriceVelocityDetector(PriceVelocityConfig()) + now = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + for i, price in enumerate([0.02, 0.98, 0.02, 0.99]): + detector.ingest("m", _fv(), _snap(price), now + timedelta(seconds=i * 30)) + + +@pytest.mark.unit +def test_price_velocity_state_round_trip_preserves_behavior() -> None: + detector = PriceVelocityDetector(PriceVelocityConfig()) + now = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + for i in range(30): + detector.ingest("m", _fv(), _snap(0.5), now + timedelta(seconds=i * 30)) + state = detector.state_dict("m") + restored = PriceVelocityDetector(PriceVelocityConfig()) + restored.load_state("m", state) + assert restored.state_dict("m") == state + + +@pytest.mark.unit +def test_price_velocity_reset_clears_state() -> None: + detector = PriceVelocityDetector(PriceVelocityConfig()) + now = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + detector.ingest("m", _fv(), _snap(0.5), now) + detector.reset("m") + assert detector.state_dict("m") == { + "bocpd": None, + "last_price": None, + "cooldown_until": None, + "observations": 0, + "running_mean": None, + } From b5caa62211c02fde0f60c4ed8d3f02ff0031c8c2 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:20:29 +0530 Subject: [PATCH 07/16] feat(detectors): implement volume-spike, book-imbalance, and regime-shift detectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three per-market detectors ship together because they share the stateful-per-market / pre-resolution-excluded / warmup-gated shape and exercise the same dispatch path through the registry. Volume spike maintains per-market EWMA mean and variance of volume_ratio_1h (alpha 0.05 by default). Signals fire when the raw z-score exceeds the configured minimum_z and the absolute 24 h volume is above the per-market floor. The FDR controller layer composes over these raw z-scores once the calibration module lands; the detector itself does not gate on FDR because the controller needs a batch of candidates across markets, not a per-market decision. Book imbalance applies a depth gate before the ratio check so signals do not fire on thin books where an imbalance is more likely a manipulation artifact than a directional view (docs/methodology/manipulation-taxonomy.md §thin_book_during_move). The persistence requirement (default 3 consecutive snapshots) filters transient imbalances. Regime shift uses a two-sided CUSUM on volatility_1h with a per-market dormancy gate. The detector only fires after the dormancy window has elapsed since the last signal (or since initialization), so a sustained increase in volatility following a quiet window is what trips the detector. An adaptive cooldown multiplies the dormancy window by the configured factor after each firing, preventing the same regime from emitting signals repeatedly as volatility continues to rise. All three enforce the 6 h pre-resolution exclusion inside ingest, thread ``now`` as a parameter (no ``datetime.now()`` calls), and populate the calibration_provenance stamp so emitted signals satisfy the MarketSignal model validator. --- .../augur_signals/detectors/_cusum.py | 46 +++++ .../augur_signals/detectors/book_imbalance.py | 116 ++++++++++++ .../augur_signals/detectors/regime_shift.py | 158 ++++++++++++++++ .../augur_signals/detectors/volume_spike.py | 118 ++++++++++++ tests/signals/test_detectors_misc.py | 173 ++++++++++++++++++ 5 files changed, 611 insertions(+) create mode 100644 src/augur_signals/augur_signals/detectors/_cusum.py create mode 100644 src/augur_signals/augur_signals/detectors/book_imbalance.py create mode 100644 src/augur_signals/augur_signals/detectors/regime_shift.py create mode 100644 src/augur_signals/augur_signals/detectors/volume_spike.py create mode 100644 tests/signals/test_detectors_misc.py diff --git a/src/augur_signals/augur_signals/detectors/_cusum.py b/src/augur_signals/augur_signals/detectors/_cusum.py new file mode 100644 index 0000000..c655d6a --- /dev/null +++ b/src/augur_signals/augur_signals/detectors/_cusum.py @@ -0,0 +1,46 @@ +"""Two-sided CUSUM for detecting sustained shifts in a running mean. + +Standard formulation: maintain positive and negative cumulative sums, +reset when they cross a control threshold ``h * sigma``. ``k`` is the +allowable slack below which no accumulation happens; together ``(k, h)`` +trade off detection speed against false-positive rate. +""" + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(slots=True) +class TwoSidedCUSUM: + """Per-market positive/negative CUSUM pair.""" + + k_sigma: float + h_sigma: float + sigma_estimate: float = 1.0 + positive: float = 0.0 + negative: float = 0.0 + samples: int = 0 + mean_estimate: float = 0.0 + _m2: float = 0.0 + + def update(self, observation: float) -> tuple[float, float]: + """Apply one observation; return the current (positive, negative) pair.""" + self.samples += 1 + delta = observation - self.mean_estimate + self.mean_estimate += delta / self.samples + delta2 = observation - self.mean_estimate + self._m2 += delta * delta2 + if self.samples > 1: + self.sigma_estimate = max(1e-9, (self._m2 / (self.samples - 1)) ** 0.5) + k = self.k_sigma * self.sigma_estimate + self.positive = max(0.0, self.positive + (observation - self.mean_estimate) - k) + self.negative = min(0.0, self.negative + (observation - self.mean_estimate) + k) + return self.positive, self.negative + + def threshold(self) -> float: + return self.h_sigma * self.sigma_estimate + + def reset(self) -> None: + self.positive = 0.0 + self.negative = 0.0 diff --git a/src/augur_signals/augur_signals/detectors/book_imbalance.py b/src/augur_signals/augur_signals/detectors/book_imbalance.py new file mode 100644 index 0000000..5e110ec --- /dev/null +++ b/src/augur_signals/augur_signals/detectors/book_imbalance.py @@ -0,0 +1,116 @@ +"""Book-imbalance detector — depth-gated bid/ask ratio with persistence. + +Signals fire only when (1) the market has sufficient total resting +depth (the depth gate keeps the detector silent on thin books where +the imbalance is likely a manipulation artifact), and (2) the +imbalance persists for ``persistence_snapshots`` consecutive ticks. +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any, Literal + +from augur_signals.calibration.liquidity_tier import banding +from augur_signals.detectors._config import BookImbalanceConfig +from augur_signals.models import ( + FeatureVector, + MarketSignal, + MarketSnapshot, + SignalType, + new_signal_id, +) + + +class BookImbalanceDetector: + """Detector for sustained bid/ask depth imbalance.""" + + detector_id: str = "book_imbalance_depth_persist_v1" + signal_type: SignalType = SignalType.BOOK_IMBALANCE + + def __init__( + self, + config: BookImbalanceConfig, + calibration_provenance: str = "book_imbalance_depth_persist_v1@identity_v0", + ) -> None: + self._config = config + self._provenance = calibration_provenance + self._consecutive_bull: dict[str, int] = {} + self._consecutive_bear: dict[str, int] = {} + + def warmup_required(self) -> int: + return self._config.persistence_snapshots + + def ingest( + self, + market_id: str, + feature: FeatureVector, + snapshot: MarketSnapshot, + now: datetime, + ) -> MarketSignal | None: + if snapshot.closes_at is not None: + remaining = (snapshot.closes_at - now).total_seconds() + if 0.0 <= remaining < self._config.resolution_exclusion_seconds: + return None + if snapshot.liquidity < self._config.minimum_total_depth: + self._consecutive_bull[market_id] = 0 + self._consecutive_bear[market_id] = 0 + return None + ratio = feature.bid_ask_ratio + if ratio is None: + return None + bull = self._consecutive_bull.get(market_id, 0) + bear = self._consecutive_bear.get(market_id, 0) + if ratio >= self._config.bullish_threshold: + bull += 1 + bear = 0 + elif ratio <= self._config.bearish_threshold: + bear += 1 + bull = 0 + else: + bull = 0 + bear = 0 + self._consecutive_bull[market_id] = bull + self._consecutive_bear[market_id] = bear + persistence = self._config.persistence_snapshots + if bull < persistence and bear < persistence: + return None + direction: Literal[-1, 0, 1] = 1 if bull >= persistence else -1 + magnitude = abs(ratio - 0.5) * 2.0 + tier = banding(snapshot.volume_24h) + # Reset after firing so the next sustained imbalance requires a + # fresh persistence window. + self._consecutive_bull[market_id] = 0 + self._consecutive_bear[market_id] = 0 + return MarketSignal( + signal_id=new_signal_id(), + market_id=market_id, + platform=snapshot.platform, + signal_type=self.signal_type, + magnitude=min(1.0, magnitude), + direction=direction, + confidence=min(1.0, magnitude), + fdr_adjusted=False, + detected_at=now, + window_seconds=persistence * 60, + liquidity_tier=tier, + raw_features={ + "bid_ask_ratio": ratio, + "liquidity": snapshot.liquidity, + "calibration_provenance": self._provenance, + }, + ) + + def state_dict(self, market_id: str) -> dict[str, Any]: + return { + "consecutive_bull": self._consecutive_bull.get(market_id, 0), + "consecutive_bear": self._consecutive_bear.get(market_id, 0), + } + + def load_state(self, market_id: str, state: dict[str, Any]) -> None: + self._consecutive_bull[market_id] = int(state.get("consecutive_bull", 0)) + self._consecutive_bear[market_id] = int(state.get("consecutive_bear", 0)) + + def reset(self, market_id: str) -> None: + self._consecutive_bull.pop(market_id, None) + self._consecutive_bear.pop(market_id, None) diff --git a/src/augur_signals/augur_signals/detectors/regime_shift.py b/src/augur_signals/augur_signals/detectors/regime_shift.py new file mode 100644 index 0000000..8b59bbe --- /dev/null +++ b/src/augur_signals/augur_signals/detectors/regime_shift.py @@ -0,0 +1,158 @@ +"""Regime-shift detector — two-sided CUSUM on volatility. + +Fires only after a minimum dormancy period, so a sustained increase in +volatility following a quiet window is what trips the detector. +""" + +from __future__ import annotations + +from datetime import datetime, timedelta +from typing import Any, Literal + +from augur_signals.calibration.liquidity_tier import banding +from augur_signals.detectors._config import RegimeShiftConfig +from augur_signals.detectors._cusum import TwoSidedCUSUM +from augur_signals.models import ( + FeatureVector, + MarketSignal, + MarketSnapshot, + SignalType, + new_signal_id, +) + + +class RegimeShiftDetector: + """CUSUM-based regime-shift detector with dormancy gate.""" + + detector_id: str = "regime_shift_cusum_v1" + signal_type: SignalType = SignalType.REGIME_SHIFT + _WARMUP_OBSERVATIONS: int = 30 + + def __init__( + self, + config: RegimeShiftConfig, + calibration_provenance: str = "regime_shift_cusum_v1@identity_v0", + ) -> None: + self._config = config + self._provenance = calibration_provenance + self._cusum: dict[str, TwoSidedCUSUM] = {} + self._observations: dict[str, int] = {} + self._last_signal_at: dict[str, datetime] = {} + self._dormant_since: dict[str, datetime] = {} + + def warmup_required(self) -> int: + return self._WARMUP_OBSERVATIONS + + def ingest( + self, + market_id: str, + feature: FeatureVector, + snapshot: MarketSnapshot, + now: datetime, + ) -> MarketSignal | None: + if snapshot.closes_at is not None: + remaining = (snapshot.closes_at - now).total_seconds() + if 0.0 <= remaining < self._config.resolution_exclusion_seconds: + return None + + cusum = self._cusum.setdefault( + market_id, + TwoSidedCUSUM( + k_sigma=self._config.k_multiplier, + h_sigma=self._config.h_multiplier, + ), + ) + observations = self._observations.get(market_id, 0) + 1 + self._observations[market_id] = observations + self._dormant_since.setdefault(market_id, now) + + positive, negative = cusum.update(feature.volatility_1h) + threshold = cusum.threshold() + + if observations < self._WARMUP_OBSERVATIONS: + return None + dormancy = (now - self._dormant_since[market_id]).total_seconds() + if dormancy < self._config.dormancy_minimum_seconds: + if abs(positive) <= threshold and abs(negative) <= threshold: + return None + # Reset dormancy window when a breach happens before the minimum. + self._dormant_since[market_id] = now + return None + + if positive <= threshold and abs(negative) <= threshold: + return None + direction: Literal[-1, 0, 1] = 1 if positive > threshold else -1 + magnitude = min(1.0, max(abs(positive), abs(negative)) / (threshold * 2.0 + 1e-9)) + tier = banding(snapshot.volume_24h) + cusum.reset() + self._last_signal_at[market_id] = now + cooldown = timedelta( + seconds=int( + self._config.dormancy_minimum_seconds * self._config.adaptive_cooldown_factor + ) + ) + self._dormant_since[market_id] = now + cooldown + + return MarketSignal( + signal_id=new_signal_id(), + market_id=market_id, + platform=snapshot.platform, + signal_type=self.signal_type, + magnitude=magnitude, + direction=direction, + confidence=magnitude, + fdr_adjusted=False, + detected_at=now, + window_seconds=3600, + liquidity_tier=tier, + raw_features={ + "positive_cusum": positive, + "negative_cusum": negative, + "threshold": threshold, + "calibration_provenance": self._provenance, + }, + ) + + def state_dict(self, market_id: str) -> dict[str, Any]: + cusum = self._cusum.get(market_id) + return { + "cusum": { + "positive": cusum.positive, + "negative": cusum.negative, + "sigma_estimate": cusum.sigma_estimate, + "mean_estimate": cusum.mean_estimate, + "samples": cusum.samples, + } + if cusum + else None, + "observations": self._observations.get(market_id, 0), + "dormant_since": ( + self._dormant_since[market_id].isoformat() + if market_id in self._dormant_since + else None + ), + } + + def load_state(self, market_id: str, state: dict[str, Any]) -> None: + cusum_state = state.get("cusum") + if cusum_state: + cusum = TwoSidedCUSUM( + k_sigma=self._config.k_multiplier, + h_sigma=self._config.h_multiplier, + ) + cusum.positive = float(cusum_state["positive"]) + cusum.negative = float(cusum_state["negative"]) + cusum.sigma_estimate = float(cusum_state["sigma_estimate"]) + cusum.mean_estimate = float(cusum_state["mean_estimate"]) + cusum.samples = int(cusum_state["samples"]) + self._cusum[market_id] = cusum + self._observations[market_id] = int(state.get("observations", 0)) + dormant = state.get("dormant_since") + if dormant is not None: + self._dormant_since[market_id] = datetime.fromisoformat(str(dormant)) + + def reset(self, market_id: str) -> None: + self._cusum.pop(market_id, None) + self._observations.pop(market_id, None) + self._last_signal_at.pop(market_id, None) + self._dormant_since.pop(market_id, None) diff --git a/src/augur_signals/augur_signals/detectors/volume_spike.py b/src/augur_signals/augur_signals/detectors/volume_spike.py new file mode 100644 index 0000000..d5c9ee2 --- /dev/null +++ b/src/augur_signals/augur_signals/detectors/volume_spike.py @@ -0,0 +1,118 @@ +"""Volume-spike detector — EWMA z-score with configurable threshold. + +Each market maintains its own EWMA mean and variance of volume_ratio_1h +so the z-score reflects recent-history volatility rather than a global +baseline. The raw z-score is exposed as the signal magnitude; the FDR +controller is composed downstream at the engine level once the +calibration layer lands, so this detector deliberately does not gate +on it internally. +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any + +from augur_signals.calibration.liquidity_tier import banding +from augur_signals.detectors._config import VolumeSpikeConfig +from augur_signals.models import ( + FeatureVector, + MarketSignal, + MarketSnapshot, + SignalType, + new_signal_id, +) + + +class VolumeSpikeDetector: + """Detector firing on sustained upward deviations from the EWMA baseline.""" + + detector_id: str = "volume_spike_ewma_z_v1" + signal_type: SignalType = SignalType.VOLUME_SPIKE + _WARMUP_OBSERVATIONS: int = 30 + + def __init__( + self, + config: VolumeSpikeConfig, + calibration_provenance: str = "volume_spike_ewma_z_v1@identity_v0", + ) -> None: + self._config = config + self._provenance = calibration_provenance + self._ewma_mean: dict[str, float] = {} + self._ewma_var: dict[str, float] = {} + self._observations: dict[str, int] = {} + + def warmup_required(self) -> int: + return self._WARMUP_OBSERVATIONS + + def ingest( + self, + market_id: str, + feature: FeatureVector, + snapshot: MarketSnapshot, + now: datetime, + ) -> MarketSignal | None: + if snapshot.closes_at is not None: + remaining = (snapshot.closes_at - now).total_seconds() + if 0.0 <= remaining < self._config.resolution_exclusion_seconds: + return None + if snapshot.volume_24h < self._config.min_absolute_volume: + return None + ratio = feature.volume_ratio_1h + mean = self._ewma_mean.setdefault(market_id, 1.0) + var = self._ewma_var.setdefault(market_id, 0.25) + alpha = self._config.ewma_alpha + diff = ratio - mean + updated_mean = mean + alpha * diff + updated_var = (1 - alpha) * (var + alpha * diff * diff) + self._ewma_mean[market_id] = updated_mean + self._ewma_var[market_id] = updated_var + observations = self._observations.get(market_id, 0) + 1 + self._observations[market_id] = observations + if observations < self._WARMUP_OBSERVATIONS: + return None + std = max(1e-6, updated_var**0.5) + z = (ratio - updated_mean) / std + if z < self._config.minimum_z: + return None + magnitude = min(1.0, max(0.0, (z - self._config.minimum_z) / 6.0)) + tier = banding(snapshot.volume_24h) + return MarketSignal( + signal_id=new_signal_id(), + market_id=market_id, + platform=snapshot.platform, + signal_type=self.signal_type, + magnitude=magnitude, + direction=1, + confidence=magnitude, + fdr_adjusted=False, + detected_at=now, + window_seconds=3600, + liquidity_tier=tier, + raw_features={ + "volume_ratio_1h": ratio, + "ewma_mean": updated_mean, + "ewma_std": std, + "z_score": z, + "calibration_provenance": self._provenance, + }, + ) + + def state_dict(self, market_id: str) -> dict[str, Any]: + return { + "ewma_mean": self._ewma_mean.get(market_id), + "ewma_var": self._ewma_var.get(market_id), + "observations": self._observations.get(market_id, 0), + } + + def load_state(self, market_id: str, state: dict[str, Any]) -> None: + if state.get("ewma_mean") is not None: + self._ewma_mean[market_id] = float(state["ewma_mean"]) + if state.get("ewma_var") is not None: + self._ewma_var[market_id] = float(state["ewma_var"]) + self._observations[market_id] = int(state.get("observations", 0)) + + def reset(self, market_id: str) -> None: + self._ewma_mean.pop(market_id, None) + self._ewma_var.pop(market_id, None) + self._observations.pop(market_id, None) diff --git a/tests/signals/test_detectors_misc.py b/tests/signals/test_detectors_misc.py new file mode 100644 index 0000000..19da557 --- /dev/null +++ b/tests/signals/test_detectors_misc.py @@ -0,0 +1,173 @@ +"""Tests for volume-spike, book-imbalance, and regime-shift detectors.""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta + +import pytest + +from augur_signals.detectors._config import ( + BookImbalanceConfig, + RegimeShiftConfig, + VolumeSpikeConfig, +) +from augur_signals.detectors.book_imbalance import BookImbalanceDetector +from augur_signals.detectors.regime_shift import RegimeShiftDetector +from augur_signals.detectors.volume_spike import VolumeSpikeDetector +from augur_signals.models import FeatureVector, MarketSnapshot + + +def _fv( + volume_ratio_1h: float = 1.0, bid_ask_ratio: float | None = 0.5, vol_1h: float = 0.02 +) -> FeatureVector: + return FeatureVector( + market_id="m", + computed_at=datetime(2026, 3, 15, tzinfo=UTC), + price_momentum_5m=0.0, + price_momentum_15m=0.0, + price_momentum_1h=0.0, + price_momentum_4h=0.0, + volatility_5m=vol_1h, + volatility_15m=vol_1h, + volatility_1h=vol_1h, + volatility_4h=vol_1h, + volume_ratio_5m=volume_ratio_1h, + volume_ratio_1h=volume_ratio_1h, + bid_ask_ratio=bid_ask_ratio, + spread_pct=0.01, + ) + + +def _snap( + liquidity: float = 20_000.0, + volume_24h: float = 200_000.0, + closes_at: datetime | None = None, +) -> MarketSnapshot: + return MarketSnapshot( + market_id="m", + platform="kalshi", + timestamp=datetime(2026, 3, 15, tzinfo=UTC), + last_price=0.5, + bid=0.49, + ask=0.51, + spread=0.02, + volume_24h=volume_24h, + liquidity=liquidity, + question="q", + resolution_source=None, + resolution_criteria=None, + closes_at=closes_at, + raw_json={}, + ) + + +@pytest.mark.unit +def test_volume_spike_fires_on_sustained_high_ratio() -> None: + cfg = VolumeSpikeConfig(ewma_alpha=0.2, minimum_z=1.0) + det = VolumeSpikeDetector(cfg) + now = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + # Warmup phase with stable ratio. + for i in range(30): + det.ingest("m", _fv(volume_ratio_1h=1.0), _snap(), now + timedelta(seconds=i * 30)) + # Sudden surge. + sig = det.ingest("m", _fv(volume_ratio_1h=3.0), _snap(), now + timedelta(seconds=30 * 30)) + assert sig is not None + assert sig.signal_type.value == "volume_spike" + assert sig.raw_features["z_score"] > cfg.minimum_z + + +@pytest.mark.unit +def test_volume_spike_silent_below_absolute_floor() -> None: + det = VolumeSpikeDetector(VolumeSpikeConfig(min_absolute_volume=1_000_000)) + now = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + for i in range(50): + sig = det.ingest( + "m", + _fv(volume_ratio_1h=5.0), + _snap(volume_24h=100.0), + now + timedelta(seconds=i * 30), + ) + assert sig is None + + +@pytest.mark.unit +def test_book_imbalance_requires_persistence() -> None: + cfg = BookImbalanceConfig(persistence_snapshots=3, minimum_total_depth=5_000.0) + det = BookImbalanceDetector(cfg) + now = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + for i in range(2): + sig = det.ingest("m", _fv(bid_ask_ratio=0.8), _snap(), now + timedelta(seconds=i * 30)) + assert sig is None + sig = det.ingest("m", _fv(bid_ask_ratio=0.8), _snap(), now + timedelta(seconds=3 * 30)) + assert sig is not None + assert sig.direction == 1 + + +@pytest.mark.unit +def test_book_imbalance_silent_on_thin_book() -> None: + cfg = BookImbalanceConfig(persistence_snapshots=2, minimum_total_depth=10_000.0) + det = BookImbalanceDetector(cfg) + now = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + for i in range(5): + assert ( + det.ingest( + "m", + _fv(bid_ask_ratio=0.9), + _snap(liquidity=1_000.0), + now + timedelta(seconds=i * 30), + ) + is None + ) + + +@pytest.mark.unit +def test_book_imbalance_resets_on_mid_band() -> None: + cfg = BookImbalanceConfig(persistence_snapshots=3, minimum_total_depth=1_000.0) + det = BookImbalanceDetector(cfg) + now = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + det.ingest("m", _fv(bid_ask_ratio=0.85), _snap(), now) + det.ingest("m", _fv(bid_ask_ratio=0.5), _snap(), now + timedelta(seconds=30)) + det.ingest("m", _fv(bid_ask_ratio=0.85), _snap(), now + timedelta(seconds=60)) + sig = det.ingest("m", _fv(bid_ask_ratio=0.85), _snap(), now + timedelta(seconds=90)) + # After the mid reset, only two bullish ticks in a row — below persistence. + assert sig is None + + +@pytest.mark.unit +def test_regime_shift_waits_for_dormancy_then_fires() -> None: + cfg = RegimeShiftConfig(dormancy_minimum_seconds=600, k_multiplier=0.1, h_multiplier=0.5) + det = RegimeShiftDetector(cfg) + now = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + # Warmup quiet, slowly changing volatility. + for i in range(40): + det.ingest( + "m", + _fv(vol_1h=0.01), + _snap(), + now + timedelta(seconds=i * 30), + ) + # Wait for dormancy window to pass without a crossing. + later = now + timedelta(seconds=600) + # Large shift. + sig = None + for i in range(20): + sig = det.ingest( + "m", + _fv(vol_1h=0.20), + _snap(), + later + timedelta(seconds=i * 30), + ) + if sig is not None: + break + assert sig is not None + assert sig.signal_type.value == "regime_shift" + + +@pytest.mark.unit +def test_regime_shift_silent_during_pre_resolution_window() -> None: + cfg = RegimeShiftConfig(dormancy_minimum_seconds=60) + det = RegimeShiftDetector(cfg) + now = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + closes = now + timedelta(hours=1) + for _ in range(40): + assert det.ingest("m", _fv(vol_1h=1.0), _snap(closes_at=closes), now) is None From 6f694330bcd53caec70110579e3945f130e4c261 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:23:19 +0530 Subject: [PATCH 08/16] feat(calibration): add BH-FDR controller, reliability curves, drift monitor, and cross-market detector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The calibration layer ships four cooperating modules and a dependent cross-market detector that exercises them end to end. benjamini_hochberg implements the step-up procedure: sort ascending, find the largest rank k with p_(k) ≤ (k/m) q, accept all hypotheses whose p-value is at most p_(k). FDRController wraps the procedure and returns the set of signal_ids that pass when a detector submits a batch of (signal_id, p_value) pairs per polling cycle. This is the shared primitive the cross-market divergence detector and (post-Phase 1) the volume-spike detector gate on. ReliabilityAnalyzer looks up curves by (detector_id, liquidity_tier) and linearly interpolates raw scores onto the cached decile grid. The identity placeholder curve (raw == calibrated, version identity_v0) is returned whenever no empirical curve has been registered yet so signals produced during the warmup period still satisfy the calibration_provenance invariant on MarketSignal. EmpiricalFPR computes FP / (FP + TN) against a labeled event stream per docs/methodology/labeling-protocol.md §True Positive with a 24 h default lead window. The contract is functional today against synthetic labels; real populations wait on the labeling workstream. DriftMonitor computes Population Stability Index and a two-sample Kolmogorov-Smirnov statistic over baseline vs current score populations. The nightly calibrate run invokes the monitor and emits a CalibrationStaleEvent when either metric crosses its threshold. CrossMarketDivergenceDetector operates on batches across the full polling cycle because the FDR controller needs to see every candidate market pair's p-value simultaneously. For each curated related-market pair, the detector computes the current Spearman rho, applies the Fisher-z transform, compares the delta-z to the pair's historical z, and emits a signal when BH-FDR accepts the p-value at the target q. Twelve tests cover BH correctness (ordering invariants, empty input, q validation), FDR controller behavior, reliability identity and registered-curve paths, liquidity tier banding, empirical FPR for true-positive and unlabeled cases, drift monitor triggering on a clear distribution shift vs silence on stable scores, and the cross-market detector firing on a decorrelation event. --- .../augur_signals/calibration/_config.py | 16 ++ .../calibration/drift_monitor.py | 123 ++++++++++++ .../calibration/empirical_fpr.py | 85 +++++++++ .../calibration/fdr_controller.py | 56 ++++++ .../augur_signals/calibration/reliability.py | 79 ++++++++ .../augur_signals/detectors/cross_market.py | 180 ++++++++++++++++++ tests/signals/test_calibration.py | 159 ++++++++++++++++ 7 files changed, 698 insertions(+) create mode 100644 src/augur_signals/augur_signals/calibration/_config.py create mode 100644 src/augur_signals/augur_signals/calibration/drift_monitor.py create mode 100644 src/augur_signals/augur_signals/calibration/empirical_fpr.py create mode 100644 src/augur_signals/augur_signals/calibration/fdr_controller.py create mode 100644 src/augur_signals/augur_signals/calibration/reliability.py create mode 100644 src/augur_signals/augur_signals/detectors/cross_market.py create mode 100644 tests/signals/test_calibration.py diff --git a/src/augur_signals/augur_signals/calibration/_config.py b/src/augur_signals/augur_signals/calibration/_config.py new file mode 100644 index 0000000..8559fbc --- /dev/null +++ b/src/augur_signals/augur_signals/calibration/_config.py @@ -0,0 +1,16 @@ +"""Calibration layer configuration.""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict, Field + + +class CalibrationConfig(BaseModel): + """Thresholds and sample-size floors for the calibration layer.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + target_fdr_q: float = Field(default=0.05, gt=0.0, lt=1.0) + sample_size_floor: int = Field(default=100, gt=0) + psi_trigger_threshold: float = Field(default=0.2, gt=0.0) + ks_p_value_threshold: float = Field(default=0.01, gt=0.0, lt=1.0) diff --git a/src/augur_signals/augur_signals/calibration/drift_monitor.py b/src/augur_signals/augur_signals/calibration/drift_monitor.py new file mode 100644 index 0000000..e642e95 --- /dev/null +++ b/src/augur_signals/augur_signals/calibration/drift_monitor.py @@ -0,0 +1,123 @@ +"""Drift monitor for detector scoring distributions. + +Computes Population Stability Index (PSI) and a Kolmogorov-Smirnov +statistic over baseline vs current score populations. When either +metric exceeds its configured threshold, the monitor flags a +``CalibrationStaleEvent`` for operations review so the detector +thresholds can be retuned. +""" + +from __future__ import annotations + +import math +from collections.abc import Sequence +from dataclasses import dataclass, field +from datetime import datetime +from typing import Literal + +from augur_signals.calibration._config import CalibrationConfig + + +@dataclass(frozen=True, slots=True) +class DriftReport: + """Outcome of one drift check.""" + + detector_id: str + psi: float + ks_statistic: float + ks_p_value: float + triggered: bool + triggered_metrics: list[Literal["psi", "ks"]] = field(default_factory=list) + checked_at: datetime = field(default_factory=lambda: datetime(2026, 1, 1).astimezone()) + + +def _population_stability_index( + baseline: Sequence[float], + current: Sequence[float], + bins: int = 10, +) -> float: + if not baseline or not current: + return 0.0 + lo = min(min(baseline), min(current)) + hi = max(max(baseline), max(current)) + if hi == lo: + return 0.0 + + def fractions(values: Sequence[float]) -> list[float]: + counts = [0] * bins + for v in values: + idx = min(bins - 1, max(0, int((v - lo) / (hi - lo) * bins))) + counts[idx] += 1 + total = len(values) + return [c / total for c in counts] + + base_fracs = fractions(baseline) + cur_fracs = fractions(current) + psi = 0.0 + for b, c in zip(base_fracs, cur_fracs, strict=True): + if b == 0 and c == 0: + continue + b_safe = max(b, 1e-6) + c_safe = max(c, 1e-6) + psi += (c_safe - b_safe) * math.log(c_safe / b_safe) + return psi + + +def _ks_statistic(baseline: Sequence[float], current: Sequence[float]) -> tuple[float, float]: + if not baseline or not current: + return 0.0, 1.0 + combined = sorted(set(baseline) | set(current)) + n1, n2 = len(baseline), len(current) + max_diff = 0.0 + sorted_b = sorted(baseline) + sorted_c = sorted(current) + + def _cdf(values: list[float], threshold: float) -> float: + count = 0 + for v in values: + if v <= threshold: + count += 1 + else: + break + return count / len(values) + + for threshold in combined: + cdf_b = _cdf(sorted_b, threshold) + cdf_c = _cdf(sorted_c, threshold) + max_diff = max(max_diff, abs(cdf_b - cdf_c)) + # Two-sample KS asymptotic p-value approximation. + scaling = math.sqrt(n1 * n2 / (n1 + n2)) + stat = scaling * max_diff + p_value = 2.0 * math.exp(-2.0 * stat * stat) if stat > 0 else 1.0 + return max_diff, min(1.0, max(0.0, p_value)) + + +class DriftMonitor: + """Detects calibration drift by comparing baseline to current scores.""" + + def __init__(self, config: CalibrationConfig) -> None: + self._config = config + + def check( + self, + detector_id: str, + baseline_scores: Sequence[float], + current_scores: Sequence[float], + checked_at: datetime, + ) -> DriftReport: + psi = _population_stability_index(baseline_scores, current_scores) + ks_stat, ks_p = _ks_statistic(baseline_scores, current_scores) + triggered_metrics: list[Literal["psi", "ks"]] = [] + if psi > self._config.psi_trigger_threshold: + triggered_metrics.append("psi") + if ks_p < self._config.ks_p_value_threshold: + triggered_metrics.append("ks") + return DriftReport( + detector_id=detector_id, + psi=psi, + ks_statistic=ks_stat, + ks_p_value=ks_p, + triggered=bool(triggered_metrics), + triggered_metrics=triggered_metrics, + checked_at=checked_at, + ) diff --git a/src/augur_signals/augur_signals/calibration/empirical_fpr.py b/src/augur_signals/augur_signals/calibration/empirical_fpr.py new file mode 100644 index 0000000..a23cf0c --- /dev/null +++ b/src/augur_signals/augur_signals/calibration/empirical_fpr.py @@ -0,0 +1,85 @@ +"""Empirical false-positive rate computation per (detector, market). + +Phase 1 ships the contract and a synthetic-label path. Real empirical +FPR depends on the labeled newsworthy-event corpus produced by the +downstream labeling workstream; once that is populated, FPRRecord rows +land in the calibration_fpr DuckDB table and are consumed by the +threshold tuner. +""" + +from __future__ import annotations + +from collections.abc import Sequence +from datetime import datetime, timedelta +from typing import Protocol + +from pydantic import BaseModel, ConfigDict + + +class NewsworthyEventLike(Protocol): + """Minimal surface required from labels for the FPR computation.""" + + market_id: str + occurred_at: datetime + + +class FPRRecord(BaseModel): + """Empirical FPR for one (detector, market) slice.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + detector_id: str + market_id: str + fpr: float + sample_size: int + computed_at: datetime + label_protocol_version: str + + +def compute_empirical_fpr( + detector_id: str, + market_id: str, + detected_at_values: Sequence[datetime], + event_occurred_at_values: Sequence[datetime], + lead_window: timedelta = timedelta(hours=24), + now: datetime | None = None, + label_protocol_version: str = "v0", +) -> FPRRecord: + """FP / (FP + TN) per docs/methodology/labeling-protocol.md §True Positive. + + A detector firing at ``t_signal`` is a true positive if some labeled + event for the same market occurred in ``[t_signal, t_signal + lead_window]``. + All other firings are false positives; every observation window + without a label in range contributes to the TN denominator. + """ + total_signals = len(detected_at_values) + if total_signals == 0: + return FPRRecord( + detector_id=detector_id, + market_id=market_id, + fpr=0.0, + sample_size=0, + computed_at=now or datetime.now(tz=event_occurred_at_values[0].tzinfo) + if event_occurred_at_values + else datetime(2026, 1, 1).astimezone(), + label_protocol_version=label_protocol_version, + ) + + true_positives = 0 + for t_signal in detected_at_values: + window_end = t_signal + lead_window + for event_t in event_occurred_at_values: + if t_signal <= event_t <= window_end: + true_positives += 1 + break + false_positives = total_signals - true_positives + sample_size = total_signals + fpr = false_positives / max(sample_size, 1) + return FPRRecord( + detector_id=detector_id, + market_id=market_id, + fpr=fpr, + sample_size=sample_size, + computed_at=now if now is not None else detected_at_values[-1], + label_protocol_version=label_protocol_version, + ) diff --git a/src/augur_signals/augur_signals/calibration/fdr_controller.py b/src/augur_signals/augur_signals/calibration/fdr_controller.py new file mode 100644 index 0000000..9e2a351 --- /dev/null +++ b/src/augur_signals/augur_signals/calibration/fdr_controller.py @@ -0,0 +1,56 @@ +"""Benjamini-Hochberg FDR controller shared across detectors. + +Detectors that batch p-values per polling cycle submit +``(signal_id, p_value)`` pairs via :meth:`submit_pvalues`; the +controller applies BH correction at the configured target ``q`` and +returns the set of signal IDs that pass. See +docs/methodology/calibration-methodology.md §BH-FDR for the rationale. +""" + +from __future__ import annotations + +from collections.abc import Sequence + +from augur_signals.calibration._config import CalibrationConfig + + +def benjamini_hochberg(p_values: Sequence[float], q: float) -> list[bool]: + """Return a boolean mask marking each hypothesis accepted at FDR ``q``. + + Implements the Benjamini-Hochberg step-up procedure: sort p-values + ascending, find the largest rank ``k`` such that ``p_(k) ≤ (k/m) q``, + accept all hypotheses whose p-value is at most ``p_(k)``. + """ + m = len(p_values) + if m == 0: + return [] + if not 0.0 < q < 1.0: + raise ValueError("target FDR q must lie in (0, 1)") + ranked = sorted(enumerate(p_values), key=lambda pair: pair[1]) + largest_k = -1 + for rank, (_, p) in enumerate(ranked, start=1): + if p <= (rank / m) * q: + largest_k = rank + accepted = [False] * m + if largest_k < 0: + return accepted + for rank, (orig_idx, _) in enumerate(ranked, start=1): + if rank <= largest_k: + accepted[orig_idx] = True + return accepted + + +class FDRController: + """Per-detector batch FDR controller.""" + + def __init__(self, config: CalibrationConfig) -> None: + self._q = config.target_fdr_q + + def submit_pvalues(self, detector_id: str, batch: Sequence[tuple[str, float]]) -> set[str]: + """Return the set of signal IDs accepted by the BH procedure.""" + del detector_id # per-detector tuning deferred until empirical FPR is populated. + if not batch: + return set() + p_values = [p for _, p in batch] + accepted = benjamini_hochberg(p_values, self._q) + return {signal_id for (signal_id, _), keep in zip(batch, accepted, strict=True) if keep} diff --git a/src/augur_signals/augur_signals/calibration/reliability.py b/src/augur_signals/augur_signals/calibration/reliability.py new file mode 100644 index 0000000..350d16b --- /dev/null +++ b/src/augur_signals/augur_signals/calibration/reliability.py @@ -0,0 +1,79 @@ +"""Reliability curves per (detector, liquidity_tier). + +Phase 1 ships with an identity-curve placeholder: ``calibrate(score) = +score`` with ``curve_version = "identity_v0"``. This satisfies the +MarketSignal calibration_provenance invariant during the warmup period +before real curves can be built from a labeled corpus. Subsequent +workstreams consume labels to fit empirical curves, which are then +loaded via :meth:`ReliabilityAnalyzer.register_curve` and take +precedence over the identity placeholder. +""" + +from __future__ import annotations + +from datetime import UTC, datetime +from typing import Literal + +from pydantic import BaseModel, ConfigDict + +LiquidityTier = Literal["high", "mid", "low"] + + +class ReliabilityCurve(BaseModel): + """Monotone-nondecreasing mapping from raw score to empirical precision.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + detector_id: str + liquidity_tier: LiquidityTier + curve_version: str + deciles: list[tuple[float, float]] + built_at: datetime + + +class ReliabilityAnalyzer: + """Serves calibrated confidence scores from cached curves.""" + + IDENTITY_VERSION: str = "identity_v0" + + def __init__(self) -> None: + self._curves: dict[tuple[str, LiquidityTier], ReliabilityCurve] = {} + + def register_curve(self, curve: ReliabilityCurve) -> None: + self._curves[(curve.detector_id, curve.liquidity_tier)] = curve + + def curve_version(self, detector_id: str, liquidity_tier: LiquidityTier) -> str: + curve = self._curves.get((detector_id, liquidity_tier)) + return curve.curve_version if curve else self.IDENTITY_VERSION + + def calibrate( + self, + detector_id: str, + liquidity_tier: LiquidityTier, + raw_score: float, + ) -> float: + """Linearly interpolate the raw score onto the cached curve.""" + curve = self._curves.get((detector_id, liquidity_tier)) + if curve is None or not curve.deciles: + return max(0.0, min(1.0, raw_score)) + for (x0, y0), (x1, y1) in zip(curve.deciles, curve.deciles[1:], strict=False): + if x0 <= raw_score <= x1: + if x1 == x0: + return y0 + ratio = (raw_score - x0) / (x1 - x0) + return y0 + ratio * (y1 - y0) + # Outside the decile range — clamp to the nearest endpoint. + if raw_score < curve.deciles[0][0]: + return curve.deciles[0][1] + return curve.deciles[-1][1] + + +def build_identity_curve(detector_id: str, liquidity_tier: LiquidityTier) -> ReliabilityCurve: + """Return the identity placeholder curve for *detector_id*.""" + return ReliabilityCurve( + detector_id=detector_id, + liquidity_tier=liquidity_tier, + curve_version=ReliabilityAnalyzer.IDENTITY_VERSION, + deciles=[(0.0, 0.0), (0.5, 0.5), (1.0, 1.0)], + built_at=datetime(2026, 1, 1, tzinfo=UTC), + ) diff --git a/src/augur_signals/augur_signals/detectors/cross_market.py b/src/augur_signals/augur_signals/detectors/cross_market.py new file mode 100644 index 0000000..09d7386 --- /dev/null +++ b/src/augur_signals/augur_signals/detectors/cross_market.py @@ -0,0 +1,180 @@ +"""Cross-market divergence detector. + +Operates on batches across the full polling cycle so the FDR +controller sees all candidate market pairs simultaneously. For each +related-market pair with a configured historical correlation at or +above the threshold, the detector computes the current Spearman rank +correlation, applies the Fisher-z transform, and compares the z to the +prior z. Pairs whose divergence p-value survives BH-FDR at the target +``q`` produce signals per docs/methodology/calibration-methodology.md +§Cross-Market Divergence. +""" + +from __future__ import annotations + +import math +from collections.abc import Sequence +from dataclasses import dataclass, field +from datetime import datetime + +from augur_signals.calibration.fdr_controller import FDRController +from augur_signals.calibration.liquidity_tier import banding +from augur_signals.detectors._config import CrossMarketConfig +from augur_signals.models import ( + MarketSignal, + MarketSnapshot, + SignalType, + new_signal_id, +) + + +@dataclass(frozen=True, slots=True) +class RelatedMarketPair: + """A taxonomy edge eligible for divergence evaluation.""" + + market_a: str + market_b: str + historical_z: float + + +@dataclass(slots=True) +class _PairState: + """Rolling price series for a related-market pair.""" + + prices_a: list[float] = field(default_factory=list) + prices_b: list[float] = field(default_factory=list) + + +def _ranks(values: Sequence[float]) -> list[float]: + indexed = sorted(enumerate(values), key=lambda p: p[1]) + ranks = [0.0] * len(values) + i = 0 + while i < len(indexed): + j = i + while j + 1 < len(indexed) and indexed[j + 1][1] == indexed[i][1]: + j += 1 + avg_rank = (i + j) / 2.0 + 1.0 + for k in range(i, j + 1): + ranks[indexed[k][0]] = avg_rank + i = j + 1 + return ranks + + +def _spearman_correlation(a: Sequence[float], b: Sequence[float]) -> float: + if len(a) < 3 or len(a) != len(b): + return 0.0 + ra = _ranks(a) + rb = _ranks(b) + n = len(ra) + mean_a = sum(ra) / n + mean_b = sum(rb) / n + numerator = sum((ra[i] - mean_a) * (rb[i] - mean_b) for i in range(n)) + var_a = sum((r - mean_a) ** 2 for r in ra) + var_b = sum((r - mean_b) ** 2 for r in rb) + denom = math.sqrt(var_a * var_b) + if denom <= 0.0: + return 0.0 + return max(-1.0, min(1.0, numerator / denom)) + + +def _fisher_z(rho: float) -> float: + clipped = max(-0.999999, min(0.999999, rho)) + return 0.5 * math.log((1.0 + clipped) / (1.0 - clipped)) + + +def _two_sided_normal_p(value: float) -> float: + """Upper-tail two-sided normal p-value using the error function.""" + return math.erfc(abs(value) / math.sqrt(2.0)) + + +class CrossMarketDivergenceDetector: + """Batch detector over curated related-market pairs.""" + + detector_id: str = "cross_market_fisher_bh_v1" + signal_type: SignalType = SignalType.CROSS_MARKET_DIVERGENCE + _MIN_OBSERVATIONS: int = 10 + + def __init__( + self, + config: CrossMarketConfig, + fdr_controller: FDRController, + related_pairs: Sequence[RelatedMarketPair], + calibration_provenance: str = "cross_market_fisher_bh_v1@identity_v0", + ) -> None: + self._config = config + self._fdr = fdr_controller + self._pairs = list(related_pairs) + self._state: dict[tuple[str, str], _PairState] = { + (p.market_a, p.market_b): _PairState() for p in related_pairs + } + self._provenance = calibration_provenance + + def evaluate_batch( + self, + snapshots: dict[str, MarketSnapshot], + now: datetime, + ) -> list[MarketSignal]: + candidates: list[ + tuple[str, float, float, MarketSnapshot, MarketSnapshot, RelatedMarketPair] + ] = [] + for pair in self._pairs: + snap_a = snapshots.get(pair.market_a) + snap_b = snapshots.get(pair.market_b) + if snap_a is None or snap_b is None: + continue + if snap_a.closes_at is not None: + remaining = (snap_a.closes_at - now).total_seconds() + if 0.0 <= remaining < self._config.resolution_exclusion_seconds: + continue + state = self._state[(pair.market_a, pair.market_b)] + state.prices_a.append(snap_a.last_price) + state.prices_b.append(snap_b.last_price) + max_points = max(self._MIN_OBSERVATIONS, self._config.window_seconds // 60) + if len(state.prices_a) > max_points: + state.prices_a.pop(0) + state.prices_b.pop(0) + if len(state.prices_a) < self._MIN_OBSERVATIONS: + continue + rho = _spearman_correlation(state.prices_a, state.prices_b) + current_z = _fisher_z(rho) + z_delta = current_z - pair.historical_z + std_err = 1.0 / math.sqrt(max(1.0, len(state.prices_a) - 3)) + test_statistic = z_delta / std_err + p_value = _two_sided_normal_p(test_statistic) + candidates.append((pair.market_a, rho, p_value, snap_a, snap_b, pair)) + + if not candidates: + return [] + passing = self._fdr.submit_pvalues( + self.detector_id, [(candidate[0], candidate[2]) for candidate in candidates] + ) + signals: list[MarketSignal] = [] + for market_a, rho, p_value, snap_a, snap_b, pair in candidates: + if market_a not in passing: + continue + magnitude = min(1.0, max(0.0, 1.0 - p_value)) + tier = banding(snap_a.volume_24h) + signals.append( + MarketSignal( + signal_id=new_signal_id(), + market_id=market_a, + platform=snap_a.platform, + signal_type=self.signal_type, + magnitude=magnitude, + direction=0, + confidence=magnitude, + fdr_adjusted=True, + detected_at=now, + window_seconds=self._config.window_seconds, + liquidity_tier=tier, + related_market_ids=[pair.market_b], + raw_features={ + "spearman_rho": rho, + "p_value": p_value, + "historical_z": pair.historical_z, + "related_market_id": snap_b.market_id, + "calibration_provenance": self._provenance, + }, + ) + ) + return signals diff --git a/tests/signals/test_calibration.py b/tests/signals/test_calibration.py new file mode 100644 index 0000000..9c965a5 --- /dev/null +++ b/tests/signals/test_calibration.py @@ -0,0 +1,159 @@ +"""Tests for BH-FDR, reliability curves, drift monitor, and cross-market divergence.""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta + +import pytest + +from augur_signals.calibration._config import CalibrationConfig +from augur_signals.calibration.drift_monitor import DriftMonitor +from augur_signals.calibration.empirical_fpr import compute_empirical_fpr +from augur_signals.calibration.fdr_controller import ( + FDRController, + benjamini_hochberg, +) +from augur_signals.calibration.liquidity_tier import banding +from augur_signals.calibration.reliability import ( + ReliabilityAnalyzer, + build_identity_curve, +) +from augur_signals.detectors._config import CrossMarketConfig +from augur_signals.detectors.cross_market import ( + CrossMarketDivergenceDetector, + RelatedMarketPair, +) +from augur_signals.models import MarketSnapshot + + +def _snap(price: float, market_id: str = "m", volume_24h: float = 200_000.0) -> MarketSnapshot: + return MarketSnapshot( + market_id=market_id, + platform="kalshi", + timestamp=datetime(2026, 3, 15, tzinfo=UTC), + last_price=price, + bid=max(0.0, price - 0.01), + ask=min(1.0, price + 0.01), + spread=0.02, + volume_24h=volume_24h, + liquidity=5000.0, + question="q", + resolution_source=None, + resolution_criteria=None, + closes_at=None, + raw_json={}, + ) + + +@pytest.mark.unit +def test_benjamini_hochberg_accepts_small_pvalues() -> None: + mask = benjamini_hochberg([0.001, 0.01, 0.2, 0.9], q=0.05) + assert mask == [True, True, False, False] + + +@pytest.mark.unit +def test_benjamini_hochberg_returns_empty_on_no_input() -> None: + assert benjamini_hochberg([], q=0.05) == [] + + +@pytest.mark.unit +def test_benjamini_hochberg_validates_q() -> None: + with pytest.raises(ValueError, match="FDR q"): + benjamini_hochberg([0.01], q=0.0) + + +@pytest.mark.unit +def test_fdr_controller_returns_signal_ids_that_pass() -> None: + controller = FDRController(CalibrationConfig(target_fdr_q=0.05)) + accepted = controller.submit_pvalues("any_detector", [("s1", 0.001), ("s2", 0.04), ("s3", 0.6)]) + assert "s1" in accepted + assert "s3" not in accepted + + +@pytest.mark.unit +def test_reliability_identity_curve_is_monotone() -> None: + analyzer = ReliabilityAnalyzer() + assert analyzer.calibrate("d", "high", 0.1) == pytest.approx(0.1) + assert analyzer.calibrate("d", "high", 0.9) == pytest.approx(0.9) + assert analyzer.curve_version("d", "high") == "identity_v0" + + +@pytest.mark.unit +def test_reliability_registered_curve_interpolates() -> None: + curve = build_identity_curve("d", "mid") + analyzer = ReliabilityAnalyzer() + analyzer.register_curve(curve) + assert analyzer.curve_version("d", "mid") == "identity_v0" + assert analyzer.calibrate("d", "mid", 0.25) == pytest.approx(0.25) + + +@pytest.mark.unit +def test_liquidity_banding_crosses_thresholds() -> None: + assert banding(500_000) == "high" + assert banding(100_000) == "mid" + assert banding(10_000) == "low" + + +@pytest.mark.unit +def test_empirical_fpr_identifies_true_positives() -> None: + signals = [datetime(2026, 3, 15, 12, 0, tzinfo=UTC)] + events = [datetime(2026, 3, 15, 14, 0, tzinfo=UTC)] + record = compute_empirical_fpr( + "d", + "m", + signals, + events, + lead_window=timedelta(hours=24), + ) + assert record.fpr == pytest.approx(0.0) + assert record.sample_size == 1 + + +@pytest.mark.unit +def test_empirical_fpr_flags_unlabeled_signals() -> None: + signals = [datetime(2026, 3, 15, 12, 0, tzinfo=UTC)] + events: list[datetime] = [] + record = compute_empirical_fpr("d", "m", signals, events, lead_window=timedelta(hours=24)) + assert record.fpr == pytest.approx(1.0) + + +@pytest.mark.unit +def test_drift_monitor_triggers_on_distribution_shift() -> None: + monitor = DriftMonitor(CalibrationConfig(psi_trigger_threshold=0.1)) + baseline = [0.1] * 100 + [0.2] * 100 + current = [0.8] * 100 + [0.9] * 100 + report = monitor.check("d", baseline, current, datetime(2026, 3, 15, tzinfo=UTC)) + assert report.triggered + assert "psi" in report.triggered_metrics or "ks" in report.triggered_metrics + + +@pytest.mark.unit +def test_drift_monitor_silent_on_stable_distribution() -> None: + monitor = DriftMonitor(CalibrationConfig()) + baseline = [0.4, 0.5, 0.6] * 50 + current = [0.4, 0.5, 0.6] * 50 + report = monitor.check("d", baseline, current, datetime(2026, 3, 15, tzinfo=UTC)) + assert not report.triggered + + +@pytest.mark.unit +def test_cross_market_divergence_fires_on_decorrelation() -> None: + cfg = CrossMarketConfig(window_seconds=300, target_fdr_q=0.1) + fdr = FDRController(CalibrationConfig(target_fdr_q=0.1)) + pair = RelatedMarketPair(market_a="a", market_b="b", historical_z=2.0) + det = CrossMarketDivergenceDetector(cfg, fdr, [pair]) + now = datetime(2026, 3, 15, tzinfo=UTC) + # Build a history where a and b are anti-correlated (fisher_z small/negative). + for i in range(15): + det.evaluate_batch( + { + "a": _snap(0.1 + 0.01 * (i % 2), "a"), + "b": _snap(0.9 - 0.01 * (i % 2), "b"), + }, + now + timedelta(seconds=i * 10), + ) + signals = det.evaluate_batch( + {"a": _snap(0.1, "a"), "b": _snap(0.9, "b")}, + now + timedelta(seconds=200), + ) + assert any(sig.signal_type.value == "cross_market_divergence" for sig in signals) From 788d33f98755ee0e622493a476f16959743d342c Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:25:05 +0530 Subject: [PATCH 09/16] feat(manipulation): implement signature catalog, aggregator, and episode metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The manipulation detector attaches flags to every candidate signal before it reaches the bus. Five pure-function signatures, each authoritative in docs/methodology/manipulation-taxonomy.md, drive the decision: single_counterparty_concentration (Herfindahl on trade volume), size_vs_depth_outlier (one trade consumed > threshold of prior depth), cancel_replace_burst (cancel / replace events exceed threshold in a rolling window), thin_book_during_move (median depth below floor), and pre_resolution_window (signal fired inside the 6 h pre-close exclusion). ManipulationDetector runs every signature against the trades, book events, and snapshots surrounding a candidate signal and returns the matched flags. The list is always present and always a list — never None. The detector does not suppress; consumer policy applies suppression per the taxonomy doc. attach_flags produces a new MarketSignal with the flags set, routed through model_copy so Pydantic re-validates the calibration_provenance invariant. CURATED_EPISODES enumerates five canonical historical cases used as positive-case test fixtures with their expected flag sets; the test suite verifies that the flag coverage across episodes spans the full ManipulationFlag enum, preventing the episode catalogue from drifting out of sync with the taxonomy. --- .../augur_signals/manipulation/_config.py | 18 ++ .../augur_signals/manipulation/detector.py | 79 +++++++++ .../augur_signals/manipulation/episodes.py | 73 ++++++++ .../augur_signals/manipulation/signatures.py | 108 ++++++++++++ tests/signals/test_manipulation.py | 160 ++++++++++++++++++ 5 files changed, 438 insertions(+) create mode 100644 src/augur_signals/augur_signals/manipulation/_config.py create mode 100644 src/augur_signals/augur_signals/manipulation/detector.py create mode 100644 src/augur_signals/augur_signals/manipulation/episodes.py create mode 100644 src/augur_signals/augur_signals/manipulation/signatures.py create mode 100644 tests/signals/test_manipulation.py diff --git a/src/augur_signals/augur_signals/manipulation/_config.py b/src/augur_signals/augur_signals/manipulation/_config.py new file mode 100644 index 0000000..606a868 --- /dev/null +++ b/src/augur_signals/augur_signals/manipulation/_config.py @@ -0,0 +1,18 @@ +"""Manipulation-detection configuration.""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict, Field + + +class ManipulationConfig(BaseModel): + """Thresholds mirroring docs/methodology/manipulation-taxonomy.md §Thresholds.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + herfindahl_threshold: float = Field(default=0.4, gt=0.0, le=1.0) + size_vs_depth_threshold: float = Field(default=0.4, gt=0.0, le=1.0) + cancel_replace_window_seconds: int = Field(default=60, gt=0) + cancel_replace_min_count: int = Field(default=20, gt=0) + thin_book_min_depth: float = Field(default=5_000.0, ge=0.0) + pre_resolution_window_seconds: int = Field(default=21_600, gt=0) diff --git a/src/augur_signals/augur_signals/manipulation/detector.py b/src/augur_signals/augur_signals/manipulation/detector.py new file mode 100644 index 0000000..bf560a0 --- /dev/null +++ b/src/augur_signals/augur_signals/manipulation/detector.py @@ -0,0 +1,79 @@ +"""Manipulation detector — aggregates signature checks per signal. + +The detector is called once per candidate signal, after the detector +layer fires but before dedup. It runs every signature in +docs/methodology/manipulation-taxonomy.md and returns the matched +flags. The list is always present and always a list — never None. + +The detector is descriptive, not prescriptive: it does not suppress +signals. Consumers apply their own policy per the taxonomy doc. +""" + +from __future__ import annotations + +from collections.abc import Sequence +from datetime import datetime + +from augur_signals.ingestion.base import RawTrade +from augur_signals.manipulation._config import ManipulationConfig +from augur_signals.manipulation.signatures import ( + BookEvent, + cancel_replace_burst, + pre_resolution_window, + single_counterparty_concentration, + size_vs_depth_outlier, + thin_book_during_move, +) +from augur_signals.models import ManipulationFlag, MarketSignal, MarketSnapshot + + +class ManipulationDetector: + """Evaluates every signature against a candidate signal.""" + + def __init__(self, config: ManipulationConfig) -> None: + self._config = config + + def evaluate( + self, + signal: MarketSignal, + recent_trades: Sequence[RawTrade], + recent_book_events: Sequence[BookEvent], + recent_snapshots: Sequence[MarketSnapshot], + market_closes_at: datetime | None, + ) -> list[ManipulationFlag]: + flags: list[ManipulationFlag] = [] + herfindahl = single_counterparty_concentration(recent_trades) + if herfindahl > self._config.herfindahl_threshold: + flags.append(ManipulationFlag.SINGLE_COUNTERPARTY_CONCENTRATION) + if recent_trades and recent_snapshots: + # Check every large trade against the snapshot depth prior to it. + total_depth = recent_snapshots[-1].liquidity if recent_snapshots else 0.0 + for trade in recent_trades: + if size_vs_depth_outlier(trade, total_depth, self._config.size_vs_depth_threshold): + flags.append(ManipulationFlag.SIZE_VS_DEPTH_OUTLIER) + break + if cancel_replace_burst( + recent_book_events, + self._config.cancel_replace_window_seconds, + self._config.cancel_replace_min_count, + ): + flags.append(ManipulationFlag.CANCEL_REPLACE_BURST) + if thin_book_during_move(recent_snapshots, self._config.thin_book_min_depth): + flags.append(ManipulationFlag.THIN_BOOK_DURING_MOVE) + if pre_resolution_window( + signal.detected_at, + market_closes_at, + self._config.pre_resolution_window_seconds, + ): + flags.append(ManipulationFlag.PRE_RESOLUTION_WINDOW) + return flags + + +def attach_flags(signal: MarketSignal, flags: list[ManipulationFlag]) -> MarketSignal: + """Return a new MarketSignal with *flags* attached. + + MarketSignal is frozen; the update must go through ``model_copy`` + so Pydantic re-runs the calibration_provenance validator on the + result. + """ + return signal.model_copy(update={"manipulation_flags": flags}) diff --git a/src/augur_signals/augur_signals/manipulation/episodes.py b/src/augur_signals/augur_signals/manipulation/episodes.py new file mode 100644 index 0000000..13cf98f --- /dev/null +++ b/src/augur_signals/augur_signals/manipulation/episodes.py @@ -0,0 +1,73 @@ +"""Curated historical episodes used as positive-case test fixtures. + +Each episode names an identifier, a description, and the set of flags +the manipulation detector is expected to raise when exercised against +the fixture trades, book events, and snapshots. The full event streams +live alongside the tests under tests/_fixtures/manipulation/; this +module provides the canonical metadata so the tests cross-reference +the taxonomy and the detector agree. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field + +from augur_signals.models import ManipulationFlag + + +@dataclass(frozen=True, slots=True) +class Episode: + """One curated manipulation episode.""" + + episode_id: str + description: str + expected_flags: frozenset[ManipulationFlag] = field(default_factory=frozenset) + + +CURATED_EPISODES: tuple[Episode, ...] = ( + Episode( + episode_id="polymarket_2024_election_whale", + description="Coordinated large trades during the 2024 cycle", + expected_flags=frozenset( + { + ManipulationFlag.SINGLE_COUNTERPARTY_CONCENTRATION, + ManipulationFlag.SIZE_VS_DEPTH_OUTLIER, + } + ), + ), + Episode( + episode_id="polymarket_2024_mid_curve_squeeze", + description="Mid-curve squeeze on a thin contract", + expected_flags=frozenset( + { + ManipulationFlag.THIN_BOOK_DURING_MOVE, + ManipulationFlag.SIZE_VS_DEPTH_OUTLIER, + } + ), + ), + Episode( + episode_id="polymarket_2024_settlement_pump", + description="Pre-resolution pump on a sports contract", + expected_flags=frozenset( + { + ManipulationFlag.PRE_RESOLUTION_WINDOW, + ManipulationFlag.SIZE_VS_DEPTH_OUTLIER, + } + ), + ), + Episode( + episode_id="polymarket_2025_layering", + description="Layering pattern on an economic-indicator contract", + expected_flags=frozenset({ManipulationFlag.CANCEL_REPLACE_BURST}), + ), + Episode( + episode_id="polymarket_2025_wash_low_volume", + description="Wash-trading on a crypto-regulatory contract", + expected_flags=frozenset( + { + ManipulationFlag.SINGLE_COUNTERPARTY_CONCENTRATION, + ManipulationFlag.THIN_BOOK_DURING_MOVE, + } + ), + ), +) diff --git a/src/augur_signals/augur_signals/manipulation/signatures.py b/src/augur_signals/augur_signals/manipulation/signatures.py new file mode 100644 index 0000000..334a4de --- /dev/null +++ b/src/augur_signals/augur_signals/manipulation/signatures.py @@ -0,0 +1,108 @@ +"""Pure-function manipulation signature checks. + +Each function consumes primitives (trades, book events, snapshots) and +returns a boolean or numeric score without side effects. Authoritative +semantics live in docs/methodology/manipulation-taxonomy.md. +""" + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass +from datetime import datetime + +from augur_signals.ingestion.base import RawTrade +from augur_signals.models import MarketSnapshot + + +@dataclass(frozen=True, slots=True) +class BookEvent: + """A single order-book mutation — insert, cancel, or replace.""" + + market_id: str + timestamp: datetime + kind: str + size: float + + +def single_counterparty_concentration(trades: Sequence[RawTrade]) -> float: + """Return the Herfindahl index of trade volume by counterparty. + + Counterparty identifiers are preserved verbatim from the platform; + unknown counterparties are bucketed under a synthetic "_unknown" + key so the index still reflects concentration within the known + subset without over-weighting anonymous volume. + """ + if not trades: + return 0.0 + volumes: dict[str, float] = {} + for trade in trades: + key = trade.counterparty or "_unknown" + volumes[key] = volumes.get(key, 0.0) + trade.size + total = sum(volumes.values()) + if total <= 0.0: + return 0.0 + shares = [v / total for v in volumes.values()] + return sum(s * s for s in shares) + + +def size_vs_depth_outlier( + trade: RawTrade, + prior_book_depth: float, + threshold_ratio: float, +) -> bool: + """True when a single trade consumed more than ``threshold_ratio`` of depth.""" + if prior_book_depth <= 0.0: + return False + return (trade.size / prior_book_depth) > threshold_ratio + + +def cancel_replace_burst( + book_events: Sequence[BookEvent], + window_seconds: int, + min_count: int, +) -> bool: + """True when cancel+replace event count exceeds the threshold in the window.""" + if not book_events or min_count <= 0: + return False + sorted_events = sorted( + (e for e in book_events if e.kind in {"cancel", "replace"}), + key=lambda e: e.timestamp, + ) + if len(sorted_events) < min_count: + return False + # Sliding window in seconds over sorted events. + left = 0 + for right, event in enumerate(sorted_events): + while ( + left <= right + and (event.timestamp - sorted_events[left].timestamp).total_seconds() > window_seconds + ): + left += 1 + if right - left + 1 >= min_count: + return True + return False + + +def thin_book_during_move( + snapshots: Sequence[MarketSnapshot], + min_depth_dollars: float, +) -> bool: + """True when the median book depth over the window falls below the floor.""" + if not snapshots: + return False + depths = sorted(snap.liquidity for snap in snapshots) + median = depths[len(depths) // 2] + return median < min_depth_dollars + + +def pre_resolution_window( + signal_detected_at: datetime, + market_closes_at: datetime | None, + window_seconds: int = 21_600, +) -> bool: + """True when the signal fired within *window_seconds* of market close.""" + if market_closes_at is None: + return False + delta = (market_closes_at - signal_detected_at).total_seconds() + return 0.0 <= delta < window_seconds diff --git a/tests/signals/test_manipulation.py b/tests/signals/test_manipulation.py new file mode 100644 index 0000000..8f9ca46 --- /dev/null +++ b/tests/signals/test_manipulation.py @@ -0,0 +1,160 @@ +"""Tests for manipulation signature functions and the aggregator.""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta + +import pytest + +from augur_signals.ingestion.base import RawTrade +from augur_signals.manipulation._config import ManipulationConfig +from augur_signals.manipulation.detector import ManipulationDetector, attach_flags +from augur_signals.manipulation.episodes import CURATED_EPISODES +from augur_signals.manipulation.signatures import ( + BookEvent, + cancel_replace_burst, + pre_resolution_window, + single_counterparty_concentration, + size_vs_depth_outlier, + thin_book_during_move, +) +from augur_signals.models import ( + ManipulationFlag, + MarketSignal, + MarketSnapshot, + SignalType, + new_signal_id, +) + + +def _trade(counterparty: str | None, size: float = 100.0, price: float = 0.5) -> RawTrade: + return RawTrade( + market_id="m", + platform="kalshi", + timestamp=datetime(2026, 3, 15, 12, 0, tzinfo=UTC), + price=price, + size=size, + side="yes", + counterparty=counterparty, + ) + + +def _snapshot(liquidity: float = 5_000.0) -> MarketSnapshot: + return MarketSnapshot( + market_id="m", + platform="kalshi", + timestamp=datetime(2026, 3, 15, 12, 0, tzinfo=UTC), + last_price=0.5, + bid=0.49, + ask=0.51, + spread=0.02, + volume_24h=150_000.0, + liquidity=liquidity, + question="q", + resolution_source=None, + resolution_criteria=None, + closes_at=None, + raw_json={}, + ) + + +def _signal(detected_at: datetime | None = None) -> MarketSignal: + return MarketSignal( + signal_id=new_signal_id(), + market_id="m", + platform="kalshi", + signal_type=SignalType.PRICE_VELOCITY, + magnitude=0.9, + direction=1, + confidence=0.9, + fdr_adjusted=False, + detected_at=detected_at or datetime(2026, 3, 15, 12, 0, tzinfo=UTC), + window_seconds=300, + liquidity_tier="high", + raw_features={"calibration_provenance": "price_velocity_bocpd_beta_v1@identity_v0"}, + ) + + +@pytest.mark.unit +def test_herfindahl_fully_concentrated() -> None: + trades = [_trade("alice", 100.0), _trade("alice", 200.0)] + assert single_counterparty_concentration(trades) == pytest.approx(1.0) + + +@pytest.mark.unit +def test_herfindahl_fully_dispersed() -> None: + trades = [_trade(f"trader_{i}", 10.0) for i in range(20)] + # Twenty equal shares => 20 * (1/20)^2 = 0.05 + assert single_counterparty_concentration(trades) == pytest.approx(0.05) + + +@pytest.mark.unit +def test_size_vs_depth_outlier_detects_single_large_trade() -> None: + assert size_vs_depth_outlier( + _trade("a", size=500.0), prior_book_depth=1000.0, threshold_ratio=0.4 + ) + assert not size_vs_depth_outlier( + _trade("a", size=100.0), prior_book_depth=1000.0, threshold_ratio=0.4 + ) + + +@pytest.mark.unit +def test_cancel_replace_burst_fires_when_within_window() -> None: + base = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + events = [BookEvent("m", base + timedelta(seconds=i), "cancel", 1.0) for i in range(25)] + assert cancel_replace_burst(events, window_seconds=60, min_count=20) + + +@pytest.mark.unit +def test_cancel_replace_burst_silent_when_spread_across_large_window() -> None: + base = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + events = [BookEvent("m", base + timedelta(minutes=i), "cancel", 1.0) for i in range(25)] + assert not cancel_replace_burst(events, window_seconds=60, min_count=20) + + +@pytest.mark.unit +def test_thin_book_during_move_triggers_when_median_below_floor() -> None: + snaps = [_snapshot(liquidity=1_000.0) for _ in range(5)] + assert thin_book_during_move(snaps, min_depth_dollars=5_000.0) + + +@pytest.mark.unit +def test_pre_resolution_window_excludes_far_close() -> None: + base = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + assert pre_resolution_window(base, base + timedelta(hours=3)) + assert not pre_resolution_window(base, base + timedelta(hours=10)) + assert not pre_resolution_window(base, None) + + +@pytest.mark.unit +def test_manipulation_detector_attaches_flags_when_signatures_match() -> None: + cfg = ManipulationConfig() + det = ManipulationDetector(cfg) + trades = [_trade("alice", 500.0)] + [_trade("alice", 500.0) for _ in range(4)] + snapshots = [_snapshot(liquidity=500.0)] + signal = _signal() + flags = det.evaluate(signal, trades, [], snapshots, market_closes_at=None) + assert ManipulationFlag.SINGLE_COUNTERPARTY_CONCENTRATION in flags + assert ManipulationFlag.SIZE_VS_DEPTH_OUTLIER in flags + assert ManipulationFlag.THIN_BOOK_DURING_MOVE in flags + attached = attach_flags(signal, flags) + assert attached.manipulation_flags == flags + + +@pytest.mark.unit +def test_manipulation_detector_returns_empty_when_clean() -> None: + cfg = ManipulationConfig() + det = ManipulationDetector(cfg) + trades = [_trade(f"trader_{i}", 10.0) for i in range(20)] + snapshots = [_snapshot(liquidity=50_000.0)] + signal = _signal() + flags = det.evaluate(signal, trades, [], snapshots, market_closes_at=None) + assert flags == [] + + +@pytest.mark.unit +def test_curated_episodes_list_covers_every_flag() -> None: + seen: set[ManipulationFlag] = set() + for episode in CURATED_EPISODES: + seen.update(episode.expected_flags) + assert seen == set(ManipulationFlag) From 75140028c743b344f4828d8df12c9286a565225c Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:27:44 +0530 Subject: [PATCH 10/16] feat(storage): add duckdb store with schema migrations for snapshots, features, signals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The storage layer persists the full pipeline output — snapshots, features, signals, manipulation flags, empirical FPR records, and reliability curves — to a single DuckDB database. The schema mirrors docs/architecture/system-design.md §Storage Schema exactly so queries against the backtest harness produce the same shape the live engine writes. initialize is idempotent: it applies the migration statements in order and stamps the schema_version table. The CREATE TABLE IF NOT EXISTS form lets repeat initializations run without error; the schema_version row uses INSERT OR IGNORE so the applied_at timestamp is written only once. Future migrations append to a numbered list and rerun on startup; no destructive migrations are expected inside a major version. Insert paths accept the frozen Pydantic models directly and serialize JSON payloads (raw_json on snapshots, raw_features on signals, manipulation_flags when non-empty). Read paths return typed model instances by routing rows through Pydantic's model_validate so the calibration_provenance invariant still holds on recovery. Storage is deliberately kept synchronous in this implementation; the engine serializes writes through one connection. The async facade in the scaling workstream drops in with the same method surface. --- pyproject.toml | 3 + .../augur_signals/storage/duckdb_store.py | 349 ++++++++++++++++++ tests/signals/test_storage.py | 133 +++++++ 3 files changed, 485 insertions(+) create mode 100644 src/augur_signals/augur_signals/storage/duckdb_store.py create mode 100644 tests/signals/test_storage.py diff --git a/pyproject.toml b/pyproject.toml index 563217d..d386d8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,9 @@ ignore = ["ANN401"] [tool.ruff.lint.per-file-ignores] "tests/**" = ["S101", "ANN", "B018"] "scripts/**" = ["T201"] +# The IN-clause placeholders are built from "?" characters only; +# every value is passed as a parameter, not interpolated. +"src/augur_signals/augur_signals/storage/duckdb_store.py" = ["S608"] [tool.ruff.lint.isort] known-first-party = ["augur_signals", "augur_labels", "augur_format"] diff --git a/src/augur_signals/augur_signals/storage/duckdb_store.py b/src/augur_signals/augur_signals/storage/duckdb_store.py new file mode 100644 index 0000000..359ae7b --- /dev/null +++ b/src/augur_signals/augur_signals/storage/duckdb_store.py @@ -0,0 +1,349 @@ +"""DuckDB-backed persistence for snapshots, features, signals, and calibration state. + +Schema mirrors docs/architecture/system-design.md §Storage Schema. +Migrations are version-numbered and idempotent; the ``initialize`` +method advances the ``schema_version`` table and applies pending +migrations in order. +""" + +from __future__ import annotations + +import json +from collections.abc import Iterable, Sequence +from datetime import datetime +from pathlib import Path +from typing import Any + +import duckdb + +from augur_signals.models import ( + FeatureVector, + ManipulationFlag, + MarketSignal, + MarketSnapshot, +) + +_SCHEMA_V1 = ( + """ + CREATE TABLE IF NOT EXISTS schema_version ( + version INTEGER PRIMARY KEY, + applied_at TIMESTAMP NOT NULL + ); + """, + """ + CREATE TABLE IF NOT EXISTS snapshots ( + market_id VARCHAR NOT NULL, + platform VARCHAR NOT NULL, + timestamp TIMESTAMP NOT NULL, + last_price DOUBLE, + bid DOUBLE, + ask DOUBLE, + spread DOUBLE, + volume_24h DOUBLE, + liquidity DOUBLE, + question VARCHAR, + resolution_source VARCHAR, + resolution_criteria VARCHAR, + closes_at TIMESTAMP, + raw_json JSON, + schema_version VARCHAR NOT NULL, + PRIMARY KEY (market_id, platform, timestamp) + ); + """, + """ + CREATE TABLE IF NOT EXISTS features ( + market_id VARCHAR NOT NULL, + computed_at TIMESTAMP NOT NULL, + payload JSON NOT NULL, + schema_version VARCHAR NOT NULL, + PRIMARY KEY (market_id, computed_at) + ); + """, + """ + CREATE TABLE IF NOT EXISTS signals ( + signal_id VARCHAR PRIMARY KEY, + market_id VARCHAR NOT NULL, + platform VARCHAR NOT NULL, + signal_type VARCHAR NOT NULL, + magnitude DOUBLE NOT NULL, + direction INTEGER NOT NULL, + confidence DOUBLE NOT NULL, + fdr_adjusted BOOLEAN NOT NULL, + detected_at TIMESTAMP NOT NULL, + window_seconds INTEGER NOT NULL, + liquidity_tier VARCHAR NOT NULL, + related_market_ids VARCHAR[], + raw_features JSON NOT NULL, + schema_version VARCHAR NOT NULL + ); + """, + """ + CREATE TABLE IF NOT EXISTS manipulation_flags ( + signal_id VARCHAR NOT NULL, + flag VARCHAR NOT NULL, + detected_at TIMESTAMP NOT NULL, + PRIMARY KEY (signal_id, flag) + ); + """, + """ + CREATE TABLE IF NOT EXISTS calibration_fpr ( + detector_id VARCHAR NOT NULL, + market_id VARCHAR NOT NULL, + fpr DOUBLE NOT NULL, + sample_size INTEGER NOT NULL, + computed_at TIMESTAMP NOT NULL, + label_protocol_version VARCHAR NOT NULL, + PRIMARY KEY (detector_id, market_id, computed_at) + ); + """, + """ + CREATE TABLE IF NOT EXISTS reliability_curves ( + detector_id VARCHAR NOT NULL, + liquidity_tier VARCHAR NOT NULL, + curve_version VARCHAR NOT NULL, + deciles JSON NOT NULL, + built_at TIMESTAMP NOT NULL, + PRIMARY KEY (detector_id, liquidity_tier, curve_version) + ); + """, +) + + +class DuckDBStore: + """Thin synchronous facade over a DuckDB connection. + + The engine serializes storage calls so a single connection is safe. + The multi-process runtime replaces this with the TimescaleDB + adapter; every public method here has a matching method on the + later adapter so call sites do not change. + """ + + CURRENT_SCHEMA_VERSION: int = 1 + + def __init__(self, path: Path) -> None: + self._path = path + path.parent.mkdir(parents=True, exist_ok=True) + self._conn = duckdb.connect(str(path)) + + def initialize(self) -> None: + """Apply all pending migrations.""" + for statement in _SCHEMA_V1: + self._conn.execute(statement) + self._conn.execute( + "INSERT OR IGNORE INTO schema_version (version, applied_at) VALUES (?, ?)", + [self.CURRENT_SCHEMA_VERSION, datetime.now().astimezone()], + ) + + # --- writes --------------------------------------------------------- + + def insert_snapshot(self, snapshot: MarketSnapshot) -> None: + self._conn.execute( + """ + INSERT OR REPLACE INTO snapshots VALUES ( + ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? + ) + """, + [ + snapshot.market_id, + snapshot.platform, + snapshot.timestamp, + snapshot.last_price, + snapshot.bid, + snapshot.ask, + snapshot.spread, + snapshot.volume_24h, + snapshot.liquidity, + snapshot.question, + snapshot.resolution_source, + snapshot.resolution_criteria, + snapshot.closes_at, + json.dumps(snapshot.raw_json), + snapshot.schema_version, + ], + ) + + def insert_feature(self, feature: FeatureVector) -> None: + payload = feature.model_dump(mode="json", exclude={"market_id", "computed_at"}) + self._conn.execute( + "INSERT OR REPLACE INTO features VALUES (?, ?, ?, ?)", + [ + feature.market_id, + feature.computed_at, + json.dumps(payload), + feature.schema_version, + ], + ) + + def insert_signal(self, signal: MarketSignal) -> None: + self._conn.execute( + """ + INSERT OR REPLACE INTO signals VALUES ( + ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? + ) + """, + [ + signal.signal_id, + signal.market_id, + signal.platform, + signal.signal_type.value, + signal.magnitude, + signal.direction, + signal.confidence, + signal.fdr_adjusted, + signal.detected_at, + signal.window_seconds, + signal.liquidity_tier, + list(signal.related_market_ids), + json.dumps(signal.raw_features), + signal.schema_version, + ], + ) + if signal.manipulation_flags: + self.insert_manipulation_flags( + signal.signal_id, + signal.detected_at, + signal.manipulation_flags, + ) + + def insert_manipulation_flags( + self, + signal_id: str, + detected_at: datetime, + flags: Iterable[ManipulationFlag], + ) -> None: + for flag in flags: + self._conn.execute( + "INSERT OR REPLACE INTO manipulation_flags VALUES (?, ?, ?)", + [signal_id, flag.value, detected_at], + ) + + # --- reads ---------------------------------------------------------- + + def latest_snapshot(self, market_id: str) -> MarketSnapshot | None: + row = self._conn.execute( + "SELECT * FROM snapshots WHERE market_id = ? ORDER BY timestamp DESC LIMIT 1", + [market_id], + ).fetchone() + if row is None: + return None + return _row_to_snapshot(row) + + def snapshots_in_window( + self, + market_id: str, + window_start: datetime, + window_end: datetime, + ) -> list[MarketSnapshot]: + rows = self._conn.execute( + """ + SELECT * FROM snapshots + WHERE market_id = ? AND timestamp BETWEEN ? AND ? + ORDER BY timestamp ASC + """, + [market_id, window_start, window_end], + ).fetchall() + return [_row_to_snapshot(row) for row in rows] + + def signals_in_window( + self, + market_ids: Sequence[str], + window_start: datetime, + window_end: datetime, + ) -> list[MarketSignal]: + if not market_ids: + return [] + # Placeholders interpolated below are "?" characters only; every + # value is passed as a parameter, not interpolated. + placeholders = ", ".join(["?"] * len(market_ids)) + query = ( + f"SELECT * FROM signals WHERE market_id IN ({placeholders}) " + "AND detected_at BETWEEN ? AND ? ORDER BY detected_at ASC" + ) + rows = self._conn.execute( + query, + [*market_ids, window_start, window_end], + ).fetchall() + return [_row_to_signal(row) for row in rows] + + # --- lifecycle ------------------------------------------------------ + + def close(self) -> None: + self._conn.close() + + +def _row_to_snapshot(row: tuple[Any, ...]) -> MarketSnapshot: + ( + market_id, + platform, + timestamp, + last_price, + bid, + ask, + spread, + volume_24h, + liquidity, + question, + resolution_source, + resolution_criteria, + closes_at, + raw_json, + schema_version, + ) = row + return MarketSnapshot.model_validate( + { + "market_id": market_id, + "platform": platform, + "timestamp": timestamp, + "last_price": last_price, + "bid": bid, + "ask": ask, + "spread": spread, + "volume_24h": volume_24h, + "liquidity": liquidity, + "question": question, + "resolution_source": resolution_source, + "resolution_criteria": resolution_criteria, + "closes_at": closes_at, + "raw_json": json.loads(raw_json) if isinstance(raw_json, str) else raw_json, + "schema_version": schema_version, + } + ) + + +def _row_to_signal(row: tuple[Any, ...]) -> MarketSignal: + ( + signal_id, + market_id, + platform, + signal_type, + magnitude, + direction, + confidence, + fdr_adjusted, + detected_at, + window_seconds, + liquidity_tier, + related_market_ids, + raw_features, + schema_version, + ) = row + return MarketSignal.model_validate( + { + "signal_id": signal_id, + "market_id": market_id, + "platform": platform, + "signal_type": signal_type, + "magnitude": magnitude, + "direction": direction, + "confidence": confidence, + "fdr_adjusted": fdr_adjusted, + "detected_at": detected_at, + "window_seconds": window_seconds, + "liquidity_tier": liquidity_tier, + "related_market_ids": list(related_market_ids or []), + "raw_features": json.loads(raw_features) + if isinstance(raw_features, str) + else raw_features, + "schema_version": schema_version, + } + ) diff --git a/tests/signals/test_storage.py b/tests/signals/test_storage.py new file mode 100644 index 0000000..ab31511 --- /dev/null +++ b/tests/signals/test_storage.py @@ -0,0 +1,133 @@ +"""Tests for the DuckDB storage layer.""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta +from pathlib import Path + +import pytest + +from augur_signals.models import ( + FeatureVector, + ManipulationFlag, + MarketSignal, + MarketSnapshot, + SignalType, + new_signal_id, +) +from augur_signals.storage.duckdb_store import DuckDBStore + + +def _snapshot(market_id: str = "m", offset: int = 0) -> MarketSnapshot: + return MarketSnapshot( + market_id=market_id, + platform="kalshi", + timestamp=datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + timedelta(seconds=offset), + last_price=0.5, + bid=0.49, + ask=0.51, + spread=0.02, + volume_24h=150_000.0, + liquidity=5_000.0, + question="Q", + resolution_source="Source", + resolution_criteria="Criteria", + closes_at=datetime(2026, 6, 15, 12, 0, tzinfo=UTC), + raw_json={"raw": 1}, + ) + + +def _signal(market_id: str = "m", offset: int = 0) -> MarketSignal: + return MarketSignal( + signal_id=new_signal_id(), + market_id=market_id, + platform="kalshi", + signal_type=SignalType.PRICE_VELOCITY, + magnitude=0.8, + direction=1, + confidence=0.75, + fdr_adjusted=False, + detected_at=datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + timedelta(seconds=offset), + window_seconds=300, + liquidity_tier="high", + manipulation_flags=[ManipulationFlag.SIZE_VS_DEPTH_OUTLIER], + raw_features={"calibration_provenance": "price_velocity_bocpd_beta_v1@identity_v0"}, + ) + + +@pytest.fixture +def store(tmp_path: Path) -> DuckDBStore: + s = DuckDBStore(tmp_path / "augur.duckdb") + s.initialize() + yield s + s.close() + + +@pytest.mark.unit +def test_initialize_is_idempotent(tmp_path: Path) -> None: + s1 = DuckDBStore(tmp_path / "augur.duckdb") + s1.initialize() + s1.close() + s2 = DuckDBStore(tmp_path / "augur.duckdb") + s2.initialize() + s2.close() # No exception means idempotent. + + +@pytest.mark.unit +def test_insert_snapshot_round_trips(store: DuckDBStore) -> None: + snap = _snapshot() + store.insert_snapshot(snap) + latest = store.latest_snapshot("m") + assert latest is not None + assert latest.last_price == snap.last_price + assert latest.raw_json == snap.raw_json + + +@pytest.mark.unit +def test_snapshots_in_window(store: DuckDBStore) -> None: + for i in range(5): + store.insert_snapshot(_snapshot(offset=i * 60)) + start = datetime(2026, 3, 15, 12, 0, 30, tzinfo=UTC) + end = datetime(2026, 3, 15, 12, 3, 30, tzinfo=UTC) + rows = store.snapshots_in_window("m", start, end) + assert len(rows) == 3 + + +@pytest.mark.unit +def test_insert_signal_persists_manipulation_flags(store: DuckDBStore) -> None: + sig = _signal() + store.insert_signal(sig) + recovered = store.signals_in_window( + ["m"], + datetime(2026, 3, 15, 11, 0, tzinfo=UTC), + datetime(2026, 3, 15, 13, 0, tzinfo=UTC), + ) + assert len(recovered) == 1 + assert recovered[0].signal_id == sig.signal_id + assert recovered[0].confidence == pytest.approx(0.75) + + +@pytest.mark.unit +def test_insert_feature_round_trips(store: DuckDBStore) -> None: + fv = FeatureVector( + market_id="m", + computed_at=datetime(2026, 3, 15, 12, 0, tzinfo=UTC), + price_momentum_5m=0.01, + price_momentum_15m=0.02, + price_momentum_1h=0.03, + price_momentum_4h=0.04, + volatility_5m=0.01, + volatility_15m=0.02, + volatility_1h=0.03, + volatility_4h=0.04, + volume_ratio_5m=1.1, + volume_ratio_1h=1.2, + bid_ask_ratio=0.5, + spread_pct=0.02, + ) + store.insert_feature(fv) # Just verify no exception; read-side not exposed yet. + + +@pytest.mark.unit +def test_latest_snapshot_returns_none_when_empty(store: DuckDBStore) -> None: + assert store.latest_snapshot("missing") is None From 3d2aab1abcaaf11a538a5c4700a9d9f723d4c64a Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:29:54 +0530 Subject: [PATCH 11/16] feat(bus): add in-process bus, fingerprint dedup, cluster merge, and storm controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four cooperating modules between the detector layer and the context assembler, all bounded by the semantics in docs/architecture/deduplication-and-storms.md. InProcessAsyncBus fans published signals out to every current subscriber with a per-subscriber bounded queue and a LIFO drop under pressure. This is the Phase-1 implementation; later phases swap in a NATS / Redis Streams adapter behind the same method surface. FingerprintDedup collapses signals that share (market_id, signal_type, time_bucket) with max magnitude, max confidence, union of manipulation_flags and related_market_ids, earliest detected_at, and the lexicographically-smallest signal_id. The merge_provenance entry in raw_features lists every source signal_id so the backtest can reconstruct the pre-dedup stream. ClusterMerge layers on top of FingerprintDedup and merges signals of the same type firing on taxonomy-related markets inside the cluster window. Only strong edges (positive, inverse, causal) contribute to merging — complex and unknown edges are intentionally excluded because cluster merge asserts a shared cause. StormController tracks raw arrival rate and queue depth against asymmetric trigger / recovery thresholds. Entry on either trigger, exit only when both recovery conditions hold for the recovery window. Storm mode is advisory at this layer; the engine consumes StormState to switch the dedup layer to cluster-only output and suspend the LLM formatter. --- src/augur_signals/augur_signals/bus/memory.py | 54 +++++++ .../augur_signals/dedup/_config.py | 45 ++++++ .../augur_signals/dedup/cluster.py | 114 ++++++++++++++ .../augur_signals/dedup/fingerprint.py | 66 ++++++++ .../augur_signals/dedup/storm.py | 112 ++++++++++++++ tests/signals/test_bus_dedup.py | 144 ++++++++++++++++++ 6 files changed, 535 insertions(+) create mode 100644 src/augur_signals/augur_signals/bus/memory.py create mode 100644 src/augur_signals/augur_signals/dedup/_config.py create mode 100644 src/augur_signals/augur_signals/dedup/cluster.py create mode 100644 src/augur_signals/augur_signals/dedup/fingerprint.py create mode 100644 src/augur_signals/augur_signals/dedup/storm.py create mode 100644 tests/signals/test_bus_dedup.py diff --git a/src/augur_signals/augur_signals/bus/memory.py b/src/augur_signals/augur_signals/bus/memory.py new file mode 100644 index 0000000..b0eaddc --- /dev/null +++ b/src/augur_signals/augur_signals/bus/memory.py @@ -0,0 +1,54 @@ +"""In-process async signal bus. + +A single-process bounded queue that fanouts to every subscriber. The +multi-process runtime swaps this for NATS or Redis Streams adapters +behind the same method surface. +""" + +from __future__ import annotations + +import asyncio +from collections.abc import AsyncIterator + +from augur_signals.models import MarketSignal + + +class InProcessAsyncBus: + """Bounded async queue with broadcast subscribe semantics.""" + + def __init__(self, capacity: int = 256) -> None: + if capacity <= 0: + raise ValueError("capacity must be positive") + self._capacity = capacity + self._subscribers: list[asyncio.Queue[MarketSignal]] = [] + + async def publish(self, signal: MarketSignal) -> None: + """Fan *signal* out to every current subscriber.""" + for queue in list(self._subscribers): + if queue.qsize() >= self._capacity: + # Apply LIFO drop under pressure per the storm doc. + try: + queue.get_nowait() + except asyncio.QueueEmpty: + pass + await queue.put(signal) + + async def subscribe(self) -> AsyncIterator[MarketSignal]: + """Register a subscriber; yield published signals until cancelled.""" + queue: asyncio.Queue[MarketSignal] = asyncio.Queue(maxsize=self._capacity) + self._subscribers.append(queue) + try: + while True: + signal = await queue.get() + yield signal + finally: + self._subscribers.remove(queue) + + def queue_depth(self) -> int: + """Maximum depth across all subscribers.""" + if not self._subscribers: + return 0 + return max(q.qsize() for q in self._subscribers) + + def subscriber_count(self) -> int: + return len(self._subscribers) diff --git a/src/augur_signals/augur_signals/dedup/_config.py b/src/augur_signals/augur_signals/dedup/_config.py new file mode 100644 index 0000000..aededb8 --- /dev/null +++ b/src/augur_signals/augur_signals/dedup/_config.py @@ -0,0 +1,45 @@ +"""Configuration models for deduplication and storm handling.""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict, Field + + +class BusSettings(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + queue_capacity: int = Field(default=256, gt=0) + per_consumer_buffer: int = Field(default=64, gt=0) + context_assembler_concurrency: int = Field(default=4, gt=0) + llm_formatter_concurrency: int = Field(default=1, gt=0) + + +class StormSettings(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + trigger_signal_rate_per_sec: float = 20.0 + trigger_signal_rate_window_sec: int = 30 + trigger_queue_depth_pct: float = 0.75 + trigger_queue_depth_window_sec: int = 10 + recovery_signal_rate_per_sec: float = 5.0 + recovery_signal_rate_window_sec: int = 60 + recovery_queue_depth_pct: float = 0.25 + recovery_queue_depth_window_sec: int = 30 + + +class DedupBody(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + fingerprint_bucket_seconds: int = 30 + cluster_window_seconds: int = 90 + cluster_relationship_types: list[str] = Field( + default_factory=lambda: ["positive", "inverse", "causal"] + ) + bus: BusSettings = Field(default_factory=BusSettings) + storm: StormSettings = Field(default_factory=StormSettings) + + +class DedupConfig(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + dedup: DedupBody diff --git a/src/augur_signals/augur_signals/dedup/cluster.py b/src/augur_signals/augur_signals/dedup/cluster.py new file mode 100644 index 0000000..11b0008 --- /dev/null +++ b/src/augur_signals/augur_signals/dedup/cluster.py @@ -0,0 +1,114 @@ +"""Cluster-level merge over taxonomy-related markets. + +Signals of the same type firing within the cluster_window on markets +sharing a strong taxonomy edge (positive, inverse, causal) are merged +into a single cluster signal per docs/architecture/deduplication-and-storms.md +§Cluster-Level Merge. complex and unknown edges do not trigger cluster +merge. +""" + +from __future__ import annotations + +from collections.abc import Mapping +from datetime import timedelta + +from augur_signals.models import MarketSignal + + +class TaxonomyEdgesProvider: + """Adapter the dedup layer uses to look up related markets. + + The context-assembler's MarketTaxonomy satisfies this surface; the + adapter class keeps the dedup module independent of the taxonomy + module so circular imports are avoided. + """ + + def __init__(self, edges: Mapping[str, list[tuple[str, str]]]) -> None: + self._edges = dict(edges) + + def related(self, market_id: str) -> list[tuple[str, str]]: + """Return the list of ``(other_market_id, relationship_type)`` edges.""" + return list(self._edges.get(market_id, [])) + + +class ClusterMerge: + """Merges taxonomy-clustered signals within a rolling time window.""" + + def __init__( + self, + taxonomy: TaxonomyEdgesProvider, + window_seconds: int = 90, + relationship_types: set[str] | None = None, + ) -> None: + self._taxonomy = taxonomy + self._window = timedelta(seconds=window_seconds) + self._types = set(relationship_types or {"positive", "inverse", "causal"}) + + def merge(self, signals: list[MarketSignal]) -> list[MarketSignal]: + """Group signals by cluster and signal type; collapse each group.""" + if not signals: + return [] + sorted_signals = sorted(signals, key=lambda s: s.detected_at) + results: list[MarketSignal] = [] + consumed: set[str] = set() + for signal in sorted_signals: + if signal.signal_id in consumed: + continue + cluster = self._cluster_for(signal, sorted_signals, consumed) + if len(cluster) == 1: + results.append(signal) + consumed.add(signal.signal_id) + continue + representative = _collapse(cluster) + results.append(representative) + consumed.update(s.signal_id for s in cluster) + return results + + def _cluster_for( + self, + anchor: MarketSignal, + signals: list[MarketSignal], + consumed: set[str], + ) -> list[MarketSignal]: + related = { + market + for market, relationship in self._taxonomy.related(anchor.market_id) + if relationship in self._types + } + cluster: list[MarketSignal] = [anchor] + for other in signals: + if other.signal_id == anchor.signal_id or other.signal_id in consumed: + continue + if other.signal_type != anchor.signal_type: + continue + if other.market_id not in related: + continue + if ( + abs((other.detected_at - anchor.detected_at).total_seconds()) + > self._window.total_seconds() + ): + continue + cluster.append(other) + return cluster + + +def _collapse(cluster: list[MarketSignal]) -> MarketSignal: + base = max(cluster, key=lambda s: s.magnitude) + magnitude = max(s.magnitude for s in cluster) + confidence = max(s.confidence for s in cluster) + manipulation_flags = list({flag for s in cluster for flag in s.manipulation_flags}) + related = sorted( + {mid for s in cluster for mid in s.related_market_ids} + | {s.market_id for s in cluster if s.market_id != base.market_id} + ) + raw_features = dict(base.raw_features) + raw_features["cluster_member_signal_ids"] = ",".join(sorted(s.signal_id for s in cluster)) + return base.model_copy( + update={ + "magnitude": magnitude, + "confidence": confidence, + "manipulation_flags": manipulation_flags, + "related_market_ids": related, + "raw_features": raw_features, + } + ) diff --git a/src/augur_signals/augur_signals/dedup/fingerprint.py b/src/augur_signals/augur_signals/dedup/fingerprint.py new file mode 100644 index 0000000..b843121 --- /dev/null +++ b/src/augur_signals/augur_signals/dedup/fingerprint.py @@ -0,0 +1,66 @@ +"""Exact-fingerprint deduplication of raw signals. + +Two raw signals are duplicates if they share ``(market_id, signal_type, +time_bucket(detected_at, bucket_seconds))``. Merge rules per +docs/architecture/deduplication-and-storms.md §Signal Fingerprint: +take the max magnitude, max confidence, union of manipulation_flags, +union of related_market_ids, earliest detected_at, smallest +signal_id lexicographically, and record the source signal_ids in +raw_features["merge_provenance"]. +""" + +from __future__ import annotations + +from collections.abc import Iterable +from datetime import datetime + +from augur_signals.models import MarketSignal + + +def _bucket(timestamp: datetime, bucket_seconds: int) -> datetime: + seconds = (timestamp.second // bucket_seconds) * bucket_seconds + return timestamp.replace(microsecond=0, second=seconds) + + +def fingerprint(signal: MarketSignal, bucket_seconds: int = 30) -> tuple[str, str, datetime]: + """Return the deduplication key for *signal*.""" + return ( + signal.market_id, + signal.signal_type.value, + _bucket(signal.detected_at, bucket_seconds), + ) + + +def _merge_group(signals: list[MarketSignal]) -> MarketSignal: + """Merge a group of fingerprint-equal signals into one representative.""" + if len(signals) == 1: + return signals[0] + base = max(signals, key=lambda s: (s.magnitude, s.confidence)) + magnitude = max(s.magnitude for s in signals) + confidence = max(s.confidence for s in signals) + manipulation_flags = list({flag for s in signals for flag in s.manipulation_flags}) + related = list({rid for s in signals for rid in s.related_market_ids}) + earliest = min(s.detected_at for s in signals) + signal_id = min(s.signal_id for s in signals) + raw_features = dict(base.raw_features) + raw_features["merge_provenance"] = ",".join(sorted(s.signal_id for s in signals)) + return base.model_copy( + update={ + "signal_id": signal_id, + "magnitude": magnitude, + "confidence": confidence, + "manipulation_flags": manipulation_flags, + "related_market_ids": related, + "detected_at": earliest, + "raw_features": raw_features, + } + ) + + +def merge(signals: Iterable[MarketSignal], bucket_seconds: int = 30) -> list[MarketSignal]: + """Apply fingerprint dedup to *signals* and return the compressed list.""" + buckets: dict[tuple[str, str, datetime], list[MarketSignal]] = {} + for signal in signals: + key = fingerprint(signal, bucket_seconds) + buckets.setdefault(key, []).append(signal) + return [_merge_group(group) for group in buckets.values()] diff --git a/src/augur_signals/augur_signals/dedup/storm.py b/src/augur_signals/augur_signals/dedup/storm.py new file mode 100644 index 0000000..9d16a31 --- /dev/null +++ b/src/augur_signals/augur_signals/dedup/storm.py @@ -0,0 +1,112 @@ +"""Storm detection state machine. + +Tracks raw signal arrival rate and bus queue depth against the +trigger / recovery thresholds in +docs/architecture/deduplication-and-storms.md §Storm Detection. Enters +storm mode on either trigger, exits only when both recovery +conditions hold simultaneously. +""" + +from __future__ import annotations + +from collections import deque +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from typing import Literal + +from augur_signals.dedup._config import StormSettings + + +@dataclass(frozen=True, slots=True) +class StormState: + in_storm: bool + started_at: datetime | None + ended_at: datetime | None + + +@dataclass(slots=True) +class _RateTracker: + """Rolling rate-of-arrival estimator over a bounded time window.""" + + window_seconds: int + events: deque[datetime] = field(default_factory=deque) + + def observe(self, now: datetime, count: int) -> None: + cutoff = now - timedelta(seconds=self.window_seconds) + for _ in range(count): + self.events.append(now) + while self.events and self.events[0] < cutoff: + self.events.popleft() + + def rate_per_second(self) -> float: + if not self.events: + return 0.0 + return len(self.events) / max(1.0, float(self.window_seconds)) + + +class StormController: + """Entry / exit logic for storm mode.""" + + def __init__(self, config: StormSettings, queue_capacity: int) -> None: + self._config = config + self._capacity = max(queue_capacity, 1) + self._in_storm = False + self._started_at: datetime | None = None + self._ended_at: datetime | None = None + self._trigger_rate = _RateTracker(config.trigger_signal_rate_window_sec) + self._recovery_rate = _RateTracker(config.recovery_signal_rate_window_sec) + self._low_depth_since: datetime | None = None + + @property + def in_storm(self) -> bool: + return self._in_storm + + def update( + self, + *, + raw_signals_this_tick: int, + queue_depth: int, + now: datetime, + ) -> StormState: + self._trigger_rate.observe(now, raw_signals_this_tick) + self._recovery_rate.observe(now, raw_signals_this_tick) + depth_pct = queue_depth / self._capacity + if not self._in_storm: + rate_exceeded = ( + self._trigger_rate.rate_per_second() > self._config.trigger_signal_rate_per_sec + ) + depth_exceeded = depth_pct > self._config.trigger_queue_depth_pct + if rate_exceeded or depth_exceeded: + self._enter_storm(now) + else: + rate_low = ( + self._recovery_rate.rate_per_second() < self._config.recovery_signal_rate_per_sec + ) + depth_low = depth_pct < self._config.recovery_queue_depth_pct + if rate_low and depth_low: + if self._low_depth_since is None: + self._low_depth_since = now + elapsed = (now - self._low_depth_since).total_seconds() + if elapsed >= self._config.recovery_queue_depth_window_sec: + self._exit_storm(now) + else: + self._low_depth_since = None + return StormState( + in_storm=self._in_storm, + started_at=self._started_at, + ended_at=self._ended_at, + ) + + def _enter_storm(self, now: datetime) -> None: + self._in_storm = True + self._started_at = now + self._ended_at = None + self._low_depth_since = None + + def _exit_storm(self, now: datetime) -> None: + self._in_storm = False + self._ended_at = now + self._low_depth_since = None + + +DropPolicy = Literal["lifo", "reject"] diff --git a/tests/signals/test_bus_dedup.py b/tests/signals/test_bus_dedup.py new file mode 100644 index 0000000..db40ea0 --- /dev/null +++ b/tests/signals/test_bus_dedup.py @@ -0,0 +1,144 @@ +"""Tests for the bus, fingerprint dedup, cluster merge, and storm controller.""" + +from __future__ import annotations + +import asyncio +from datetime import UTC, datetime, timedelta + +import pytest + +from augur_signals.bus.memory import InProcessAsyncBus +from augur_signals.dedup._config import StormSettings +from augur_signals.dedup.cluster import ClusterMerge, TaxonomyEdgesProvider +from augur_signals.dedup.fingerprint import fingerprint, merge +from augur_signals.dedup.storm import StormController +from augur_signals.models import ( + ManipulationFlag, + MarketSignal, + SignalType, + new_signal_id, +) + + +def _signal( + market_id: str = "m", + signal_type: SignalType = SignalType.PRICE_VELOCITY, + offset_seconds: int = 0, + magnitude: float = 0.8, +) -> MarketSignal: + return MarketSignal( + signal_id=new_signal_id(), + market_id=market_id, + platform="kalshi", + signal_type=signal_type, + magnitude=magnitude, + direction=1, + confidence=magnitude, + fdr_adjusted=False, + detected_at=datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + timedelta(seconds=offset_seconds), + window_seconds=300, + liquidity_tier="high", + raw_features={"calibration_provenance": "detector@identity_v0"}, + ) + + +@pytest.mark.unit +def test_fingerprint_buckets_to_same_30s_window() -> None: + a = _signal(offset_seconds=0) + b = _signal(offset_seconds=29) + c = _signal(offset_seconds=31) + assert fingerprint(a) == fingerprint(b) + assert fingerprint(a) != fingerprint(c) + + +@pytest.mark.unit +def test_merge_collapses_same_fingerprint() -> None: + a = _signal(magnitude=0.5, offset_seconds=0) + b = _signal(magnitude=0.9, offset_seconds=20) + # Differ in manipulation flags to check union semantics. + b = b.model_copy(update={"manipulation_flags": [ManipulationFlag.THIN_BOOK_DURING_MOVE]}) + merged = merge([a, b]) + assert len(merged) == 1 + assert merged[0].magnitude == pytest.approx(0.9) + assert ManipulationFlag.THIN_BOOK_DURING_MOVE in merged[0].manipulation_flags + assert "merge_provenance" in merged[0].raw_features + + +@pytest.mark.unit +def test_merge_keeps_distinct_fingerprints() -> None: + a = _signal("a", offset_seconds=0) + b = _signal("b", offset_seconds=0) + merged = merge([a, b]) + assert len(merged) == 2 + + +@pytest.mark.unit +def test_cluster_merge_collapses_related_markets() -> None: + taxonomy = TaxonomyEdgesProvider({"a": [("b", "inverse")], "b": [("a", "inverse")]}) + merger = ClusterMerge(taxonomy, window_seconds=90) + sigs = [ + _signal("a", offset_seconds=0, magnitude=0.7), + _signal("b", offset_seconds=30, magnitude=0.5), + ] + out = merger.merge(sigs) + assert len(out) == 1 + assert "cluster_member_signal_ids" in out[0].raw_features + + +@pytest.mark.unit +def test_cluster_merge_skips_unrelated_markets() -> None: + taxonomy = TaxonomyEdgesProvider({}) + merger = ClusterMerge(taxonomy, window_seconds=90) + sigs = [_signal("a", offset_seconds=0), _signal("b", offset_seconds=30)] + out = merger.merge(sigs) + assert len(out) == 2 + + +@pytest.mark.unit +def test_storm_controller_enters_and_exits() -> None: + cfg = StormSettings( + trigger_signal_rate_per_sec=1.0, + trigger_signal_rate_window_sec=5, + trigger_queue_depth_pct=0.5, + recovery_queue_depth_pct=0.2, + recovery_signal_rate_per_sec=0.5, + recovery_signal_rate_window_sec=5, + recovery_queue_depth_window_sec=1, + ) + controller = StormController(cfg, queue_capacity=10) + now = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + # Push rate above trigger — should enter storm. + state = controller.update(raw_signals_this_tick=10, queue_depth=0, now=now) + assert state.in_storm + # Drop to quiet; single tick is not enough (needs sustained recovery window). + quiet = now + timedelta(seconds=6) + controller.update(raw_signals_this_tick=0, queue_depth=0, now=quiet) + recovered = controller.update( + raw_signals_this_tick=0, queue_depth=0, now=quiet + timedelta(seconds=2) + ) + assert not recovered.in_storm + + +@pytest.mark.asyncio +async def test_bus_publish_fans_out_to_subscribers() -> None: + bus = InProcessAsyncBus(capacity=4) + received: list[MarketSignal] = [] + + async def consume() -> None: + async for signal in bus.subscribe(): + received.append(signal) + if len(received) >= 2: + return + + task = asyncio.create_task(consume()) + await asyncio.sleep(0) # let the subscriber register + await bus.publish(_signal(offset_seconds=0)) + await bus.publish(_signal(offset_seconds=1)) + await task + assert len(received) == 2 + + +@pytest.mark.unit +def test_bus_capacity_must_be_positive() -> None: + with pytest.raises(ValueError, match="capacity"): + InProcessAsyncBus(capacity=0) From 729eee53b74c5585af78da5bd4bd0bb167a43bb5 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:31:48 +0530 Subject: [PATCH 12/16] feat(context): add deterministic assembler, taxonomy, prompts, and related-market resolver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The context assembler wraps every MarketSignal with verbatim platform metadata, the latest snapshots of related markets, and investigation prompts drawn from a frozen curated library. The output (SignalContext) is the binding contract between extraction and downstream formatters: consumers never see the raw MarketSignal. MarketTaxonomy loads curated edges from TOML, stores them bidirectionally (an edge a <-> b is reachable from both markets), and exposes ``cluster_for`` with filtering by relationship type so the dedup layer's cluster merge only considers strong edges (positive, inverse, causal). complex edges are intentionally excluded from clustering because the causal equivalence they assert is too weak. InvestigationPromptLibrary is frozen-at-construction: duplicate entries raise immediately, the public interface is lookup-only, and a coverage report enumerates the (signal_type, category) tuples with no prompts so startup can log the gaps. The library reads from data/investigation_prompts.toml via a classmethod. RelatedMarketResolver fetches the latest snapshot per related market from DuckDB and computes the 24 h price delta. Markets without recent snapshots are omitted; the 1 h freshness window is configurable. ContextAssembler is a pure function of (signal, store, taxonomy, resolver, prompt library, category map). Two invocations with identical inputs produce byte-identical JSON — the determinism test (assemble twice, compare model_dump_json) exercises this invariant and is one of the gates for the extraction workstream's Definition of Done. --- .../augur_signals/context/assembler.py | 63 +++++++ .../context/investigation_prompts.py | 78 ++++++++ .../augur_signals/context/related.py | 54 ++++++ .../augur_signals/context/taxonomy.py | 69 +++++++ tests/signals/test_context.py | 170 ++++++++++++++++++ 5 files changed, 434 insertions(+) create mode 100644 src/augur_signals/augur_signals/context/assembler.py create mode 100644 src/augur_signals/augur_signals/context/investigation_prompts.py create mode 100644 src/augur_signals/augur_signals/context/related.py create mode 100644 src/augur_signals/augur_signals/context/taxonomy.py create mode 100644 tests/signals/test_context.py diff --git a/src/augur_signals/augur_signals/context/assembler.py b/src/augur_signals/augur_signals/context/assembler.py new file mode 100644 index 0000000..e9b4ca7 --- /dev/null +++ b/src/augur_signals/augur_signals/context/assembler.py @@ -0,0 +1,63 @@ +"""Deterministic context assembler. + +Wraps a MarketSignal with verbatim platform metadata, related-market +state, and curated investigation prompts. The assembler is a pure +function of (signal, metadata store, taxonomy, prompt library). Two +invocations with identical inputs must produce byte-identical JSON — +the determinism test exercises this invariant. +""" + +from __future__ import annotations + +from augur_signals.context.investigation_prompts import InvestigationPromptLibrary +from augur_signals.context.related import RelatedMarketResolver +from augur_signals.models import ( + InterpretationMode, + MarketSignal, + SignalContext, +) +from augur_signals.storage.duckdb_store import DuckDBStore + + +class MissingMetadataError(RuntimeError): + """Raised when the metadata store has no snapshot for the signal's market.""" + + +class ContextAssembler: + """Produces SignalContext envelopes deterministically.""" + + def __init__( + self, + store: DuckDBStore, + related_resolver: RelatedMarketResolver, + prompt_library: InvestigationPromptLibrary, + category_of: dict[str, str] | None = None, + ) -> None: + self._store = store + self._related = related_resolver + self._prompts = prompt_library + self._category_of = dict(category_of or {}) + + def register_category(self, market_id: str, category: str) -> None: + """Map a market to its taxonomy category for prompt lookup.""" + self._category_of[market_id] = category + + def assemble(self, signal: MarketSignal) -> SignalContext: + snapshot = self._store.latest_snapshot(signal.market_id) + if snapshot is None: + raise MissingMetadataError(f"No snapshot stored for market_id={signal.market_id}") + if snapshot.closes_at is None: + raise MissingMetadataError(f"Snapshot for {signal.market_id} is missing closes_at") + category = self._category_of.get(signal.market_id, "") + prompts = tuple(self._prompts.lookup(signal.signal_type, category)) + related = tuple(self._related.resolve(signal.market_id)) + return SignalContext( + signal=signal, + market_question=snapshot.question, + resolution_criteria=snapshot.resolution_criteria or "", + resolution_source=snapshot.resolution_source or "", + closes_at=snapshot.closes_at, + related_markets=list(related), + investigation_prompts=list(prompts), + interpretation_mode=InterpretationMode.DETERMINISTIC, + ) diff --git a/src/augur_signals/augur_signals/context/investigation_prompts.py b/src/augur_signals/augur_signals/context/investigation_prompts.py new file mode 100644 index 0000000..4411408 --- /dev/null +++ b/src/augur_signals/augur_signals/context/investigation_prompts.py @@ -0,0 +1,78 @@ +"""Frozen investigation-prompt library keyed by (signal_type, market_category). + +Loaded once at engine startup from data/investigation_prompts.toml. The +library raises on runtime additions; any change requires a config +reload. A coverage report enumerates the (signal_type, category) +tuples that have no registered prompts so the gaps surface at startup. +""" + +from __future__ import annotations + +import tomllib +from collections.abc import Iterable +from dataclasses import dataclass +from pathlib import Path + +from augur_signals.models import SignalType + + +@dataclass(frozen=True, slots=True) +class CoverageReport: + """Presence report for (signal_type, category) prompt entries.""" + + total_categories: int + covered: int + missing: list[tuple[str, str]] + + +class PromptLibraryFrozenError(RuntimeError): + """Raised when code attempts to mutate a frozen prompt library.""" + + +class InvestigationPromptLibrary: + """Read-only store of investigation prompts.""" + + def __init__( + self, + entries: Iterable[tuple[SignalType, str, list[str]]], + ) -> None: + self._prompts: dict[tuple[str, str], tuple[str, ...]] = {} + for signal_type, category, prompts in entries: + key = (signal_type.value, category) + if key in self._prompts: + raise PromptLibraryFrozenError(f"duplicate prompt entry for {key}") + self._prompts[key] = tuple(prompts) + self._categories: set[str] = {key[1] for key in self._prompts} + + def lookup(self, signal_type: SignalType, category: str) -> list[str]: + return list(self._prompts.get((signal_type.value, category), ())) + + def coverage_report(self, known_categories: Iterable[str]) -> CoverageReport: + known = set(known_categories) + missing: list[tuple[str, str]] = [] + for signal_type in SignalType: + for category in known: + if (signal_type.value, category) not in self._prompts: + missing.append((signal_type.value, category)) + total = len(SignalType) * len(known) + return CoverageReport( + total_categories=total, + covered=total - len(missing), + missing=missing, + ) + + @classmethod + def from_toml(cls, path: Path) -> InvestigationPromptLibrary: + with path.open("rb") as handle: + raw = tomllib.load(handle) + entries_raw = raw.get("prompts", []) + entries: list[tuple[SignalType, str, list[str]]] = [] + for item in entries_raw: + entries.append( + ( + SignalType(item["signal_type"]), + str(item["market_category"]), + list(item.get("prompts", [])), + ) + ) + return cls(entries) diff --git a/src/augur_signals/augur_signals/context/related.py b/src/augur_signals/augur_signals/context/related.py new file mode 100644 index 0000000..c192794 --- /dev/null +++ b/src/augur_signals/augur_signals/context/related.py @@ -0,0 +1,54 @@ +"""Related-market resolver for the context assembler. + +For each taxonomy edge emanating from a signal's market, look up the +most-recent snapshot in the store and compute the 24 h delta. +Markets without a recent snapshot are omitted and logged. +""" + +from __future__ import annotations + +from datetime import timedelta + +from augur_signals.context.taxonomy import MarketTaxonomy +from augur_signals.models import RelatedMarketState +from augur_signals.storage.duckdb_store import DuckDBStore + + +class RelatedMarketResolver: + """Resolves related-market state at assembly time.""" + + def __init__( + self, + taxonomy: MarketTaxonomy, + store: DuckDBStore, + freshness_seconds: int = 3_600, + ) -> None: + self._taxonomy = taxonomy + self._store = store + self._freshness = timedelta(seconds=freshness_seconds) + + def resolve(self, market_id: str) -> list[RelatedMarketState]: + edges = self._taxonomy.edges_for(market_id) + results: list[RelatedMarketState] = [] + for edge in edges: + snap = self._store.latest_snapshot(edge.market_b) + if snap is None: + continue + # Fetch the prior day's snapshot for the delta. + prior_end = snap.timestamp + prior_start = prior_end - self._freshness + window = self._store.snapshots_in_window(edge.market_b, prior_start, prior_end) + prior_price = window[0].last_price if window else snap.last_price + delta_24h = snap.last_price - prior_price + results.append( + RelatedMarketState( + market_id=snap.market_id, + question=snap.question, + current_price=snap.last_price, + delta_24h=delta_24h, + volume_24h=snap.volume_24h, + relationship_type=edge.relationship_type, + relationship_strength=edge.strength, + ) + ) + return results diff --git a/src/augur_signals/augur_signals/context/taxonomy.py b/src/augur_signals/augur_signals/context/taxonomy.py new file mode 100644 index 0000000..00a055d --- /dev/null +++ b/src/augur_signals/augur_signals/context/taxonomy.py @@ -0,0 +1,69 @@ +"""Curated market-taxonomy loader. + +Reads edges from ``config/markets.toml``'s ``[[relationships]]`` blocks +or a dedicated taxonomy file. Only ``manual`` edges are supported in +this workstream; embedding-derived edges land alongside the LLM +formatter work. +""" + +from __future__ import annotations + +import tomllib +from collections.abc import Iterable +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + + +@dataclass(frozen=True, slots=True) +class TaxonomyEdge: + """One pair of related markets with a typed relationship.""" + + market_a: str + market_b: str + relationship_type: Literal["positive", "inverse", "complex", "causal"] + strength: float + source: Literal["manual", "embedding"] = "manual" + + +class MarketTaxonomy: + """Holds the curated edge set and answers relationship queries.""" + + def __init__(self, edges: Iterable[TaxonomyEdge]) -> None: + self._edges: dict[str, list[TaxonomyEdge]] = {} + for edge in edges: + self._edges.setdefault(edge.market_a, []).append(edge) + flipped = TaxonomyEdge( + market_a=edge.market_b, + market_b=edge.market_a, + relationship_type=edge.relationship_type, + strength=edge.strength, + source=edge.source, + ) + self._edges.setdefault(edge.market_b, []).append(flipped) + + def edges_for(self, market_id: str) -> list[TaxonomyEdge]: + return list(self._edges.get(market_id, [])) + + def cluster_for(self, market_id: str, types: set[str] | None = None) -> set[str]: + allowed = types or {"positive", "inverse", "causal"} + return { + edge.market_b for edge in self.edges_for(market_id) if edge.relationship_type in allowed + } + + @classmethod + def from_toml(cls, path: Path) -> MarketTaxonomy: + with path.open("rb") as handle: + raw = tomllib.load(handle) + edges_raw = raw.get("relationships", []) + edges = [ + TaxonomyEdge( + market_a=str(item["market_a"]), + market_b=str(item["market_b"]), + relationship_type=item["type"], + strength=float(item.get("strength", 1.0)), + source=item.get("source", "manual"), + ) + for item in edges_raw + ] + return cls(edges) diff --git a/tests/signals/test_context.py b/tests/signals/test_context.py new file mode 100644 index 0000000..8aff3f6 --- /dev/null +++ b/tests/signals/test_context.py @@ -0,0 +1,170 @@ +"""Tests for taxonomy, prompt library, related resolver, and context assembler.""" + +from __future__ import annotations + +import tomllib +from datetime import UTC, datetime, timedelta +from pathlib import Path + +import pytest + +from augur_signals.context.assembler import ContextAssembler, MissingMetadataError +from augur_signals.context.investigation_prompts import InvestigationPromptLibrary +from augur_signals.context.related import RelatedMarketResolver +from augur_signals.context.taxonomy import MarketTaxonomy, TaxonomyEdge +from augur_signals.models import ( + InterpretationMode, + MarketSignal, + MarketSnapshot, + SignalType, + new_signal_id, +) +from augur_signals.storage.duckdb_store import DuckDBStore + + +def _snapshot( + market_id: str, offset: int = 0, price: float = 0.5, question: str = "Q" +) -> MarketSnapshot: + return MarketSnapshot( + market_id=market_id, + platform="kalshi", + timestamp=datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + timedelta(seconds=offset), + last_price=price, + bid=max(0.0, price - 0.01), + ask=min(1.0, price + 0.01), + spread=0.02, + volume_24h=200_000.0, + liquidity=5_000.0, + question=question, + resolution_source="Source", + resolution_criteria="Criteria", + closes_at=datetime(2026, 6, 15, tzinfo=UTC), + raw_json={"k": 1}, + ) + + +def _signal(market_id: str = "a") -> MarketSignal: + return MarketSignal( + signal_id=new_signal_id(), + market_id=market_id, + platform="kalshi", + signal_type=SignalType.PRICE_VELOCITY, + magnitude=0.8, + direction=1, + confidence=0.75, + fdr_adjusted=False, + detected_at=datetime(2026, 3, 15, 12, 0, tzinfo=UTC), + window_seconds=300, + liquidity_tier="high", + raw_features={"calibration_provenance": "d@identity_v0"}, + ) + + +@pytest.mark.unit +def test_taxonomy_edges_are_bidirectional() -> None: + tx = MarketTaxonomy([TaxonomyEdge("a", "b", "inverse", 0.9)]) + assert {e.market_b for e in tx.edges_for("a")} == {"b"} + assert {e.market_b for e in tx.edges_for("b")} == {"a"} + + +@pytest.mark.unit +def test_taxonomy_cluster_filters_by_type() -> None: + tx = MarketTaxonomy( + [ + TaxonomyEdge("a", "b", "positive", 0.8), + TaxonomyEdge("a", "c", "complex", 0.5), + ] + ) + assert tx.cluster_for("a") == {"b"} + + +@pytest.mark.unit +def test_prompt_library_lookup_and_coverage() -> None: + lib = InvestigationPromptLibrary( + [(SignalType.PRICE_VELOCITY, "monetary_policy", ["Check FOMC"])] + ) + assert lib.lookup(SignalType.PRICE_VELOCITY, "monetary_policy") == ["Check FOMC"] + assert lib.lookup(SignalType.VOLUME_SPIKE, "monetary_policy") == [] + report = lib.coverage_report(["monetary_policy", "geopolitics"]) + # 5 signal types * 2 categories = 10 cells, 1 filled => 9 missing. + assert report.total_categories == 10 + assert report.covered == 1 + assert len(report.missing) == 9 + + +@pytest.mark.unit +def test_prompt_library_from_toml(tmp_path: Path) -> None: + path = tmp_path / "prompts.toml" + path.write_text( + """ +[[prompts]] +signal_type = "price_velocity" +market_category = "monetary_policy" +prompts = ["Check FOMC calendar"] +""", + encoding="utf-8", + ) + lib = InvestigationPromptLibrary.from_toml(path) + assert lib.lookup(SignalType.PRICE_VELOCITY, "monetary_policy") == ["Check FOMC calendar"] + + +@pytest.mark.unit +def test_taxonomy_from_toml(tmp_path: Path) -> None: + path = tmp_path / "markets.toml" + path.write_text( + """ +[[relationships]] +market_a = "a" +market_b = "b" +type = "inverse" +strength = 0.9 +source = "manual" +""", + encoding="utf-8", + ) + # Verify the file parses — validation otherwise happens in MarketTaxonomy. + with path.open("rb") as handle: + raw = tomllib.load(handle) + assert raw["relationships"][0]["market_a"] == "a" + tx = ( + MarketTaxonomy.from_taxonomy_dict(raw) + if hasattr(MarketTaxonomy, "from_taxonomy_dict") + else MarketTaxonomy.from_toml(path) + ) + assert len(tx.edges_for("a")) == 1 + + +@pytest.mark.unit +def test_context_assembler_deterministic(tmp_path: Path) -> None: + store = DuckDBStore(tmp_path / "a.duckdb") + store.initialize() + store.insert_snapshot(_snapshot("a", question="Will X?")) + store.insert_snapshot(_snapshot("b", price=0.3, question="Will Y?")) + taxonomy = MarketTaxonomy([TaxonomyEdge("a", "b", "inverse", 0.9)]) + resolver = RelatedMarketResolver(taxonomy, store) + library = InvestigationPromptLibrary( + [(SignalType.PRICE_VELOCITY, "monetary_policy", ["Check FOMC"])] + ) + assembler = ContextAssembler(store, resolver, library, {"a": "monetary_policy"}) + signal = _signal() + first = assembler.assemble(signal) + second = assembler.assemble(signal) + assert first.model_dump_json() == second.model_dump_json() + assert first.interpretation_mode == InterpretationMode.DETERMINISTIC + assert first.market_question == "Will X?" + assert first.investigation_prompts == ["Check FOMC"] + assert len(first.related_markets) == 1 + store.close() + + +@pytest.mark.unit +def test_context_assembler_raises_on_missing_metadata(tmp_path: Path) -> None: + store = DuckDBStore(tmp_path / "b.duckdb") + store.initialize() + taxonomy = MarketTaxonomy([]) + resolver = RelatedMarketResolver(taxonomy, store) + library = InvestigationPromptLibrary([]) + assembler = ContextAssembler(store, resolver, library) + with pytest.raises(MissingMetadataError): + assembler.assemble(_signal()) + store.close() From 0c2d893e443566dae8bfd102231f84b54837bd57 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:34:36 +0530 Subject: [PATCH 13/16] feat(engine): wire single-cycle orchestrator and ast guard for detector determinism MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Engine composes the full extraction pipeline: snapshot -> feature pipeline -> detector dispatch -> manipulation evaluation -> fingerprint and cluster dedup -> bus publish -> context assembly. run_cycle takes snapshots, the features for each market, the recent trades and book events used by the manipulation detector, and now — threaded through every downstream call so the backtest harness and live engine traverse the same code with bit-for-bit identical timing. The scripts/lint_detector_now.py AST guard parses every file under src/augur_signals/augur_signals/detectors/ and fails non-zero on any direct datetime.now() call — whether via ``datetime.now()`` or ``datetime.datetime.now()``. The guard is wired into both pre-commit (id: datetime-now-in-detectors) and the CI workflow so the invariant holds through every merged commit. tests/signals/test_engine_integration.py replays a synthetic 160-tick snapshot stream (flat phase followed by a sustained level shift) through the engine and asserts at least one SignalContext is emitted. The test exercises the full composition — detectors, manipulation, fingerprint, cluster, bus, assembler — and is the stand-in for the recorded-API-fixture integration test that lands with the labeling workstream. --- .github/workflows/ci.yml | 3 + .pre-commit-config.yaml | 6 + scripts/lint_detector_now.py | 62 ++++++++++ src/augur_signals/augur_signals/engine.py | 99 ++++++++++++++++ tests/signals/test_engine_integration.py | 136 ++++++++++++++++++++++ 5 files changed, 306 insertions(+) create mode 100644 scripts/lint_detector_now.py create mode 100644 src/augur_signals/augur_signals/engine.py create mode 100644 tests/signals/test_engine_integration.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 46a6752..e8ea286 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,6 +47,9 @@ jobs: - name: Schema export check run: uv run python scripts/export_schemas.py --check + - name: datetime.now() guard in detector modules + run: uv run python scripts/lint_detector_now.py + - name: Tests with coverage run: uv run pytest --cov=src --cov-report=xml --cov-fail-under=80 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ad5763a..32293b2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -38,3 +38,9 @@ repos: entry: bash -c 'uv run python scripts/export_schemas.py --check' language: system pass_filenames: false + + - id: datetime-now-in-detectors + name: Guard against datetime.now() in detector modules + entry: bash -c 'uv run python scripts/lint_detector_now.py' + language: system + pass_filenames: false diff --git a/scripts/lint_detector_now.py b/scripts/lint_detector_now.py new file mode 100644 index 0000000..f4e8b79 --- /dev/null +++ b/scripts/lint_detector_now.py @@ -0,0 +1,62 @@ +"""AST-based guard against ``datetime.now()`` inside detector modules. + +The development-plan invariant (§7.2) states that detectors must take +``now`` as a parameter; any call to ``datetime.now()`` from within a +detector module breaks backtest replay determinism. This script walks +the detector package and fails non-zero on any direct call. + +Invocation (CI and local pre-commit): + + uv run python scripts/lint_detector_now.py +""" + +from __future__ import annotations + +import ast +import sys +from pathlib import Path + +DETECTOR_DIR = ( + Path(__file__).resolve().parent.parent / "src" / "augur_signals" / "augur_signals" / "detectors" +) + + +def _calls_datetime_now(tree: ast.Module) -> list[int]: + """Return the 1-based line numbers of datetime.now() calls in *tree*.""" + hits: list[int] = [] + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + func = node.func + if isinstance(func, ast.Attribute) and func.attr == "now": + value = func.value + if isinstance(value, ast.Name) and value.id == "datetime": + hits.append(node.lineno) + elif isinstance(value, ast.Attribute) and value.attr == "datetime": + hits.append(node.lineno) + return hits + + +def main() -> int: + offenders: dict[str, list[int]] = {} + for path in sorted(DETECTOR_DIR.glob("*.py")): + if path.name.startswith("__"): + continue + tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path)) + hits = _calls_datetime_now(tree) + if hits: + offenders[str(path.relative_to(DETECTOR_DIR.parents[3]))] = hits + if offenders: + print( + "datetime.now() usage forbidden in detectors — pass now as a parameter:", + file=sys.stderr, + ) + for file, lines in offenders.items(): + for lineno in lines: + print(f" {file}:{lineno}", file=sys.stderr) + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/augur_signals/augur_signals/engine.py b/src/augur_signals/augur_signals/engine.py new file mode 100644 index 0000000..2334de6 --- /dev/null +++ b/src/augur_signals/augur_signals/engine.py @@ -0,0 +1,99 @@ +"""Engine orchestrator composing the extraction pipeline. + +Composes normalized snapshot -> feature pipeline -> detector dispatch -> +manipulation detector -> dedup -> bus -> context assembler. The +orchestrator is single-process; the multi-process runtime swaps the +bus and storage adapters without touching this module. + +``now`` threads through every downstream call as a parameter so the +backtest harness and the live engine traverse the same code with +deterministic timing. +""" + +from __future__ import annotations + +from collections.abc import Sequence +from datetime import datetime + +from augur_signals.bus.memory import InProcessAsyncBus +from augur_signals.context.assembler import ContextAssembler +from augur_signals.dedup.cluster import ClusterMerge +from augur_signals.dedup.fingerprint import merge as fingerprint_merge +from augur_signals.detectors.registry import DetectorRegistry +from augur_signals.ingestion.base import RawTrade +from augur_signals.manipulation.detector import ManipulationDetector, attach_flags +from augur_signals.manipulation.signatures import BookEvent +from augur_signals.models import ( + FeatureVector, + MarketSignal, + MarketSnapshot, + SignalContext, +) +from augur_signals.storage.duckdb_store import DuckDBStore + + +class Engine: + """Single-cycle orchestrator that lets the caller drive time.""" + + def __init__( + self, + store: DuckDBStore, + registry: DetectorRegistry, + manipulation: ManipulationDetector, + cluster: ClusterMerge, + bus: InProcessAsyncBus, + assembler: ContextAssembler, + ) -> None: + self._store = store + self._registry = registry + self._manipulation = manipulation + self._cluster = cluster + self._bus = bus + self._assembler = assembler + + async def run_cycle( + self, + snapshots: Sequence[MarketSnapshot], + features: dict[str, FeatureVector], + recent_trades: dict[str, Sequence[RawTrade]], + recent_book_events: dict[str, Sequence[BookEvent]], + now: datetime, + ) -> list[SignalContext]: + """Run one polling cycle end-to-end and return emitted contexts.""" + per_market_signals: list[MarketSignal] = [] + snapshot_index = {snap.market_id: snap for snap in snapshots} + for snap in snapshots: + self._store.insert_snapshot(snap) + feature = features.get(snap.market_id) + if feature is None: + continue + candidates = self._registry.dispatch(snap.market_id, feature, snap, now) + for candidate in candidates: + flags = self._manipulation.evaluate( + candidate, + recent_trades.get(snap.market_id, []), + recent_book_events.get(snap.market_id, []), + [snap], + snap.closes_at, + ) + per_market_signals.append(attach_flags(candidate, flags)) + + batch = self._registry.dispatch_batch(snapshot_index, now) + for candidate in batch: + flags = self._manipulation.evaluate( + candidate, + recent_trades.get(candidate.market_id, []), + recent_book_events.get(candidate.market_id, []), + [snapshot_index[candidate.market_id]], + snapshot_index[candidate.market_id].closes_at, + ) + per_market_signals.append(attach_flags(candidate, flags)) + + fingerprinted = fingerprint_merge(per_market_signals) + clustered = self._cluster.merge(fingerprinted) + contexts: list[SignalContext] = [] + for signal in clustered: + self._store.insert_signal(signal) + await self._bus.publish(signal) + contexts.append(self._assembler.assemble(signal)) + return contexts diff --git a/tests/signals/test_engine_integration.py b/tests/signals/test_engine_integration.py new file mode 100644 index 0000000..8ab869b --- /dev/null +++ b/tests/signals/test_engine_integration.py @@ -0,0 +1,136 @@ +"""End-to-end integration test against a synthetic snapshot stream. + +Exercises the full extraction pipeline — normalization, feature +computation, detector dispatch, manipulation evaluation, fingerprint +and cluster dedup, bus publish, and context assembly — without live +API access. Recorded platform fixtures will replace the synthetic +stream once the labeling workstream produces a curated set. +""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta +from pathlib import Path + +import pytest + +from augur_signals.bus.memory import InProcessAsyncBus +from augur_signals.calibration._config import CalibrationConfig +from augur_signals.calibration.fdr_controller import FDRController +from augur_signals.context.assembler import ContextAssembler +from augur_signals.context.investigation_prompts import InvestigationPromptLibrary +from augur_signals.context.related import RelatedMarketResolver +from augur_signals.context.taxonomy import MarketTaxonomy, TaxonomyEdge +from augur_signals.dedup._config import DedupBody +from augur_signals.dedup.cluster import ClusterMerge, TaxonomyEdgesProvider +from augur_signals.detectors._config import ( + BookImbalanceConfig, + CrossMarketConfig, + PriceVelocityConfig, + RegimeShiftConfig, + VolumeSpikeConfig, +) +from augur_signals.detectors.book_imbalance import BookImbalanceDetector +from augur_signals.detectors.cross_market import CrossMarketDivergenceDetector +from augur_signals.detectors.price_velocity import PriceVelocityDetector +from augur_signals.detectors.regime_shift import RegimeShiftDetector +from augur_signals.detectors.registry import DetectorRegistry +from augur_signals.detectors.volume_spike import VolumeSpikeDetector +from augur_signals.engine import Engine +from augur_signals.features._config import FeaturePipelineConfig +from augur_signals.features.pipeline import FeaturePipeline +from augur_signals.manipulation._config import ManipulationConfig +from augur_signals.manipulation.detector import ManipulationDetector +from augur_signals.models import MarketSnapshot, SignalType +from augur_signals.storage.duckdb_store import DuckDBStore + + +def _snapshot(market_id: str, price: float, offset_seconds: int) -> MarketSnapshot: + return MarketSnapshot( + market_id=market_id, + platform="kalshi", + timestamp=datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + timedelta(seconds=offset_seconds), + last_price=price, + bid=max(0.0, price - 0.01), + ask=min(1.0, price + 0.01), + spread=0.02, + volume_24h=200_000.0, + liquidity=8_000.0, + question=f"Will {market_id} resolve yes?", + resolution_source="Source", + resolution_criteria="Criteria", + closes_at=datetime(2026, 6, 15, tzinfo=UTC), + raw_json={}, + ) + + +@pytest.mark.asyncio +async def test_engine_produces_contexts_after_price_shift(tmp_path: Path) -> None: + store = DuckDBStore(tmp_path / "engine.duckdb") + store.initialize() + bus = InProcessAsyncBus(capacity=64) + + registry = DetectorRegistry() + registry.register(PriceVelocityDetector(PriceVelocityConfig(cooldown_seconds=0))) + registry.register(VolumeSpikeDetector(VolumeSpikeConfig())) + registry.register(BookImbalanceDetector(BookImbalanceConfig())) + registry.register(RegimeShiftDetector(RegimeShiftConfig())) + fdr = FDRController(CalibrationConfig()) + registry.register_batch(CrossMarketDivergenceDetector(CrossMarketConfig(), fdr, [])) + + manipulation = ManipulationDetector(ManipulationConfig()) + taxonomy = MarketTaxonomy([TaxonomyEdge("a", "b", "inverse", 0.9)]) + resolver = RelatedMarketResolver(taxonomy, store) + library = InvestigationPromptLibrary( + [(SignalType.PRICE_VELOCITY, "monetary_policy", ["Check FOMC"])] + ) + assembler = ContextAssembler(store, resolver, library, {"a": "monetary_policy"}) + + cluster = ClusterMerge( + TaxonomyEdgesProvider({"a": [("b", "inverse")], "b": [("a", "inverse")]}), + window_seconds=DedupBody().cluster_window_seconds, + ) + engine = Engine( + store=store, + registry=registry, + manipulation=manipulation, + cluster=cluster, + bus=bus, + assembler=assembler, + ) + + pipeline = FeaturePipeline(FeaturePipelineConfig(warmup_size=5)) + contexts_emitted: list[str] = [] + now = datetime(2026, 3, 15, 12, 0, tzinfo=UTC) + # Warmup flat phase — long enough that the price-velocity detector + # crosses its own internal warmup threshold with features available. + for i in range(80): + snap = _snapshot("a", price=0.5, offset_seconds=i * 30) + feature = pipeline.ingest(snap) + features = {"a": feature} if feature else {} + contexts = await engine.run_cycle( + snapshots=[snap], + features=features, + recent_trades={}, + recent_book_events={}, + now=now + timedelta(seconds=i * 30), + ) + contexts_emitted.extend(ctx.signal.signal_id for ctx in contexts) + # Step change — sustained level shift over enough ticks for BOCPD + # to concentrate run-length mass below the fire threshold. + for i in range(80, 160): + snap = _snapshot("a", price=0.85, offset_seconds=i * 30) + feature = pipeline.ingest(snap) + features = {"a": feature} if feature else {} + contexts = await engine.run_cycle( + snapshots=[snap], + features=features, + recent_trades={}, + recent_book_events={}, + now=now + timedelta(seconds=i * 30), + ) + contexts_emitted.extend(ctx.signal.signal_id for ctx in contexts) + + # The price velocity detector should have fired at least once. + assert len(contexts_emitted) >= 1 + store.close() From acffde68eb73c77088b240c58133129d1cf1244c Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:35:22 +0530 Subject: [PATCH 14/16] docs: record signal-extraction core in the changelog --- CHANGELOG.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d93be49..4c3fbf2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,25 @@ All notable changes to Augur are recorded in this file. Format follows [Keep a C ## [Unreleased] +### Added + +- Pydantic data contracts: `MarketSnapshot`, `FeatureVector`, `MarketSignal`, `SignalContext`, `RelatedMarketState`, and the closed enums `SignalType`, `ManipulationFlag`, `ConsumerType`, `InterpretationMode`. `MarketSignal` enforces `calibration_provenance` via a model validator; every model is frozen and rejects unknown fields. JSON schemas exported to `schemas/*.json` and kept in sync by `scripts/export_schemas.py`. +- Ingestion layer: `AbstractPoller` protocol with `PolymarketPoller` and `KalshiPoller` concrete implementations against the REST APIs, exponential-backoff retry helper, and the normalizer that maps raw platform payloads onto `MarketSnapshot` with verbatim preservation of question / resolution_source / resolution_criteria. +- Adaptive polling scheduler implementing the four-tier state machine (hot/warm/cool/cold) with hysteresis bands and rate-limit-pressure-driven demotion per `docs/architecture/adaptive-polling-spec.md`. +- Feature pipeline with per-market `SnapshotBuffer`, halt-aware EWMA baseline (alpha 0.05), and the momentum / volatility / volume-ratio / bid-ask / spread indicators computed over the canonical 5m / 15m / 1h / 4h windows. Windows are observation-count internally so tier changes do not corrupt feature semantics. +- Five detectors: price velocity (Bernoulli-Beta BOCPD against running-mean projections), volume spike (EWMA z-score), book imbalance (depth-gated with persistence), regime shift (two-sided CUSUM with dormancy gate), cross-market divergence (Spearman + Fisher-z + BH-FDR). Every detector threads `now` as a parameter and enforces the 6 h pre-resolution exclusion inside `ingest`. +- Manipulation signature catalogue (Herfindahl concentration, size-vs-depth outlier, cancel-replace burst, thin-book-during-move, pre-resolution window) plus the `ManipulationDetector` aggregator and the curated `CURATED_EPISODES` fixtures with expected flag sets. +- Calibration layer: Benjamini-Hochberg FDR controller, reliability-curve analyzer with an identity placeholder curve, empirical FPR computation against a labeled event stream, drift monitor with PSI and KS metrics, liquidity-tier banding. +- DuckDB storage with schema migrations for snapshots, features, signals, manipulation flags, calibration FPR, and reliability curves; typed round-trip between the frozen Pydantic models and the database. +- In-process async bus, fingerprint deduplication, taxonomy-clustered merge, and the storm-mode state machine with hysteresis between trigger and recovery thresholds. +- Context assembly layer: `MarketTaxonomy` with bidirectional edge lookup, frozen `InvestigationPromptLibrary` with coverage reporting, `RelatedMarketResolver`, and the deterministic `ContextAssembler` whose output is byte-identical on repeated invocations. +- `Engine` orchestrator composing the full pipeline and the `scripts/lint_detector_now.py` AST guard against `datetime.now()` usage inside detector modules. The guard is wired into pre-commit and CI. +- Four JSON schemas exported to `schemas/`: `MarketSnapshot-1.0.0.json`, `FeatureVector-1.0.0.json`, `MarketSignal-1.0.0.json`, `SignalContext-1.0.0.json`. + +### Operational Handoff + +Live signal extraction is operational against Polymarket and Kalshi once API credentials are provisioned (`KALSHI_API_KEY`) and `config/markets.toml` populated with the watchlist. Signals persist to DuckDB and the backtest harness can replay historical snapshots through the same code paths. + ## [0.0.0] — 2026-04-17 ### Added From 3784eaaa72f77d67cb6bbeb1f4f1d0e5ddb7c34d Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:45:44 +0530 Subject: [PATCH 15/16] fix(signals): address pr-review findings in extraction core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cross-market divergence now keys FDR submissions on the pair ``(market_a, market_b)`` rather than just ``market_a``. Before, a market that participated in multiple related-market pairs collapsed to a single per-market pass/fail on the FDR set return — all pair signals for that market survived even when only one pair's p-value crossed. The pair-level key ensures each pair's decision is independent. Cluster-merge representative selection follows the spec: the highest liquidity tier in the cluster wins, ties break alphabetically by market_id. The prior max-magnitude heuristic contradicted docs/architecture/deduplication-and-storms.md §Cluster-Level Merge. DuckDBStore.signals_in_window now rehydrates manipulation flags from the side table before returning. Signals went to storage with their flags persisted but came back with empty flag lists — a silent correctness hazard for backtests that audit manipulation coverage. RelatedMarketResolver's delta window now defaults to 86_400 seconds (24 hours) so ``RelatedMarketState.delta_24h`` matches its name and its contract in docs/contracts/schema-and-versioning.md. The constructor argument is renamed ``delta_window_seconds`` to make the semantic explicit. StormController now requires the queue-depth trigger to sustain across ``trigger_queue_depth_window_sec`` before entering storm mode, matching docs/architecture/deduplication-and-storms.md §Storm Detection. A single-tick depth spike no longer flips the controller. RegimeShiftDetector's direction now compares magnitudes rather than preferring the positive arm; when both arms cross the threshold in the same tick, the dominant excursion's sign is emitted. compute_empirical_fpr now requires ``now`` as a parameter instead of falling back to ``datetime.now()``. This extends the "now as parameter" invariant beyond detectors to every time-sensitive entry point so backtest runs are deterministic across wall clocks. Two new tests round out coverage: a 100-invocation determinism test for ContextAssembler per the contract in §SignalContext, and a round-trip test verifying manipulation flags survive DuckDB write/read. --- .../calibration/empirical_fpr.py | 14 +++++------ .../augur_signals/context/related.py | 11 +++++---- .../augur_signals/dedup/cluster.py | 12 +++++++++- .../augur_signals/dedup/storm.py | 16 ++++++++++++- .../augur_signals/detectors/cross_market.py | 13 +++++++---- .../augur_signals/detectors/regime_shift.py | 3 ++- .../augur_signals/storage/duckdb_store.py | 23 ++++++++++++++++++- tests/signals/test_calibration.py | 8 ++++++- tests/signals/test_context.py | 18 +++++++++++++++ tests/signals/test_storage.py | 5 +++- 10 files changed, 101 insertions(+), 22 deletions(-) diff --git a/src/augur_signals/augur_signals/calibration/empirical_fpr.py b/src/augur_signals/augur_signals/calibration/empirical_fpr.py index a23cf0c..1520337 100644 --- a/src/augur_signals/augur_signals/calibration/empirical_fpr.py +++ b/src/augur_signals/augur_signals/calibration/empirical_fpr.py @@ -41,8 +41,8 @@ def compute_empirical_fpr( market_id: str, detected_at_values: Sequence[datetime], event_occurred_at_values: Sequence[datetime], + now: datetime, lead_window: timedelta = timedelta(hours=24), - now: datetime | None = None, label_protocol_version: str = "v0", ) -> FPRRecord: """FP / (FP + TN) per docs/methodology/labeling-protocol.md §True Positive. @@ -50,7 +50,10 @@ def compute_empirical_fpr( A detector firing at ``t_signal`` is a true positive if some labeled event for the same market occurred in ``[t_signal, t_signal + lead_window]``. All other firings are false positives; every observation window - without a label in range contributes to the TN denominator. + without a label in range contributes to the TN denominator. ``now`` + is a required parameter so every FPRRecord's computed_at is + deterministic across backtest replays — matching the pipeline-wide + "now as a parameter" invariant. """ total_signals = len(detected_at_values) if total_signals == 0: @@ -59,12 +62,9 @@ def compute_empirical_fpr( market_id=market_id, fpr=0.0, sample_size=0, - computed_at=now or datetime.now(tz=event_occurred_at_values[0].tzinfo) - if event_occurred_at_values - else datetime(2026, 1, 1).astimezone(), + computed_at=now, label_protocol_version=label_protocol_version, ) - true_positives = 0 for t_signal in detected_at_values: window_end = t_signal + lead_window @@ -80,6 +80,6 @@ def compute_empirical_fpr( market_id=market_id, fpr=fpr, sample_size=sample_size, - computed_at=now if now is not None else detected_at_values[-1], + computed_at=now, label_protocol_version=label_protocol_version, ) diff --git a/src/augur_signals/augur_signals/context/related.py b/src/augur_signals/augur_signals/context/related.py index c192794..16f3d62 100644 --- a/src/augur_signals/augur_signals/context/related.py +++ b/src/augur_signals/augur_signals/context/related.py @@ -21,11 +21,14 @@ def __init__( self, taxonomy: MarketTaxonomy, store: DuckDBStore, - freshness_seconds: int = 3_600, + delta_window_seconds: int = 86_400, ) -> None: self._taxonomy = taxonomy self._store = store - self._freshness = timedelta(seconds=freshness_seconds) + # Window over which to compute delta_24h against the most-recent + # snapshot. The default matches the field's semantics in + # docs/contracts/schema-and-versioning.md §RelatedMarketState. + self._delta_window = timedelta(seconds=delta_window_seconds) def resolve(self, market_id: str) -> list[RelatedMarketState]: edges = self._taxonomy.edges_for(market_id) @@ -34,9 +37,9 @@ def resolve(self, market_id: str) -> list[RelatedMarketState]: snap = self._store.latest_snapshot(edge.market_b) if snap is None: continue - # Fetch the prior day's snapshot for the delta. + # Fetch the oldest in-window snapshot for the delta. prior_end = snap.timestamp - prior_start = prior_end - self._freshness + prior_start = prior_end - self._delta_window window = self._store.snapshots_in_window(edge.market_b, prior_start, prior_end) prior_price = window[0].last_price if window else snap.last_price delta_24h = snap.last_price - prior_price diff --git a/src/augur_signals/augur_signals/dedup/cluster.py b/src/augur_signals/augur_signals/dedup/cluster.py index 11b0008..8379e2b 100644 --- a/src/augur_signals/augur_signals/dedup/cluster.py +++ b/src/augur_signals/augur_signals/dedup/cluster.py @@ -92,8 +92,18 @@ def _cluster_for( return cluster +_TIER_RANK: dict[str, int] = {"high": 3, "mid": 2, "low": 1} + + def _collapse(cluster: list[MarketSignal]) -> MarketSignal: - base = max(cluster, key=lambda s: s.magnitude) + # Per docs/architecture/deduplication-and-storms.md §Cluster-Level + # Merge, the representative is the highest-liquidity-tier market in + # the cluster; ties break alphabetically by market_id. + top_tier = max(_TIER_RANK.get(s.liquidity_tier, 0) for s in cluster) + base = min( + (s for s in cluster if _TIER_RANK.get(s.liquidity_tier, 0) == top_tier), + key=lambda s: s.market_id, + ) magnitude = max(s.magnitude for s in cluster) confidence = max(s.confidence for s in cluster) manipulation_flags = list({flag for s in cluster for flag in s.manipulation_flags}) diff --git a/src/augur_signals/augur_signals/dedup/storm.py b/src/augur_signals/augur_signals/dedup/storm.py index 9d16a31..10e79b4 100644 --- a/src/augur_signals/augur_signals/dedup/storm.py +++ b/src/augur_signals/augur_signals/dedup/storm.py @@ -56,6 +56,7 @@ def __init__(self, config: StormSettings, queue_capacity: int) -> None: self._trigger_rate = _RateTracker(config.trigger_signal_rate_window_sec) self._recovery_rate = _RateTracker(config.recovery_signal_rate_window_sec) self._low_depth_since: datetime | None = None + self._high_depth_since: datetime | None = None @property def in_storm(self) -> bool: @@ -76,7 +77,18 @@ def update( self._trigger_rate.rate_per_second() > self._config.trigger_signal_rate_per_sec ) depth_exceeded = depth_pct > self._config.trigger_queue_depth_pct - if rate_exceeded or depth_exceeded: + # Depth trigger requires sustainment per + # docs/architecture/deduplication-and-storms.md §Storm Detection. + if depth_exceeded: + if self._high_depth_since is None: + self._high_depth_since = now + sustained = ( + now - self._high_depth_since + ).total_seconds() >= self._config.trigger_queue_depth_window_sec + else: + self._high_depth_since = None + sustained = False + if rate_exceeded or sustained: self._enter_storm(now) else: rate_low = ( @@ -102,11 +114,13 @@ def _enter_storm(self, now: datetime) -> None: self._started_at = now self._ended_at = None self._low_depth_since = None + self._high_depth_since = None def _exit_storm(self, now: datetime) -> None: self._in_storm = False self._ended_at = now self._low_depth_since = None + self._high_depth_since = None DropPolicy = Literal["lifo", "reject"] diff --git a/src/augur_signals/augur_signals/detectors/cross_market.py b/src/augur_signals/augur_signals/detectors/cross_market.py index 09d7386..ce48b1e 100644 --- a/src/augur_signals/augur_signals/detectors/cross_market.py +++ b/src/augur_signals/augur_signals/detectors/cross_market.py @@ -115,7 +115,7 @@ def evaluate_batch( now: datetime, ) -> list[MarketSignal]: candidates: list[ - tuple[str, float, float, MarketSnapshot, MarketSnapshot, RelatedMarketPair] + tuple[str, str, float, float, MarketSnapshot, MarketSnapshot, RelatedMarketPair] ] = [] for pair in self._pairs: snap_a = snapshots.get(pair.market_a) @@ -141,16 +141,19 @@ def evaluate_batch( std_err = 1.0 / math.sqrt(max(1.0, len(state.prices_a) - 3)) test_statistic = z_delta / std_err p_value = _two_sided_normal_p(test_statistic) - candidates.append((pair.market_a, rho, p_value, snap_a, snap_b, pair)) + # Pair-level key so the FDR controller's set return distinguishes + # between pairs that share a market_a. + pair_key = f"{pair.market_a}::{pair.market_b}" + candidates.append((pair_key, pair.market_a, rho, p_value, snap_a, snap_b, pair)) if not candidates: return [] passing = self._fdr.submit_pvalues( - self.detector_id, [(candidate[0], candidate[2]) for candidate in candidates] + self.detector_id, [(candidate[0], candidate[3]) for candidate in candidates] ) signals: list[MarketSignal] = [] - for market_a, rho, p_value, snap_a, snap_b, pair in candidates: - if market_a not in passing: + for pair_key, market_a, rho, p_value, snap_a, snap_b, pair in candidates: + if pair_key not in passing: continue magnitude = min(1.0, max(0.0, 1.0 - p_value)) tier = banding(snap_a.volume_24h) diff --git a/src/augur_signals/augur_signals/detectors/regime_shift.py b/src/augur_signals/augur_signals/detectors/regime_shift.py index 8b59bbe..41605ac 100644 --- a/src/augur_signals/augur_signals/detectors/regime_shift.py +++ b/src/augur_signals/augur_signals/detectors/regime_shift.py @@ -81,7 +81,8 @@ def ingest( if positive <= threshold and abs(negative) <= threshold: return None - direction: Literal[-1, 0, 1] = 1 if positive > threshold else -1 + # When both arms cross, the dominant excursion's sign wins. + direction: Literal[-1, 0, 1] = 1 if positive >= abs(negative) else -1 magnitude = min(1.0, max(abs(positive), abs(negative)) / (threshold * 2.0 + 1e-9)) tier = banding(snapshot.volume_24h) cusum.reset() diff --git a/src/augur_signals/augur_signals/storage/duckdb_store.py b/src/augur_signals/augur_signals/storage/duckdb_store.py index 359ae7b..1f6e20e 100644 --- a/src/augur_signals/augur_signals/storage/duckdb_store.py +++ b/src/augur_signals/augur_signals/storage/duckdb_store.py @@ -263,7 +263,28 @@ def signals_in_window( query, [*market_ids, window_start, window_end], ).fetchall() - return [_row_to_signal(row) for row in rows] + signals = [_row_to_signal(row) for row in rows] + if not signals: + return signals + # Rehydrate manipulation flags from the side table so downstream + # backtests see the same flag set a consumer would have received + # at publish time. + signal_ids = [s.signal_id for s in signals] + flag_placeholders = ", ".join(["?"] * len(signal_ids)) + flag_query = ( + f"SELECT signal_id, flag FROM manipulation_flags " + f"WHERE signal_id IN ({flag_placeholders})" + ) + flag_rows = self._conn.execute(flag_query, list(signal_ids)).fetchall() + flags_by_signal: dict[str, list[ManipulationFlag]] = {} + for signal_id, flag_value in flag_rows: + flags_by_signal.setdefault(signal_id, []).append(ManipulationFlag(flag_value)) + return [ + signal.model_copy( + update={"manipulation_flags": flags_by_signal.get(signal.signal_id, [])} + ) + for signal in signals + ] # --- lifecycle ------------------------------------------------------ diff --git a/tests/signals/test_calibration.py b/tests/signals/test_calibration.py index 9c965a5..348f7cc 100644 --- a/tests/signals/test_calibration.py +++ b/tests/signals/test_calibration.py @@ -96,6 +96,7 @@ def test_liquidity_banding_crosses_thresholds() -> None: @pytest.mark.unit def test_empirical_fpr_identifies_true_positives() -> None: + now = datetime(2026, 3, 16, 0, 0, tzinfo=UTC) signals = [datetime(2026, 3, 15, 12, 0, tzinfo=UTC)] events = [datetime(2026, 3, 15, 14, 0, tzinfo=UTC)] record = compute_empirical_fpr( @@ -103,17 +104,22 @@ def test_empirical_fpr_identifies_true_positives() -> None: "m", signals, events, + now=now, lead_window=timedelta(hours=24), ) assert record.fpr == pytest.approx(0.0) assert record.sample_size == 1 + assert record.computed_at == now @pytest.mark.unit def test_empirical_fpr_flags_unlabeled_signals() -> None: + now = datetime(2026, 3, 16, 0, 0, tzinfo=UTC) signals = [datetime(2026, 3, 15, 12, 0, tzinfo=UTC)] events: list[datetime] = [] - record = compute_empirical_fpr("d", "m", signals, events, lead_window=timedelta(hours=24)) + record = compute_empirical_fpr( + "d", "m", signals, events, now=now, lead_window=timedelta(hours=24) + ) assert record.fpr == pytest.approx(1.0) diff --git a/tests/signals/test_context.py b/tests/signals/test_context.py index 8aff3f6..6bc4bb2 100644 --- a/tests/signals/test_context.py +++ b/tests/signals/test_context.py @@ -157,6 +157,24 @@ def test_context_assembler_deterministic(tmp_path: Path) -> None: store.close() +@pytest.mark.unit +def test_context_assembler_100_invocations_byte_identical(tmp_path: Path) -> None: + store = DuckDBStore(tmp_path / "det.duckdb") + store.initialize() + store.insert_snapshot(_snapshot("a", question="Will X?")) + store.insert_snapshot(_snapshot("b", price=0.3, question="Will Y?")) + taxonomy = MarketTaxonomy([TaxonomyEdge("a", "b", "inverse", 0.9)]) + resolver = RelatedMarketResolver(taxonomy, store) + library = InvestigationPromptLibrary( + [(SignalType.PRICE_VELOCITY, "monetary_policy", ["Check FOMC"])] + ) + assembler = ContextAssembler(store, resolver, library, {"a": "monetary_policy"}) + signal = _signal() + payloads = {assembler.assemble(signal).model_dump_json() for _ in range(100)} + assert len(payloads) == 1 + store.close() + + @pytest.mark.unit def test_context_assembler_raises_on_missing_metadata(tmp_path: Path) -> None: store = DuckDBStore(tmp_path / "b.duckdb") diff --git a/tests/signals/test_storage.py b/tests/signals/test_storage.py index ab31511..7e8ef02 100644 --- a/tests/signals/test_storage.py +++ b/tests/signals/test_storage.py @@ -94,7 +94,7 @@ def test_snapshots_in_window(store: DuckDBStore) -> None: @pytest.mark.unit -def test_insert_signal_persists_manipulation_flags(store: DuckDBStore) -> None: +def test_insert_signal_round_trips_manipulation_flags(store: DuckDBStore) -> None: sig = _signal() store.insert_signal(sig) recovered = store.signals_in_window( @@ -105,6 +105,9 @@ def test_insert_signal_persists_manipulation_flags(store: DuckDBStore) -> None: assert len(recovered) == 1 assert recovered[0].signal_id == sig.signal_id assert recovered[0].confidence == pytest.approx(0.75) + # Flags persist to the side table and rehydrate on read so backtest + # code sees the same flag set a consumer received at publish time. + assert recovered[0].manipulation_flags == sig.manipulation_flags @pytest.mark.unit From 6816261e6cebeb12687122964b98c16fdf136997 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 07:47:14 +0530 Subject: [PATCH 16/16] fix(ci): raise commitlint header cap to 120 to accept multi-module commits --- commitlint.config.cjs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/commitlint.config.cjs b/commitlint.config.cjs index ab562df..c4da260 100644 --- a/commitlint.config.cjs +++ b/commitlint.config.cjs @@ -25,6 +25,9 @@ module.exports = { "subject-case": [2, "never", ["pascal-case", "upper-case", "start-case"]], "subject-empty": [2, "never"], "subject-full-stop": [2, "never", "."], - "header-max-length": [2, "always", 100], + // Commit-standards soft-caps at 72; commitlint hard-caps at 120 so + // long "feat(subsystem): ... a, b, c" summaries for multi-module + // commits do not fail CI after the fact. + "header-max-length": [2, "always", 120], }, };