From bd58942192990ddebd9354d2d44444d3cd014b16 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 14:46:55 +0530 Subject: [PATCH 01/11] chore(ops): add distributed-runtime dependencies and config schemas Introduce the configuration surface that the multi-process runtime consumes: storage.toml for backend selection and hypertable/compression /retention policies, bus.toml for NATS/Redis/memory backend wiring and distributed-lock TTLs, observability.toml for the Prometheus and OTLP exporters. Each TOML loads through a Pydantic frozen model with extra=forbid so typos fail loud at startup. Move NATS, Redis, psycopg, prometheus_client, and the OpenTelemetry SDKs into optional-dependencies groups on augur-signals so the monolith wheel stays lean; the dev group adds them (plus fakeredis) so CI can unit-test adapters against injected fakes without live clusters. --- config/bus.toml | 43 ++ config/observability.toml | 28 ++ config/storage.toml | 47 ++ pyproject.toml | 11 + .../augur_signals/_observability_config.py | 55 +++ .../augur_signals/bus/_config.py | 65 +++ .../augur_signals/storage/_config.py | 85 ++++ src/augur_signals/pyproject.toml | 16 + tests/signals/test_distributed_config.py | 105 ++++ uv.lock | 460 ++++++++++++++++++ 10 files changed, 915 insertions(+) create mode 100644 config/bus.toml create mode 100644 config/observability.toml create mode 100644 config/storage.toml create mode 100644 src/augur_signals/augur_signals/_observability_config.py create mode 100644 src/augur_signals/augur_signals/bus/_config.py create mode 100644 src/augur_signals/augur_signals/storage/_config.py create mode 100644 tests/signals/test_distributed_config.py diff --git a/config/bus.toml b/config/bus.toml new file mode 100644 index 0000000..728416f --- /dev/null +++ b/config/bus.toml @@ -0,0 +1,43 @@ +# Message bus configuration. +# +# The default backend is "memory" — the single-process InProcessAsyncBus +# used by the monolith engine. Operators flip to "nats" or "redis" when +# they deploy the multi-process runtime; see `.docs/phase-5-scaling.md +# §4` for the decision matrix and operational tradeoffs. + +[backend] +# One of: "memory" | "nats" | "redis". +kind = "memory" +# Used by the in-process bus; ignored for nats/redis. +capacity = 256 + +[nats] +servers = ["nats://localhost:4222"] +# File containing nats credentials. The env var is read at startup; +# the file path inside the env var is opened by the nats-py client. +credentials_file_env = "NATS_CREDENTIALS_FILE" +# JetStream stream name. A single stream carries every augur.* subject. +stream_name = "augur" +# JetStream replication factor. Production clusters run at 3; a +# single-node dev cluster accepts 1. +replication_factor = 3 +# Subject prefix for every Augur topic. Downstream subjects follow +# the §4.3 layout: .snapshots.., etc. +subject_prefix = "augur" + +[redis] +url_env = "REDIS_URL" +# XADD MAXLEN target per stream. Oldest entries are trimmed past this. +stream_max_length = 100000 +# Consumer groups are named ".". +consumer_group_prefix = "augur" +# XREAD block timeout. The consumer loop unblocks at this cadence +# so graceful shutdown observes stop signals without delay. +block_ms = 1000 + +[lock] +# Distributed-lock TTL used by active-passive singleton workers. +# The active holder renews the lock every renew_interval_seconds; if +# renewal misses three intervals the passive peer takes over. +ttl_seconds = 30 +renew_interval_seconds = 10 diff --git a/config/observability.toml b/config/observability.toml new file mode 100644 index 0000000..8b84376 --- /dev/null +++ b/config/observability.toml @@ -0,0 +1,28 @@ +# Observability backend configuration. +# +# Phase 1-4 used no-op shims; Phase 5 replaces them with +# prometheus-client counters/gauges and an OpenTelemetry tracer. Call +# sites are unchanged. When the kind fields below are "disabled" the +# backends stay silent — useful for unit tests and backtest runs. + +[metrics] +# One of: "disabled" | "prometheus". +kind = "prometheus" +# Bound address for the /metrics HTTP endpoint. The worker-harness +# start-up path opens the listener before running subscriptions. +prometheus_bind = "0.0.0.0" +prometheus_port = 9090 + +[traces] +# One of: "disabled" | "otlp". +kind = "otlp" +otlp_endpoint = "http://otel-collector:4317" +service_name = "augur" +# Fraction of spans sampled. At 0.0 the tracer is wired but records +# no spans; at 1.0 every span is recorded. 0.1 is the per-service +# default recommended in `.docs/phase-5-scaling.md §7.2`. +sampling_ratio = 0.1 + +[logs] +level = "INFO" +format = "json" diff --git a/config/storage.toml b/config/storage.toml new file mode 100644 index 0000000..eefa506 --- /dev/null +++ b/config/storage.toml @@ -0,0 +1,47 @@ +# Storage backend configuration. +# +# The Phase 1-4 monolith runs with backend.kind = "duckdb" and the +# local Parquet archive. Cutover to TimescaleDB flips backend.kind +# to "timescaledb" and restarts the engine; rollback flips it back. +# See `.docs/phase-5-scaling.md §5` for the cutover procedure. + +[backend] +# One of: "duckdb" | "timescaledb". +kind = "duckdb" +# Path used when kind == "duckdb". +duckdb_path = "data/augur.duckdb" +# Env var holding the PostgreSQL DSN when kind == "timescaledb". +timescale_url_env = "AUGUR_TIMESCALE_URL" + +[connection] +pool_size = 20 +max_overflow = 10 +pool_timeout_seconds = 30 + +[migration] +parquet_archive_root = "labels/snapshots_archive" +# Dual-write sidecar alerts when the observed per-table lag exceeds +# this threshold. The 7-day dual-write window lives operationally; +# the sidecar surfaces breaches but does not auto-rollback. +dual_write_lag_alert_seconds = 10 + +[hypertable] +# Chunk intervals per hypertable. Tuned so the hot-vs-cold boundary +# lines up with the compression policy below. +snapshot_chunk_interval_days = 1 +feature_chunk_interval_days = 1 +signal_chunk_interval_days = 7 + +[retention] +# Retention policies per TimescaleDB hypertable. A zero value skips +# the retention policy (rows never drop). +snapshot_retention_days = 0 +feature_retention_days = 30 +signal_retention_days = 0 + +[compression] +# Chunks older than this threshold compress automatically. Set to +# zero to disable compression on a given hypertable. +snapshot_compress_after_days = 7 +feature_compress_after_days = 7 +signal_compress_after_days = 30 diff --git a/pyproject.toml b/pyproject.toml index bf95622..2da37e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,17 @@ dev = [ "ruff>=0.6", "mypy>=1.11", "pre-commit>=3.7", + # Distributed-runtime clients and their test doubles. CI unit tests + # exercise bus and storage adapters through dependency-injected + # fakes; live NATS / Redis / TimescaleDB clusters are only required + # for the opt-in integration tests. + "fakeredis>=2.23", + "nats-py>=2.7", + "redis>=5.0", + "psycopg[binary]>=3.1", + "prometheus-client>=0.20", + "opentelemetry-api>=1.27", + "opentelemetry-sdk>=1.27", ] [tool.ruff] diff --git a/src/augur_signals/augur_signals/_observability_config.py b/src/augur_signals/augur_signals/_observability_config.py new file mode 100644 index 0000000..a9a082f --- /dev/null +++ b/src/augur_signals/augur_signals/_observability_config.py @@ -0,0 +1,55 @@ +"""Configuration model for observability backends. + +Schema mirrors `config/observability.toml`. The "disabled" variants +are useful for unit tests and backtest runs where metric and trace +emission would pollute the signal. +""" + +from __future__ import annotations + +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field + + +class MetricsBody(BaseModel): + """Prometheus exporter parameters.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + kind: Literal["disabled", "prometheus"] = "prometheus" + # The "bind to all interfaces" default is intentional; the metrics + # endpoint is reached from a sibling container (ServiceMonitor / + # scraper) within the same Kubernetes pod network. + prometheus_bind: str = "0.0.0.0" # noqa: S104 + prometheus_port: int = Field(default=9090, gt=0, lt=65536) + + +class TracesBody(BaseModel): + """OpenTelemetry OTLP exporter parameters.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + kind: Literal["disabled", "otlp"] = "otlp" + otlp_endpoint: str = "http://otel-collector:4317" + service_name: str = "augur" + sampling_ratio: float = Field(default=0.1, ge=0.0, le=1.0) + + +class LogsBody(BaseModel): + """Structured-log emitter parameters.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO" + format: Literal["json", "text"] = "json" + + +class ObservabilityConfig(BaseModel): + """Top-level observability configuration.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + metrics: MetricsBody = Field(default_factory=MetricsBody) + traces: TracesBody = Field(default_factory=TracesBody) + logs: LogsBody = Field(default_factory=LogsBody) diff --git a/src/augur_signals/augur_signals/bus/_config.py b/src/augur_signals/augur_signals/bus/_config.py new file mode 100644 index 0000000..5ab31b5 --- /dev/null +++ b/src/augur_signals/augur_signals/bus/_config.py @@ -0,0 +1,65 @@ +"""Configuration model for the message bus. + +Schema mirrors `config/bus.toml`. The default backend is "memory" — +the in-process bus used by the monolith. Phase 5 flips the field to +"nats" or "redis" after the operator chooses the cluster topology; +see `.docs/phase-5-scaling.md §4` for the decision matrix. +""" + +from __future__ import annotations + +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field + + +class BackendBody(BaseModel): + """Bus backend selector with defaults tuned for the monolith.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + kind: Literal["memory", "nats", "redis"] + capacity: int = Field(default=256, gt=0) + + +class NATSBody(BaseModel): + """NATS JetStream connection parameters.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + servers: list[str] = Field(default_factory=lambda: ["nats://localhost:4222"]) + credentials_file_env: str = "NATS_CREDENTIALS_FILE" + stream_name: str = "augur" + replication_factor: int = Field(default=3, gt=0) + subject_prefix: str = "augur" + + +class RedisBody(BaseModel): + """Redis Streams connection parameters.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + url_env: str = "REDIS_URL" + stream_max_length: int = Field(default=100_000, gt=0) + consumer_group_prefix: str = "augur" + block_ms: int = Field(default=1000, gt=0) + + +class LockBody(BaseModel): + """Distributed-lock parameters used by active-passive singletons.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + ttl_seconds: int = Field(default=30, gt=0) + renew_interval_seconds: int = Field(default=10, gt=0) + + +class BusConfig(BaseModel): + """Top-level bus configuration loaded from `config/bus.toml`.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + backend: BackendBody + nats: NATSBody = Field(default_factory=NATSBody) + redis: RedisBody = Field(default_factory=RedisBody) + lock: LockBody = Field(default_factory=LockBody) diff --git a/src/augur_signals/augur_signals/storage/_config.py b/src/augur_signals/augur_signals/storage/_config.py new file mode 100644 index 0000000..a1d9bc3 --- /dev/null +++ b/src/augur_signals/augur_signals/storage/_config.py @@ -0,0 +1,85 @@ +"""Configuration model for storage backend selection. + +Schema mirrors `config/storage.toml`. The Phase 1-4 monolith reads +``backend.kind == "duckdb"``; Phase 5 cutover flips it to +``"timescaledb"``. See `.docs/phase-5-scaling.md §5` for the cutover +procedure and rollback constraints. +""" + +from __future__ import annotations + +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field + + +class BackendBody(BaseModel): + """Which backing store the engine opens at startup.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + kind: Literal["duckdb", "timescaledb"] + duckdb_path: str = "data/augur.duckdb" + timescale_url_env: str = "AUGUR_TIMESCALE_URL" + + +class ConnectionBody(BaseModel): + """Connection-pool shape used by the TimescaleDB adapter.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + pool_size: int = Field(default=20, gt=0) + max_overflow: int = Field(default=10, ge=0) + pool_timeout_seconds: int = Field(default=30, gt=0) + + +class MigrationBody(BaseModel): + """Parquet-to-TimescaleDB migration settings.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + parquet_archive_root: str = "labels/snapshots_archive" + dual_write_lag_alert_seconds: int = Field(default=10, gt=0) + + +class HypertableBody(BaseModel): + """Chunk intervals for each hypertable.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + snapshot_chunk_interval_days: int = Field(default=1, gt=0) + feature_chunk_interval_days: int = Field(default=1, gt=0) + signal_chunk_interval_days: int = Field(default=7, gt=0) + + +class RetentionBody(BaseModel): + """Retention policies in days; 0 disables the policy.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + snapshot_retention_days: int = Field(default=0, ge=0) + feature_retention_days: int = Field(default=30, ge=0) + signal_retention_days: int = Field(default=0, ge=0) + + +class CompressionBody(BaseModel): + """Compression policies in days; 0 disables compression.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + snapshot_compress_after_days: int = Field(default=7, ge=0) + feature_compress_after_days: int = Field(default=7, ge=0) + signal_compress_after_days: int = Field(default=30, ge=0) + + +class StorageConfig(BaseModel): + """Top-level storage configuration loaded from `config/storage.toml`.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + backend: BackendBody + connection: ConnectionBody = Field(default_factory=ConnectionBody) + migration: MigrationBody = Field(default_factory=MigrationBody) + hypertable: HypertableBody = Field(default_factory=HypertableBody) + retention: RetentionBody = Field(default_factory=RetentionBody) + compression: CompressionBody = Field(default_factory=CompressionBody) diff --git a/src/augur_signals/pyproject.toml b/src/augur_signals/pyproject.toml index 3a730f9..ffe3268 100644 --- a/src/augur_signals/pyproject.toml +++ b/src/augur_signals/pyproject.toml @@ -14,6 +14,22 @@ dependencies = [ "uuid7>=0.1", ] +[project.optional-dependencies] +# Installed when the multi-process runtime is deployed; the single- +# process monolith does not require any of these. +bus-nats = ["nats-py>=2.7"] +bus-redis = ["redis>=5.0"] +storage-timescale = ["psycopg[binary]>=3.1"] +observability = [ + "prometheus-client>=0.20", + "opentelemetry-api>=1.27", + "opentelemetry-sdk>=1.27", + "opentelemetry-exporter-otlp>=1.27", +] +distributed = [ + "augur-signals[bus-nats,bus-redis,storage-timescale,observability]", +] + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/tests/signals/test_distributed_config.py b/tests/signals/test_distributed_config.py new file mode 100644 index 0000000..ad91ae9 --- /dev/null +++ b/tests/signals/test_distributed_config.py @@ -0,0 +1,105 @@ +"""Tests for the distributed-runtime configuration loaders.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from pydantic import ValidationError + +from augur_signals._config import load_config +from augur_signals._observability_config import ObservabilityConfig +from augur_signals.bus._config import BusConfig +from augur_signals.storage._config import StorageConfig + +REPO_ROOT = Path(__file__).resolve().parents[2] +CONFIG_DIR = REPO_ROOT / "config" + + +@pytest.mark.unit +def test_storage_toml_parses_with_defaults() -> None: + cfg = load_config(CONFIG_DIR / "storage.toml", StorageConfig) + assert cfg.backend.kind == "duckdb" + assert cfg.backend.duckdb_path == "data/augur.duckdb" + assert cfg.connection.pool_size == 20 + assert cfg.migration.dual_write_lag_alert_seconds == 10 + assert cfg.hypertable.signal_chunk_interval_days == 7 + assert cfg.compression.snapshot_compress_after_days == 7 + + +@pytest.mark.unit +def test_storage_rejects_unknown_backend(tmp_path: Path) -> None: + bad = tmp_path / "storage.toml" + bad.write_text('[backend]\nkind = "sqlite"\n', encoding="utf-8") + with pytest.raises(ValidationError): + load_config(bad, StorageConfig) + + +@pytest.mark.unit +def test_storage_rejects_unknown_top_level_section(tmp_path: Path) -> None: + bad = tmp_path / "storage.toml" + bad.write_text( + '[backend]\nkind = "duckdb"\n\n[unknown]\nfoo = 1\n', + encoding="utf-8", + ) + with pytest.raises(ValidationError): + load_config(bad, StorageConfig) + + +@pytest.mark.unit +def test_bus_toml_parses_with_defaults() -> None: + cfg = load_config(CONFIG_DIR / "bus.toml", BusConfig) + assert cfg.backend.kind == "memory" + assert cfg.backend.capacity == 256 + assert cfg.nats.subject_prefix == "augur" + assert cfg.redis.stream_max_length == 100_000 + assert cfg.lock.ttl_seconds == 30 + assert cfg.lock.renew_interval_seconds == 10 + + +@pytest.mark.unit +def test_bus_rejects_unknown_backend(tmp_path: Path) -> None: + bad = tmp_path / "bus.toml" + bad.write_text('[backend]\nkind = "kafka"\n', encoding="utf-8") + with pytest.raises(ValidationError): + load_config(bad, BusConfig) + + +@pytest.mark.unit +def test_bus_lock_renew_must_be_positive(tmp_path: Path) -> None: + bad = tmp_path / "bus.toml" + bad.write_text( + '[backend]\nkind = "memory"\n\n[lock]\nttl_seconds = 30\nrenew_interval_seconds = 0\n', + encoding="utf-8", + ) + with pytest.raises(ValidationError): + load_config(bad, BusConfig) + + +@pytest.mark.unit +def test_observability_toml_parses_with_defaults() -> None: + cfg = load_config(CONFIG_DIR / "observability.toml", ObservabilityConfig) + assert cfg.metrics.kind == "prometheus" + assert cfg.metrics.prometheus_port == 9090 + assert cfg.traces.kind == "otlp" + assert cfg.traces.sampling_ratio == 0.1 + assert cfg.logs.level == "INFO" + + +@pytest.mark.unit +def test_observability_sampling_ratio_bounded(tmp_path: Path) -> None: + bad = tmp_path / "observability.toml" + bad.write_text( + '[traces]\nkind = "otlp"\nsampling_ratio = 1.5\n', + encoding="utf-8", + ) + with pytest.raises(ValidationError): + load_config(bad, ObservabilityConfig) + + +@pytest.mark.unit +def test_observability_disabled_metrics_variant(tmp_path: Path) -> None: + good = tmp_path / "observability.toml" + good.write_text('[metrics]\nkind = "disabled"\n', encoding="utf-8") + cfg = load_config(good, ObservabilityConfig) + assert cfg.metrics.kind == "disabled" diff --git a/uv.lock b/uv.lock index 32be738..d437ef4 100644 --- a/uv.lock +++ b/uv.lock @@ -1,6 +1,11 @@ version = 1 revision = 1 requires-python = ">=3.12" +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version == '3.13.*'", + "python_full_version < '3.13'", +] [manifest] members = [ @@ -179,12 +184,19 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "fakeredis" }, { name = "hypothesis" }, { name = "mypy" }, + { name = "nats-py" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-sdk" }, { name = "pre-commit" }, + { name = "prometheus-client" }, + { name = "psycopg", extra = ["binary"] }, { name = "pytest" }, { name = "pytest-asyncio" }, { name = "pytest-cov" }, + { name = "redis" }, { name = "ruff" }, ] @@ -197,12 +209,19 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ + { name = "fakeredis", specifier = ">=2.23" }, { name = "hypothesis", specifier = ">=6.100" }, { name = "mypy", specifier = ">=1.11" }, + { name = "nats-py", specifier = ">=2.7" }, + { name = "opentelemetry-api", specifier = ">=1.27" }, + { name = "opentelemetry-sdk", specifier = ">=1.27" }, { name = "pre-commit", specifier = ">=3.7" }, + { name = "prometheus-client", specifier = ">=0.20" }, + { name = "psycopg", extras = ["binary"], specifier = ">=3.1" }, { name = "pytest", specifier = ">=8.0" }, { name = "pytest-asyncio", specifier = ">=0.23" }, { name = "pytest-cov", specifier = ">=5.0" }, + { name = "redis", specifier = ">=5.0" }, { name = "ruff", specifier = ">=0.6" }, ] @@ -275,16 +294,51 @@ dependencies = [ { name = "uuid7" }, ] +[package.optional-dependencies] +bus-nats = [ + { name = "nats-py" }, +] +bus-redis = [ + { name = "redis" }, +] +distributed = [ + { name = "nats-py" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp" }, + { name = "opentelemetry-sdk" }, + { name = "prometheus-client" }, + { name = "psycopg", extra = ["binary"] }, + { name = "redis" }, +] +observability = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp" }, + { name = "opentelemetry-sdk" }, + { name = "prometheus-client" }, +] +storage-timescale = [ + { name = "psycopg", extra = ["binary"] }, +] + [package.metadata] requires-dist = [ { name = "aiohttp", specifier = ">=3.9" }, + { name = "augur-signals", extras = ["bus-nats", "bus-redis", "storage-timescale", "observability"], marker = "extra == 'distributed'", editable = "src/augur_signals" }, { name = "duckdb", specifier = ">=1.0" }, + { name = "nats-py", marker = "extra == 'bus-nats'", specifier = ">=2.7" }, { name = "numpy", specifier = ">=2.0" }, + { name = "opentelemetry-api", marker = "extra == 'observability'", specifier = ">=1.27" }, + { name = "opentelemetry-exporter-otlp", marker = "extra == 'observability'", specifier = ">=1.27" }, + { name = "opentelemetry-sdk", marker = "extra == 'observability'", specifier = ">=1.27" }, + { name = "prometheus-client", marker = "extra == 'observability'", specifier = ">=0.20" }, + { name = "psycopg", extras = ["binary"], marker = "extra == 'storage-timescale'", specifier = ">=3.1" }, { name = "pydantic", specifier = ">=2.7" }, + { name = "redis", marker = "extra == 'bus-redis'", specifier = ">=5.0" }, { name = "scipy", specifier = ">=1.13" }, { name = "structlog", specifier = ">=24.0" }, { name = "uuid7", specifier = ">=0.1" }, ] +provides-extras = ["bus-nats", "bus-redis", "storage-timescale", "observability", "distributed"] [[package]] name = "certifi" @@ -304,6 +358,79 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445 }, ] +[[package]] +name = "charset-normalizer" +version = "3.4.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/a1/67fe25fac3c7642725500a3f6cfe5821ad557c3abb11c9d20d12c7008d3e/charset_normalizer-3.4.7.tar.gz", hash = "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5", size = 144271 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/eb/4fc8d0a7110eb5fc9cc161723a34a8a6c200ce3b4fbf681bc86feee22308/charset_normalizer-3.4.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:eca9705049ad3c7345d574e3510665cb2cf844c2f2dcfe675332677f081cbd46", size = 311328 }, + { url = "https://files.pythonhosted.org/packages/f8/e3/0fadc706008ac9d7b9b5be6dc767c05f9d3e5df51744ce4cc9605de7b9f4/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6178f72c5508bfc5fd446a5905e698c6212932f25bcdd4b47a757a50605a90e2", size = 208061 }, + { url = "https://files.pythonhosted.org/packages/42/f0/3dd1045c47f4a4604df85ec18ad093912ae1344ac706993aff91d38773a2/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1421b502d83040e6d7fb2fb18dff63957f720da3d77b2fbd3187ceb63755d7b", size = 229031 }, + { url = "https://files.pythonhosted.org/packages/dc/67/675a46eb016118a2fbde5a277a5d15f4f69d5f3f5f338e5ee2f8948fcf43/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:edac0f1ab77644605be2cbba52e6b7f630731fc42b34cb0f634be1a6eface56a", size = 225239 }, + { url = "https://files.pythonhosted.org/packages/4b/f8/d0118a2f5f23b02cd166fa385c60f9b0d4f9194f574e2b31cef350ad7223/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5649fd1c7bade02f320a462fdefd0b4bd3ce036065836d4f42e0de958038e116", size = 216589 }, + { url = "https://files.pythonhosted.org/packages/b1/f1/6d2b0b261b6c4ceef0fcb0d17a01cc5bc53586c2d4796fa04b5c540bc13d/charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:203104ed3e428044fd943bc4bf45fa73c0730391f9621e37fe39ecf477b128cb", size = 202733 }, + { url = "https://files.pythonhosted.org/packages/6f/c0/7b1f943f7e87cc3db9626ba17807d042c38645f0a1d4415c7a14afb5591f/charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:298930cec56029e05497a76988377cbd7457ba864beeea92ad7e844fe74cd1f1", size = 212652 }, + { url = "https://files.pythonhosted.org/packages/38/dd/5a9ab159fe45c6e72079398f277b7d2b523e7f716acc489726115a910097/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:708838739abf24b2ceb208d0e22403dd018faeef86ddac04319a62ae884c4f15", size = 211229 }, + { url = "https://files.pythonhosted.org/packages/d5/ff/531a1cad5ca855d1c1a8b69cb71abfd6d85c0291580146fda7c82857caa1/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:0f7eb884681e3938906ed0434f20c63046eacd0111c4ba96f27b76084cd679f5", size = 203552 }, + { url = "https://files.pythonhosted.org/packages/c1/4c/a5fb52d528a8ca41f7598cb619409ece30a169fbdf9cdce592e53b46c3a6/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4dc1e73c36828f982bfe79fadf5919923f8a6f4df2860804db9a98c48824ce8d", size = 230806 }, + { url = "https://files.pythonhosted.org/packages/59/7a/071feed8124111a32b316b33ae4de83d36923039ef8cf48120266844285b/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:aed52fea0513bac0ccde438c188c8a471c4e0f457c2dd20cdbf6ea7a450046c7", size = 212316 }, + { url = "https://files.pythonhosted.org/packages/fd/35/f7dba3994312d7ba508e041eaac39a36b120f32d4c8662b8814dab876431/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fea24543955a6a729c45a73fe90e08c743f0b3334bbf3201e6c4bc1b0c7fa464", size = 227274 }, + { url = "https://files.pythonhosted.org/packages/8a/2d/a572df5c9204ab7688ec1edc895a73ebded3b023bb07364710b05dd1c9be/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb6d88045545b26da47aa879dd4a89a71d1dce0f0e549b1abcb31dfe4a8eac49", size = 218468 }, + { url = "https://files.pythonhosted.org/packages/86/eb/890922a8b03a568ca2f336c36585a4713c55d4d67bf0f0c78924be6315ca/charset_normalizer-3.4.7-cp312-cp312-win32.whl", hash = "sha256:2257141f39fe65a3fdf38aeccae4b953e5f3b3324f4ff0daf9f15b8518666a2c", size = 148460 }, + { url = "https://files.pythonhosted.org/packages/35/d9/0e7dffa06c5ab081f75b1b786f0aefc88365825dfcd0ac544bdb7b2b6853/charset_normalizer-3.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:5ed6ab538499c8644b8a3e18debabcd7ce684f3fa91cf867521a7a0279cab2d6", size = 159330 }, + { url = "https://files.pythonhosted.org/packages/9e/5d/481bcc2a7c88ea6b0878c299547843b2521ccbc40980cb406267088bc701/charset_normalizer-3.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:56be790f86bfb2c98fb742ce566dfb4816e5a83384616ab59c49e0604d49c51d", size = 147828 }, + { url = "https://files.pythonhosted.org/packages/c1/3b/66777e39d3ae1ddc77ee606be4ec6d8cbd4c801f65e5a1b6f2b11b8346dd/charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063", size = 309627 }, + { url = "https://files.pythonhosted.org/packages/2e/4e/b7f84e617b4854ade48a1b7915c8ccfadeba444d2a18c291f696e37f0d3b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c", size = 207008 }, + { url = "https://files.pythonhosted.org/packages/c4/bb/ec73c0257c9e11b268f018f068f5d00aa0ef8c8b09f7753ebd5f2880e248/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66", size = 228303 }, + { url = "https://files.pythonhosted.org/packages/85/fb/32d1f5033484494619f701e719429c69b766bfc4dbc61aa9e9c8c166528b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18", size = 224282 }, + { url = "https://files.pythonhosted.org/packages/fa/07/330e3a0dda4c404d6da83b327270906e9654a24f6c546dc886a0eb0ffb23/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd", size = 215595 }, + { url = "https://files.pythonhosted.org/packages/e3/7c/fc890655786e423f02556e0216d4b8c6bcb6bdfa890160dc66bf52dee468/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215", size = 201986 }, + { url = "https://files.pythonhosted.org/packages/d8/97/bfb18b3db2aed3b90cf54dc292ad79fdd5ad65c4eae454099475cbeadd0d/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859", size = 211711 }, + { url = "https://files.pythonhosted.org/packages/6f/a5/a581c13798546a7fd557c82614a5c65a13df2157e9ad6373166d2a3e645d/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8", size = 210036 }, + { url = "https://files.pythonhosted.org/packages/8c/bf/b3ab5bcb478e4193d517644b0fb2bf5497fbceeaa7a1bc0f4d5b50953861/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5", size = 202998 }, + { url = "https://files.pythonhosted.org/packages/e7/4e/23efd79b65d314fa320ec6017b4b5834d5c12a58ba4610aa353af2e2f577/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832", size = 230056 }, + { url = "https://files.pythonhosted.org/packages/b9/9f/1e1941bc3f0e01df116e68dc37a55c4d249df5e6fa77f008841aef68264f/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6", size = 211537 }, + { url = "https://files.pythonhosted.org/packages/80/0f/088cbb3020d44428964a6c97fe1edfb1b9550396bf6d278330281e8b709c/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48", size = 226176 }, + { url = "https://files.pythonhosted.org/packages/6a/9f/130394f9bbe06f4f63e22641d32fc9b202b7e251c9aef4db044324dac493/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a", size = 217723 }, + { url = "https://files.pythonhosted.org/packages/73/55/c469897448a06e49f8fa03f6caae97074fde823f432a98f979cc42b90e69/charset_normalizer-3.4.7-cp313-cp313-win32.whl", hash = "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e", size = 148085 }, + { url = "https://files.pythonhosted.org/packages/5d/78/1b74c5bbb3f99b77a1715c91b3e0b5bdb6fe302d95ace4f5b1bec37b0167/charset_normalizer-3.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110", size = 158819 }, + { url = "https://files.pythonhosted.org/packages/68/86/46bd42279d323deb8687c4a5a811fd548cb7d1de10cf6535d099877a9a9f/charset_normalizer-3.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b", size = 147915 }, + { url = "https://files.pythonhosted.org/packages/97/c8/c67cb8c70e19ef1960b97b22ed2a1567711de46c4ddf19799923adc836c2/charset_normalizer-3.4.7-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c36c333c39be2dbca264d7803333c896ab8fa7d4d6f0ab7edb7dfd7aea6e98c0", size = 309234 }, + { url = "https://files.pythonhosted.org/packages/99/85/c091fdee33f20de70d6c8b522743b6f831a2f1cd3ff86de4c6a827c48a76/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c2aed2e5e41f24ea8ef1590b8e848a79b56f3a5564a65ceec43c9d692dc7d8a", size = 208042 }, + { url = "https://files.pythonhosted.org/packages/87/1c/ab2ce611b984d2fd5d86a5a8a19c1ae26acac6bad967da4967562c75114d/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:54523e136b8948060c0fa0bc7b1b50c32c186f2fceee897a495406bb6e311d2b", size = 228706 }, + { url = "https://files.pythonhosted.org/packages/a8/29/2b1d2cb00bf085f59d29eb773ce58ec2d325430f8c216804a0a5cd83cbca/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:715479b9a2802ecac752a3b0efa2b0b60285cf962ee38414211abdfccc233b41", size = 224727 }, + { url = "https://files.pythonhosted.org/packages/47/5c/032c2d5a07fe4d4855fea851209cca2b6f03ebeb6d4e3afdb3358386a684/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd6c2a1c7573c64738d716488d2cdd3c00e340e4835707d8fdb8dc1a66ef164e", size = 215882 }, + { url = "https://files.pythonhosted.org/packages/2c/c2/356065d5a8b78ed04499cae5f339f091946a6a74f91e03476c33f0ab7100/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:c45e9440fb78f8ddabcf714b68f936737a121355bf59f3907f4e17721b9d1aae", size = 200860 }, + { url = "https://files.pythonhosted.org/packages/0c/cd/a32a84217ced5039f53b29f460962abb2d4420def55afabe45b1c3c7483d/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3534e7dcbdcf757da6b85a0bbf5b6868786d5982dd959b065e65481644817a18", size = 211564 }, + { url = "https://files.pythonhosted.org/packages/44/86/58e6f13ce26cc3b8f4a36b94a0f22ae2f00a72534520f4ae6857c4b81f89/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e8ac484bf18ce6975760921bb6148041faa8fef0547200386ea0b52b5d27bf7b", size = 211276 }, + { url = "https://files.pythonhosted.org/packages/8f/fe/d17c32dc72e17e155e06883efa84514ca375f8a528ba2546bee73fc4df81/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a5fe03b42827c13cdccd08e6c0247b6a6d4b5e3cdc53fd1749f5896adcdc2356", size = 201238 }, + { url = "https://files.pythonhosted.org/packages/6a/29/f33daa50b06525a237451cdb6c69da366c381a3dadcd833fa5676bc468b3/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2d6eb928e13016cea4f1f21d1e10c1cebd5a421bc57ddf5b1142ae3f86824fab", size = 230189 }, + { url = "https://files.pythonhosted.org/packages/b6/6e/52c84015394a6a0bdcd435210a7e944c5f94ea1055f5cc5d56c5fe368e7b/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e74327fb75de8986940def6e8dee4f127cc9752bee7355bb323cc5b2659b6d46", size = 211352 }, + { url = "https://files.pythonhosted.org/packages/8c/d7/4353be581b373033fb9198bf1da3cf8f09c1082561e8e922aa7b39bf9fe8/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d6038d37043bced98a66e68d3aa2b6a35505dc01328cd65217cefe82f25def44", size = 227024 }, + { url = "https://files.pythonhosted.org/packages/30/45/99d18aa925bd1740098ccd3060e238e21115fffbfdcb8f3ece837d0ace6c/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7579e913a5339fb8fa133f6bbcfd8e6749696206cf05acdbdca71a1b436d8e72", size = 217869 }, + { url = "https://files.pythonhosted.org/packages/5c/05/5ee478aa53f4bb7996482153d4bfe1b89e0f087f0ab6b294fcf92d595873/charset_normalizer-3.4.7-cp314-cp314-win32.whl", hash = "sha256:5b77459df20e08151cd6f8b9ef8ef1f961ef73d85c21a555c7eed5b79410ec10", size = 148541 }, + { url = "https://files.pythonhosted.org/packages/48/77/72dcb0921b2ce86420b2d79d454c7022bf5be40202a2a07906b9f2a35c97/charset_normalizer-3.4.7-cp314-cp314-win_amd64.whl", hash = "sha256:92a0a01ead5e668468e952e4238cccd7c537364eb7d851ab144ab6627dbbe12f", size = 159634 }, + { url = "https://files.pythonhosted.org/packages/c6/a3/c2369911cd72f02386e4e340770f6e158c7980267da16af8f668217abaa0/charset_normalizer-3.4.7-cp314-cp314-win_arm64.whl", hash = "sha256:67f6279d125ca0046a7fd386d01b311c6363844deac3e5b069b514ba3e63c246", size = 148384 }, + { url = "https://files.pythonhosted.org/packages/94/09/7e8a7f73d24dba1f0035fbbf014d2c36828fc1bf9c88f84093e57d315935/charset_normalizer-3.4.7-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:effc3f449787117233702311a1b7d8f59cba9ced946ba727bdc329ec69028e24", size = 330133 }, + { url = "https://files.pythonhosted.org/packages/8d/da/96975ddb11f8e977f706f45cddd8540fd8242f71ecdb5d18a80723dcf62c/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fbccdc05410c9ee21bbf16a35f4c1d16123dcdeb8a1d38f33654fa21d0234f79", size = 216257 }, + { url = "https://files.pythonhosted.org/packages/e5/e8/1d63bf8ef2d388e95c64b2098f45f84758f6d102a087552da1485912637b/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:733784b6d6def852c814bce5f318d25da2ee65dd4839a0718641c696e09a2960", size = 234851 }, + { url = "https://files.pythonhosted.org/packages/9b/40/e5ff04233e70da2681fa43969ad6f66ca5611d7e669be0246c4c7aaf6dc8/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a89c23ef8d2c6b27fd200a42aa4ac72786e7c60d40efdc76e6011260b6e949c4", size = 233393 }, + { url = "https://files.pythonhosted.org/packages/be/c1/06c6c49d5a5450f76899992f1ee40b41d076aee9279b49cf9974d2f313d5/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c114670c45346afedc0d947faf3c7f701051d2518b943679c8ff88befe14f8e", size = 223251 }, + { url = "https://files.pythonhosted.org/packages/2b/9f/f2ff16fb050946169e3e1f82134d107e5d4ae72647ec8a1b1446c148480f/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:a180c5e59792af262bf263b21a3c49353f25945d8d9f70628e73de370d55e1e1", size = 206609 }, + { url = "https://files.pythonhosted.org/packages/69/d5/a527c0cd8d64d2eab7459784fb4169a0ac76e5a6fc5237337982fd61347e/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3c9a494bc5ec77d43cea229c4f6db1e4d8fe7e1bbffa8b6f0f0032430ff8ab44", size = 220014 }, + { url = "https://files.pythonhosted.org/packages/7e/80/8a7b8104a3e203074dc9aa2c613d4b726c0e136bad1cc734594b02867972/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8d828b6667a32a728a1ad1d93957cdf37489c57b97ae6c4de2860fa749b8fc1e", size = 218979 }, + { url = "https://files.pythonhosted.org/packages/02/9a/b759b503d507f375b2b5c153e4d2ee0a75aa215b7f2489cf314f4541f2c0/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:cf1493cd8607bec4d8a7b9b004e699fcf8f9103a9284cc94962cb73d20f9d4a3", size = 209238 }, + { url = "https://files.pythonhosted.org/packages/c2/4e/0f3f5d47b86bdb79256e7290b26ac847a2832d9a4033f7eb2cd4bcf4bb5b/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0c96c3b819b5c3e9e165495db84d41914d6894d55181d2d108cc1a69bfc9cce0", size = 236110 }, + { url = "https://files.pythonhosted.org/packages/96/23/bce28734eb3ed2c91dcf93abeb8a5cf393a7b2749725030bb630e554fdd8/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:752a45dc4a6934060b3b0dab47e04edc3326575f82be64bc4fc293914566503e", size = 219824 }, + { url = "https://files.pythonhosted.org/packages/2c/6f/6e897c6984cc4d41af319b077f2f600fc8214eb2fe2d6bcb79141b882400/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:8778f0c7a52e56f75d12dae53ae320fae900a8b9b4164b981b9c5ce059cd1fcb", size = 233103 }, + { url = "https://files.pythonhosted.org/packages/76/22/ef7bd0fe480a0ae9b656189ec00744b60933f68b4f42a7bb06589f6f576a/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ce3412fbe1e31eb81ea42f4169ed94861c56e643189e1e75f0041f3fe7020abe", size = 225194 }, + { url = "https://files.pythonhosted.org/packages/c5/a7/0e0ab3e0b5bc1219bd80a6a0d4d72ca74d9250cb2382b7c699c147e06017/charset_normalizer-3.4.7-cp314-cp314t-win32.whl", hash = "sha256:c03a41a8784091e67a39648f70c5f97b5b6a37f216896d44d2cdcb82615339a0", size = 159827 }, + { url = "https://files.pythonhosted.org/packages/7a/1d/29d32e0fb40864b1f878c7f5a0b343ae676c6e2b271a2d55cc3a152391da/charset_normalizer-3.4.7-cp314-cp314t-win_amd64.whl", hash = "sha256:03853ed82eeebbce3c2abfdbc98c96dc205f32a79627688ac9a27370ea61a49c", size = 174168 }, + { url = "https://files.pythonhosted.org/packages/de/32/d92444ad05c7a6e41fb2036749777c163baf7a0301a040cb672d6b2b1ae9/charset_normalizer-3.4.7-cp314-cp314t-win_arm64.whl", hash = "sha256:c35abb8bfff0185efac5878da64c45dafd2b37fb0383add1be155a763c1f083d", size = 153018 }, + { url = "https://files.pythonhosted.org/packages/db/8f/61959034484a4a7c527811f4721e75d02d653a35afb0b6054474d8185d4c/charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", size = 61958 }, +] + [[package]] name = "click" version = "8.3.2" @@ -465,6 +592,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7a/e3/9d34173ec068631faea3ea6e73050700729363e7e33306a9a3218e5cdc61/duckdb-1.5.2-cp314-cp314-win_arm64.whl", hash = "sha256:c9f3e0b71b8a50fccfb42794899285d9d318ce2503782b9dd54868e5ecd0ad31", size = 14402513 }, ] +[[package]] +name = "fakeredis" +version = "2.35.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "redis" }, + { name = "sortedcontainers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/50/b748233c02fa77e5105238190cc9bb58b852eb1c8b1d0763230d3a5b745a/fakeredis-2.35.1.tar.gz", hash = "sha256:5bae5eba7b9d93cb968944ac40936373cf2397ff71667d4b595df65c3d2e413f", size = 189118 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6f/27/b8b057a23f7777177e92d3a602fd866751b6b45014964548997e92e048fd/fakeredis-2.35.1-py3-none-any.whl", hash = "sha256:67d97e11f562b7870e11e5c30cf182270bfb2dd37f6707dba47cc6d91628d1b9", size = 129678 }, +] + [[package]] name = "filelock" version = "3.28.0" @@ -563,6 +703,59 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409 }, ] +[[package]] +name = "googleapis-common-protos" +version = "1.74.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/20/18/a746c8344152d368a5aac738d4c857012f2c5d1fd2eac7e17b647a7861bd/googleapis_common_protos-1.74.0.tar.gz", hash = "sha256:57971e4eeeba6aad1163c1f0fc88543f965bb49129b8bb55b2b7b26ecab084f1", size = 151254 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/b0/be5d3329badb9230b765de6eea66b73abd5944bdeb5afb3562ddcd80ae84/googleapis_common_protos-1.74.0-py3-none-any.whl", hash = "sha256:702216f78610bb510e3f12ac3cafd281b7ac45cc5d86e90ad87e4d301a3426b5", size = 300743 }, +] + +[[package]] +name = "grpcio" +version = "1.80.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b7/48/af6173dbca4454f4637a4678b67f52ca7e0c1ed7d5894d89d434fecede05/grpcio-1.80.0.tar.gz", hash = "sha256:29aca15edd0688c22ba01d7cc01cb000d72b2033f4a3c72a81a19b56fd143257", size = 12978905 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/e8/a2b749265eb3415abc94f2e619bbd9e9707bebdda787e61c593004ec927a/grpcio-1.80.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:c624cc9f1008361014378c9d776de7182b11fe8b2e5a81bc69f23a295f2a1ad0", size = 6015616 }, + { url = "https://files.pythonhosted.org/packages/3e/97/b1282161a15d699d1e90c360df18d19165a045ce1c343c7f313f5e8a0b77/grpcio-1.80.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:f49eddcac43c3bf350c0385366a58f36bed8cc2c0ec35ef7b74b49e56552c0c2", size = 12014204 }, + { url = "https://files.pythonhosted.org/packages/6e/5e/d319c6e997b50c155ac5a8cb12f5173d5b42677510e886d250d50264949d/grpcio-1.80.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d334591df610ab94714048e0d5b4f3dd5ad1bee74dfec11eee344220077a79de", size = 6563866 }, + { url = "https://files.pythonhosted.org/packages/ae/f6/fdd975a2cb4d78eb67769a7b3b3830970bfa2e919f1decf724ae4445f42c/grpcio-1.80.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0cb517eb1d0d0aaf1d87af7cc5b801d686557c1d88b2619f5e31fab3c2315921", size = 7273060 }, + { url = "https://files.pythonhosted.org/packages/db/f0/a3deb5feba60d9538a962913e37bd2e69a195f1c3376a3dd44fe0427e996/grpcio-1.80.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e78c4ac0d97dc2e569b2f4bcbbb447491167cb358d1a389fc4af71ab6f70411", size = 6782121 }, + { url = "https://files.pythonhosted.org/packages/ca/84/36c6dcfddc093e108141f757c407902a05085e0c328007cb090d56646cdf/grpcio-1.80.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2ed770b4c06984f3b47eb0517b1c69ad0b84ef3f40128f51448433be904634cd", size = 7383811 }, + { url = "https://files.pythonhosted.org/packages/7c/ef/f3a77e3dc5b471a0ec86c564c98d6adfa3510d38f8ee99010410858d591e/grpcio-1.80.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:256507e2f524092f1473071a05e65a5b10d84b82e3ff24c5b571513cfaa61e2f", size = 8393860 }, + { url = "https://files.pythonhosted.org/packages/9b/8d/9d4d27ed7f33d109c50d6b5ce578a9914aa68edab75d65869a17e630a8d1/grpcio-1.80.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a6284a5d907c37db53350645567c522be314bac859a64a7a5ca63b77bb7958f", size = 7830132 }, + { url = "https://files.pythonhosted.org/packages/14/e4/9990b41c6d7a44e1e9dee8ac11d7a9802ba1378b40d77468a7761d1ad288/grpcio-1.80.0-cp312-cp312-win32.whl", hash = "sha256:c71309cfce2f22be26aa4a847357c502db6c621f1a49825ae98aa0907595b193", size = 4140904 }, + { url = "https://files.pythonhosted.org/packages/2f/2c/296f6138caca1f4b92a31ace4ae1b87dab692fc16a7a3417af3bb3c805bf/grpcio-1.80.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe648599c0e37594c4809d81a9e77bd138cc82eb8baa71b6a86af65426723ff", size = 4880944 }, + { url = "https://files.pythonhosted.org/packages/2f/3a/7c3c25789e3f069e581dc342e03613c5b1cb012c4e8c7d9d5cf960a75856/grpcio-1.80.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:e9e408fc016dffd20661f0126c53d8a31c2821b5c13c5d67a0f5ed5de93319ad", size = 6017243 }, + { url = "https://files.pythonhosted.org/packages/04/19/21a9806eb8240e174fd1ab0cd5b9aa948bb0e05c2f2f55f9d5d7405e6d08/grpcio-1.80.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:92d787312e613754d4d8b9ca6d3297e69994a7912a32fa38c4c4e01c272974b0", size = 12010840 }, + { url = "https://files.pythonhosted.org/packages/18/3a/23347d35f76f639e807fb7a36fad3068aed100996849a33809591f26eca6/grpcio-1.80.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac393b58aa16991a2f1144ec578084d544038c12242da3a215966b512904d0f", size = 6567644 }, + { url = "https://files.pythonhosted.org/packages/ff/40/96e07ecb604a6a67ae6ab151e3e35b132875d98bc68ec65f3e5ab3e781d7/grpcio-1.80.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:68e5851ac4b9afe07e7f84483803ad167852570d65326b34d54ca560bfa53fb6", size = 7277830 }, + { url = "https://files.pythonhosted.org/packages/9b/e2/da1506ecea1f34a5e365964644b35edef53803052b763ca214ba3870c856/grpcio-1.80.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:873ff5d17d68992ef6605330127425d2fc4e77e612fa3c3e0ed4e668685e3140", size = 6783216 }, + { url = "https://files.pythonhosted.org/packages/44/83/3b20ff58d0c3b7f6caaa3af9a4174d4023701df40a3f39f7f1c8e7c48f9d/grpcio-1.80.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2bea16af2750fd0a899bf1abd9022244418b55d1f37da2202249ba4ba673838d", size = 7385866 }, + { url = "https://files.pythonhosted.org/packages/47/45/55c507599c5520416de5eefecc927d6a0d7af55e91cfffb2e410607e5744/grpcio-1.80.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba0db34f7e1d803a878284cd70e4c63cb6ae2510ba51937bf8f45ba997cefcf7", size = 8391602 }, + { url = "https://files.pythonhosted.org/packages/10/bb/dd06f4c24c01db9cf11341b547d0a016b2c90ed7dbbb086a5710df7dd1d7/grpcio-1.80.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8eb613f02d34721f1acf3626dfdb3545bd3c8505b0e52bf8b5710a28d02e8aa7", size = 7826752 }, + { url = "https://files.pythonhosted.org/packages/f9/1e/9d67992ba23371fd63d4527096eb8c6b76d74d52b500df992a3343fd7251/grpcio-1.80.0-cp313-cp313-win32.whl", hash = "sha256:93b6f823810720912fd131f561f91f5fed0fda372b6b7028a2681b8194d5d294", size = 4142310 }, + { url = "https://files.pythonhosted.org/packages/cf/e6/283326a27da9e2c3038bc93eeea36fb118ce0b2d03922a9cda6688f53c5b/grpcio-1.80.0-cp313-cp313-win_amd64.whl", hash = "sha256:e172cf795a3ba5246d3529e4d34c53db70e888fa582a8ffebd2e6e48bc0cba50", size = 4882833 }, + { url = "https://files.pythonhosted.org/packages/c5/6d/e65307ce20f5a09244ba9e9d8476e99fb039de7154f37fb85f26978b59c3/grpcio-1.80.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:3d4147a97c8344d065d01bbf8b6acec2cf86fb0400d40696c8bdad34a64ffc0e", size = 6017376 }, + { url = "https://files.pythonhosted.org/packages/69/10/9cef5d9650c72625a699c549940f0abb3c4bfdb5ed45a5ce431f92f31806/grpcio-1.80.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8e11f167935b3eb089ac9038e1a063e6d7dbe995c0bb4a661e614583352e76f", size = 12018133 }, + { url = "https://files.pythonhosted.org/packages/04/82/983aabaad82ba26113caceeb9091706a0696b25da004fe3defb5b346e15b/grpcio-1.80.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f14b618fc30de822681ee986cfdcc2d9327229dc4c98aed16896761cacd468b9", size = 6574748 }, + { url = "https://files.pythonhosted.org/packages/07/d7/031666ef155aa0bf399ed7e19439656c38bbd143779ae0861b038ce82abd/grpcio-1.80.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4ed39fbdcf9b87370f6e8df4e39ca7b38b3e5e9d1b0013c7b6be9639d6578d14", size = 7277711 }, + { url = "https://files.pythonhosted.org/packages/e8/43/f437a78f7f4f1d311804189e8f11fb311a01049b2e08557c1068d470cb2e/grpcio-1.80.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dcc70e9f0ba987526e8e8603a610fb4f460e42899e74e7a518bf3c68fe1bf05", size = 6785372 }, + { url = "https://files.pythonhosted.org/packages/93/3d/f6558e9c6296cb4227faa5c43c54a34c68d32654b829f53288313d16a86e/grpcio-1.80.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:448c884b668b868562b1bda833c5fce6272d26e1926ec46747cda05741d302c1", size = 7395268 }, + { url = "https://files.pythonhosted.org/packages/06/21/0fdd77e84720b08843c371a2efa6f2e19dbebf56adc72df73d891f5506f0/grpcio-1.80.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a1dc80fe55685b4a543555e6eef975303b36c8db1023b1599b094b92aa77965f", size = 8392000 }, + { url = "https://files.pythonhosted.org/packages/f5/68/67f4947ed55d2e69f2cc199ab9fd85e0a0034d813bbeef84df6d2ba4d4b7/grpcio-1.80.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:31b9ac4ad1aa28ffee5503821fafd09e4da0a261ce1c1281c6c8da0423c83b6e", size = 7828477 }, + { url = "https://files.pythonhosted.org/packages/44/b6/8d4096691b2e385e8271911a0de4f35f0a6c7d05aff7098e296c3de86939/grpcio-1.80.0-cp314-cp314-win32.whl", hash = "sha256:367ce30ba67d05e0592470428f0ec1c31714cab9ef19b8f2e37be1f4c7d32fae", size = 4218563 }, + { url = "https://files.pythonhosted.org/packages/e5/8c/bbe6baf2557262834f2070cf668515fa308b2d38a4bbf771f8f7872a7036/grpcio-1.80.0-cp314-cp314-win_amd64.whl", hash = "sha256:3b01e1f5464c583d2f567b2e46ff0d516ef979978f72091fd81f5ab7fa6e2e7f", size = 5019457 }, +] + [[package]] name = "h11" version = "0.16.0" @@ -630,6 +823,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008 }, ] +[[package]] +name = "importlib-metadata" +version = "8.7.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "zipp" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/49/3b30cad09e7771a4982d9975a8cbf64f00d4a1ececb53297f1d9a7be1b10/importlib_metadata-8.7.1.tar.gz", hash = "sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb", size = 57107 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/5e/f8e9a1d23b9c20a551a8a02ea3637b4642e22c2626e3a13a9a29cdea99eb/importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151", size = 27865 }, +] + [[package]] name = "iniconfig" version = "2.3.0" @@ -997,6 +1202,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963 }, ] +[[package]] +name = "nats-py" +version = "2.14.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/f8/b956c4621ba88748ed707c52e69f95b7a50c8914e750edca59a5bef84a76/nats_py-2.14.0.tar.gz", hash = "sha256:4ed02cb8e3b55c68074a063aa2687087115d805d1513297da90cb2068fb07bed", size = 120751 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/39/0e87753df1072254bac190b33ed34b264f28f6aa9bea0f01b7e818071756/nats_py-2.14.0-py3-none-any.whl", hash = "sha256:4116f5d2233ce16e63c3d5538fa40a5e207f75fcf42a741773929ddf1e29d19d", size = 82259 }, +] + [[package]] name = "nodeenv" version = "1.10.0" @@ -1080,6 +1294,119 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/47/4f/4a617ee93d8208d2bcf26b2d8b9402ceaed03e3853c754940e2290fed063/ollama-0.6.1-py3-none-any.whl", hash = "sha256:fc4c984b345735c5486faeee67d8a265214a31cbb828167782dc642ce0a2bf8c", size = 14354 }, ] +[[package]] +name = "opentelemetry-api" +version = "1.41.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "importlib-metadata" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/47/8e/3778a7e87801d994869a9396b9fc2a289e5f9be91ff54a27d41eace494b0/opentelemetry_api-1.41.0.tar.gz", hash = "sha256:9421d911326ec12dee8bc933f7839090cad7a3f13fcfb0f9e82f8174dc003c09", size = 71416 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/ee/99ab786653b3bda9c37ade7e24a7b607a1b1f696063172768417539d876d/opentelemetry_api-1.41.0-py3-none-any.whl", hash = "sha256:0e77c806e6a89c9e4f8d372034622f3e1418a11bdbe1c80a50b3d3397ad0fa4f", size = 69007 }, +] + +[[package]] +name = "opentelemetry-exporter-otlp" +version = "1.41.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-exporter-otlp-proto-grpc" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/65/b7/845565a2ab5d22c1486bc7729a06b05cd0964c61539d766e1f107c9eea0c/opentelemetry_exporter_otlp-1.41.0.tar.gz", hash = "sha256:97ff847321f8d4c919032a67d20d3137fb7b34eac0c47f13f71112858927fc5b", size = 6152 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f2/f1076fff152858773f22cda146713f9ae3661795af6bacd411a76f2151ac/opentelemetry_exporter_otlp-1.41.0-py3-none-any.whl", hash = "sha256:443b6a45c990ae4c55e147f97049a86c5f5b704f3d78b48b44a073a886ec4d6e", size = 7022 }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-common" +version = "1.41.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-proto" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8c/28/e8eca94966fe9a1465f6094dc5ddc5398473682180279c94020bc23b4906/opentelemetry_exporter_otlp_proto_common-1.41.0.tar.gz", hash = "sha256:966bbce537e9edb166154779a7c4f8ab6b8654a03a28024aeaf1a3eacb07d6ee", size = 20411 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/c4/78b9bf2d9c1d5e494f44932988d9d91c51a66b9a7b48adf99b62f7c65318/opentelemetry_exporter_otlp_proto_common-1.41.0-py3-none-any.whl", hash = "sha256:7a99177bf61f85f4f9ed2072f54d676364719c066f6d11f515acc6c745c7acf0", size = 18366 }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-grpc" +version = "1.41.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "googleapis-common-protos" }, + { name = "grpcio" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-common" }, + { name = "opentelemetry-proto" }, + { name = "opentelemetry-sdk" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/46/d75a3f8c91915f2e58f61d0a2e4ada63891e7c7a37a20ff7949ba184a6b2/opentelemetry_exporter_otlp_proto_grpc-1.41.0.tar.gz", hash = "sha256:f704201251c6f65772b11bddea1c948000554459101bdbb0116e0a01b70592f6", size = 25754 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/f6/b09e2e0c9f0b5750cebc6eaf31527b910821453cef40a5a0fe93550422b2/opentelemetry_exporter_otlp_proto_grpc-1.41.0-py3-none-any.whl", hash = "sha256:3a1a86bd24806ccf136ec9737dbfa4c09b069f9130ff66b0acb014f9c5255fd1", size = 20299 }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-http" +version = "1.41.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "googleapis-common-protos" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-common" }, + { name = "opentelemetry-proto" }, + { name = "opentelemetry-sdk" }, + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/63/d9f43cd75f3fabb7e01148c89cfa9491fc18f6580a6764c554ff7c953c46/opentelemetry_exporter_otlp_proto_http-1.41.0.tar.gz", hash = "sha256:dcd6e0686f56277db4eecbadd5262124e8f2cc739cadbc3fae3d08a12c976cf5", size = 24139 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/b5/a214cd907eedc17699d1c2d602288ae17cb775526df04db3a3b3585329d2/opentelemetry_exporter_otlp_proto_http-1.41.0-py3-none-any.whl", hash = "sha256:a9c4ee69cce9c3f4d7ee736ad1b44e3c9654002c0816900abbafd9f3cf289751", size = 22673 }, +] + +[[package]] +name = "opentelemetry-proto" +version = "1.41.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e0/d9/08e3dc6156878713e8c811682bc76151f5fe1a3cb7f3abda3966fd56e71e/opentelemetry_proto-1.41.0.tar.gz", hash = "sha256:95d2e576f9fb1800473a3e4cfcca054295d06bdb869fda4dc9f4f779dc68f7b6", size = 45669 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/8c/65ef7a9383a363864772022e822b5d5c6988e6f9dabeebb9278f5b86ebc3/opentelemetry_proto-1.41.0-py3-none-any.whl", hash = "sha256:b970ab537309f9eed296be482c3e7cca05d8aca8165346e929f658dbe153b247", size = 72074 }, +] + +[[package]] +name = "opentelemetry-sdk" +version = "1.41.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f8/0e/a586df1186f9f56b5a0879d52653effc40357b8e88fc50fe300038c3c08b/opentelemetry_sdk-1.41.0.tar.gz", hash = "sha256:7bddf3961131b318fc2d158947971a8e37e38b1cd23470cfb72b624e7cc108bd", size = 230181 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/13/a7825118208cb32e6a4edcd0a99f925cbef81e77b3b0aedfd9125583c543/opentelemetry_sdk-1.41.0-py3-none-any.whl", hash = "sha256:a596f5687964a3e0d7f8edfdcf5b79cbca9c93c7025ebf5fb00f398a9443b0bd", size = 180214 }, +] + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.62b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/b0/c14f723e86c049b7bf8ff431160d982519b97a7be2857ed2247377397a24/opentelemetry_semantic_conventions-0.62b0.tar.gz", hash = "sha256:cbfb3c8fc259575cf68a6e1b94083cc35adc4a6b06e8cf431efa0d62606c0097", size = 145753 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/6c/5e86fa1759a525ef91c2d8b79d668574760ff3f900d114297765eb8786cb/opentelemetry_semantic_conventions-0.62b0-py3-none-any.whl", hash = "sha256:0ddac1ce59eaf1a827d9987ab60d9315fb27aea23304144242d1fcad9e16b489", size = 231619 }, +] + [[package]] name = "packaging" version = "26.1" @@ -1132,6 +1459,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", size = 226437 }, ] +[[package]] +name = "prometheus-client" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/fb/d9aa83ffe43ce1f19e557c0971d04b90561b0cfd50762aafb01968285553/prometheus_client-0.25.0.tar.gz", hash = "sha256:5e373b75c31afb3c86f1a52fa1ad470c9aace18082d39ec0d2f918d11cc9ba28", size = 86035 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/9b/d4b1e644385499c8346fa9b622a3f030dce14cd6ef8a1871c221a17a67e7/prometheus_client-0.25.0-py3-none-any.whl", hash = "sha256:d5aec89e349a6ec230805d0df882f3807f74fd6c1a2fa86864e3c2279059fed1", size = 64154 }, +] + [[package]] name = "propcache" version = "0.4.1" @@ -1216,6 +1552,79 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305 }, ] +[[package]] +name = "protobuf" +version = "6.33.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/70/e908e9c5e52ef7c3a6c7902c9dfbb34c7e29c25d2f81ade3856445fd5c94/protobuf-6.33.6.tar.gz", hash = "sha256:a6768d25248312c297558af96a9f9c929e8c4cee0659cb07e780731095f38135", size = 444531 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/9f/2f509339e89cfa6f6a4c4ff50438db9ca488dec341f7e454adad60150b00/protobuf-6.33.6-cp310-abi3-win32.whl", hash = "sha256:7d29d9b65f8afef196f8334e80d6bc1d5d4adedb449971fefd3723824e6e77d3", size = 425739 }, + { url = "https://files.pythonhosted.org/packages/76/5d/683efcd4798e0030c1bab27374fd13a89f7c2515fb1f3123efdfaa5eab57/protobuf-6.33.6-cp310-abi3-win_amd64.whl", hash = "sha256:0cd27b587afca21b7cfa59a74dcbd48a50f0a6400cfb59391340ad729d91d326", size = 437089 }, + { url = "https://files.pythonhosted.org/packages/5c/01/a3c3ed5cd186f39e7880f8303cc51385a198a81469d53d0fdecf1f64d929/protobuf-6.33.6-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:9720e6961b251bde64edfdab7d500725a2af5280f3f4c87e57c0208376aa8c3a", size = 427737 }, + { url = "https://files.pythonhosted.org/packages/ee/90/b3c01fdec7d2f627b3a6884243ba328c1217ed2d978def5c12dc50d328a3/protobuf-6.33.6-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:e2afbae9b8e1825e3529f88d514754e094278bb95eadc0e199751cdd9a2e82a2", size = 324610 }, + { url = "https://files.pythonhosted.org/packages/9b/ca/25afc144934014700c52e05103c2421997482d561f3101ff352e1292fb81/protobuf-6.33.6-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:c96c37eec15086b79762ed265d59ab204dabc53056e3443e702d2681f4b39ce3", size = 339381 }, + { url = "https://files.pythonhosted.org/packages/16/92/d1e32e3e0d894fe00b15ce28ad4944ab692713f2e7f0a99787405e43533a/protobuf-6.33.6-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:e9db7e292e0ab79dd108d7f1a94fe31601ce1ee3f7b79e0692043423020b0593", size = 323436 }, + { url = "https://files.pythonhosted.org/packages/c4/72/02445137af02769918a93807b2b7890047c32bfb9f90371cbc12688819eb/protobuf-6.33.6-py3-none-any.whl", hash = "sha256:77179e006c476e69bf8e8ce866640091ec42e1beb80b213c3900006ecfba6901", size = 170656 }, +] + +[[package]] +name = "psycopg" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d3/b6/379d0a960f8f435ec78720462fd94c4863e7a31237cf81bf76d0af5883bf/psycopg-3.3.3.tar.gz", hash = "sha256:5e9a47458b3c1583326513b2556a2a9473a1001a56c9efe9e587245b43148dd9", size = 165624 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/5b/181e2e3becb7672b502f0ed7f16ed7352aca7c109cfb94cf3878a9186db9/psycopg-3.3.3-py3-none-any.whl", hash = "sha256:f96525a72bcfade6584ab17e89de415ff360748c766f0106959144dcbb38c698", size = 212768 }, +] + +[package.optional-dependencies] +binary = [ + { name = "psycopg-binary", marker = "implementation_name != 'pypy'" }, +] + +[[package]] +name = "psycopg-binary" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/15/021be5c0cbc5b7c1ab46e91cc3434eb42569f79a0592e67b8d25e66d844d/psycopg_binary-3.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6698dbab5bcef8fdb570fc9d35fd9ac52041771bfcfe6fd0fc5f5c4e36f1e99d", size = 4591170 }, + { url = "https://files.pythonhosted.org/packages/f1/54/a60211c346c9a2f8c6b272b5f2bbe21f6e11800ce7f61e99ba75cf8b63e1/psycopg_binary-3.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:329ff393441e75f10b673ae99ab45276887993d49e65f141da20d915c05aafd8", size = 4670009 }, + { url = "https://files.pythonhosted.org/packages/c1/53/ac7c18671347c553362aadbf65f92786eef9540676ca24114cc02f5be405/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:eb072949b8ebf4082ae24289a2b0fd724da9adc8f22743409d6fd718ddb379df", size = 5469735 }, + { url = "https://files.pythonhosted.org/packages/7f/c3/4f4e040902b82a344eff1c736cde2f2720f127fe939c7e7565706f96dd44/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:263a24f39f26e19ed7fc982d7859a36f17841b05bebad3eb47bb9cd2dd785351", size = 5152919 }, + { url = "https://files.pythonhosted.org/packages/0c/e7/d929679c6a5c212bcf738806c7c89f5b3d0919f2e1685a0e08d6ff877945/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5152d50798c2fa5bd9b68ec68eb68a1b71b95126c1d70adaa1a08cd5eefdc23d", size = 6738785 }, + { url = "https://files.pythonhosted.org/packages/69/b0/09703aeb69a9443d232d7b5318d58742e8ca51ff79f90ffe6b88f1db45e7/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9d6a1e56dd267848edb824dbeb08cf5bac649e02ee0b03ba883ba3f4f0bd54f2", size = 4979008 }, + { url = "https://files.pythonhosted.org/packages/cc/a6/e662558b793c6e13a7473b970fee327d635270e41eded3090ef14045a6a5/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73eaaf4bb04709f545606c1db2f65f4000e8a04cdbf3e00d165a23004692093e", size = 4508255 }, + { url = "https://files.pythonhosted.org/packages/5f/7f/0f8b2e1d5e0093921b6f324a948a5c740c1447fbb45e97acaf50241d0f39/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:162e5675efb4704192411eaf8e00d07f7960b679cd3306e7efb120bb8d9456cc", size = 4189166 }, + { url = "https://files.pythonhosted.org/packages/92/ec/ce2e91c33bc8d10b00c87e2f6b0fb570641a6a60042d6a9ae35658a3a797/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:fab6b5e37715885c69f5d091f6ff229be71e235f272ebaa35158d5a46fd548a0", size = 3924544 }, + { url = "https://files.pythonhosted.org/packages/c5/2f/7718141485f73a924205af60041c392938852aa447a94c8cbd222ff389a1/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a4aab31bd6d1057f287c96c0effca3a25584eb9cc702f282ecb96ded7814e830", size = 4235297 }, + { url = "https://files.pythonhosted.org/packages/57/f9/1add717e2643a003bbde31b1b220172e64fbc0cb09f06429820c9173f7fc/psycopg_binary-3.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:59aa31fe11a0e1d1bcc2ce37ed35fe2ac84cd65bb9036d049b1a1c39064d0f14", size = 3547659 }, + { url = "https://files.pythonhosted.org/packages/03/0a/cac9fdf1df16a269ba0e5f0f06cac61f826c94cadb39df028cdfe19d3a33/psycopg_binary-3.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05f32239aec25c5fb15f7948cffdc2dc0dac098e48b80a140e4ba32b572a2e7d", size = 4590414 }, + { url = "https://files.pythonhosted.org/packages/9c/c0/d8f8508fbf440edbc0099b1abff33003cd80c9e66eb3a1e78834e3fb4fb9/psycopg_binary-3.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7c84f9d214f2d1de2fafebc17fa68ac3f6561a59e291553dfc45ad299f4898c1", size = 4669021 }, + { url = "https://files.pythonhosted.org/packages/04/05/097016b77e343b4568feddf12c72171fc513acef9a4214d21b9478569068/psycopg_binary-3.3.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:e77957d2ba17cada11be09a5066d93026cdb61ada7c8893101d7fe1c6e1f3925", size = 5467453 }, + { url = "https://files.pythonhosted.org/packages/91/23/73244e5feb55b5ca109cede6e97f32ef45189f0fdac4c80d75c99862729d/psycopg_binary-3.3.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:42961609ac07c232a427da7c87a468d3c82fee6762c220f38e37cfdacb2b178d", size = 5151135 }, + { url = "https://files.pythonhosted.org/packages/11/49/5309473b9803b207682095201d8708bbc7842ddf3f192488a69204e36455/psycopg_binary-3.3.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae07a3114313dd91fce686cab2f4c44af094398519af0e0f854bc707e1aeedf1", size = 6737315 }, + { url = "https://files.pythonhosted.org/packages/d4/5d/03abe74ef34d460b33c4d9662bf6ec1dd38888324323c1a1752133c10377/psycopg_binary-3.3.3-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d257c58d7b36a621dcce1d01476ad8b60f12d80eb1406aee4cf796f88b2ae482", size = 4979783 }, + { url = "https://files.pythonhosted.org/packages/f0/6c/3fbf8e604e15f2f3752900434046c00c90bb8764305a1b81112bff30ba24/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:07c7211f9327d522c9c47560cae00a4ecf6687f4e02d779d035dd3177b41cb12", size = 4509023 }, + { url = "https://files.pythonhosted.org/packages/9c/6b/1a06b43b7c7af756c80b67eac8bfaa51d77e68635a8a8d246e4f0bb7604a/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8e7e9eca9b363dbedeceeadd8be97149d2499081f3c52d141d7cd1f395a91f83", size = 4185874 }, + { url = "https://files.pythonhosted.org/packages/2b/d3/bf49e3dcaadba510170c8d111e5e69e5ae3f981c1554c5bb71c75ce354bb/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:cb85b1d5702877c16f28d7b92ba030c1f49ebcc9b87d03d8c10bf45a2f1c7508", size = 3925668 }, + { url = "https://files.pythonhosted.org/packages/f8/92/0aac830ed6a944fe334404e1687a074e4215630725753f0e3e9a9a595b62/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4d4606c84d04b80f9138d72f1e28c6c02dc5ae0c7b8f3f8aaf89c681ce1cd1b1", size = 4234973 }, + { url = "https://files.pythonhosted.org/packages/2e/96/102244653ee5a143ece5afe33f00f52fe64e389dfce8dbc87580c6d70d3d/psycopg_binary-3.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:74eae563166ebf74e8d950ff359be037b85723d99ca83f57d9b244a871d6c13b", size = 3551342 }, + { url = "https://files.pythonhosted.org/packages/a2/71/7a57e5b12275fe7e7d84d54113f0226080423a869118419c9106c083a21c/psycopg_binary-3.3.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:497852c5eaf1f0c2d88ab74a64a8097c099deac0c71de1cbcf18659a8a04a4b2", size = 4607368 }, + { url = "https://files.pythonhosted.org/packages/c7/04/cb834f120f2b2c10d4003515ef9ca9d688115b9431735e3936ae48549af8/psycopg_binary-3.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:258d1ea53464d29768bf25930f43291949f4c7becc706f6e220c515a63a24edd", size = 4687047 }, + { url = "https://files.pythonhosted.org/packages/40/e9/47a69692d3da9704468041aa5ed3ad6fc7f6bb1a5ae788d261a26bbca6c7/psycopg_binary-3.3.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:111c59897a452196116db12e7f608da472fbff000693a21040e35fc978b23430", size = 5487096 }, + { url = "https://files.pythonhosted.org/packages/0b/b6/0e0dd6a2f802864a4ae3dbadf4ec620f05e3904c7842b326aafc43e5f464/psycopg_binary-3.3.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:17bb6600e2455993946385249a3c3d0af52cd70c1c1cdbf712e9d696d0b0bf1b", size = 5168720 }, + { url = "https://files.pythonhosted.org/packages/6f/0d/977af38ac19a6b55d22dff508bd743fd7c1901e1b73657e7937c7cccb0a3/psycopg_binary-3.3.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:642050398583d61c9856210568eb09a8e4f2fe8224bf3be21b67a370e677eead", size = 6762076 }, + { url = "https://files.pythonhosted.org/packages/34/40/912a39d48322cf86895c0eaf2d5b95cb899402443faefd4b09abbba6b6e1/psycopg_binary-3.3.3-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:533efe6dc3a7cba5e2a84e38970786bb966306863e45f3db152007e9f48638a6", size = 4997623 }, + { url = "https://files.pythonhosted.org/packages/98/0c/c14d0e259c65dc7be854d926993f151077887391d5a081118907a9d89603/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5958dbf28b77ce2033482f6cb9ef04d43f5d8f4b7636e6963d5626f000efb23e", size = 4532096 }, + { url = "https://files.pythonhosted.org/packages/39/21/8b7c50a194cfca6ea0fd4d1f276158307785775426e90700ab2eba5cd623/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:a6af77b6626ce92b5817bf294b4d45ec1a6161dba80fc2d82cdffdd6814fd023", size = 4208884 }, + { url = "https://files.pythonhosted.org/packages/c7/2c/a4981bf42cf30ebba0424971d7ce70a222ae9b82594c42fc3f2105d7b525/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:47f06fcbe8542b4d96d7392c476a74ada521c5aebdb41c3c0155f6595fc14c8d", size = 3944542 }, + { url = "https://files.pythonhosted.org/packages/60/e9/b7c29b56aa0b85a4e0c4d89db691c1ceef08f46a356369144430c155a2f5/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e7800e6c6b5dc4b0ca7cc7370f770f53ac83886b76afda0848065a674231e856", size = 4254339 }, + { url = "https://files.pythonhosted.org/packages/98/5a/291d89f44d3820fffb7a04ebc8f3ef5dda4f542f44a5daea0c55a84abf45/psycopg_binary-3.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:165f22ab5a9513a3d7425ffb7fcc7955ed8ccaeef6d37e369d6cc1dff1582383", size = 3652796 }, +] + [[package]] name = "pyarrow" version = "23.0.1" @@ -1460,6 +1869,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341 }, ] +[[package]] +name = "redis" +version = "7.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/7f/3759b1d0d72b7c92f0d70ffd9dc962b7b7b5ee74e135f9d7d8ab06b8a318/redis-7.4.0.tar.gz", hash = "sha256:64a6ea7bf567ad43c964d2c30d82853f8df927c5c9017766c55a1d1ed95d18ad", size = 4943913 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/74/3a/95deec7db1eb53979973ebd156f3369a72732208d1391cd2e5d127062a32/redis-7.4.0-py3-none-any.whl", hash = "sha256:a9c74a5c893a5ef8455a5adb793a31bb70feb821c86eccb62eebef5a19c429ec", size = 409772 }, +] + +[[package]] +name = "requests" +version = "2.33.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5f/a4/98b9c7c6428a668bf7e42ebb7c79d576a1c3c1e3ae2d47e674b468388871/requests-2.33.1.tar.gz", hash = "sha256:18817f8c57c6263968bc123d237e3b8b08ac046f5456bd1e307ee8f4250d3517", size = 134120 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/8e/7540e8a2036f79a125c1d2ebadf69ed7901608859186c856fa0388ef4197/requests-2.33.1-py3-none-any.whl", hash = "sha256:4e6d1ef462f3626a1f0a0a9c42dd93c63bad33f9f1c1937509b8c5c8718ab56a", size = 64947 }, +] + [[package]] name = "ruff" version = "0.15.11" @@ -1594,6 +2027,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611 }, ] +[[package]] +name = "tzdata" +version = "2026.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/f5/cd531b2d15a671a40c0f66cf06bc3570a12cd56eef98960068ebbad1bf5a/tzdata-2026.1.tar.gz", hash = "sha256:67658a1903c75917309e753fdc349ac0efd8c27db7a0cb406a25be4840f87f98", size = 197639 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b0/70/d460bd685a170790ec89317e9bd33047988e4bce507b831f5db771e142de/tzdata-2026.1-py2.py3-none-any.whl", hash = "sha256:4b1d2be7ac37ceafd7327b961aa3a54e467efbdb563a23655fbfe0d39cfc42a9", size = 348952 }, +] + +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584 }, +] + [[package]] name = "uuid7" version = "0.1.0" @@ -1766,3 +2217,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/51/47/3fa2286c3cb162c71cdb34c4224d5745a1ceceb391b2bd9b19b668a8d724/yarl-1.23.0-cp314-cp314t-win_arm64.whl", hash = "sha256:44bb7bef4ea409384e3f8bc36c063d77ea1b8d4a5b2706956c0d6695f07dcc25", size = 86041 }, { url = "https://files.pythonhosted.org/packages/69/68/c8739671f5699c7dc470580a4f821ef37c32c4cb0b047ce223a7f115757f/yarl-1.23.0-py3-none-any.whl", hash = "sha256:a2df6afe50dea8ae15fa34c9f824a3ee958d785fd5d089063d960bae1daa0a3f", size = 48288 }, ] + +[[package]] +name = "zipp" +version = "3.23.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/30/21/093488dfc7cc8964ded15ab726fad40f25fd3d788fd741cc1c5a17d78ee8/zipp-3.23.1.tar.gz", hash = "sha256:32120e378d32cd9714ad503c1d024619063ec28aad2248dc6672ad13edfa5110", size = 25965 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/8a/0861bec20485572fbddf3dfba2910e38fe249796cb73ecdeb74e07eeb8d3/zipp-3.23.1-py3-none-any.whl", hash = "sha256:0b3596c50a5c700c9cb40ba8d86d9f2cc4807e9bedb06bcdf7fac85633e444dc", size = 10378 }, +] From 572cb0bf3978efe4b23be33e1cdf21beeac9dd94 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 15:17:53 +0530 Subject: [PATCH 02/11] feat(obs): prometheus and opentelemetry backends behind stable shims MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the Phase 1 no-op MetricCounter / MetricGauge / trace_span shims with a pluggable backend selected by ObservabilityConfig. Call sites do not change: instrumented code keeps building instances by name+labels and invoking inc / set / span; configure_observability swaps the concrete registration target behind them. Counters and gauges cache by metric name so repeated construction returns the same registered collector. The backend accepts an explicit CollectorRegistry for test isolation — production uses the default module-level registry while tests pass a fresh CollectorRegistry per case to avoid "already registered" collisions. Tracing wires through TracerProvider with a TraceIdRatioBased sampler and the OTLP gRPC exporter; start_metrics_server opens the scrape endpoint only when metrics.kind is "prometheus". --- pyproject.toml | 1 + .../augur_signals/_observability.py | 250 ++++++++++++++++-- tests/signals/test_observability.py | 95 ++++++- uv.lock | 2 + 4 files changed, 318 insertions(+), 30 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2da37e7..441ae7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ dev = [ "prometheus-client>=0.20", "opentelemetry-api>=1.27", "opentelemetry-sdk>=1.27", + "opentelemetry-exporter-otlp>=1.41.0", ] [tool.ruff] diff --git a/src/augur_signals/augur_signals/_observability.py b/src/augur_signals/augur_signals/_observability.py index 7514ebf..de5bd40 100644 --- a/src/augur_signals/augur_signals/_observability.py +++ b/src/augur_signals/augur_signals/_observability.py @@ -1,64 +1,258 @@ """Observability primitives: metric counters, gauges, and trace spans. -The implementations here are deliberate no-ops. Call sites instrument -code with counters, gauges, and spans against these shims; the -multi-process runtime replaces the shims with real Prometheus and -OpenTelemetry adapters without any call-site edits. - -The shim approach keeps signal-extraction, labeling, and formatter code -free of a hard dependency on the observability backend during early -development and testing, while still exercising the instrumentation -surface end-to-end. +This module exposes ``MetricCounter``, ``MetricGauge``, and +``trace_span``. Call sites build an instance by name+labels and invoke +``inc`` / ``set`` / ``with trace_span(...)``; the concrete backend is +swapped via ``configure_observability`` without touching instrumented +code. Three backend combinations are supported: + +* disabled — no-op shims. The Phase 1 default; suitable for unit tests + and backtest runs where metric emission would pollute signal. +* prometheus + otlp — the Phase 5 deployment. Metrics land in the + prometheus_client default registry and a /metrics HTTP endpoint is + started via ``start_metrics_server``. Traces route through an + OpenTelemetry ``TracerProvider`` with OTLP export. +* mixed — independent knobs per surface (metrics disabled, traces on; + or vice versa) for incremental rollout. + +The backend is a module-global singleton because prometheus_client and +the OpenTelemetry SDK both maintain their own global state. Calling +``configure_observability`` a second time rebuilds the backend and +replaces previously-registered collectors; this is only safe in tests. """ from __future__ import annotations +import threading from collections.abc import Generator from contextlib import contextmanager -from typing import Any +from typing import TYPE_CHECKING, Any, Protocol + +from augur_signals._observability_config import ObservabilityConfig + +if TYPE_CHECKING: + from prometheus_client import CollectorRegistry + + +class _CounterBackend(Protocol): + def inc(self, value: float, label_values: dict[str, str]) -> None: ... + + +class _GaugeBackend(Protocol): + def set(self, value: float, label_values: dict[str, str]) -> None: ... + + +class _TracerBackend(Protocol): + @contextmanager + def span(self, name: str, attributes: dict[str, Any]) -> Generator[None, None, None]: ... + + +class _NoOpCounter: + def inc(self, value: float, label_values: dict[str, str]) -> None: + _ = value, label_values + + +class _NoOpGauge: + def set(self, value: float, label_values: dict[str, str]) -> None: + _ = value, label_values + + +class _NoOpTracer: + @contextmanager + def span(self, name: str, attributes: dict[str, Any]) -> Generator[None, None, None]: + _ = name, attributes + yield + + +class _PromCounter: + def __init__(self, name: str, labels: list[str], registry: CollectorRegistry | None) -> None: + from prometheus_client import Counter + + self._counter = Counter(name, name, labels, registry=registry) + self._labels = labels + + def inc(self, value: float, label_values: dict[str, str]) -> None: + if self._labels: + self._counter.labels(**{k: label_values.get(k, "") for k in self._labels}).inc(value) + else: + self._counter.inc(value) + + +class _PromGauge: + def __init__(self, name: str, labels: list[str], registry: CollectorRegistry | None) -> None: + from prometheus_client import Gauge + + self._gauge = Gauge(name, name, labels, registry=registry) + self._labels = labels + + def set(self, value: float, label_values: dict[str, str]) -> None: + if self._labels: + self._gauge.labels(**{k: label_values.get(k, "") for k in self._labels}).set(value) + else: + self._gauge.set(value) + + +class _OTelTracer: + def __init__(self, service_name: str, endpoint: str, sampling_ratio: float) -> None: + from opentelemetry import trace + from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter + from opentelemetry.sdk.resources import SERVICE_NAME, Resource + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import BatchSpanProcessor + from opentelemetry.sdk.trace.sampling import TraceIdRatioBased + + resource = Resource.create({SERVICE_NAME: service_name}) + provider = TracerProvider(resource=resource, sampler=TraceIdRatioBased(sampling_ratio)) + provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(endpoint=endpoint))) + trace.set_tracer_provider(provider) + self._tracer = trace.get_tracer("augur") + + @contextmanager + def span(self, name: str, attributes: dict[str, Any]) -> Generator[None, None, None]: + with self._tracer.start_as_current_span(name, attributes=attributes): + yield + + +class _Backend: + """Module-level backend selector. + + Holds factory callables so ``MetricCounter("foo", [...])`` can be + built after configuration without rebuilding the class hierarchy. + """ + + def __init__(self) -> None: + self._metrics_kind: str = "disabled" + self._registry: CollectorRegistry | None = None + self._tracer: _TracerBackend = _NoOpTracer() + self._lock = threading.Lock() + self._counters: dict[str, _CounterBackend] = {} + self._gauges: dict[str, _GaugeBackend] = {} + + def configure( + self, + config: ObservabilityConfig, + registry: CollectorRegistry | None = None, + ) -> None: + with self._lock: + self._metrics_kind = config.metrics.kind + self._registry = registry + self._counters.clear() + self._gauges.clear() + if config.traces.kind == "otlp": + self._tracer = _OTelTracer( + config.traces.service_name, + config.traces.otlp_endpoint, + config.traces.sampling_ratio, + ) + else: + self._tracer = _NoOpTracer() + + def counter(self, name: str, labels: list[str]) -> _CounterBackend: + with self._lock: + existing = self._counters.get(name) + if existing is not None: + return existing + backend: _CounterBackend = ( + _PromCounter(name, labels, self._registry) + if self._metrics_kind == "prometheus" + else _NoOpCounter() + ) + self._counters[name] = backend + return backend + + def gauge(self, name: str, labels: list[str]) -> _GaugeBackend: + with self._lock: + existing = self._gauges.get(name) + if existing is not None: + return existing + backend: _GaugeBackend = ( + _PromGauge(name, labels, self._registry) + if self._metrics_kind == "prometheus" + else _NoOpGauge() + ) + self._gauges[name] = backend + return backend + + def tracer(self) -> _TracerBackend: + return self._tracer + + +_BACKEND = _Backend() + + +def configure_observability( + config: ObservabilityConfig, + registry: CollectorRegistry | None = None, +) -> None: + """Activate real backends per *config*. + + *registry* is the prometheus_client ``CollectorRegistry`` the + backend registers counters and gauges with. Production leaves it + ``None`` so the default module-level registry is used; tests pass + a fresh ``CollectorRegistry()`` to isolate collectors between + cases. + + Leaves counters and gauges unregistered until their first + ``MetricCounter(name, labels)`` / ``MetricGauge(name, labels)`` call + so test suites can re-configure without colliding on the shared + prometheus_client registry. + """ + _BACKEND.configure(config, registry) + + +def start_metrics_server(config: ObservabilityConfig) -> None: + """Start a /metrics HTTP listener on the configured bind/port. + + Separate from ``configure_observability`` because the backtest + harness configures the backend without ever binding a port. + """ + if config.metrics.kind != "prometheus": + return + from prometheus_client import start_http_server + + start_http_server(config.metrics.prometheus_port, addr=config.metrics.prometheus_bind) class MetricCounter: - """No-op counter shim. + """Monotonic counter. Call ``inc`` to increment. Attributes: - name: Metric name in the Prometheus namespace. - labels: Label-key list; values are supplied at ``inc`` time. + name: Metric name exposed to the scraper. + labels: Ordered list of label keys; values are provided at + ``inc`` time via keyword arguments. """ def __init__(self, name: str, labels: list[str]) -> None: self.name = name self.labels = list(labels) + self._backend = _BACKEND.counter(name, self.labels) def inc(self, value: float = 1.0, **label_values: str | int | float) -> None: - """Increment the counter. No-op in the shim implementation.""" - _ = value, label_values + """Increment by *value*; label values are stringified on the way in.""" + self._backend.inc(value, {k: str(v) for k, v in label_values.items()}) class MetricGauge: - """No-op gauge shim. + """Instantaneous value. Call ``set`` to overwrite. Attributes: - name: Metric name in the Prometheus namespace. - labels: Label-key list; values are supplied at ``set`` time. + name: Metric name exposed to the scraper. + labels: Ordered list of label keys; values are provided at + ``set`` time via keyword arguments. """ def __init__(self, name: str, labels: list[str]) -> None: self.name = name self.labels = list(labels) + self._backend = _BACKEND.gauge(name, self.labels) def set(self, value: float, **label_values: str | int | float) -> None: - """Set the gauge. No-op in the shim implementation.""" - _ = value, label_values + """Set the gauge to *value*; label values are stringified.""" + self._backend.set(value, {k: str(v) for k, v in label_values.items()}) @contextmanager def trace_span(name: str, **attributes: Any) -> Generator[None, None, None]: - """No-op trace-span shim. - - The real implementation will open an OpenTelemetry span, attach - *attributes*, and close it on context exit. For now, the call site - is exercised but no data is recorded. - """ - _ = name, attributes - yield + """Open a trace span named *name* with *attributes*; auto-close on exit.""" + with _BACKEND.tracer().span(name, dict(attributes)): + yield diff --git a/tests/signals/test_observability.py b/tests/signals/test_observability.py index 1f9f492..84631fa 100644 --- a/tests/signals/test_observability.py +++ b/tests/signals/test_observability.py @@ -1,10 +1,49 @@ -"""Tests for the observability shim primitives.""" +"""Tests for the observability primitives and backend wiring.""" from __future__ import annotations import pytest +from prometheus_client import CollectorRegistry, generate_latest -from augur_signals._observability import MetricCounter, MetricGauge, trace_span +from augur_signals._observability import ( + MetricCounter, + MetricGauge, + configure_observability, + trace_span, +) +from augur_signals._observability_config import ( + LogsBody, + MetricsBody, + ObservabilityConfig, + TracesBody, +) + + +@pytest.fixture +def registry() -> CollectorRegistry: + """Per-test registry so counters do not collide across cases.""" + reg = CollectorRegistry() + configure_observability( + ObservabilityConfig( + metrics=MetricsBody(kind="disabled"), + traces=TracesBody(kind="disabled"), + logs=LogsBody(), + ), + reg, + ) + return reg + + +@pytest.fixture(autouse=True) +def _reset_default() -> None: + """Reset to disabled backend when a test does not claim the registry.""" + configure_observability( + ObservabilityConfig( + metrics=MetricsBody(kind="disabled"), + traces=TracesBody(kind="disabled"), + logs=LogsBody(), + ) + ) @pytest.mark.unit @@ -36,3 +75,55 @@ def test_trace_span_is_a_context_manager() -> None: def test_trace_span_with_no_attributes() -> None: with trace_span("noop"): pass + + +@pytest.mark.unit +def test_prometheus_backend_records_increments(registry: CollectorRegistry) -> None: + configure_observability( + ObservabilityConfig( + metrics=MetricsBody(kind="prometheus"), + traces=TracesBody(kind="disabled"), + logs=LogsBody(), + ), + registry, + ) + counter = MetricCounter("augur_worker_processed_total", ["worker_kind"]) + counter.inc(3.0, worker_kind="feature") + counter.inc(worker_kind="feature") + payload = generate_latest(registry).decode("utf-8") + assert 'augur_worker_processed_total{worker_kind="feature"} 4.0' in payload + + +@pytest.mark.unit +def test_prometheus_gauge_overwrites_value(registry: CollectorRegistry) -> None: + configure_observability( + ObservabilityConfig( + metrics=MetricsBody(kind="prometheus"), + traces=TracesBody(kind="disabled"), + logs=LogsBody(), + ), + registry, + ) + gauge = MetricGauge("augur_bus_queue_depth", ["topic"]) + gauge.set(10.0, topic="augur.signals") + gauge.set(3.0, topic="augur.signals") + payload = generate_latest(registry).decode("utf-8") + assert 'augur_bus_queue_depth{topic="augur.signals"} 3.0' in payload + + +@pytest.mark.unit +def test_counter_singleton_across_instantiations(registry: CollectorRegistry) -> None: + configure_observability( + ObservabilityConfig( + metrics=MetricsBody(kind="prometheus"), + traces=TracesBody(kind="disabled"), + logs=LogsBody(), + ), + registry, + ) + first = MetricCounter("augur_failover_total", ["singleton_kind"]) + second = MetricCounter("augur_failover_total", ["singleton_kind"]) + first.inc(singleton_kind="dedup") + second.inc(singleton_kind="dedup") + payload = generate_latest(registry).decode("utf-8") + assert 'augur_failover_total{singleton_kind="dedup"} 2.0' in payload diff --git a/uv.lock b/uv.lock index d437ef4..b0d31b9 100644 --- a/uv.lock +++ b/uv.lock @@ -189,6 +189,7 @@ dev = [ { name = "mypy" }, { name = "nats-py" }, { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp" }, { name = "opentelemetry-sdk" }, { name = "pre-commit" }, { name = "prometheus-client" }, @@ -214,6 +215,7 @@ dev = [ { name = "mypy", specifier = ">=1.11" }, { name = "nats-py", specifier = ">=2.7" }, { name = "opentelemetry-api", specifier = ">=1.27" }, + { name = "opentelemetry-exporter-otlp", specifier = ">=1.41.0" }, { name = "opentelemetry-sdk", specifier = ">=1.27" }, { name = "pre-commit", specifier = ">=3.7" }, { name = "prometheus-client", specifier = ">=0.20" }, From a1cc65f5768ffb2c1291f1b1ba61dde49f78ef21 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 15:41:46 +0530 Subject: [PATCH 03/11] feat(bus): nats and redis adapters plus distributed lock primitives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the byte-level EventBus protocol used by multi-process workers. Two adapters implement it: * NATSBus — one JetStream stream per subject prefix, pull consumers keyed by (pattern, consumer_group). Publish via js.publish with optional headers; subscribe yields BusMessage envelopes and acks only after the consumer iterates past each message so unfinished processing triggers JetStream redelivery on restart. * RedisStreamsBus — one stream per literal subject, XADD with MAXLEN trim for hot-retention, XREADGROUP/XACK for at-least-once delivery. make_event_bus selects between them from BusConfig. The memory kind is intentionally rejected and callers are redirected to the monolith's InProcessAsyncBus. DistributedLock protocol plus three backends: * InMemoryLock with an injectable monotonic clock for unit tests. * NATSKVLock using the JetStream KV bucket's create/put/delete semantics as a CAS primitive. * RedisLock using SET NX EX for acquire and WATCH+MULTI+EXEC for renew/release, keeping the adapter usable against fakeredis (which does not ship a Lua interpreter) and against real Redis clusters without relying on the script cache. Adapters accept an injected client so unit tests exercise them via fakeredis.aioredis and a fake NATS JetStream without any live cluster. --- schemas/IntelligenceBrief-1.0.0.json | 2 +- scripts/backtest.py | 2 +- scripts/export_schemas.py | 8 +- scripts/label.py | 4 +- scripts/lint_detector_now.py | 4 +- src/augur_format/augur_format/_config.py | 2 +- .../augur_format/deterministic/json_feed.py | 2 +- .../augur_format/deterministic/markdown.py | 4 +- .../augur_format/deterministic/severity.py | 4 +- .../augur_format/llm/backends/anthropic.py | 4 +- .../augur_format/llm/backends/base.py | 4 +- .../augur_format/llm/backends/ollama.py | 4 +- .../augur_format/llm/interpreter.py | 4 +- src/augur_format/augur_format/llm/models.py | 6 +- .../augur_format/llm/prompts/builder.py | 2 +- .../augur_format/llm/provenance/stamp.py | 6 +- .../augur_format/llm/routing/consumer_gate.py | 2 +- .../augur_format/routing/consumer_registry.py | 4 +- .../augur_format/routing/router.py | 2 +- .../augur_format/transport/retry.py | 2 +- .../augur_format/transport/webhook.py | 2 +- .../augur_format/transport/websocket.py | 10 +- .../augur_format/validate/enum_check.py | 6 +- .../augur_format/validate/schema_check.py | 4 +- src/augur_labels/augur_labels/_protocol.py | 2 +- .../augur_labels/annotator/agreement.py | 4 +- .../augur_labels/annotator/candidate_queue.py | 2 +- .../augur_labels/models/agreement.py | 4 +- .../augur_labels/models/annotation.py | 2 +- .../augur_labels/sources/_http.py | 2 +- src/augur_labels/augur_labels/sources/base.py | 2 +- .../augur_labels/storage/parquet_writer.py | 14 +- src/augur_signals/augur_signals/_logging.py | 22 +- .../augur_signals/_observability.py | 34 +-- src/augur_signals/augur_signals/bus/_lock.py | 128 ++++++++++ src/augur_signals/augur_signals/bus/base.py | 84 +++++++ .../augur_signals/bus/factory.py | 43 ++++ src/augur_signals/augur_signals/bus/nats.py | 145 +++++++++++ .../augur_signals/bus/redis_streams.py | 226 ++++++++++++++++++ .../calibration/drift_monitor.py | 2 +- .../calibration/empirical_fpr.py | 6 +- .../calibration/fdr_controller.py | 10 +- .../augur_signals/calibration/reliability.py | 4 +- .../augur_signals/context/taxonomy.py | 4 +- .../augur_signals/dedup/cluster.py | 2 +- .../augur_signals/dedup/fingerprint.py | 4 +- .../augur_signals/detectors/_bocpd.py | 8 +- .../augur_signals/detectors/_cusum.py | 4 +- .../augur_signals/detectors/base.py | 12 +- .../augur_signals/detectors/book_imbalance.py | 2 +- .../augur_signals/detectors/cross_market.py | 2 +- .../augur_signals/detectors/price_velocity.py | 2 +- src/augur_signals/augur_signals/engine.py | 2 +- .../augur_signals/ingestion/retry.py | 2 +- .../augur_signals/manipulation/detector.py | 2 +- .../augur_signals/manipulation/signatures.py | 2 +- .../augur_signals/models/_identifiers.py | 2 +- .../augur_signals/storage/_config.py | 4 +- .../augur_signals/storage/duckdb_store.py | 4 +- tests/conftest.py | 2 +- tests/signals/test_bus_factory.py | 38 +++ tests/signals/test_bus_lock.py | 89 +++++++ tests/signals/test_bus_nats.py | 177 ++++++++++++++ tests/signals/test_bus_redis.py | 122 ++++++++++ tests/signals/test_llm_isolation.py | 10 +- 65 files changed, 1189 insertions(+), 137 deletions(-) create mode 100644 src/augur_signals/augur_signals/bus/_lock.py create mode 100644 src/augur_signals/augur_signals/bus/base.py create mode 100644 src/augur_signals/augur_signals/bus/factory.py create mode 100644 src/augur_signals/augur_signals/bus/nats.py create mode 100644 src/augur_signals/augur_signals/bus/redis_streams.py create mode 100644 tests/signals/test_bus_factory.py create mode 100644 tests/signals/test_bus_lock.py create mode 100644 tests/signals/test_bus_nats.py create mode 100644 tests/signals/test_bus_redis.py diff --git a/schemas/IntelligenceBrief-1.0.0.json b/schemas/IntelligenceBrief-1.0.0.json index 3a769d9..d5633a3 100644 --- a/schemas/IntelligenceBrief-1.0.0.json +++ b/schemas/IntelligenceBrief-1.0.0.json @@ -15,7 +15,7 @@ } }, "additionalProperties": false, - "description": "Gated LLM formatter output contract.\n\nStructural invariants are enforced by Pydantic at construction:\nthe headline is capped at 90 characters so it fits a Slack header,\nbody_markdown is capped at 800 characters so it stays readable on\na dashboard card, ``actionable_for`` is typed as list[ConsumerType]\nso unknown consumers fail immediately, and ``interpretation_mode``\nplus ``forbidden_token_check`` are Literal singletons \u2014 any\nconstruction path that bypasses the linter or the deterministic-\nmode check would have to forge the literal, which is caught in\ncode review.", + "description": "Gated LLM formatter output contract.\n\nStructural invariants are enforced by Pydantic at construction:\nthe headline is capped at 90 characters so it fits a Slack header,\nbody_markdown is capped at 800 characters so it stays readable on\na dashboard card, `actionable_for` is typed as list[ConsumerType]\nso unknown consumers fail immediately, and `interpretation_mode`\nplus `forbidden_token_check` are Literal singletons \u2014 any\nconstruction path that bypasses the linter or the deterministic-\nmode check would have to forge the literal, which is caught in\ncode review.", "properties": { "actionable_for": { "items": { diff --git a/scripts/backtest.py b/scripts/backtest.py index c568b55..39c86a7 100644 --- a/scripts/backtest.py +++ b/scripts/backtest.py @@ -1,7 +1,7 @@ """Backtest harness entrypoint. Replays historical snapshots from DuckDB through the live signal -pipeline with ``now`` threaded deterministically, then computes +pipeline with now threaded deterministically, then computes precision / recall / lead-time distributions per detector and liquidity tier against the labeled corpus per docs/methodology/labeling-protocol.md. diff --git a/scripts/export_schemas.py b/scripts/export_schemas.py index c6cb45d..0574bcc 100644 --- a/scripts/export_schemas.py +++ b/scripts/export_schemas.py @@ -6,14 +6,14 @@ The command is intentionally narrow: it serializes every registered Pydantic model to a deterministic JSON document at -``schemas/-.json``. ``--check`` compares the on-disk +schemas/-.json. --check compares the on-disk snapshot byte-for-byte against the model's current schema and exits non-zero on drift; this is the CI gate that enforces schema-contract discipline per docs/contracts/schema-and-versioning.md. The model registry below is empty while the Pydantic models live in future commits. Each model is registered by importing it here and -appending ``(ModelClass, "1.0.0")`` to ``MODELS``. +appending (ModelClass, "1.0.0") to MODELS. """ from __future__ import annotations @@ -66,8 +66,8 @@ def export_schema(model_cls: type[BaseModel], version: str) -> None: def check_schema(model_cls: type[BaseModel], version: str) -> tuple[bool, bool]: """Compare on-disk schema to the current model. - Returns a ``(exists, matches)`` pair. ``exists`` is False when the - schema file is missing on disk; ``matches`` is True only when the + Returns a (exists, matches) pair. exists is False when the + schema file is missing on disk; matches is True only when the file exists and its contents are byte-for-byte identical to the serialized model. The split lets the caller distinguish a missing file from a content drift and report them separately. diff --git a/scripts/label.py b/scripts/label.py index 0041cef..bee62e0 100644 --- a/scripts/label.py +++ b/scripts/label.py @@ -2,8 +2,8 @@ Launches the augur-label click CLI over the newsworthy-event candidate queue and the append-only parquet corpus. Available commands are -implemented in augur_labels.annotator.cli; run ``python scripts/label.py ---help`` to discover them. +implemented in augur_labels.annotator.cli; run `python scripts/label.py +--help` to discover them. """ from __future__ import annotations diff --git a/scripts/lint_detector_now.py b/scripts/lint_detector_now.py index f4e8b79..f64f283 100644 --- a/scripts/lint_detector_now.py +++ b/scripts/lint_detector_now.py @@ -1,7 +1,7 @@ -"""AST-based guard against ``datetime.now()`` inside detector modules. +"""AST-based guard against `datetime.now()` inside detector modules. The development-plan invariant (§7.2) states that detectors must take -``now`` as a parameter; any call to ``datetime.now()`` from within a +`now` as a parameter; any call to `datetime.now()` from within a detector module breaks backtest replay determinism. This script walks the detector package and fails non-zero on any direct call. diff --git a/src/augur_format/augur_format/_config.py b/src/augur_format/augur_format/_config.py index 9bfa314..7d2f0fd 100644 --- a/src/augur_format/augur_format/_config.py +++ b/src/augur_format/augur_format/_config.py @@ -60,7 +60,7 @@ class FormatterConfig(BaseModel): model_config = ConfigDict(frozen=True, extra="forbid", populate_by_name=True) # Field aliased so the TOML block is [json] per the documented - # schema, while the Python attribute is ``canonical_json`` to avoid + # schema, while the Python attribute is `canonical_json` to avoid # shadowing BaseModel.json. canonical_json: JsonConfig = Field(default_factory=JsonConfig, alias="json") markdown: MarkdownConfig = Field(default_factory=MarkdownConfig) diff --git a/src/augur_format/augur_format/deterministic/json_feed.py b/src/augur_format/augur_format/deterministic/json_feed.py index 29e6eca..f0e9b9f 100644 --- a/src/augur_format/augur_format/deterministic/json_feed.py +++ b/src/augur_format/augur_format/deterministic/json_feed.py @@ -1,7 +1,7 @@ """Canonical JSON formatter for SignalContext. Serializes a SignalContext with stable key ordering, float rounding, -and ISO-8601 UTC timestamps with a ``Z`` suffix. The determinism +and ISO-8601 UTC timestamps with a `Z` suffix. The determinism contract: same SignalContext in, byte-identical JSON out across any number of invocations. Consumers can hash the bytes and rely on stable equality. diff --git a/src/augur_format/augur_format/deterministic/markdown.py b/src/augur_format/augur_format/deterministic/markdown.py index f11dabc..b6fb492 100644 --- a/src/augur_format/augur_format/deterministic/markdown.py +++ b/src/augur_format/augur_format/deterministic/markdown.py @@ -1,7 +1,7 @@ """Jinja2 Markdown renderer. -Templates live alongside this module at ``templates/``; one per -signal type plus a shared ``_base.md.j2``. The renderer is +Templates live alongside this module at `templates/`; one per +signal type plus a shared `_base.md.j2`. The renderer is deterministic given identical inputs and template files. The templates are committed, so any rendering drift surfaces as a test failure rather than silent variation. diff --git a/src/augur_format/augur_format/deterministic/severity.py b/src/augur_format/augur_format/deterministic/severity.py index b47364a..2a9f496 100644 --- a/src/augur_format/augur_format/deterministic/severity.py +++ b/src/augur_format/augur_format/deterministic/severity.py @@ -1,6 +1,6 @@ """Deterministic severity derivation. -Severity is ``magnitude * confidence`` scored against per-tier +Severity is `magnitude * confidence` scored against per-tier thresholds. The formula is pure code (not configuration) so every consumer can reproduce the mapping locally without a network round trip. Changing the thresholds requires a schema-version bump on the @@ -31,7 +31,7 @@ def derive_severity(signal: MarketSignal) -> Severity: """Return the deterministic severity label for *signal*. - The score is ``magnitude * confidence`` (both in [0, 1]); the + The score is `magnitude * confidence` (both in [0, 1]); the threshold applied depends on the liquidity tier. Low-tier markets always emit "low" severity — the sample size on low-tier reliability curves is too thin to justify higher confidence in a human channel. diff --git a/src/augur_format/augur_format/llm/backends/anthropic.py b/src/augur_format/augur_format/llm/backends/anthropic.py index 5c43f6d..1d0a256 100644 --- a/src/augur_format/augur_format/llm/backends/anthropic.py +++ b/src/augur_format/augur_format/llm/backends/anthropic.py @@ -1,9 +1,9 @@ """Anthropic backend adapter. -Imports the anthropic SDK lazily via ``importlib.import_module`` so +Imports the anthropic SDK lazily via `importlib.import_module` so that the llm-isolation test continues to assert anthropic is NOT importable in the default environment. Operators install anthropic -via the ``augur-format[llm-cloud]`` extra before enabling the +via the `augur-format[llm-cloud]` extra before enabling the backend. """ diff --git a/src/augur_format/augur_format/llm/backends/base.py b/src/augur_format/augur_format/llm/backends/base.py index 462d811..f3930c3 100644 --- a/src/augur_format/augur_format/llm/backends/base.py +++ b/src/augur_format/augur_format/llm/backends/base.py @@ -1,7 +1,7 @@ """AbstractLLMBackend protocol and completion result model. Concrete adapters (Ollama, Anthropic) implement the same async -``complete`` surface so the interpreter dispatches uniformly. The +`complete` surface so the interpreter dispatches uniformly. The completion result exposes only the fields downstream actually needs: the raw text, token counts for observability, and the duration in milliseconds for the generation-latency SLO. @@ -45,7 +45,7 @@ async def complete( ... def model_id(self) -> str: - """Return the active model identifier (e.g. ``gemma2:27b``).""" + """Return the active model identifier (e.g. `gemma2:27b`).""" ... async def health_check(self) -> bool: diff --git a/src/augur_format/augur_format/llm/backends/ollama.py b/src/augur_format/augur_format/llm/backends/ollama.py index 421f9c3..c86cd81 100644 --- a/src/augur_format/augur_format/llm/backends/ollama.py +++ b/src/augur_format/augur_format/llm/backends/ollama.py @@ -1,8 +1,8 @@ """Ollama backend adapter. Uses plain httpx against the local Ollama daemon (default -``http://localhost:11434``) so the adapter has no hard dependency on -the ``ollama`` Python client. The adapter retries twice on connection +`http://localhost:11434`) so the adapter has no hard dependency on +the `ollama` Python client. The adapter retries twice on connection failures; local daemon outages should surface quickly, not retry for a minute. """ diff --git a/src/augur_format/augur_format/llm/interpreter.py b/src/augur_format/augur_format/llm/interpreter.py index 1687b75..5cdf865 100644 --- a/src/augur_format/augur_format/llm/interpreter.py +++ b/src/augur_format/augur_format/llm/interpreter.py @@ -2,7 +2,7 @@ Composes the backend, prompt builder, forbidden-token linter, schema validator, consumer gate, and provenance stamp into a single -``interpret`` call per SignalContext. Any failure (backend error, +`interpret` call per SignalContext. Any failure (backend error, forbidden token, invalid JSON, schema violation, storm suspension) returns None; the deterministic pipeline proceeds unaffected. @@ -62,7 +62,7 @@ def suspended(self) -> bool: def set_suspended(self, suspended: bool) -> None: """Toggle storm-mode suspension. - When True, ``interpret`` returns None without calling the + When True, `interpret` returns None without calling the backend, matching phase-4 §11 coordination with the dedup layer's StormController. """ diff --git a/src/augur_format/augur_format/llm/models.py b/src/augur_format/augur_format/llm/models.py index e887760..a825603 100644 --- a/src/augur_format/augur_format/llm/models.py +++ b/src/augur_format/augur_format/llm/models.py @@ -24,9 +24,9 @@ class IntelligenceBrief(BaseModel): Structural invariants are enforced by Pydantic at construction: the headline is capped at 90 characters so it fits a Slack header, body_markdown is capped at 800 characters so it stays readable on - a dashboard card, ``actionable_for`` is typed as list[ConsumerType] - so unknown consumers fail immediately, and ``interpretation_mode`` - plus ``forbidden_token_check`` are Literal singletons — any + a dashboard card, `actionable_for` is typed as list[ConsumerType] + so unknown consumers fail immediately, and `interpretation_mode` + plus `forbidden_token_check` are Literal singletons — any construction path that bypasses the linter or the deterministic- mode check would have to forge the literal, which is caught in code review. diff --git a/src/augur_format/augur_format/llm/prompts/builder.py b/src/augur_format/augur_format/llm/prompts/builder.py index 010ecd2..c6adc14 100644 --- a/src/augur_format/augur_format/llm/prompts/builder.py +++ b/src/augur_format/augur_format/llm/prompts/builder.py @@ -9,7 +9,7 @@ The builder is deterministic: identical SignalContext + identical forbidden-phrase list + identical template files always produce identical prompt strings. The prompt hash used for provenance is -the SHA-256 of ``system + "\\n\\n" + user``. +the SHA-256 of `system + "\\n\\n" + user`. """ from __future__ import annotations diff --git a/src/augur_format/augur_format/llm/provenance/stamp.py b/src/augur_format/augur_format/llm/provenance/stamp.py index 891ba40..014580c 100644 --- a/src/augur_format/augur_format/llm/provenance/stamp.py +++ b/src/augur_format/augur_format/llm/provenance/stamp.py @@ -1,9 +1,9 @@ """Provenance stamping for LLM-generated briefs. -``stamp`` returns a ProvenanceStamp whose ``prompt_hash`` is the -SHA-256 of ``system + "\\n\\n" + user``. Auditors recompute the hash +`stamp` returns a ProvenanceStamp whose `prompt_hash` is the +SHA-256 of `system + "\\n\\n" + user`. Auditors recompute the hash from the deterministic prompt builder to confirm the model saw -exactly what the record claims; ``formatter_version`` is read from +exactly what the record claims; `formatter_version` is read from the installed package metadata so downgrades / upgrades are visible in the record. """ diff --git a/src/augur_format/augur_format/llm/routing/consumer_gate.py b/src/augur_format/augur_format/llm/routing/consumer_gate.py index 2439387..aa126c8 100644 --- a/src/augur_format/augur_format/llm/routing/consumer_gate.py +++ b/src/augur_format/augur_format/llm/routing/consumer_gate.py @@ -1,7 +1,7 @@ """Consumer gate enforcing opt-in for llm_assisted briefs. Per docs/contracts/consumer-registry.md, only consumers whose -configuration sets ``accepts_llm_assisted = true`` receive LLM- +configuration sets `accepts_llm_assisted = true` receive LLM- rendered briefs. The deterministic JSON and Markdown briefs from Phase 3 still reach every consumer; the gate only filters the LLM output. diff --git a/src/augur_format/augur_format/routing/consumer_registry.py b/src/augur_format/augur_format/routing/consumer_registry.py index 0511e71..de452f2 100644 --- a/src/augur_format/augur_format/routing/consumer_registry.py +++ b/src/augur_format/augur_format/routing/consumer_registry.py @@ -1,6 +1,6 @@ """Consumer registry loader. -Reads ``config/consumers.toml`` (seeded in the workspace bootstrap) +Reads `config/consumers.toml` (seeded in the workspace bootstrap) and exposes the per-category consumer routing plus per-consumer transport configuration. The router consumes the registry to decide which consumers should receive a given signal. @@ -32,7 +32,7 @@ def __init__(self, routing: dict[str, tuple[ConsumerType, ...]]) -> None: def consumers_for_category(self, category: str) -> tuple[ConsumerType, ...]: """Return the default consumers for *category*. - Unknown categories fall through to ``default`` — matching the + Unknown categories fall through to `default` — matching the Routing Table in docs/contracts/consumer-registry.md. """ if category in self._routing: diff --git a/src/augur_format/augur_format/routing/router.py b/src/augur_format/augur_format/routing/router.py index 2653fe4..50c89b7 100644 --- a/src/augur_format/augur_format/routing/router.py +++ b/src/augur_format/augur_format/routing/router.py @@ -51,7 +51,7 @@ def route(self, context: SignalContext) -> RoutingDecision: """Return the consumer set for *context*. Consumers whose subscription excludes the context's - interpretation_mode are reported under ``suppressed`` so + interpretation_mode are reported under `suppressed` so operational metrics can count the drops. """ category = self._market_categories.get(context.signal.market_id, "default") diff --git a/src/augur_format/augur_format/transport/retry.py b/src/augur_format/augur_format/transport/retry.py index e4cfa2f..ab56de9 100644 --- a/src/augur_format/augur_format/transport/retry.py +++ b/src/augur_format/augur_format/transport/retry.py @@ -37,7 +37,7 @@ async def deliver_with_backoff[T]( ) -> tuple[T, int]: """Invoke *factory* with exponential backoff. - Returns ``(result, attempts)`` where ``attempts`` is the 1-based + Returns `(result, attempts)` where `attempts` is the 1-based count of attempts up to and including the successful call so the caller can surface the actual attempt count in operational telemetry rather than hardcoding policy.max_retries. diff --git a/src/augur_format/augur_format/transport/webhook.py b/src/augur_format/augur_format/transport/webhook.py index f722c48..8fae0ae 100644 --- a/src/augur_format/augur_format/transport/webhook.py +++ b/src/augur_format/augur_format/transport/webhook.py @@ -38,7 +38,7 @@ class WebhookTarget(BaseModel): Consumer-type gating and LLM-assisted opt-in live on the SignalRouter and the LLM formatter gate respectively; neither belongs on the delivery target, where there is no call site. - Phase-4 re-introduces ``accepts_llm_assisted`` when the gated + Phase-4 re-introduces `accepts_llm_assisted` when the gated formatter needs per-target opt-in. """ diff --git a/src/augur_format/augur_format/transport/websocket.py b/src/augur_format/augur_format/transport/websocket.py index 3fd69f1..660b77c 100644 --- a/src/augur_format/augur_format/transport/websocket.py +++ b/src/augur_format/augur_format/transport/websocket.py @@ -89,10 +89,10 @@ class ClientSubscription: class WebSocketBroadcaster: """In-process broadcaster; adapts to a real websockets server easily. - The broadcaster manages per-client queues. A ``publish`` call + The broadcaster manages per-client queues. A `publish` call enqueues the frame for every subscriber whose consumer_type matches (or whose subscription is unfiltered). Queues are bounded - by ``per_connection_buffer``; enqueue on a full queue drops the + by `per_connection_buffer`; enqueue on a full queue drops the oldest frame to preserve timeliness, matching the dedup/storm doc's rationale for LIFO under pressure. """ @@ -151,9 +151,9 @@ async def stream(self, subscription: ClientSubscription) -> AsyncIterator[WebSoc class HeartbeatScheduler: """Answers "should a heartbeat emit now?" against caller-supplied time. - The scheduler is mutable by design — ``record`` tracks the last - emission so ``should_emit`` can gate the next one. Engine code - owns the outer loop and passes ``now`` explicitly so the scheduler + The scheduler is mutable by design — `record` tracks the last + emission so `should_emit` can gate the next one. Engine code + owns the outer loop and passes `now` explicitly so the scheduler stays backtest-deterministic. """ diff --git a/src/augur_format/augur_format/validate/enum_check.py b/src/augur_format/augur_format/validate/enum_check.py index a6dc0aa..f75bfd5 100644 --- a/src/augur_format/augur_format/validate/enum_check.py +++ b/src/augur_format/augur_format/validate/enum_check.py @@ -1,7 +1,7 @@ """Closed-enum validators for the formatter boundary. Briefs emitted by any formatter (deterministic today, LLM in the -gated secondary layer) carry an ``actionable_for`` list that must +gated secondary layer) carry an `actionable_for` list that must contain only values from the ConsumerType registry in docs/contracts/consumer-registry.md. Validation runs at the formatter boundary; briefs with unknown values are dropped loudly, never @@ -38,7 +38,7 @@ def validate_consumer_types(values: Sequence[str]) -> list[str]: class ConsumerEnumValidator: """Validator callable used at the formatter boundary. - The ``strict`` parameter is retained for the secondary LLM + The `strict` parameter is retained for the secondary LLM formatter, which may want to downgrade to a warning-and-drop during backfill; production deterministic output always runs in strict mode. @@ -52,7 +52,7 @@ def strict(self) -> bool: return self._strict def validate_actionable_for(self, values: Sequence[str]) -> ValidationResult: - """Check an ``actionable_for`` list against the ConsumerType registry.""" + """Check an `actionable_for` list against the ConsumerType registry.""" offending = validate_consumer_types(values) return ValidationResult(valid=not offending, offending_values=offending) diff --git a/src/augur_format/augur_format/validate/schema_check.py b/src/augur_format/augur_format/validate/schema_check.py index 16dfbdd..b874dfa 100644 --- a/src/augur_format/augur_format/validate/schema_check.py +++ b/src/augur_format/augur_format/validate/schema_check.py @@ -2,7 +2,7 @@ Runs in debug builds and integration tests; production skips schema validation for throughput per the pattern in phase-3 §8.2. The -validator reads exported JSON schemas from ``schemas/`` so producers +validator reads exported JSON schemas from `schemas/` so producers and consumers share the same contract snapshot. """ @@ -23,7 +23,7 @@ def load_schema( version: str, root: Path | None = None, ) -> dict[str, object]: - """Load ``schemas/-.json``. + """Load `schemas/-.json`. Missing schemas raise SchemaNotFoundError rather than returning a permissive empty dict; a missing schema indicates the export step diff --git a/src/augur_labels/augur_labels/_protocol.py b/src/augur_labels/augur_labels/_protocol.py index 105a2f5..8376ea1 100644 --- a/src/augur_labels/augur_labels/_protocol.py +++ b/src/augur_labels/augur_labels/_protocol.py @@ -1,7 +1,7 @@ """Labeling-protocol constants shared across modules. The protocol version is the single source of truth for -``label_protocol_version`` on every produced NewsworthyEvent and +`label_protocol_version` on every produced NewsworthyEvent and SignalLabel. Bumping this constant triggers recomputation of any calibration metric derived from the affected labels per docs/methodology/labeling-protocol.md §Versioning. diff --git a/src/augur_labels/augur_labels/annotator/agreement.py b/src/augur_labels/augur_labels/annotator/agreement.py index 5e2f340..9800a27 100644 --- a/src/augur_labels/augur_labels/annotator/agreement.py +++ b/src/augur_labels/augur_labels/annotator/agreement.py @@ -4,7 +4,7 @@ Jaccard overlap of market-association sets per the targets in docs/methodology/labeling-protocol.md §Inter-Annotator Agreement. -Paired decisions are matched by ``candidate_id``; decisions on +Paired decisions are matched by `candidate_id`; decisions on candidates only one annotator reviewed are excluded from the report. """ @@ -66,7 +66,7 @@ def _pair_decisions( """Return (paired_decisions, unpaired_count). Unpaired decisions (candidate reviewed by only one annotator) are - surfaced so ``compute_agreement`` can report them without silently + surfaced so `compute_agreement` can report them without silently dropping from the denominator. """ by_candidate_a = {d.candidate_id: d for d in decisions_a} diff --git a/src/augur_labels/augur_labels/annotator/candidate_queue.py b/src/augur_labels/augur_labels/annotator/candidate_queue.py index 0bd07c6..d5e614e 100644 --- a/src/augur_labels/augur_labels/annotator/candidate_queue.py +++ b/src/augur_labels/augur_labels/annotator/candidate_queue.py @@ -13,7 +13,7 @@ class CandidateQueue: - """In-memory candidate store indexed by ``candidate_id``.""" + """In-memory candidate store indexed by `candidate_id`.""" def __init__(self) -> None: self._candidates: dict[str, EventCandidate] = {} diff --git a/src/augur_labels/augur_labels/models/agreement.py b/src/augur_labels/augur_labels/models/agreement.py index 0be4122..2f0353b 100644 --- a/src/augur_labels/augur_labels/models/agreement.py +++ b/src/augur_labels/augur_labels/models/agreement.py @@ -1,9 +1,9 @@ """AgreementReport — inter-annotator agreement metrics. Produced by the workflow enforcer before candidate promotion and by -the agreement CLI command for retrospective analysis. The ``targets`` +the agreement CLI command for retrospective analysis. The `targets` in docs/methodology/labeling-protocol.md §Inter-Annotator Agreement -are the thresholds that ``meets_targets`` checks. +are the thresholds that `meets_targets` checks. """ from __future__ import annotations diff --git a/src/augur_labels/augur_labels/models/annotation.py b/src/augur_labels/augur_labels/models/annotation.py index 86098b2..2a76770 100644 --- a/src/augur_labels/augur_labels/models/annotation.py +++ b/src/augur_labels/augur_labels/models/annotation.py @@ -24,7 +24,7 @@ class AnnotatorIdentity(BaseModel): class LabelDecision(BaseModel): """One annotator's call on one candidate. - Fields marked ``required if qualifies`` are enforced by a + Fields marked `required if qualifies` are enforced by a model_validator on promotion rather than at construction so an annotator can record "does not qualify" decisions without supplying event metadata. diff --git a/src/augur_labels/augur_labels/sources/_http.py b/src/augur_labels/augur_labels/sources/_http.py index e04c028..8784fa1 100644 --- a/src/augur_labels/augur_labels/sources/_http.py +++ b/src/augur_labels/augur_labels/sources/_http.py @@ -1,6 +1,6 @@ """Shared httpx client helpers with exponential backoff. -Every source adapter routes its calls through ``request_with_backoff`` +Every source adapter routes its calls through `request_with_backoff` so retry semantics stay consistent: 1 s initial delay, doubling to a 60 s cap, 5-retry max on any exception. The helper is parameterized over the request factory so the session's headers, auth, and URL diff --git a/src/augur_labels/augur_labels/sources/base.py b/src/augur_labels/augur_labels/sources/base.py index 65bb236..312def6 100644 --- a/src/augur_labels/augur_labels/sources/base.py +++ b/src/augur_labels/augur_labels/sources/base.py @@ -1,7 +1,7 @@ """AbstractSourceAdapter protocol. Every concrete wire-service adapter implements this surface so the -annotator CLI's ``discover`` command can fetch publications across +annotator CLI's `discover` command can fetch publications across sources uniformly. Source-specific auth, rate-limiting, and response- shape handling stay in the concrete adapter; callers see only SourcePublication. diff --git a/src/augur_labels/augur_labels/storage/parquet_writer.py b/src/augur_labels/augur_labels/storage/parquet_writer.py index b2c9a38..c6c79c4 100644 --- a/src/augur_labels/augur_labels/storage/parquet_writer.py +++ b/src/augur_labels/augur_labels/storage/parquet_writer.py @@ -1,21 +1,21 @@ """Append-only Parquet writer with per-partition file locking. -Events are partitioned by the date of ``ground_truth_timestamp``. Each -partition lives at ``/date=YYYY-MM-DD/events.parquet``. The +Events are partitioned by the date of `ground_truth_timestamp`. Each +partition lives at `/date=YYYY-MM-DD/events.parquet`. The writer acquires a filelock on the partition before every read-modify- write so concurrent annotator processes do not corrupt the file. Operational ceiling ------------------- -Each ``append`` re-reads the partition, concats, and rewrites under +Each `append` re-reads the partition, concats, and rewrites under the per-partition lock. For dense labeling days (dozens of events) this is O(n²) I/O; the ceiling is several hundred events per day before the 30 s default lock timeout becomes a bottleneck. Once the corpus approaches that volume, migrate to a sibling-file layout -(``/events-.parquet``) read via -``pq.ParquetDataset`` so each append writes only the new rows. -``supersede`` similarly scans every partition sequentially; an -``event_id -> partition_date`` index lets it jump directly to the +(`/events-.parquet`) read via +`pq.ParquetDataset` so each append writes only the new rows. +`supersede` similarly scans every partition sequentially; an +`event_id -> partition_date` index lets it jump directly to the partition at scale. """ diff --git a/src/augur_signals/augur_signals/_logging.py b/src/augur_signals/augur_signals/_logging.py index 7ff2d36..810043a 100644 --- a/src/augur_signals/augur_signals/_logging.py +++ b/src/augur_signals/augur_signals/_logging.py @@ -3,14 +3,14 @@ All logs serialize to single-line JSON and write to stdout. A downstream log shipper routes stdout into the centralized store once the multi-process runtime is operational. Module callers obtain bound -loggers via ``get_logger(__name__)`` and add per-request or per-signal -context with ``structlog.contextvars.bind_contextvars``. +loggers via `get_logger(__name__)` and add per-request or per-signal +context with `structlog.contextvars.bind_contextvars`. Conventions ----------- - Log keys are snake_case. -- Every entry carries ``signal_id`` and ``market_id`` when available, - bound via ``bind_contextvars`` at the point where the identity is +- Every entry carries `signal_id` and `market_id` when available, + bound via `bind_contextvars` at the point where the identity is established. - Log values never contain PII or secrets. """ @@ -28,13 +28,13 @@ def configure_logging(level: str = "INFO") -> None: """Configure structlog to emit UTC-stamped JSON records to stdout. - Idempotent across calls that precede any ``get_logger`` invocation + Idempotent across calls that precede any `get_logger` invocation on the process. Because structlog caches the wrapper class on first - logger retrieval, a call to ``configure_logging`` that follows an - earlier ``get_logger`` affects subsequent loggers only; previously + logger retrieval, a call to `configure_logging` that follows an + earlier `get_logger` affects subsequent loggers only; previously returned loggers retain their original filtering level. Production code configures once at engine startup before any module-level - ``get_logger`` runs; tests that change the level re-retrieve their + `get_logger` runs; tests that change the level re-retrieve their logger after reconfiguring. """ level_number = logging.getLevelNamesMapping()[level] @@ -57,9 +57,9 @@ def configure_logging(level: str = "INFO") -> None: def get_logger(name: str) -> BoundLogger: """Return a bound logger for *name*. Call once per module. - Structlog's own ``get_logger`` is typed ``Any`` because the concrete - wrapper depends on the configured ``wrapper_class``. This wrapper - casts to the stdlib-compatible ``BoundLogger`` so call sites get + Structlog's own `get_logger` is typed `Any` because the concrete + wrapper depends on the configured `wrapper_class`. This wrapper + casts to the stdlib-compatible `BoundLogger` so call sites get typed method access. """ return cast(BoundLogger, structlog.get_logger(name)) diff --git a/src/augur_signals/augur_signals/_observability.py b/src/augur_signals/augur_signals/_observability.py index de5bd40..ca22d38 100644 --- a/src/augur_signals/augur_signals/_observability.py +++ b/src/augur_signals/augur_signals/_observability.py @@ -1,23 +1,23 @@ """Observability primitives: metric counters, gauges, and trace spans. -This module exposes ``MetricCounter``, ``MetricGauge``, and -``trace_span``. Call sites build an instance by name+labels and invoke -``inc`` / ``set`` / ``with trace_span(...)``; the concrete backend is -swapped via ``configure_observability`` without touching instrumented +This module exposes `MetricCounter`, `MetricGauge`, and +`trace_span`. Call sites build an instance by name+labels and invoke +`inc` / `set` / `with trace_span(...)`; the concrete backend is +swapped via `configure_observability` without touching instrumented code. Three backend combinations are supported: * disabled — no-op shims. The Phase 1 default; suitable for unit tests and backtest runs where metric emission would pollute signal. * prometheus + otlp — the Phase 5 deployment. Metrics land in the prometheus_client default registry and a /metrics HTTP endpoint is - started via ``start_metrics_server``. Traces route through an - OpenTelemetry ``TracerProvider`` with OTLP export. + started via `start_metrics_server`. Traces route through an + OpenTelemetry `TracerProvider` with OTLP export. * mixed — independent knobs per surface (metrics disabled, traces on; or vice versa) for incremental rollout. The backend is a module-global singleton because prometheus_client and the OpenTelemetry SDK both maintain their own global state. Calling -``configure_observability`` a second time rebuilds the backend and +`configure_observability` a second time rebuilds the backend and replaces previously-registered collectors; this is only safe in tests. """ @@ -116,7 +116,7 @@ def span(self, name: str, attributes: dict[str, Any]) -> Generator[None, None, N class _Backend: """Module-level backend selector. - Holds factory callables so ``MetricCounter("foo", [...])`` can be + Holds factory callables so `MetricCounter("foo", [...])` can be built after configuration without rebuilding the class hierarchy. """ @@ -186,14 +186,14 @@ def configure_observability( ) -> None: """Activate real backends per *config*. - *registry* is the prometheus_client ``CollectorRegistry`` the + *registry* is the prometheus_client `CollectorRegistry` the backend registers counters and gauges with. Production leaves it - ``None`` so the default module-level registry is used; tests pass - a fresh ``CollectorRegistry()`` to isolate collectors between + `None` so the default module-level registry is used; tests pass + a fresh `CollectorRegistry()` to isolate collectors between cases. Leaves counters and gauges unregistered until their first - ``MetricCounter(name, labels)`` / ``MetricGauge(name, labels)`` call + `MetricCounter(name, labels)` / `MetricGauge(name, labels)` call so test suites can re-configure without colliding on the shared prometheus_client registry. """ @@ -203,7 +203,7 @@ def configure_observability( def start_metrics_server(config: ObservabilityConfig) -> None: """Start a /metrics HTTP listener on the configured bind/port. - Separate from ``configure_observability`` because the backtest + Separate from `configure_observability` because the backtest harness configures the backend without ever binding a port. """ if config.metrics.kind != "prometheus": @@ -214,12 +214,12 @@ def start_metrics_server(config: ObservabilityConfig) -> None: class MetricCounter: - """Monotonic counter. Call ``inc`` to increment. + """Monotonic counter. Call `inc` to increment. Attributes: name: Metric name exposed to the scraper. labels: Ordered list of label keys; values are provided at - ``inc`` time via keyword arguments. + `inc` time via keyword arguments. """ def __init__(self, name: str, labels: list[str]) -> None: @@ -233,12 +233,12 @@ def inc(self, value: float = 1.0, **label_values: str | int | float) -> None: class MetricGauge: - """Instantaneous value. Call ``set`` to overwrite. + """Instantaneous value. Call `set` to overwrite. Attributes: name: Metric name exposed to the scraper. labels: Ordered list of label keys; values are provided at - ``set`` time via keyword arguments. + `set` time via keyword arguments. """ def __init__(self, name: str, labels: list[str]) -> None: diff --git a/src/augur_signals/augur_signals/bus/_lock.py b/src/augur_signals/augur_signals/bus/_lock.py new file mode 100644 index 0000000..7ac20b5 --- /dev/null +++ b/src/augur_signals/augur_signals/bus/_lock.py @@ -0,0 +1,128 @@ +"""Distributed lock primitives for active-passive singleton workers. + +Dedup and the LLM formatter cannot shard; Phase 5 runs each as an +active instance with one passive peer. The pair coordinates via a +named lock stored in the message bus's metadata store: + +* NATS: JetStream KV bucket (`DistributedLock` uses + `kv.create`/`kv.update` with TTL). +* Redis: `SET key value NX EX ttl` for acquire; a Lua CAS script + for renew/release to avoid racing another holder. + +The protocol is minimal: `acquire` returns True on success, `renew` +extends the TTL, `release` drops the key only if the caller still +holds it. A single per-bus-backend implementation is registered with +`_BACKEND` at engine startup; unit tests inject `InMemoryLock`. +""" + +from __future__ import annotations + +import asyncio +from dataclasses import dataclass, field +from typing import Protocol, runtime_checkable + + +class LockError(RuntimeError): + """Raised when the lock backend rejects an operation terminally.""" + + +@runtime_checkable +class DistributedLock(Protocol): + """Coordinates active-passive singleton ownership across processes. + + The lock identity is `(name, holder_id)`: `name` identifies the + singleton role (`"dedup"` or `"llm_formatter"`) and + `holder_id` identifies the replica attempting to hold it. Each + replica generates its own `holder_id` at process start; the + surviving peer on failover observes the abandoned lock TTL expire + and acquires on the next attempt. + """ + + async def acquire(self, name: str, holder_id: str, ttl_seconds: int) -> bool: + """Try to acquire *name* for *holder_id*; return True on success.""" + ... + + async def renew(self, name: str, holder_id: str, ttl_seconds: int) -> bool: + """Extend the TTL; return True if *holder_id* still owns *name*.""" + ... + + async def release(self, name: str, holder_id: str) -> None: + """Release *name* iff owned by *holder_id*. No-op otherwise.""" + ... + + async def holder(self, name: str) -> str | None: + """Return the current holder, or None if the lock is free.""" + ... + + +@dataclass(slots=True) +class _LockState: + holder: str + expires_at: float + + +@dataclass(slots=True) +class InMemoryLock: + """Single-process reference lock. + + Used by tests and by single-process deployments that still exercise + the active-passive pair code paths (for example, during local + smoke tests where both the active and passive live in the same + process). The lock honours TTLs against an injected clock so tests + can simulate failover without real time passing. + """ + + _locks: dict[str, _LockState] = field(default_factory=dict) + _mutex: asyncio.Lock = field(default_factory=asyncio.Lock) + _clock: _Clock | None = None + + def __post_init__(self) -> None: + if self._clock is None: + self._clock = _WallClock() + + async def acquire(self, name: str, holder_id: str, ttl_seconds: int) -> bool: + async with self._mutex: + now = self._now() + state = self._locks.get(name) + if state is not None and state.expires_at > now and state.holder != holder_id: + return False + self._locks[name] = _LockState(holder=holder_id, expires_at=now + float(ttl_seconds)) + return True + + async def renew(self, name: str, holder_id: str, ttl_seconds: int) -> bool: + async with self._mutex: + state = self._locks.get(name) + if state is None or state.holder != holder_id: + return False + state.expires_at = self._now() + float(ttl_seconds) + return True + + async def release(self, name: str, holder_id: str) -> None: + async with self._mutex: + state = self._locks.get(name) + if state is not None and state.holder == holder_id: + del self._locks[name] + + async def holder(self, name: str) -> str | None: + async with self._mutex: + state = self._locks.get(name) + if state is None or state.expires_at <= self._now(): + return None + return state.holder + + def _now(self) -> float: + clock = self._clock + assert clock is not None # noqa: S101 — init guarantees non-None + return clock.now() + + +@runtime_checkable +class _Clock(Protocol): + def now(self) -> float: ... + + +class _WallClock: + def now(self) -> float: + import time + + return time.monotonic() diff --git a/src/augur_signals/augur_signals/bus/base.py b/src/augur_signals/augur_signals/bus/base.py new file mode 100644 index 0000000..d628c4c --- /dev/null +++ b/src/augur_signals/augur_signals/bus/base.py @@ -0,0 +1,84 @@ +"""Generic subject-addressed async event bus protocol. + +The Phase 1 InProcessAsyncBus in memory.py moves typed +MarketSignal objects and remains the monolith's transport. Phase 5 +adds a separate byte-level protocol, EventBus, that workers use to +publish to and subscribe from named subjects. Serialization lives at +the worker boundary; the bus itself is agnostic. + +Every adapter (NATS, Redis Streams, in-process for tests) implements +the same protocol so make_bus selects at startup and the workers +stay backend-agnostic. The subject naming scheme matches +`.docs/phase-5-scaling.md §4.3`: + +* augur.snapshots.. +* augur.features. +* augur.candidates. +* augur.flagged_signals +* augur.calibrated_signals +* augur.signals +* augur.contexts +* augur.briefs. +* augur.ops.events +""" + +from __future__ import annotations + +from collections.abc import AsyncIterator +from dataclasses import dataclass +from typing import Protocol, runtime_checkable + + +@dataclass(frozen=True, slots=True) +class BusMessage: + """One envelope on the wire. + + Attributes: + subject: Full subject the publisher routed to. + payload: Raw body; producers serialize before publish and + consumers deserialize after subscribe. + headers: Optional small key/value metadata. NATS supports this + natively; the Redis adapter encodes it as hash fields on + the stream entry. + """ + + subject: str + payload: bytes + headers: dict[str, str] | None = None + + +@runtime_checkable +class EventBus(Protocol): + """Byte-level pub/sub transport used by multi-process workers. + + Implementations are at-least-once: a consumer that crashes before + acknowledging a message will see it redelivered on restart. Order + is preserved per subject for a single subscriber; no global order + guarantee. + """ + + async def connect(self) -> None: + """Open connections, declare streams, and attach consumer groups.""" + ... + + async def close(self) -> None: + """Flush pending publishes and close connections.""" + ... + + async def publish(self, message: BusMessage) -> None: + """Publish *message* to its subject.""" + ... + + def subscribe(self, subject_pattern: str, consumer_group: str) -> AsyncIterator[BusMessage]: + """Yield messages matching *subject_pattern* on *consumer_group*. + + The iterator is async and terminates when the bus closes + or the caller cancels the underlying task. The adapter + acknowledges each message after the consumer's async for + body returns without raising. + """ + ... + + +class BusError(RuntimeError): + """Raised by adapter code when a bus operation fails terminally.""" diff --git a/src/augur_signals/augur_signals/bus/factory.py b/src/augur_signals/augur_signals/bus/factory.py new file mode 100644 index 0000000..27f42e2 --- /dev/null +++ b/src/augur_signals/augur_signals/bus/factory.py @@ -0,0 +1,43 @@ +"""Factory that selects an EventBus implementation from ``BusConfig``. + +Call from the worker startup path: + + from augur_signals._config import load_config + from augur_signals.bus._config import BusConfig + from augur_signals.bus.factory import make_event_bus + + bus_config = load_config(Path("config/bus.toml"), BusConfig) + bus = make_event_bus(bus_config) + await bus.connect() + +The monolith engine does not use this factory; it instantiates +``InProcessAsyncBus`` directly with its native ``MarketSignal`` +interface. Phase 5 workers use the byte-level ``EventBus`` protocol +and select a backend via this factory at startup. +""" + +from __future__ import annotations + +from augur_signals.bus._config import BusConfig +from augur_signals.bus.base import BusError, EventBus +from augur_signals.bus.nats import NATSBus +from augur_signals.bus.redis_streams import RedisStreamsBus + + +def make_event_bus(config: BusConfig) -> EventBus: + """Return an ``EventBus`` implementation selected by *config*. + + The ``"memory"`` variant of ``BusConfig`` is reserved for the + monolith engine's in-process bus and is not served by this + factory; callers that pass it receive ``BusError`` because they + should reach for ``InProcessAsyncBus`` in ``bus/memory.py`` + directly. + """ + if config.backend.kind == "nats": + return NATSBus(config.nats) + if config.backend.kind == "redis": + return RedisStreamsBus(config.redis) + raise BusError( + f"EventBus factory does not serve backend {config.backend.kind!r}; " + "use InProcessAsyncBus for the single-process engine." + ) diff --git a/src/augur_signals/augur_signals/bus/nats.py b/src/augur_signals/augur_signals/bus/nats.py new file mode 100644 index 0000000..500031e --- /dev/null +++ b/src/augur_signals/augur_signals/bus/nats.py @@ -0,0 +1,145 @@ +"""NATS JetStream adapter for the EventBus protocol. + +The adapter treats one JetStream stream as the transport for every +Augur subject. Producers publish via `js.publish`; consumers create +durable pull consumers keyed by `(subject_pattern, consumer_group)` +and pull messages in a bounded loop. + +`nats-py` is imported lazily because `augur-signals` keeps it as +an optional dependency — a memory-backed deployment should not pull +the protobuf stack just to start. Unit tests inject a fake client via +`NATSBus(client=...)` so they exercise the adapter without a live +JetStream cluster. +""" + +from __future__ import annotations + +from collections.abc import AsyncIterator +from typing import TYPE_CHECKING, Any + +from augur_signals.bus._config import NATSBody +from augur_signals.bus._lock import DistributedLock, LockError +from augur_signals.bus.base import BusError, BusMessage, EventBus + +if TYPE_CHECKING: + from nats.aio.client import Client as NATSClient + + +class NATSBus(EventBus): + """EventBus backed by NATS JetStream. + + Attributes: + config: Validated `NATSBody` loaded from `config/bus.toml`. + """ + + def __init__(self, config: NATSBody, *, client: NATSClient | None = None) -> None: + self.config = config + self._client = client + self._js: Any | None = None + self._connected = self._client is not None + + async def connect(self) -> None: + if self._connected and self._js is not None: + return + if self._client is None: + import nats + + self._client = await nats.connect(servers=list(self.config.servers)) + self._js = self._client.jetstream() + await self._js.add_stream( + name=self.config.stream_name, + subjects=[f"{self.config.subject_prefix}.>"], + num_replicas=self.config.replication_factor, + ) + self._connected = True + + async def close(self) -> None: + if self._client is not None: + await self._client.drain() + self._connected = False + + async def publish(self, message: BusMessage) -> None: + if self._js is None: + raise BusError("NATSBus.connect() must be called before publish()") + headers = message.headers or None + await self._js.publish(message.subject, message.payload, headers=headers) + + async def subscribe( + self, subject_pattern: str, consumer_group: str + ) -> AsyncIterator[BusMessage]: + if self._js is None: + raise BusError("NATSBus.connect() must be called before subscribe()") + sub = await self._js.pull_subscribe(subject_pattern, durable=consumer_group) + import asyncio as _asyncio + + try: + while True: + msgs = await sub.fetch(batch=1, timeout=1) + if not msgs: + # Yield control so an outer cancellation or break can + # observe the generator between empty-fetch polls. + await _asyncio.sleep(0) + continue + for msg in msgs: + yield BusMessage( + subject=msg.subject, + payload=msg.data, + headers=dict(msg.headers) if msg.headers else None, + ) + await msg.ack() + finally: + await sub.unsubscribe() + + +class NATSKVLock(DistributedLock): + """Distributed lock backed by a NATS JetStream KV bucket.""" + + def __init__(self, bucket_name: str, *, client: NATSClient | None = None) -> None: + self._bucket_name = bucket_name + self._client = client + self._kv: Any | None = None + + async def connect(self) -> None: + if self._client is None: + import nats + + self._client = await nats.connect() + js = self._client.jetstream() + self._kv = await js.create_key_value(bucket=self._bucket_name) + + async def acquire(self, name: str, holder_id: str, ttl_seconds: int) -> bool: + _ = ttl_seconds # TTL is configured on the bucket at create_key_value time. + if self._kv is None: + raise LockError("NATSKVLock.connect() must be called before acquire()") + try: + await self._kv.create(name, holder_id.encode("utf-8")) + except Exception: + return False + return True + + async def renew(self, name: str, holder_id: str, ttl_seconds: int) -> bool: + _ = ttl_seconds + if self._kv is None: + raise LockError("NATSKVLock.connect() must be called before renew()") + entry = await self._kv.get(name) + if entry is None or entry.value.decode("utf-8") != holder_id: + return False + await self._kv.put(name, holder_id.encode("utf-8")) + return True + + async def release(self, name: str, holder_id: str) -> None: + if self._kv is None: + raise LockError("NATSKVLock.connect() must be called before release()") + entry = await self._kv.get(name) + if entry is None or entry.value.decode("utf-8") != holder_id: + return + await self._kv.delete(name) + + async def holder(self, name: str) -> str | None: + if self._kv is None: + raise LockError("NATSKVLock.connect() must be called before holder()") + entry = await self._kv.get(name) + if entry is None: + return None + value: str = entry.value.decode("utf-8") + return value diff --git a/src/augur_signals/augur_signals/bus/redis_streams.py b/src/augur_signals/augur_signals/bus/redis_streams.py new file mode 100644 index 0000000..98a2c8e --- /dev/null +++ b/src/augur_signals/augur_signals/bus/redis_streams.py @@ -0,0 +1,226 @@ +"""Redis Streams adapter for the EventBus protocol. + +The adapter maps each Augur subject to one Redis stream and uses +consumer groups for at-least-once semantics. `XADD` writes the +payload as a hash field (`p`) with optional headers under `h.*`; +`XREADGROUP` pulls entries and `XACK` acknowledges on successful +processing. + +Redis supports subject *patterns* only through multi-stream watches +(`XREAD` against many streams). The adapter takes the literal +subject for now since the Phase 5 subject layout uses a static set of +stream names (`augur.snapshots..` becomes a +single stream keyed by full subject). A fan-in pattern needs a broker +upgrade — out of scope for Phase 5. +""" + +from __future__ import annotations + +import asyncio +from collections.abc import AsyncIterator +from typing import TYPE_CHECKING, Any + +from augur_signals.bus._config import RedisBody +from augur_signals.bus._lock import DistributedLock, LockError +from augur_signals.bus.base import BusError, BusMessage, EventBus + +if TYPE_CHECKING: + from redis.asyncio import Redis + + +def _encode_message(message: BusMessage) -> dict[str | bytes, str | bytes]: + fields: dict[str | bytes, str | bytes] = {"p": message.payload} + if message.headers: + for key, value in message.headers.items(): + fields[f"h.{key}"] = value.encode("utf-8") + return fields + + +def _decode_message(subject: str, fields: dict[bytes | str, bytes | str]) -> BusMessage: + payload = _coerce_bytes(fields[b"p"] if b"p" in fields else fields["p"]) + headers: dict[str, str] = {} + for key, value in fields.items(): + key_str = key.decode("utf-8") if isinstance(key, bytes) else key + if key_str.startswith("h."): + headers[key_str[2:]] = value.decode("utf-8") if isinstance(value, bytes) else value + return BusMessage( + subject=subject, + payload=payload, + headers=headers or None, + ) + + +def _coerce_bytes(value: bytes | str) -> bytes: + return value if isinstance(value, bytes) else value.encode("utf-8") + + +class RedisStreamsBus(EventBus): + """EventBus backed by Redis Streams. + + Attributes: + config: Validated `RedisBody` loaded from `config/bus.toml`. + """ + + def __init__(self, config: RedisBody, *, client: Redis | None = None) -> None: + self.config = config + self._client = client + self._connected = client is not None + + async def connect(self) -> None: + if self._connected: + return + if self._client is None: + import os + + import redis.asyncio as redis_asyncio + + url = os.environ[self.config.url_env] + self._client = redis_asyncio.from_url(url) + self._connected = True + + async def close(self) -> None: + if self._client is not None: + await self._client.aclose() + self._connected = False + + async def publish(self, message: BusMessage) -> None: + if self._client is None: + raise BusError("RedisStreamsBus.connect() must be called before publish()") + # redis-py's FieldT / EncodableT TypeVars pin to a broader union + # than our helper returns; cast through Any so the adapter stays + # generic over str|bytes keys without duplicating the union. + fields: Any = _encode_message(message) + await self._client.xadd( + message.subject, + fields, + maxlen=self.config.stream_max_length, + approximate=True, + ) + + async def subscribe( + self, subject_pattern: str, consumer_group: str + ) -> AsyncIterator[BusMessage]: + if self._client is None: + raise BusError("RedisStreamsBus.connect() must be called before subscribe()") + group = f"{self.config.consumer_group_prefix}.{consumer_group}" + consumer = f"{group}-consumer" + try: + await self._client.xgroup_create(subject_pattern, group, id="0", mkstream=True) + except Exception as exc: + if "BUSYGROUP" not in str(exc): + raise BusError(f"Failed to create consumer group {group}") from exc + # Redis consumer groups persist across restarts; nothing to + # tear down in a finally block. The loop exits on cancellation + # propagated from the caller's async-for. + while True: + entries = await self._client.xreadgroup( + groupname=group, + consumername=consumer, + streams={subject_pattern: ">"}, + count=1, + block=self.config.block_ms, + ) + if not entries: + # Yield control so an outer cancellation can fire. + await asyncio.sleep(0) + continue + for _stream, messages in entries: + for msg_id, fields in messages: + message = _decode_message(subject_pattern, fields) + yield message + await self._client.xack(subject_pattern, group, msg_id) + + +class RedisLock(DistributedLock): + """Distributed lock backed by Redis `SET NX EX` with CAS renew/release. + + `acquire` uses `SET key value NX EX ttl` which is atomic at the + server. `renew` and `release` use `WATCH` + `MULTI` / EXEC + so the current holder check and the mutating command commit + together; a concurrent owner swap invalidates the transaction and + the operation is aborted without side effects. Using WATCH rather + than `EVAL` keeps the adapter compatible with Redis deployments + that restrict scripting (and with in-memory fakes that do not ship + a Lua interpreter). + """ + + def __init__(self, *, client: Redis, key_prefix: str = "augur.lock.") -> None: + self._client = client + self._key_prefix = key_prefix + + def _key(self, name: str) -> str: + return f"{self._key_prefix}{name}" + + @staticmethod + def _matches(current: bytes | str | None, holder_id: str) -> bool: + if current is None: + return False + value = current.decode("utf-8") if isinstance(current, bytes) else current + return value == holder_id + + async def acquire(self, name: str, holder_id: str, ttl_seconds: int) -> bool: + result = await self._client.set(self._key(name), holder_id, nx=True, ex=ttl_seconds) + return bool(result) + + async def renew(self, name: str, holder_id: str, ttl_seconds: int) -> bool: + # redis-py's pipeline helpers are untyped; cast through Any so the + # CAS retry stays readable without per-call mypy suppressions. + pipe: Any + async with self._client.pipeline() as pipe: + key = self._key(name) + while True: + try: + await pipe.watch(key) + current = await pipe.get(key) + if not self._matches(current, holder_id): + await pipe.unwatch() + return False + pipe.multi() + pipe.pexpire(key, ttl_seconds * 1000) + result = await pipe.execute() + return bool(result and result[0]) + except Exception as exc: + if "WatchError" in type(exc).__name__: + continue + raise + + async def release(self, name: str, holder_id: str) -> None: + pipe: Any + async with self._client.pipeline() as pipe: + key = self._key(name) + while True: + try: + await pipe.watch(key) + current = await pipe.get(key) + if not self._matches(current, holder_id): + await pipe.unwatch() + return + pipe.multi() + pipe.delete(key) + await pipe.execute() + return + except Exception as exc: + if "WatchError" in type(exc).__name__: + continue + raise + + async def holder(self, name: str) -> str | None: + value = await self._client.get(f"{self._key_prefix}{name}") + if value is None: + return None + if isinstance(value, bytes): + return value.decode("utf-8") + result: str = value + return result + + +def make_redis_lock(client: Redis, key_prefix: str = "augur.lock.") -> RedisLock: + """Construct a `RedisLock` bound to *client*. + + Kept separate from the constructor so tests can thread the same + fakeredis instance into both the bus and the lock without tripping + protocol-variance checks. + """ + if client is None: + raise LockError("RedisLock requires an explicit redis client") + return RedisLock(client=client, key_prefix=key_prefix) diff --git a/src/augur_signals/augur_signals/calibration/drift_monitor.py b/src/augur_signals/augur_signals/calibration/drift_monitor.py index e642e95..7212caf 100644 --- a/src/augur_signals/augur_signals/calibration/drift_monitor.py +++ b/src/augur_signals/augur_signals/calibration/drift_monitor.py @@ -3,7 +3,7 @@ Computes Population Stability Index (PSI) and a Kolmogorov-Smirnov statistic over baseline vs current score populations. When either metric exceeds its configured threshold, the monitor flags a -``CalibrationStaleEvent`` for operations review so the detector +`CalibrationStaleEvent` for operations review so the detector thresholds can be retuned. """ diff --git a/src/augur_signals/augur_signals/calibration/empirical_fpr.py b/src/augur_signals/augur_signals/calibration/empirical_fpr.py index 1520337..4189729 100644 --- a/src/augur_signals/augur_signals/calibration/empirical_fpr.py +++ b/src/augur_signals/augur_signals/calibration/empirical_fpr.py @@ -47,10 +47,10 @@ def compute_empirical_fpr( ) -> FPRRecord: """FP / (FP + TN) per docs/methodology/labeling-protocol.md §True Positive. - A detector firing at ``t_signal`` is a true positive if some labeled - event for the same market occurred in ``[t_signal, t_signal + lead_window]``. + A detector firing at `t_signal` is a true positive if some labeled + event for the same market occurred in `[t_signal, t_signal + lead_window]`. All other firings are false positives; every observation window - without a label in range contributes to the TN denominator. ``now`` + without a label in range contributes to the TN denominator. `now` is a required parameter so every FPRRecord's computed_at is deterministic across backtest replays — matching the pipeline-wide "now as a parameter" invariant. diff --git a/src/augur_signals/augur_signals/calibration/fdr_controller.py b/src/augur_signals/augur_signals/calibration/fdr_controller.py index 9e2a351..ec1c401 100644 --- a/src/augur_signals/augur_signals/calibration/fdr_controller.py +++ b/src/augur_signals/augur_signals/calibration/fdr_controller.py @@ -1,8 +1,8 @@ """Benjamini-Hochberg FDR controller shared across detectors. Detectors that batch p-values per polling cycle submit -``(signal_id, p_value)`` pairs via :meth:`submit_pvalues`; the -controller applies BH correction at the configured target ``q`` and +`(signal_id, p_value)` pairs via :meth:`submit_pvalues`; the +controller applies BH correction at the configured target `q` and returns the set of signal IDs that pass. See docs/methodology/calibration-methodology.md §BH-FDR for the rationale. """ @@ -15,11 +15,11 @@ def benjamini_hochberg(p_values: Sequence[float], q: float) -> list[bool]: - """Return a boolean mask marking each hypothesis accepted at FDR ``q``. + """Return a boolean mask marking each hypothesis accepted at FDR `q`. Implements the Benjamini-Hochberg step-up procedure: sort p-values - ascending, find the largest rank ``k`` such that ``p_(k) ≤ (k/m) q``, - accept all hypotheses whose p-value is at most ``p_(k)``. + ascending, find the largest rank `k` such that `p_(k) ≤ (k/m) q`, + accept all hypotheses whose p-value is at most `p_(k)`. """ m = len(p_values) if m == 0: diff --git a/src/augur_signals/augur_signals/calibration/reliability.py b/src/augur_signals/augur_signals/calibration/reliability.py index 350d16b..cf4d8f3 100644 --- a/src/augur_signals/augur_signals/calibration/reliability.py +++ b/src/augur_signals/augur_signals/calibration/reliability.py @@ -1,7 +1,7 @@ """Reliability curves per (detector, liquidity_tier). -Phase 1 ships with an identity-curve placeholder: ``calibrate(score) = -score`` with ``curve_version = "identity_v0"``. This satisfies the +Phase 1 ships with an identity-curve placeholder: `calibrate(score) = +score` with `curve_version = "identity_v0"`. This satisfies the MarketSignal calibration_provenance invariant during the warmup period before real curves can be built from a labeled corpus. Subsequent workstreams consume labels to fit empirical curves, which are then diff --git a/src/augur_signals/augur_signals/context/taxonomy.py b/src/augur_signals/augur_signals/context/taxonomy.py index 00a055d..e22ae8a 100644 --- a/src/augur_signals/augur_signals/context/taxonomy.py +++ b/src/augur_signals/augur_signals/context/taxonomy.py @@ -1,7 +1,7 @@ """Curated market-taxonomy loader. -Reads edges from ``config/markets.toml``'s ``[[relationships]]`` blocks -or a dedicated taxonomy file. Only ``manual`` edges are supported in +Reads edges from `config/markets.toml`'s `[[relationships]]` blocks +or a dedicated taxonomy file. Only `manual` edges are supported in this workstream; embedding-derived edges land alongside the LLM formatter work. """ diff --git a/src/augur_signals/augur_signals/dedup/cluster.py b/src/augur_signals/augur_signals/dedup/cluster.py index 8379e2b..e40f4a4 100644 --- a/src/augur_signals/augur_signals/dedup/cluster.py +++ b/src/augur_signals/augur_signals/dedup/cluster.py @@ -27,7 +27,7 @@ def __init__(self, edges: Mapping[str, list[tuple[str, str]]]) -> None: self._edges = dict(edges) def related(self, market_id: str) -> list[tuple[str, str]]: - """Return the list of ``(other_market_id, relationship_type)`` edges.""" + """Return the list of `(other_market_id, relationship_type)` edges.""" return list(self._edges.get(market_id, [])) diff --git a/src/augur_signals/augur_signals/dedup/fingerprint.py b/src/augur_signals/augur_signals/dedup/fingerprint.py index b843121..deee915 100644 --- a/src/augur_signals/augur_signals/dedup/fingerprint.py +++ b/src/augur_signals/augur_signals/dedup/fingerprint.py @@ -1,7 +1,7 @@ """Exact-fingerprint deduplication of raw signals. -Two raw signals are duplicates if they share ``(market_id, signal_type, -time_bucket(detected_at, bucket_seconds))``. Merge rules per +Two raw signals are duplicates if they share `(market_id, signal_type, +time_bucket(detected_at, bucket_seconds))`. Merge rules per docs/architecture/deduplication-and-storms.md §Signal Fingerprint: take the max magnitude, max confidence, union of manipulation_flags, union of related_market_ids, earliest detected_at, smallest diff --git a/src/augur_signals/augur_signals/detectors/_bocpd.py b/src/augur_signals/augur_signals/detectors/_bocpd.py index 0c010f2..a5d17f0 100644 --- a/src/augur_signals/augur_signals/detectors/_bocpd.py +++ b/src/augur_signals/augur_signals/detectors/_bocpd.py @@ -4,10 +4,10 @@ Detection", arXiv 0710.3742), adapted for observations in [0, 1]. Each observation x is treated as the probability of a single Bernoulli trial so the conjugate Beta-Binomial predictive -``alpha / (alpha + beta) * x + beta / (alpha + beta) * (1 - x)`` +`alpha / (alpha + beta) * x + beta / (alpha + beta) * (1 - x)` applies directly. -The run-length distribution is capped at ``run_length_cap`` so memory +The run-length distribution is capped at `run_length_cap` so memory is bounded; for a hazard of 1/250 and a cap of 1000 the truncation error on the fire decision is negligible (<1e-6). """ @@ -46,7 +46,7 @@ def __init__( def update(self, observation: float) -> tuple[float, float]: """Process one observation. - Returns the tuple ``(P(r_t < 5), E[r_t])`` where ``r_t`` is the + Returns the tuple `(P(r_t < 5), E[r_t])` where `r_t` is the run length in observations since the last change point. """ if not 0.0 <= observation <= 1.0: @@ -64,7 +64,7 @@ def update(self, observation: float) -> tuple[float, float]: new_pr[0] = change_mass # Growth shifts run length up by one. Mass that would otherwise # land at cap+1 is absorbed back into the cap bucket so the - # run-length distribution does not leak probability as ``t`` grows + # run-length distribution does not leak probability as `t` grows # past the cap. for i in range(1, self._cap): new_pr[i] = growth[i - 1] diff --git a/src/augur_signals/augur_signals/detectors/_cusum.py b/src/augur_signals/augur_signals/detectors/_cusum.py index c655d6a..bb3479a 100644 --- a/src/augur_signals/augur_signals/detectors/_cusum.py +++ b/src/augur_signals/augur_signals/detectors/_cusum.py @@ -1,8 +1,8 @@ """Two-sided CUSUM for detecting sustained shifts in a running mean. Standard formulation: maintain positive and negative cumulative sums, -reset when they cross a control threshold ``h * sigma``. ``k`` is the -allowable slack below which no accumulation happens; together ``(k, h)`` +reset when they cross a control threshold `h * sigma`. `k` is the +allowable slack below which no accumulation happens; together `(k, h)` trade off detection speed against false-positive rate. """ diff --git a/src/augur_signals/augur_signals/detectors/base.py b/src/augur_signals/augur_signals/detectors/base.py index 22bee86..b0296fb 100644 --- a/src/augur_signals/augur_signals/detectors/base.py +++ b/src/augur_signals/augur_signals/detectors/base.py @@ -1,14 +1,14 @@ """SignalDetector protocol. -Every detector implements this surface. ``now`` is a parameter rather -than sourced from ``datetime.now()`` so backtests reproduce live +Every detector implements this surface. `now` is a parameter rather +than sourced from `datetime.now()` so backtests reproduce live behavior bit-for-bit; the CI AST lint in scripts/ rejects any detector -module that calls ``datetime.now()`` directly. +module that calls `datetime.now()` directly. -Each detector is stateful per market (``state_dict`` / ``load_state`` +Each detector is stateful per market (`state_dict` / `load_state` so detector progress survives process restarts) and serializable for -the engine's periodic checkpoint. Detectors return ``None`` when no -signal fires; a ``MarketSignal`` instance carries the full calibrated +the engine's periodic checkpoint. Detectors return `None` when no +signal fires; a `MarketSignal` instance carries the full calibrated event per docs/contracts/schema-and-versioning.md §MarketSignal. """ diff --git a/src/augur_signals/augur_signals/detectors/book_imbalance.py b/src/augur_signals/augur_signals/detectors/book_imbalance.py index 5e110ec..2200360 100644 --- a/src/augur_signals/augur_signals/detectors/book_imbalance.py +++ b/src/augur_signals/augur_signals/detectors/book_imbalance.py @@ -3,7 +3,7 @@ Signals fire only when (1) the market has sufficient total resting depth (the depth gate keeps the detector silent on thin books where the imbalance is likely a manipulation artifact), and (2) the -imbalance persists for ``persistence_snapshots`` consecutive ticks. +imbalance persists for `persistence_snapshots` consecutive ticks. """ from __future__ import annotations diff --git a/src/augur_signals/augur_signals/detectors/cross_market.py b/src/augur_signals/augur_signals/detectors/cross_market.py index ce48b1e..6bc42f1 100644 --- a/src/augur_signals/augur_signals/detectors/cross_market.py +++ b/src/augur_signals/augur_signals/detectors/cross_market.py @@ -6,7 +6,7 @@ above the threshold, the detector computes the current Spearman rank correlation, applies the Fisher-z transform, and compares the z to the prior z. Pairs whose divergence p-value survives BH-FDR at the target -``q`` produce signals per docs/methodology/calibration-methodology.md +`q` produce signals per docs/methodology/calibration-methodology.md §Cross-Market Divergence. """ diff --git a/src/augur_signals/augur_signals/detectors/price_velocity.py b/src/augur_signals/augur_signals/detectors/price_velocity.py index f49a2d5..3cb4c1b 100644 --- a/src/augur_signals/augur_signals/detectors/price_velocity.py +++ b/src/augur_signals/augur_signals/detectors/price_velocity.py @@ -7,7 +7,7 @@ does not fire repeatedly. The pre-resolution exclusion (6 h before market close) is enforced -inside ``ingest`` so a signal in the window is never returned, +inside `ingest` so a signal in the window is never returned, regardless of the posterior probability. """ diff --git a/src/augur_signals/augur_signals/engine.py b/src/augur_signals/augur_signals/engine.py index 2334de6..89d54ad 100644 --- a/src/augur_signals/augur_signals/engine.py +++ b/src/augur_signals/augur_signals/engine.py @@ -5,7 +5,7 @@ orchestrator is single-process; the multi-process runtime swaps the bus and storage adapters without touching this module. -``now`` threads through every downstream call as a parameter so the +`now` threads through every downstream call as a parameter so the backtest harness and the live engine traverse the same code with deterministic timing. """ diff --git a/src/augur_signals/augur_signals/ingestion/retry.py b/src/augur_signals/augur_signals/ingestion/retry.py index e466dcf..95e7581 100644 --- a/src/augur_signals/augur_signals/ingestion/retry.py +++ b/src/augur_signals/augur_signals/ingestion/retry.py @@ -53,7 +53,7 @@ async def with_backoff[T]( The factory's eventual return value. Raises: - RetryExhaustedError: Every attempt up to ``policy.max_retries`` + RetryExhaustedError: Every attempt up to `policy.max_retries` has failed. The last exception is attached. """ delay = policy.initial_seconds diff --git a/src/augur_signals/augur_signals/manipulation/detector.py b/src/augur_signals/augur_signals/manipulation/detector.py index bf560a0..fdb19a9 100644 --- a/src/augur_signals/augur_signals/manipulation/detector.py +++ b/src/augur_signals/augur_signals/manipulation/detector.py @@ -72,7 +72,7 @@ def evaluate( def attach_flags(signal: MarketSignal, flags: list[ManipulationFlag]) -> MarketSignal: """Return a new MarketSignal with *flags* attached. - MarketSignal is frozen; the update must go through ``model_copy`` + MarketSignal is frozen; the update must go through `model_copy` so Pydantic re-runs the calibration_provenance validator on the result. """ diff --git a/src/augur_signals/augur_signals/manipulation/signatures.py b/src/augur_signals/augur_signals/manipulation/signatures.py index 334a4de..af24250 100644 --- a/src/augur_signals/augur_signals/manipulation/signatures.py +++ b/src/augur_signals/augur_signals/manipulation/signatures.py @@ -51,7 +51,7 @@ def size_vs_depth_outlier( prior_book_depth: float, threshold_ratio: float, ) -> bool: - """True when a single trade consumed more than ``threshold_ratio`` of depth.""" + """True when a single trade consumed more than `threshold_ratio` of depth.""" if prior_book_depth <= 0.0: return False return (trade.size / prior_book_depth) > threshold_ratio diff --git a/src/augur_signals/augur_signals/models/_identifiers.py b/src/augur_signals/augur_signals/models/_identifiers.py index 626ba72..00f2ea1 100644 --- a/src/augur_signals/augur_signals/models/_identifiers.py +++ b/src/augur_signals/augur_signals/models/_identifiers.py @@ -1,6 +1,6 @@ """Identifier helpers for signals and related entities. -``uuid7`` is time-ordered, which lets the bus, storage, and archive +`uuid7` is time-ordered, which lets the bus, storage, and archive sort by identifier and still recover temporal order. This is load- bearing for backtest replay determinism: the (detected_at, signal_id) pair is stable and reproducible. diff --git a/src/augur_signals/augur_signals/storage/_config.py b/src/augur_signals/augur_signals/storage/_config.py index a1d9bc3..278849f 100644 --- a/src/augur_signals/augur_signals/storage/_config.py +++ b/src/augur_signals/augur_signals/storage/_config.py @@ -1,8 +1,8 @@ """Configuration model for storage backend selection. Schema mirrors `config/storage.toml`. The Phase 1-4 monolith reads -``backend.kind == "duckdb"``; Phase 5 cutover flips it to -``"timescaledb"``. See `.docs/phase-5-scaling.md §5` for the cutover +`backend.kind == "duckdb"`; Phase 5 cutover flips it to +`"timescaledb"`. See `.docs/phase-5-scaling.md §5` for the cutover procedure and rollback constraints. """ diff --git a/src/augur_signals/augur_signals/storage/duckdb_store.py b/src/augur_signals/augur_signals/storage/duckdb_store.py index 1f6e20e..913807c 100644 --- a/src/augur_signals/augur_signals/storage/duckdb_store.py +++ b/src/augur_signals/augur_signals/storage/duckdb_store.py @@ -1,8 +1,8 @@ """DuckDB-backed persistence for snapshots, features, signals, and calibration state. Schema mirrors docs/architecture/system-design.md §Storage Schema. -Migrations are version-numbered and idempotent; the ``initialize`` -method advances the ``schema_version`` table and applies pending +Migrations are version-numbered and idempotent; the `initialize` +method advances the `schema_version` table and applies pending migrations in order. """ diff --git a/tests/conftest.py b/tests/conftest.py index 4a4c669..3cef4bb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,7 +12,7 @@ def _configure_logging_for_tests() -> None: """Initialize structlog once per session at WARNING. Individual tests that exercise logging output reconfigure to the - level they need via ``augur_signals._logging.configure_logging``. + level they need via `augur_signals._logging.configure_logging`. """ from augur_signals._logging import configure_logging diff --git a/tests/signals/test_bus_factory.py b/tests/signals/test_bus_factory.py new file mode 100644 index 0000000..a9d4476 --- /dev/null +++ b/tests/signals/test_bus_factory.py @@ -0,0 +1,38 @@ +"""Tests for ``make_event_bus`` factory routing.""" + +from __future__ import annotations + +import pytest + +from augur_signals.bus._config import BackendBody, BusConfig, NATSBody, RedisBody +from augur_signals.bus.base import BusError +from augur_signals.bus.factory import make_event_bus +from augur_signals.bus.nats import NATSBus +from augur_signals.bus.redis_streams import RedisStreamsBus + + +@pytest.mark.unit +def test_factory_returns_nats_bus_for_nats_backend() -> None: + cfg = BusConfig( + backend=BackendBody(kind="nats"), + nats=NATSBody(servers=["nats://example:4222"], stream_name="augur"), + ) + bus = make_event_bus(cfg) + assert isinstance(bus, NATSBus) + + +@pytest.mark.unit +def test_factory_returns_redis_bus_for_redis_backend() -> None: + cfg = BusConfig( + backend=BackendBody(kind="redis"), + redis=RedisBody(url_env="REDIS_URL"), + ) + bus = make_event_bus(cfg) + assert isinstance(bus, RedisStreamsBus) + + +@pytest.mark.unit +def test_factory_rejects_memory_kind_with_clear_redirect() -> None: + cfg = BusConfig(backend=BackendBody(kind="memory", capacity=64)) + with pytest.raises(BusError, match="InProcessAsyncBus"): + make_event_bus(cfg) diff --git a/tests/signals/test_bus_lock.py b/tests/signals/test_bus_lock.py new file mode 100644 index 0000000..5cb406f --- /dev/null +++ b/tests/signals/test_bus_lock.py @@ -0,0 +1,89 @@ +"""Tests for the distributed lock protocol and in-memory backend.""" + +from __future__ import annotations + +from dataclasses import dataclass + +import pytest + +from augur_signals.bus._lock import InMemoryLock + + +@dataclass +class _ManualClock: + t: float = 0.0 + + def now(self) -> float: + return self.t + + def advance(self, seconds: float) -> None: + self.t += seconds + + +@pytest.mark.asyncio +async def test_acquire_succeeds_when_lock_free() -> None: + lock = InMemoryLock() + assert await lock.acquire("dedup", "replica-a", ttl_seconds=30) is True + assert await lock.holder("dedup") == "replica-a" + + +@pytest.mark.asyncio +async def test_acquire_fails_when_another_holder_active() -> None: + lock = InMemoryLock() + await lock.acquire("dedup", "replica-a", ttl_seconds=30) + assert await lock.acquire("dedup", "replica-b", ttl_seconds=30) is False + + +@pytest.mark.asyncio +async def test_acquire_succeeds_after_ttl_expires() -> None: + clock = _ManualClock() + lock = InMemoryLock(_clock=clock) + await lock.acquire("dedup", "replica-a", ttl_seconds=30) + clock.advance(31) + assert await lock.acquire("dedup", "replica-b", ttl_seconds=30) is True + assert await lock.holder("dedup") == "replica-b" + + +@pytest.mark.asyncio +async def test_renew_extends_ttl_when_still_owner() -> None: + clock = _ManualClock() + lock = InMemoryLock(_clock=clock) + await lock.acquire("dedup", "replica-a", ttl_seconds=30) + clock.advance(20) + assert await lock.renew("dedup", "replica-a", ttl_seconds=30) is True + clock.advance(25) + # Would have expired without renew, still held. + assert await lock.holder("dedup") == "replica-a" + + +@pytest.mark.asyncio +async def test_renew_rejects_stale_holder() -> None: + clock = _ManualClock() + lock = InMemoryLock(_clock=clock) + await lock.acquire("dedup", "replica-a", ttl_seconds=30) + clock.advance(31) + await lock.acquire("dedup", "replica-b", ttl_seconds=30) + assert await lock.renew("dedup", "replica-a", ttl_seconds=30) is False + + +@pytest.mark.asyncio +async def test_release_is_noop_for_non_owner() -> None: + lock = InMemoryLock() + await lock.acquire("dedup", "replica-a", ttl_seconds=30) + await lock.release("dedup", "replica-b") + assert await lock.holder("dedup") == "replica-a" + + +@pytest.mark.asyncio +async def test_release_drops_key_for_owner() -> None: + lock = InMemoryLock() + await lock.acquire("dedup", "replica-a", ttl_seconds=30) + await lock.release("dedup", "replica-a") + assert await lock.holder("dedup") is None + + +@pytest.mark.asyncio +async def test_same_holder_reacquire_is_idempotent() -> None: + lock = InMemoryLock() + assert await lock.acquire("dedup", "replica-a", ttl_seconds=30) is True + assert await lock.acquire("dedup", "replica-a", ttl_seconds=30) is True diff --git a/tests/signals/test_bus_nats.py b/tests/signals/test_bus_nats.py new file mode 100644 index 0000000..181eba5 --- /dev/null +++ b/tests/signals/test_bus_nats.py @@ -0,0 +1,177 @@ +"""Tests for the NATS JetStream EventBus adapter with a fake client.""" + +from __future__ import annotations + +import asyncio +from dataclasses import dataclass, field + +import pytest + +from augur_signals.bus._config import NATSBody +from augur_signals.bus.base import BusError, BusMessage +from augur_signals.bus.nats import NATSBus + + +@dataclass +class _FakeMsg: + subject: str + data: bytes + headers: dict[str, str] | None = None + _acked: bool = False + + async def ack(self) -> None: + self._acked = True + + +@dataclass +class _FakeSub: + stream_name: str + subject_pattern: str + durable: str + backlog: list[_FakeMsg] = field(default_factory=list) + cursor: int = 0 + unsubscribed: bool = False + + async def fetch(self, batch: int = 1, timeout: int = 1) -> list[_FakeMsg]: # noqa: ASYNC109 + _ = timeout + msgs: list[_FakeMsg] = [] + while self.cursor < len(self.backlog) and len(msgs) < batch: + msgs.append(self.backlog[self.cursor]) + self.cursor += 1 + return msgs + + async def unsubscribe(self) -> None: + self.unsubscribed = True + + +@dataclass +class _FakeJetStream: + stream_name_added: str | None = None + subjects: list[str] = field(default_factory=list) + published: list[_FakeMsg] = field(default_factory=list) + subs: list[_FakeSub] = field(default_factory=list) + + async def add_stream(self, *, name: str, subjects: list[str], num_replicas: int) -> None: + _ = num_replicas + self.stream_name_added = name + self.subjects = subjects + + async def publish( + self, subject: str, payload: bytes, headers: dict[str, str] | None = None + ) -> None: + msg = _FakeMsg(subject=subject, data=payload, headers=headers) + self.published.append(msg) + for sub in self.subs: + if self._matches(sub.subject_pattern, subject): + sub.backlog.append(msg) + + async def pull_subscribe(self, subject_pattern: str, durable: str) -> _FakeSub: + sub = _FakeSub( + stream_name=self.stream_name_added or "", + subject_pattern=subject_pattern, + durable=durable, + ) + # Seed the new subscription with any prior publishes that match; + # real JetStream pull consumers deliver the stream from ID 1. + for msg in self.published: + if self._matches(subject_pattern, msg.subject): + sub.backlog.append(msg) + self.subs.append(sub) + return sub + + @staticmethod + def _matches(pattern: str, subject: str) -> bool: + if pattern == subject: + return True + if pattern.endswith(".>"): + return subject.startswith(pattern[:-1]) + return False + + +@dataclass +class _FakeClient: + _js: _FakeJetStream = field(default_factory=_FakeJetStream) + drained: bool = False + + def jetstream(self) -> _FakeJetStream: + return self._js + + async def drain(self) -> None: + self.drained = True + + +@pytest.fixture +def client() -> _FakeClient: + return _FakeClient() + + +@pytest.mark.asyncio +async def test_nats_connect_declares_stream_with_subject_prefix( + client: _FakeClient, +) -> None: + config = NATSBody(servers=["nats://localhost:4222"], stream_name="augur") + bus = NATSBus(config, client=client) # type: ignore[arg-type] + await bus.connect() + assert client._js.stream_name_added == "augur" + assert client._js.subjects == ["augur.>"] + + +@pytest.mark.asyncio +async def test_nats_publish_and_subscribe_roundtrip(client: _FakeClient) -> None: + config = NATSBody() + bus = NATSBus(config, client=client) # type: ignore[arg-type] + await bus.connect() + + subject = "augur.signals" + await bus.publish(BusMessage(subject=subject, payload=b"hi")) + await bus.publish(BusMessage(subject=subject, payload=b"there", headers={"k": "v"})) + + received: list[BusMessage] = [] + + async def consume() -> None: + async for msg in bus.subscribe("augur.signals", "dedup"): + received.append(msg) + if len(received) >= 2: + break + + await asyncio.wait_for(consume(), timeout=1.0) + assert [m.payload for m in received] == [b"hi", b"there"] + assert received[1].headers == {"k": "v"} + + +@pytest.mark.asyncio +async def test_nats_publish_requires_connect_first(client: _FakeClient) -> None: + config = NATSBody() + bus = NATSBus(config, client=client) # type: ignore[arg-type] + with pytest.raises(BusError, match="connect"): + await bus.publish(BusMessage(subject="augur.signals", payload=b"x")) + + +@pytest.mark.asyncio +async def test_nats_close_drains_client(client: _FakeClient) -> None: + config = NATSBody() + bus = NATSBus(config, client=client) # type: ignore[arg-type] + await bus.connect() + await bus.close() + assert client.drained is True + + +@pytest.mark.asyncio +async def test_nats_subscribe_acks_yielded_messages(client: _FakeClient) -> None: + config = NATSBody() + bus = NATSBus(config, client=client) # type: ignore[arg-type] + await bus.connect() + await bus.publish(BusMessage(subject="augur.signals", payload=b"a")) + await bus.publish(BusMessage(subject="augur.signals", payload=b"b")) + await bus.publish(BusMessage(subject="augur.signals", payload=b"c")) + + count = 0 + async for _msg in bus.subscribe("augur.signals", "dedup"): + count += 1 + if count >= 3: + break + + # The first two messages were acked in the iterations past them; + # the third is the yielded message at the break point (no ack). + acks = [m._acked for m in client._js.published] + assert acks == [True, True, False] diff --git a/tests/signals/test_bus_redis.py b/tests/signals/test_bus_redis.py new file mode 100644 index 0000000..0a13767 --- /dev/null +++ b/tests/signals/test_bus_redis.py @@ -0,0 +1,122 @@ +"""Tests for the Redis Streams EventBus adapter using fakeredis.""" + +from __future__ import annotations + +import asyncio + +import fakeredis.aioredis +import pytest + +from augur_signals.bus._config import RedisBody +from augur_signals.bus.base import BusMessage +from augur_signals.bus.redis_streams import RedisLock, RedisStreamsBus + + +@pytest.fixture +def redis_client() -> fakeredis.aioredis.FakeRedis: + return fakeredis.aioredis.FakeRedis() + + +@pytest.mark.asyncio +async def test_redis_streams_publish_and_subscribe_roundtrip( + redis_client: fakeredis.aioredis.FakeRedis, +) -> None: + config = RedisBody(url_env="IGNORED", stream_max_length=100, block_ms=50) + bus = RedisStreamsBus(config, client=redis_client) + await bus.connect() + + subject = "augur.signals" + await bus.publish(BusMessage(subject=subject, payload=b"hello")) + await bus.publish(BusMessage(subject=subject, payload=b"world", headers={"trace_id": "abc"})) + + received: list[BusMessage] = [] + + async def consume() -> None: + async for msg in bus.subscribe(subject, "test-group"): + received.append(msg) + if len(received) >= 2: + break + + await asyncio.wait_for(consume(), timeout=2.0) + + assert [m.payload for m in received] == [b"hello", b"world"] + assert received[1].headers == {"trace_id": "abc"} + + await bus.close() + + +@pytest.mark.asyncio +async def test_redis_streams_xack_marks_processed_entries( + redis_client: fakeredis.aioredis.FakeRedis, +) -> None: + """XACK fires after the consumer iterates past a yielded message. + + Consumers that break out of the subscribe iterator without advancing + past a yielded message leave it pending so Redis redelivers on + restart (at-least-once semantics). + """ + config = RedisBody(url_env="IGNORED", stream_max_length=100, block_ms=50) + bus = RedisStreamsBus(config, client=redis_client) + await bus.connect() + + subject = "augur.flagged_signals" + await bus.publish(BusMessage(subject=subject, payload=b"one")) + await bus.publish(BusMessage(subject=subject, payload=b"two")) + + received: list[bytes] = [] + + async def consume() -> None: + async for msg in bus.subscribe(subject, "test-group"): + received.append(msg.payload) + if len(received) >= 2: + # Breaking after iterating past msg #1 means #1 is + # acked; #2 is the currently-yielded message whose ack + # follows only if the consumer iterates once more. + break + + await asyncio.wait_for(consume(), timeout=2.0) + + summary = await redis_client.xpending(subject, "augur.test-group") + pending = summary.get("pending") if isinstance(summary, dict) else summary[0] + # The first message is acked; the second remains pending because + # the consumer broke out before iterating past it. + assert pending == 1 + + await bus.close() + + +@pytest.mark.asyncio +async def test_redis_streams_repeated_connect_is_idempotent( + redis_client: fakeredis.aioredis.FakeRedis, +) -> None: + config = RedisBody(url_env="IGNORED") + bus = RedisStreamsBus(config, client=redis_client) + await bus.connect() + await bus.connect() + await bus.publish(BusMessage(subject="augur.ops.events", payload=b"ping")) + await bus.close() + + +@pytest.mark.asyncio +async def test_redis_lock_acquire_and_renew_and_release( + redis_client: fakeredis.aioredis.FakeRedis, +) -> None: + lock = RedisLock(client=redis_client, key_prefix="augur.lock.") + + assert await lock.acquire("dedup", "replica-a", ttl_seconds=30) is True + assert await lock.acquire("dedup", "replica-b", ttl_seconds=30) is False + assert await lock.holder("dedup") == "replica-a" + assert await lock.renew("dedup", "replica-a", ttl_seconds=30) is True + assert await lock.renew("dedup", "replica-b", ttl_seconds=30) is False + await lock.release("dedup", "replica-a") + assert await lock.holder("dedup") is None + + +@pytest.mark.asyncio +async def test_redis_lock_release_by_non_owner_is_noop( + redis_client: fakeredis.aioredis.FakeRedis, +) -> None: + lock = RedisLock(client=redis_client, key_prefix="augur.lock.") + await lock.acquire("llm", "replica-a", ttl_seconds=30) + await lock.release("llm", "replica-b") + assert await lock.holder("llm") == "replica-a" diff --git a/tests/signals/test_llm_isolation.py b/tests/signals/test_llm_isolation.py index 375097e..88c3069 100644 --- a/tests/signals/test_llm_isolation.py +++ b/tests/signals/test_llm_isolation.py @@ -1,14 +1,14 @@ """Defense-in-depth: assert LLM packages are not importable by default. The grep-based LLM-import guard catches source-level imports in -``src/augur_signals/``. This test catches the runtime case where a -dependency change or a stray ``uv sync --extra`` pulls an LLM SDK into -the default environment, which would make an accidental ``import -anthropic`` in extraction code silently succeed. +`src/augur_signals/`. This test catches the runtime case where a +dependency change or a stray `uv sync --extra` pulls an LLM SDK into +the default environment, which would make an accidental `import +anthropic` in extraction code silently succeed. The tested invariant: in the default workspace sync (no optional extras), none of the LLM SDK packages used anywhere in the project -tree should be importable by the ``augur_signals`` interpreter. +tree should be importable by the `augur_signals` interpreter. """ from __future__ import annotations From 6fdaee123c6e20cba0e2a23adb7202b6ccf888e6 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 15:44:58 +0530 Subject: [PATCH 04/11] feat(storage): timescaledb adapter with hypertables and policies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TimescaleDBStore mirrors the DuckDBStore public surface so the engine flips storage backends via config.backend.kind without call-site edits. Writes use ON CONFLICT DO UPDATE for idempotent upserts; reads use the same query shapes as DuckDB with psycopg parameter binding. initialize() issues the schema DDL, calls create_hypertable on snapshots, features, and signals with chunk intervals and segment-by columns from StorageConfig, and attaches compression + retention policies per HypertableSpec — zero-day values skip the policy. The adapter takes its AsyncConnection via constructor injection so unit tests exercise every branch against a recording stub; CI runs the integration suite against a real TimescaleDB container in a follow-up. make_storage factory routes by backend.kind: duckdb returns the existing DuckDBStore, timescaledb opens an AsyncConnection from the configured env var (unless tests inject one) and returns the new adapter. --- .../augur_signals/storage/factory.py | 65 +++ .../storage/timescaledb_store.py | 533 ++++++++++++++++++ tests/signals/test_timescaledb_store.py | 222 ++++++++ 3 files changed, 820 insertions(+) create mode 100644 src/augur_signals/augur_signals/storage/factory.py create mode 100644 src/augur_signals/augur_signals/storage/timescaledb_store.py create mode 100644 tests/signals/test_timescaledb_store.py diff --git a/src/augur_signals/augur_signals/storage/factory.py b/src/augur_signals/augur_signals/storage/factory.py new file mode 100644 index 0000000..92d3787 --- /dev/null +++ b/src/augur_signals/augur_signals/storage/factory.py @@ -0,0 +1,65 @@ +"""Storage backend factory keyed by ``StorageConfig.backend.kind``. + +The Phase 1-4 monolith calls ``make_duckdb_store(config)`` directly +when instantiating the engine. Phase 5 workers use this factory at +startup so flipping ``config/storage.toml`` ``backend.kind`` from +``"duckdb"`` to ``"timescaledb"`` restarts the process against the +new backend without code edits. + +``make_storage`` returns the DuckDB adapter synchronously or the +TimescaleDB adapter paired with an open ``AsyncConnection``; the +TimescaleDB branch is ``async`` because opening the connection is +awaited. Callers select the right helper for their deployment mode. +""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import TYPE_CHECKING + +from augur_signals.storage._config import StorageConfig +from augur_signals.storage.duckdb_store import DuckDBStore +from augur_signals.storage.timescaledb_store import TimescaleDBStore + +if TYPE_CHECKING: + from psycopg import AsyncConnection + + +class StorageConfigurationError(RuntimeError): + """Raised for malformed or inconsistent storage configuration.""" + + +def make_duckdb_store(config: StorageConfig) -> DuckDBStore: + """Open the Phase 1-4 DuckDB store from *config*.""" + if config.backend.kind != "duckdb": + raise StorageConfigurationError( + f"make_duckdb_store called with backend.kind = {config.backend.kind!r}" + ) + return DuckDBStore(Path(config.backend.duckdb_path)) + + +async def make_timescaledb_store( + config: StorageConfig, *, connection: AsyncConnection[object] | None = None +) -> TimescaleDBStore: + """Open a TimescaleDB store from *config*. + + If *connection* is None the factory reads the DSN from the env var + named in ``config.backend.timescale_url_env`` and opens a new + ``AsyncConnection``. Tests pass a stub connection explicitly. + """ + if config.backend.kind != "timescaledb": + raise StorageConfigurationError( + f"make_timescaledb_store called with backend.kind = {config.backend.kind!r}" + ) + if connection is None: + import psycopg + + dsn = os.environ[config.backend.timescale_url_env] + connection = await psycopg.AsyncConnection.connect(dsn) + return TimescaleDBStore( + connection, + hypertable=config.hypertable, + retention=config.retention, + compression=config.compression, + ) diff --git a/src/augur_signals/augur_signals/storage/timescaledb_store.py b/src/augur_signals/augur_signals/storage/timescaledb_store.py new file mode 100644 index 0000000..b6e56e1 --- /dev/null +++ b/src/augur_signals/augur_signals/storage/timescaledb_store.py @@ -0,0 +1,533 @@ +"""TimescaleDB-backed persistence mirroring the DuckDBStore surface. + +The adapter is a thin facade over ``psycopg`` that issues the same +schema statements the DuckDB store does, then converts the time-series +tables into TimescaleDB hypertables and attaches compression and +retention policies. Every public method has a matching method on +``DuckDBStore`` so engine code flips backends via configuration without +call-site edits. + +The connection is injected so unit tests can swap in fakes or +sqlite-backed shims. Production startup reads the DSN from the env var +named in ``storage.toml``; the adapter itself does not know about the +filesystem. +""" + +from __future__ import annotations + +import json +from collections.abc import Iterable, Sequence +from dataclasses import dataclass +from datetime import datetime +from typing import TYPE_CHECKING, Any + +from augur_signals.models import ( + FeatureVector, + ManipulationFlag, + MarketSignal, + MarketSnapshot, +) +from augur_signals.storage._config import ( + CompressionBody, + HypertableBody, + RetentionBody, +) + +if TYPE_CHECKING: + from psycopg import AsyncConnection + + +_SCHEMA_STATEMENTS: tuple[str, ...] = ( + """ + CREATE TABLE IF NOT EXISTS schema_version ( + version INTEGER PRIMARY KEY, + applied_at TIMESTAMPTZ NOT NULL + ); + """, + """ + CREATE TABLE IF NOT EXISTS snapshots ( + market_id TEXT NOT NULL, + platform TEXT NOT NULL, + timestamp TIMESTAMPTZ NOT NULL, + last_price DOUBLE PRECISION, + bid DOUBLE PRECISION, + ask DOUBLE PRECISION, + spread DOUBLE PRECISION, + volume_24h DOUBLE PRECISION, + liquidity DOUBLE PRECISION, + question TEXT, + resolution_source TEXT, + resolution_criteria TEXT, + closes_at TIMESTAMPTZ, + raw_json JSONB, + schema_version TEXT NOT NULL, + PRIMARY KEY (market_id, platform, timestamp) + ); + """, + """ + CREATE TABLE IF NOT EXISTS features ( + market_id TEXT NOT NULL, + computed_at TIMESTAMPTZ NOT NULL, + payload JSONB NOT NULL, + schema_version TEXT NOT NULL, + PRIMARY KEY (market_id, computed_at) + ); + """, + """ + CREATE TABLE IF NOT EXISTS signals ( + signal_id TEXT PRIMARY KEY, + market_id TEXT NOT NULL, + platform TEXT NOT NULL, + signal_type TEXT NOT NULL, + magnitude DOUBLE PRECISION NOT NULL, + direction INTEGER NOT NULL, + confidence DOUBLE PRECISION NOT NULL, + fdr_adjusted BOOLEAN NOT NULL, + detected_at TIMESTAMPTZ NOT NULL, + window_seconds INTEGER NOT NULL, + liquidity_tier TEXT NOT NULL, + related_market_ids TEXT[], + raw_features JSONB NOT NULL, + schema_version TEXT NOT NULL + ); + """, + """ + CREATE TABLE IF NOT EXISTS manipulation_flags ( + signal_id TEXT NOT NULL, + flag TEXT NOT NULL, + detected_at TIMESTAMPTZ NOT NULL, + PRIMARY KEY (signal_id, flag) + ); + """, + """ + CREATE TABLE IF NOT EXISTS calibration_fpr ( + detector_id TEXT NOT NULL, + market_id TEXT NOT NULL, + fpr DOUBLE PRECISION NOT NULL, + sample_size INTEGER NOT NULL, + computed_at TIMESTAMPTZ NOT NULL, + label_protocol_version TEXT NOT NULL, + PRIMARY KEY (detector_id, market_id, computed_at) + ); + """, + """ + CREATE TABLE IF NOT EXISTS reliability_curves ( + detector_id TEXT NOT NULL, + liquidity_tier TEXT NOT NULL, + curve_version TEXT NOT NULL, + deciles JSONB NOT NULL, + built_at TIMESTAMPTZ NOT NULL, + PRIMARY KEY (detector_id, liquidity_tier, curve_version) + ); + """, +) + + +@dataclass(frozen=True, slots=True) +class HypertableSpec: + """One hypertable's partition/compression/retention policy.""" + + table: str + time_column: str + chunk_interval_days: int + segment_by: str | None = None + compress_after_days: int = 0 + retention_days: int = 0 + + +class TimescaleDBStore: + """Async TimescaleDB adapter mirroring DuckDBStore's method surface. + + Attributes: + CURRENT_SCHEMA_VERSION: Integer version stamped into the + ``schema_version`` table after ``initialize`` applies all + pending migrations. + """ + + CURRENT_SCHEMA_VERSION: int = 1 + + def __init__( + self, + connection: AsyncConnection[Any], + *, + hypertable: HypertableBody, + retention: RetentionBody, + compression: CompressionBody, + ) -> None: + self._conn = connection + self._hypertable = hypertable + self._retention = retention + self._compression = compression + + def hypertable_specs(self) -> list[HypertableSpec]: + """Return the hypertable policies derived from configuration.""" + return [ + HypertableSpec( + table="snapshots", + time_column="timestamp", + chunk_interval_days=self._hypertable.snapshot_chunk_interval_days, + segment_by="market_id, platform", + compress_after_days=self._compression.snapshot_compress_after_days, + retention_days=self._retention.snapshot_retention_days, + ), + HypertableSpec( + table="features", + time_column="computed_at", + chunk_interval_days=self._hypertable.feature_chunk_interval_days, + compress_after_days=self._compression.feature_compress_after_days, + retention_days=self._retention.feature_retention_days, + ), + HypertableSpec( + table="signals", + time_column="detected_at", + chunk_interval_days=self._hypertable.signal_chunk_interval_days, + compress_after_days=self._compression.signal_compress_after_days, + retention_days=self._retention.signal_retention_days, + ), + ] + + async def initialize(self) -> None: + """Apply migrations, create hypertables, attach policies.""" + async with self._conn.cursor() as cur: + for stmt in _SCHEMA_STATEMENTS: + await cur.execute(stmt) + for spec in self.hypertable_specs(): + await cur.execute( + """ + SELECT create_hypertable( + %s, %s, + chunk_time_interval => make_interval(days => %s), + if_not_exists => TRUE + ) + """, + [spec.table, spec.time_column, spec.chunk_interval_days], + ) + if spec.compress_after_days > 0: + if spec.segment_by: + await cur.execute( + "ALTER TABLE " + + self._quote_ident(spec.table) + + " SET (timescaledb.compress, " + "timescaledb.compress_segmentby = %s)", + [spec.segment_by], + ) + await cur.execute( + """ + SELECT add_compression_policy( + %s, make_interval(days => %s), + if_not_exists => TRUE + ) + """, + [spec.table, spec.compress_after_days], + ) + if spec.retention_days > 0: + await cur.execute( + """ + SELECT add_retention_policy( + %s, make_interval(days => %s), + if_not_exists => TRUE + ) + """, + [spec.table, spec.retention_days], + ) + await cur.execute( + """ + INSERT INTO schema_version (version, applied_at) + VALUES (%s, now()) + ON CONFLICT (version) DO NOTHING + """, + [self.CURRENT_SCHEMA_VERSION], + ) + await self._conn.commit() + + # --- writes --------------------------------------------------------- + + async def insert_snapshot(self, snapshot: MarketSnapshot) -> None: + async with self._conn.cursor() as cur: + await cur.execute( + """ + INSERT INTO snapshots ( + market_id, platform, timestamp, last_price, bid, ask, + spread, volume_24h, liquidity, question, + resolution_source, resolution_criteria, closes_at, + raw_json, schema_version + ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s) + ON CONFLICT (market_id, platform, timestamp) DO UPDATE SET + last_price = EXCLUDED.last_price, + bid = EXCLUDED.bid, + ask = EXCLUDED.ask, + spread = EXCLUDED.spread, + volume_24h = EXCLUDED.volume_24h, + liquidity = EXCLUDED.liquidity, + question = EXCLUDED.question, + resolution_source = EXCLUDED.resolution_source, + resolution_criteria = EXCLUDED.resolution_criteria, + closes_at = EXCLUDED.closes_at, + raw_json = EXCLUDED.raw_json, + schema_version = EXCLUDED.schema_version + """, + [ + snapshot.market_id, + snapshot.platform, + snapshot.timestamp, + snapshot.last_price, + snapshot.bid, + snapshot.ask, + snapshot.spread, + snapshot.volume_24h, + snapshot.liquidity, + snapshot.question, + snapshot.resolution_source, + snapshot.resolution_criteria, + snapshot.closes_at, + json.dumps(snapshot.raw_json), + snapshot.schema_version, + ], + ) + await self._conn.commit() + + async def insert_feature(self, feature: FeatureVector) -> None: + payload = feature.model_dump(mode="json", exclude={"market_id", "computed_at"}) + async with self._conn.cursor() as cur: + await cur.execute( + """ + INSERT INTO features (market_id, computed_at, payload, schema_version) + VALUES (%s, %s, %s::jsonb, %s) + ON CONFLICT (market_id, computed_at) DO UPDATE SET + payload = EXCLUDED.payload, + schema_version = EXCLUDED.schema_version + """, + [ + feature.market_id, + feature.computed_at, + json.dumps(payload), + feature.schema_version, + ], + ) + await self._conn.commit() + + async def insert_signal(self, signal: MarketSignal) -> None: + async with self._conn.cursor() as cur: + await cur.execute( + """ + INSERT INTO signals ( + signal_id, market_id, platform, signal_type, magnitude, + direction, confidence, fdr_adjusted, detected_at, + window_seconds, liquidity_tier, related_market_ids, + raw_features, schema_version + ) VALUES ( + %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s + ) + ON CONFLICT (signal_id) DO UPDATE SET + magnitude = EXCLUDED.magnitude, + direction = EXCLUDED.direction, + confidence = EXCLUDED.confidence, + fdr_adjusted = EXCLUDED.fdr_adjusted, + detected_at = EXCLUDED.detected_at, + window_seconds = EXCLUDED.window_seconds, + liquidity_tier = EXCLUDED.liquidity_tier, + related_market_ids = EXCLUDED.related_market_ids, + raw_features = EXCLUDED.raw_features, + schema_version = EXCLUDED.schema_version + """, + [ + signal.signal_id, + signal.market_id, + signal.platform, + signal.signal_type.value, + signal.magnitude, + signal.direction, + signal.confidence, + signal.fdr_adjusted, + signal.detected_at, + signal.window_seconds, + signal.liquidity_tier, + list(signal.related_market_ids), + json.dumps(signal.raw_features), + signal.schema_version, + ], + ) + await self._conn.commit() + if signal.manipulation_flags: + await self.insert_manipulation_flags( + signal.signal_id, + signal.detected_at, + signal.manipulation_flags, + ) + + async def insert_manipulation_flags( + self, + signal_id: str, + detected_at: datetime, + flags: Iterable[ManipulationFlag], + ) -> None: + async with self._conn.cursor() as cur: + for flag in flags: + await cur.execute( + """ + INSERT INTO manipulation_flags (signal_id, flag, detected_at) + VALUES (%s, %s, %s) + ON CONFLICT (signal_id, flag) DO UPDATE SET + detected_at = EXCLUDED.detected_at + """, + [signal_id, flag.value, detected_at], + ) + await self._conn.commit() + + # --- reads ---------------------------------------------------------- + + async def latest_snapshot(self, market_id: str) -> MarketSnapshot | None: + async with self._conn.cursor() as cur: + await cur.execute( + "SELECT * FROM snapshots WHERE market_id = %s ORDER BY timestamp DESC LIMIT 1", + [market_id], + ) + row = await cur.fetchone() + if row is None: + return None + return _row_to_snapshot(row) + + async def snapshots_in_window( + self, + market_id: str, + window_start: datetime, + window_end: datetime, + ) -> list[MarketSnapshot]: + async with self._conn.cursor() as cur: + await cur.execute( + """ + SELECT * FROM snapshots + WHERE market_id = %s AND timestamp BETWEEN %s AND %s + ORDER BY timestamp ASC + """, + [market_id, window_start, window_end], + ) + rows = await cur.fetchall() + return [_row_to_snapshot(row) for row in rows] + + async def signals_in_window( + self, + market_ids: Sequence[str], + window_start: datetime, + window_end: datetime, + ) -> list[MarketSignal]: + if not market_ids: + return [] + async with self._conn.cursor() as cur: + await cur.execute( + """ + SELECT * FROM signals + WHERE market_id = ANY(%s) AND detected_at BETWEEN %s AND %s + ORDER BY detected_at ASC + """, + [list(market_ids), window_start, window_end], + ) + rows = await cur.fetchall() + signals = [_row_to_signal(row) for row in rows] + if not signals: + return signals + signal_ids = [s.signal_id for s in signals] + await cur.execute( + "SELECT signal_id, flag FROM manipulation_flags WHERE signal_id = ANY(%s)", + [signal_ids], + ) + flag_rows = await cur.fetchall() + flags_by_signal: dict[str, list[ManipulationFlag]] = {} + for signal_id, flag_value in flag_rows: + flags_by_signal.setdefault(signal_id, []).append(ManipulationFlag(flag_value)) + return [ + signal.model_copy( + update={"manipulation_flags": flags_by_signal.get(signal.signal_id, [])} + ) + for signal in signals + ] + + # --- lifecycle ------------------------------------------------------ + + async def close(self) -> None: + await self._conn.close() + + @staticmethod + def _quote_ident(identifier: str) -> str: + """Quote a SQL identifier rejecting anything outside [a-z0-9_].""" + if not identifier or not all(c.isalnum() or c == "_" for c in identifier): + raise ValueError(f"Refusing to quote identifier: {identifier!r}") + return f'"{identifier}"' + + +def _row_to_snapshot(row: tuple[Any, ...]) -> MarketSnapshot: + ( + market_id, + platform, + timestamp, + last_price, + bid, + ask, + spread, + volume_24h, + liquidity, + question, + resolution_source, + resolution_criteria, + closes_at, + raw_json, + schema_version, + ) = row + return MarketSnapshot.model_validate( + { + "market_id": market_id, + "platform": platform, + "timestamp": timestamp, + "last_price": last_price, + "bid": bid, + "ask": ask, + "spread": spread, + "volume_24h": volume_24h, + "liquidity": liquidity, + "question": question, + "resolution_source": resolution_source, + "resolution_criteria": resolution_criteria, + "closes_at": closes_at, + "raw_json": json.loads(raw_json) if isinstance(raw_json, str) else raw_json, + "schema_version": schema_version, + } + ) + + +def _row_to_signal(row: tuple[Any, ...]) -> MarketSignal: + ( + signal_id, + market_id, + platform, + signal_type, + magnitude, + direction, + confidence, + fdr_adjusted, + detected_at, + window_seconds, + liquidity_tier, + related_market_ids, + raw_features, + schema_version, + ) = row + return MarketSignal.model_validate( + { + "signal_id": signal_id, + "market_id": market_id, + "platform": platform, + "signal_type": signal_type, + "magnitude": magnitude, + "direction": direction, + "confidence": confidence, + "fdr_adjusted": fdr_adjusted, + "detected_at": detected_at, + "window_seconds": window_seconds, + "liquidity_tier": liquidity_tier, + "related_market_ids": list(related_market_ids or []), + "raw_features": ( + json.loads(raw_features) if isinstance(raw_features, str) else raw_features + ), + "schema_version": schema_version, + } + ) diff --git a/tests/signals/test_timescaledb_store.py b/tests/signals/test_timescaledb_store.py new file mode 100644 index 0000000..53422dc --- /dev/null +++ b/tests/signals/test_timescaledb_store.py @@ -0,0 +1,222 @@ +"""Tests for the TimescaleDB storage adapter. + +The tests use a recording stub in place of ``psycopg.AsyncConnection`` +so the SQL the adapter issues can be inspected without running a real +TimescaleDB instance. CI opts into full integration tests against a +live TimescaleDB container under ``@pytest.mark.integration`` (added in +a follow-up commit alongside docker-compose fixtures). +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from datetime import UTC, datetime +from typing import Any + +import pytest + +from augur_signals.models import ( + ManipulationFlag, + MarketSignal, + MarketSnapshot, + SignalType, + new_signal_id, +) +from augur_signals.storage._config import ( + CompressionBody, + HypertableBody, + RetentionBody, +) +from augur_signals.storage.timescaledb_store import TimescaleDBStore + + +@dataclass +class _RecordingCursor: + executed: list[tuple[str, list[Any] | None]] = field(default_factory=list) + pending_rows: list[tuple[Any, ...]] = field(default_factory=list) + + async def execute(self, sql: str, params: list[Any] | None = None) -> None: + self.executed.append((sql, params)) + + async def fetchone(self) -> tuple[Any, ...] | None: + if not self.pending_rows: + return None + return self.pending_rows.pop(0) + + async def fetchall(self) -> list[tuple[Any, ...]]: + rows = self.pending_rows + self.pending_rows = [] + return rows + + async def __aenter__(self) -> _RecordingCursor: + return self + + async def __aexit__(self, *exc: object) -> None: + return None + + +@dataclass +class _RecordingConnection: + cursor_: _RecordingCursor = field(default_factory=_RecordingCursor) + committed: int = 0 + closed: bool = False + + def cursor(self) -> _RecordingCursor: + return self.cursor_ + + async def commit(self) -> None: + self.committed += 1 + + async def close(self) -> None: + self.closed = True + + +def _store(conn: _RecordingConnection) -> TimescaleDBStore: + return TimescaleDBStore( + conn, # type: ignore[arg-type] + hypertable=HypertableBody(), + retention=RetentionBody(), + compression=CompressionBody(), + ) + + +def _statements(conn: _RecordingConnection) -> list[str]: + return [sql.strip().split("\n", maxsplit=1)[0] for sql, _ in conn.cursor_.executed] + + +@pytest.mark.asyncio +async def test_initialize_creates_schema_and_hypertables() -> None: + conn = _RecordingConnection() + store = _store(conn) + await store.initialize() + joined = "\n".join(sql for sql, _ in conn.cursor_.executed) + # Schema DDL runs first, then hypertables, then compression, then + # retention, then the schema_version row lands via INSERT ON CONFLICT. + assert "CREATE TABLE IF NOT EXISTS snapshots" in joined + assert "create_hypertable" in joined + assert "add_compression_policy" in joined + assert "add_retention_policy" in joined + assert "INSERT INTO schema_version" in joined + assert conn.committed == 1 + + +@pytest.mark.asyncio +async def test_hypertable_specs_match_configuration() -> None: + conn = _RecordingConnection() + store = _store(conn) + specs = {spec.table: spec for spec in store.hypertable_specs()} + assert specs["snapshots"].time_column == "timestamp" + assert specs["snapshots"].chunk_interval_days == 1 + assert specs["snapshots"].segment_by == "market_id, platform" + assert specs["signals"].time_column == "detected_at" + assert specs["signals"].chunk_interval_days == 7 + assert specs["features"].retention_days == 30 + + +@pytest.mark.asyncio +async def test_retention_zero_skips_retention_policy() -> None: + conn = _RecordingConnection() + store = TimescaleDBStore( + conn, # type: ignore[arg-type] + hypertable=HypertableBody(), + retention=RetentionBody( + snapshot_retention_days=0, + feature_retention_days=0, + signal_retention_days=0, + ), + compression=CompressionBody(), + ) + await store.initialize() + joined = "\n".join(sql for sql, _ in conn.cursor_.executed) + assert "add_retention_policy" not in joined + assert "add_compression_policy" in joined # compression still applies + + +@pytest.mark.asyncio +async def test_insert_snapshot_upserts_with_conflict_clause() -> None: + conn = _RecordingConnection() + store = _store(conn) + snap = MarketSnapshot( + market_id="m-1", + platform="kalshi", + timestamp=datetime(2026, 4, 1, 12, 0, tzinfo=UTC), + last_price=0.5, + bid=0.49, + ask=0.51, + spread=0.02, + volume_24h=1000.0, + liquidity=5000.0, + question="Will the Fed raise rates?", + resolution_source="Federal Reserve", + resolution_criteria="YES if rate rises.", + raw_json={"source": "kalshi"}, + closes_at=datetime(2026, 6, 1, tzinfo=UTC), + ) + await store.insert_snapshot(snap) + sql, params = conn.cursor_.executed[0] + assert "INSERT INTO snapshots" in sql + assert "ON CONFLICT (market_id, platform, timestamp)" in sql + assert params is not None + assert params[0] == "m-1" + assert json.loads(params[-2]) == {"source": "kalshi"} + assert conn.committed == 1 + + +@pytest.mark.asyncio +async def test_insert_signal_writes_signal_and_manipulation_flags() -> None: + conn = _RecordingConnection() + store = _store(conn) + signal = MarketSignal( + signal_id=new_signal_id(), + market_id="m-1", + platform="kalshi", + signal_type=SignalType.PRICE_VELOCITY, + magnitude=0.9, + direction=1, + confidence=0.8, + fdr_adjusted=True, + detected_at=datetime(2026, 4, 1, 12, 0, tzinfo=UTC), + window_seconds=300, + liquidity_tier="high", + manipulation_flags=[ManipulationFlag.SINGLE_COUNTERPARTY_CONCENTRATION], + raw_features={"calibration_provenance": "d@identity_v0"}, + ) + await store.insert_signal(signal) + statements = _statements(conn) + # Two commits: one for the signal row, one for the flag row. + assert any("INSERT INTO signals" in s for s in statements) + assert any("INSERT INTO manipulation_flags" in s for s in statements) + assert conn.committed == 2 + + +@pytest.mark.asyncio +async def test_latest_snapshot_returns_none_for_empty_result() -> None: + conn = _RecordingConnection() + store = _store(conn) + result = await store.latest_snapshot("m-1") + assert result is None + + +@pytest.mark.asyncio +async def test_signals_in_window_no_markets_short_circuits() -> None: + conn = _RecordingConnection() + store = _store(conn) + result = await store.signals_in_window( + [], datetime(2026, 1, 1, tzinfo=UTC), datetime(2026, 2, 1, tzinfo=UTC) + ) + assert result == [] + assert conn.cursor_.executed == [] + + +@pytest.mark.asyncio +async def test_close_propagates_to_connection() -> None: + conn = _RecordingConnection() + store = _store(conn) + await store.close() + assert conn.closed is True + + +def test_quote_ident_rejects_non_alphanumeric_identifier() -> None: + with pytest.raises(ValueError, match="Refusing to quote"): + TimescaleDBStore._quote_ident("snapshots; DROP TABLE") From 735c26609cfdbbf0c38511b5d8be93731004b22b Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 15:50:07 +0530 Subject: [PATCH 05/11] feat(workers): harness and stateless worker bridge WorkerHarness supervises one replica: connect to the event bus, fire an optional heartbeat task, drive the worker main coroutine, shut down cleanly on SIGINT or SIGTERM or when the heartbeat stops returning True. Stateless workers pass NoHeartbeat; singletons plug in a lock-renewing emitter that signals failover on lost lease. Metric counters and gauges emit through the observability backend automatically. run_bridge is the one supervisor stateless workers build on: subscribe then deserialize then shard-filter then transform then serialize then publish. Feature, detector, manipulation, calibration, and context workers share this spine. A separate poller entrypoint fans snapshots from AdaptivePoller into the snapshot subjects. The shard filter uses FNV-1a modulo replica_count so per-market state stays on the same replica even after redeploy, and the subject helpers centralize the subject taxonomy. --- .../augur_signals/workers/__init__.py | 3 + .../augur_signals/workers/harness.py | 137 ++++++++++++ .../augur_signals/workers/poller.py | 115 +++++++++++ .../augur_signals/workers/sharding.py | 41 ++++ .../augur_signals/workers/stateless.py | 129 ++++++++++++ .../augur_signals/workers/subjects.py | 64 ++++++ tests/signals/test_worker_harness.py | 195 ++++++++++++++++++ 7 files changed, 684 insertions(+) create mode 100644 src/augur_signals/augur_signals/workers/__init__.py create mode 100644 src/augur_signals/augur_signals/workers/harness.py create mode 100644 src/augur_signals/augur_signals/workers/poller.py create mode 100644 src/augur_signals/augur_signals/workers/sharding.py create mode 100644 src/augur_signals/augur_signals/workers/stateless.py create mode 100644 src/augur_signals/augur_signals/workers/subjects.py create mode 100644 tests/signals/test_worker_harness.py diff --git a/src/augur_signals/augur_signals/workers/__init__.py b/src/augur_signals/augur_signals/workers/__init__.py new file mode 100644 index 0000000..4239c68 --- /dev/null +++ b/src/augur_signals/augur_signals/workers/__init__.py @@ -0,0 +1,3 @@ +"""Worker entrypoints for the multi-process runtime.""" + +from __future__ import annotations diff --git a/src/augur_signals/augur_signals/workers/harness.py b/src/augur_signals/augur_signals/workers/harness.py new file mode 100644 index 0000000..ab8e4ee --- /dev/null +++ b/src/augur_signals/augur_signals/workers/harness.py @@ -0,0 +1,137 @@ +"""Worker harness orchestrating connect → run → shutdown with heartbeat. + +Every worker process builds a ``WorkerHarness`` from its main module +and calls ``run`` to enter the supervisory loop. The harness connects +to the event bus, optionally starts a heartbeat task, and drives the +worker's ``process_once`` coroutine until a shutdown signal (SIGINT / +SIGTERM) flips the stop flag. On shutdown it awaits the pending batch +then closes the bus. + +The harness stays backend-agnostic: it consumes the ``EventBus`` +protocol from ``bus/base.py`` and a ``HeartbeatEmitter`` protocol that +callers plug in with concrete implementations. Stateless workers pass +a no-op emitter; singletons pass a lock-holding emitter that renews +the distributed lock each beat. +""" + +from __future__ import annotations + +import asyncio +import signal +from collections.abc import Callable, Coroutine +from dataclasses import dataclass, field +from typing import Any, Protocol, runtime_checkable + +from augur_signals._observability import MetricCounter, MetricGauge +from augur_signals.bus.base import EventBus + + +@runtime_checkable +class HeartbeatEmitter(Protocol): + """Periodic side-effect fired by the harness' background task.""" + + async def beat(self) -> bool: + """Emit one heartbeat; return True to keep running, False to stop.""" + ... + + +class _NoHeartbeat: + """Heartbeat emitter that never stops the loop; used by stateless workers.""" + + async def beat(self) -> bool: + return True + + +@dataclass(slots=True) +class WorkerHarness: + """Supervisor for a single worker replica. + + Attributes: + worker_kind: Short identifier used as a metric label and log + field (``"feature"``, ``"detector"``, ``"dedup"``, ...). + replica_id: Stable identifier for this specific replica. In + Kubernetes this is the pod name; on bare-metal deployments + operators supply it through an env var. + bus: EventBus connection to open at startup and close on exit. + main: Coroutine the harness drives to completion; the coroutine + is expected to honour ``stop_event`` via ``should_stop``. + heartbeat: Optional emitter whose ``beat`` fires every + ``heartbeat_interval_seconds``. Defaults to a no-op. + heartbeat_interval_seconds: Seconds between beats. + """ + + worker_kind: str + replica_id: str + bus: EventBus + main: Callable[[WorkerHarness], Coroutine[Any, Any, None]] + heartbeat: HeartbeatEmitter = field(default_factory=_NoHeartbeat) + heartbeat_interval_seconds: float = 10.0 + _stop: asyncio.Event = field(default_factory=asyncio.Event) + _alive: MetricGauge | None = None + _processed: MetricCounter | None = None + + def __post_init__(self) -> None: + self._alive = MetricGauge("augur_worker_alive", ["worker_kind", "replica_id"]) + self._processed = MetricCounter( + "augur_worker_processed_total", ["worker_kind", "replica_id"] + ) + + def should_stop(self) -> bool: + return self._stop.is_set() + + def request_stop(self) -> None: + self._stop.set() + + def record_processed(self, delta: float = 1.0) -> None: + if self._processed is not None: + self._processed.inc(delta, worker_kind=self.worker_kind, replica_id=self.replica_id) + + async def run(self) -> None: + """Drive the worker main task with signals and a heartbeat loop.""" + self._install_signal_handlers() + if self._alive is not None: + self._alive.set(1.0, worker_kind=self.worker_kind, replica_id=self.replica_id) + await self.bus.connect() + heartbeat_task = asyncio.create_task(self._heartbeat_loop()) + main_task = asyncio.create_task(self.main(self)) + stop_task = asyncio.create_task(self._stop.wait()) + try: + done, pending = await asyncio.wait( + {main_task, heartbeat_task, stop_task}, + return_when=asyncio.FIRST_COMPLETED, + ) + _ = done + for task in pending: + task.cancel() + self._stop.set() + for task in pending: + try: + await task + except asyncio.CancelledError: + pass + finally: + if self._alive is not None: + self._alive.set(0.0, worker_kind=self.worker_kind, replica_id=self.replica_id) + await self.bus.close() + + async def _heartbeat_loop(self) -> None: + while not self._stop.is_set(): + keep_running = await self.heartbeat.beat() + if not keep_running: + self._stop.set() + return + try: + await asyncio.wait_for(self._stop.wait(), timeout=self.heartbeat_interval_seconds) + except TimeoutError: + continue + + def _install_signal_handlers(self) -> None: + loop = asyncio.get_running_loop() + for sig in (signal.SIGINT, signal.SIGTERM): + try: + loop.add_signal_handler(sig, self._stop.set) + except NotImplementedError: + # Windows does not support signal handlers on the event + # loop; the harness still runs and stops on Ctrl+C via + # KeyboardInterrupt propagation. + continue diff --git a/src/augur_signals/augur_signals/workers/poller.py b/src/augur_signals/augur_signals/workers/poller.py new file mode 100644 index 0000000..31cfd80 --- /dev/null +++ b/src/augur_signals/augur_signals/workers/poller.py @@ -0,0 +1,115 @@ +"""Poller worker entrypoint — one per platform. + +The poller subscribes to the platform's public market API via the +Phase 1 ``AdaptivePoller`` and forwards every normalized snapshot to +``augur.snapshots..`` on the event bus. + +Run as: + python -m augur_signals.workers.poller --platform polymarket + +The main coroutine stays thin — it wires the harness to the existing +Phase 1 polling stack. The heavy lifting (adaptive backoff, rate +limiting, DLQ, manipulation hints) already lives in +``augur_signals.ingestion``; this module only glues it to the bus. +""" + +from __future__ import annotations + +import argparse +from collections.abc import AsyncIterator, Callable +from typing import Protocol + +from augur_signals._observability import trace_span +from augur_signals.bus.base import BusMessage, EventBus +from augur_signals.models import MarketSnapshot +from augur_signals.workers.harness import WorkerHarness +from augur_signals.workers.subjects import snapshots + + +class SnapshotSource(Protocol): + """Abstract snapshot producer for the poller worker. + + Phase 1's ``AdaptivePoller`` implements this; tests pass a simple + stub. The poller does not own market discovery — that lives in + ``augur_signals.ingestion``. + """ + + def stream(self) -> AsyncIterator[MarketSnapshot]: ... + + +async def run_poller(harness: WorkerHarness, source: SnapshotSource, subject_prefix: str) -> None: + """Publish each snapshot from *source* to its shard-routed subject.""" + async for snapshot in source.stream(): + if harness.should_stop(): + break + subject = snapshots(subject_prefix, snapshot.platform, snapshot.market_id) + payload = snapshot.model_dump_json().encode("utf-8") + with trace_span( + "poller.publish", + market_id=snapshot.market_id, + platform=snapshot.platform, + ): + await harness.bus.publish(BusMessage(subject=subject, payload=payload)) + harness.record_processed() + + +def build_harness( + *, + platform: str, + replica_id: str, + bus: EventBus, + source: SnapshotSource, + subject_prefix: str, +) -> WorkerHarness: + """Assemble a ``WorkerHarness`` around ``run_poller`` for *platform*.""" + + async def _main(harness: WorkerHarness) -> None: + await run_poller(harness, source, subject_prefix) + + return WorkerHarness( + worker_kind=f"poller.{platform}", + replica_id=replica_id, + bus=bus, + main=_main, + ) + + +def _parse_args(argv: list[str] | None = None) -> argparse.Namespace: + parser = argparse.ArgumentParser(prog="augur-poller") + parser.add_argument("--platform", required=True, choices=["polymarket", "kalshi"]) + parser.add_argument("--replica-id", required=True) + parser.add_argument("--subject-prefix", default="augur") + return parser.parse_args(argv) + + +def main_factory_for( + platform: str, +) -> Callable[[EventBus, SnapshotSource, str, str], WorkerHarness]: + """Curry ``build_harness`` for a given platform. + + Entrypoint scripts under ``python -m augur_signals.workers.poller`` + call this after parsing args; full container startup wires in the + concrete ``SnapshotSource`` from ``augur_signals.ingestion``. + """ + + def _build( + bus: EventBus, source: SnapshotSource, replica_id: str, subject_prefix: str + ) -> WorkerHarness: + return build_harness( + platform=platform, + replica_id=replica_id, + bus=bus, + source=source, + subject_prefix=subject_prefix, + ) + + return _build + + +if __name__ == "__main__": # pragma: no cover — thin entrypoint wiring + _parse_args() + raise SystemExit( + "augur-poller requires a SnapshotSource wired from " + "augur_signals.ingestion at deployment time. Import build_harness " + "from your deployment's bootstrap module." + ) diff --git a/src/augur_signals/augur_signals/workers/sharding.py b/src/augur_signals/augur_signals/workers/sharding.py new file mode 100644 index 0000000..15cb842 --- /dev/null +++ b/src/augur_signals/augur_signals/workers/sharding.py @@ -0,0 +1,41 @@ +"""Deterministic shard-key filter for stateful per-market workers. + +Feature workers and detector workers shard by ``market_id`` so the +same market's observations always land on the same replica. The +sharding function is FNV-1a over the UTF-8 bytes of the key modulo +``replica_count``, which is stable across processes and languages. +""" + +from __future__ import annotations + +_FNV_OFFSET = 0xCBF29CE484222325 +_FNV_PRIME = 0x100000001B3 +_U64_MASK = 0xFFFFFFFFFFFFFFFF + + +def shard_index(key: str, replica_count: int) -> int: + """Return the 0-based shard index for *key* in a pool of *replica_count*. + + Args: + key: The shard key (usually ``market_id``). + replica_count: Total number of replicas in the pool; must be + positive. + + Returns: + An integer in ``[0, replica_count)``. + + Raises: + ValueError: ``replica_count`` is zero or negative. + """ + if replica_count <= 0: + raise ValueError("replica_count must be positive") + digest = _FNV_OFFSET + for byte in key.encode("utf-8"): + digest ^= byte + digest = (digest * _FNV_PRIME) & _U64_MASK + return digest % replica_count + + +def owned_by(key: str, replica_id: int, replica_count: int) -> bool: + """True if *key* is sharded to *replica_id* in a pool of *replica_count*.""" + return shard_index(key, replica_count) == replica_id diff --git a/src/augur_signals/augur_signals/workers/stateless.py b/src/augur_signals/augur_signals/workers/stateless.py new file mode 100644 index 0000000..c50785a --- /dev/null +++ b/src/augur_signals/augur_signals/workers/stateless.py @@ -0,0 +1,129 @@ +"""Stateless worker builders: feature, detector, manipulation, calibration, context_format. + +Each of these workers consumes from one subject, runs a pure function +against the message payload, and publishes the output to another +subject. They share the ``run_bridge`` supervisor so the per-kind +entrypoints stay tiny: they supply a deserializer, a transform, a +serializer, and the input/output subjects. + +The monolith's heavy pipeline logic (feature computation, detector +dispatch, manipulation flags, calibration) remains the single source +of truth. Phase 5 workers call into that logic rather than +reimplementing it. +""" + +from __future__ import annotations + +from collections.abc import Awaitable, Callable +from dataclasses import dataclass + +from augur_signals._observability import trace_span +from augur_signals.bus.base import BusMessage, EventBus +from augur_signals.workers.harness import WorkerHarness +from augur_signals.workers.sharding import owned_by + + +@dataclass(frozen=True, slots=True) +class ShardConfig: + """Replica identity and pool size for per-market sharding.""" + + replica_id: int + replica_count: int + + +async def run_bridge[InT, OutT]( + harness: WorkerHarness, + *, + input_pattern: str, + output_subject_builder: Callable[[OutT], str | None], + consumer_group: str, + deserialize: Callable[[bytes], InT], + transform: Callable[[InT], Awaitable[list[OutT]]], + serialize: Callable[[OutT], bytes], + shard_key: Callable[[InT], str] | None = None, + shard_config: ShardConfig | None = None, + trace_name: str, +) -> None: + """Consume *input_pattern*, transform each payload, publish outputs. + + Args: + harness: The owning ``WorkerHarness``. Supplies the bus and + stop signal. + input_pattern: Subject pattern to subscribe to. + output_subject_builder: Function returning the subject to + publish each output to, or None to skip publishing (e.g., + for terminal workers that write to storage only). + consumer_group: Consumer group name for the input subscription. + deserialize: Parse the input payload from raw bytes. + transform: Produce zero or more outputs from one input. + serialize: Encode each output to bytes for publishing. + shard_key: Optional extractor used with *shard_config* to skip + messages this replica does not own. + shard_config: Optional replica identity for shard filtering. + trace_name: Name of the OpenTelemetry span wrapping each + transform. + """ + async for message in harness.bus.subscribe(input_pattern, consumer_group): + if harness.should_stop(): + break + deserialized = deserialize(message.payload) + if shard_key is not None and shard_config is not None: + key = shard_key(deserialized) + if not owned_by(key, shard_config.replica_id, shard_config.replica_count): + continue + with trace_span(trace_name, replica_id=harness.replica_id): + outputs = await transform(deserialized) + for out in outputs: + subject = output_subject_builder(out) + if subject is None: + continue + await harness.bus.publish(BusMessage(subject=subject, payload=serialize(out))) + harness.record_processed(float(max(len(outputs), 1))) + + +@dataclass(slots=True) +class StatelessWorkerSpec: + """Declarative shape for a stateless worker. + + Construction is deferred until a bootstrap module has resolved the + concrete transform function (which often depends on config loaded + from disk). + """ + + worker_kind: str + input_pattern: str + consumer_group: str + trace_name: str + + def build_harness( + self, + *, + replica_id: str, + bus: EventBus, + deserialize: Callable[[bytes], object], + transform: Callable[[object], Awaitable[list[object]]], + serialize: Callable[[object], bytes], + output_subject_builder: Callable[[object], str | None], + shard_key: Callable[[object], str] | None = None, + shard_config: ShardConfig | None = None, + ) -> WorkerHarness: + async def _main(harness: WorkerHarness) -> None: + await run_bridge( + harness, + input_pattern=self.input_pattern, + output_subject_builder=output_subject_builder, + consumer_group=self.consumer_group, + deserialize=deserialize, + transform=transform, + serialize=serialize, + shard_key=shard_key, + shard_config=shard_config, + trace_name=self.trace_name, + ) + + return WorkerHarness( + worker_kind=self.worker_kind, + replica_id=replica_id, + bus=bus, + main=_main, + ) diff --git a/src/augur_signals/augur_signals/workers/subjects.py b/src/augur_signals/augur_signals/workers/subjects.py new file mode 100644 index 0000000..80ac047 --- /dev/null +++ b/src/augur_signals/augur_signals/workers/subjects.py @@ -0,0 +1,64 @@ +"""Subject naming helpers matching `.docs/phase-5-scaling.md §4.3`. + +A single module owns the subject strings so producers and consumers +stay aligned. Every helper returns a full subject with the configured +prefix so callers pass the result straight into ``bus.publish`` / +``bus.subscribe``. +""" + +from __future__ import annotations + + +def snapshots(prefix: str, platform: str, market_id: str) -> str: + return f"{prefix}.snapshots.{platform}.{market_id}" + + +def snapshots_pattern(prefix: str, platform: str | None = None) -> str: + """Wildcard pattern for a feature worker to consume. + + If *platform* is None the pattern matches every platform; otherwise + it narrows to the named platform. + """ + if platform is None: + return f"{prefix}.snapshots.>" + return f"{prefix}.snapshots.{platform}.>" + + +def features(prefix: str, market_id: str) -> str: + return f"{prefix}.features.{market_id}" + + +def features_pattern(prefix: str) -> str: + return f"{prefix}.features.>" + + +def candidates(prefix: str, detector_id: str) -> str: + return f"{prefix}.candidates.{detector_id}" + + +def candidates_pattern(prefix: str) -> str: + return f"{prefix}.candidates.>" + + +def flagged_signals(prefix: str) -> str: + return f"{prefix}.flagged_signals" + + +def calibrated_signals(prefix: str) -> str: + return f"{prefix}.calibrated_signals" + + +def signals(prefix: str) -> str: + return f"{prefix}.signals" + + +def contexts(prefix: str) -> str: + return f"{prefix}.contexts" + + +def briefs(prefix: str, fmt: str) -> str: + return f"{prefix}.briefs.{fmt}" + + +def ops_events(prefix: str) -> str: + return f"{prefix}.ops.events" diff --git a/tests/signals/test_worker_harness.py b/tests/signals/test_worker_harness.py new file mode 100644 index 0000000..92c5389 --- /dev/null +++ b/tests/signals/test_worker_harness.py @@ -0,0 +1,195 @@ +"""Tests for WorkerHarness, stateless bridge, shard routing, subject helpers.""" + +from __future__ import annotations + +import asyncio +from collections.abc import AsyncIterator +from dataclasses import dataclass, field + +import pytest + +from augur_signals.bus.base import BusMessage, EventBus +from augur_signals.workers import subjects +from augur_signals.workers.harness import HeartbeatEmitter, WorkerHarness +from augur_signals.workers.sharding import owned_by, shard_index +from augur_signals.workers.stateless import ShardConfig, run_bridge + + +@dataclass +class _MemoryBus(EventBus): + published: list[BusMessage] = field(default_factory=list) + backlog: list[BusMessage] = field(default_factory=list) + connected: bool = False + closed: bool = False + + async def connect(self) -> None: + self.connected = True + + async def close(self) -> None: + self.closed = True + + async def publish(self, message: BusMessage) -> None: + self.published.append(message) + + async def subscribe( + self, subject_pattern: str, consumer_group: str + ) -> AsyncIterator[BusMessage]: + _ = subject_pattern, consumer_group + for msg in list(self.backlog): + yield msg + + +@pytest.mark.asyncio +async def test_harness_connects_runs_and_closes_bus() -> None: + bus = _MemoryBus() + ran = asyncio.Event() + + async def main(harness: WorkerHarness) -> None: + ran.set() + harness.request_stop() + + harness = WorkerHarness( + worker_kind="unit", + replica_id="r-0", + bus=bus, + main=main, + heartbeat_interval_seconds=0.01, + ) + await harness.run() + assert bus.connected is True + assert bus.closed is True + assert ran.is_set() + + +@dataclass +class _Heart: + ticks: int = 0 + stop_after: int = 3 + + async def beat(self) -> bool: + self.ticks += 1 + return self.ticks < self.stop_after + + +@pytest.mark.asyncio +async def test_heartbeat_returning_false_stops_the_loop() -> None: + bus = _MemoryBus() + heart: HeartbeatEmitter = _Heart(stop_after=2) + + async def main(harness: WorkerHarness) -> None: + while not harness.should_stop(): # noqa: ASYNC110 + await asyncio.sleep(0.01) + + harness = WorkerHarness( + worker_kind="singleton", + replica_id="r-0", + bus=bus, + main=main, + heartbeat=heart, + heartbeat_interval_seconds=0.01, + ) + await harness.run() + assert harness.should_stop() + + +@pytest.mark.asyncio +async def test_run_bridge_consumes_and_publishes() -> None: + bus = _MemoryBus() + bus.backlog = [ + BusMessage(subject="augur.features.m-1", payload=b"1"), + BusMessage(subject="augur.features.m-2", payload=b"2"), + ] + + async def main(harness: WorkerHarness) -> None: + async def _tx(value: bytes) -> list[bytes]: + return [value + b"x"] + + await run_bridge( + harness, + input_pattern="augur.features.>", + output_subject_builder=lambda _out: "augur.candidates.cusum", + consumer_group="detector.cusum", + deserialize=lambda b: b, + transform=_tx, + serialize=lambda v: v, + trace_name="detector", + ) + + harness = WorkerHarness( + worker_kind="detector", + replica_id="r-0", + bus=bus, + main=main, + heartbeat_interval_seconds=0.01, + ) + await harness.run() + payloads = [m.payload for m in bus.published] + assert payloads == [b"1x", b"2x"] + + +@pytest.mark.asyncio +async def test_run_bridge_shard_filter_drops_foreign_keys() -> None: + bus = _MemoryBus() + bus.backlog = [ + BusMessage(subject="augur.features.m-1", payload=b"m-1"), + BusMessage(subject="augur.features.m-2", payload=b"m-2"), + BusMessage(subject="augur.features.m-3", payload=b"m-3"), + ] + + # The replica pool size is 2; whichever replica owns the key sees + # only messages whose shard_index(key, 2) == replica_id. + replica_id = 0 + owned = [key for key in [b"m-1", b"m-2", b"m-3"] if owned_by(key.decode(), replica_id, 2)] + + async def main(harness: WorkerHarness) -> None: + async def _tx(value: bytes) -> list[bytes]: + return [value] + + await run_bridge( + harness, + input_pattern="augur.features.>", + output_subject_builder=lambda _out: "augur.candidates.out", + consumer_group="shard-test", + deserialize=lambda b: b, + transform=_tx, + serialize=lambda v: v, + shard_key=lambda v: v.decode(), + shard_config=ShardConfig(replica_id=replica_id, replica_count=2), + trace_name="shard", + ) + + harness = WorkerHarness( + worker_kind="feature", + replica_id="r-0", + bus=bus, + main=main, + heartbeat_interval_seconds=0.01, + ) + await harness.run() + assert [m.payload for m in bus.published] == owned + + +@pytest.mark.unit +def test_shard_index_stable_and_in_range() -> None: + assert shard_index("m-1", 1) == 0 + for key in ["a", "kalshi_fed_q2", "polymarket_yes"]: + idx = shard_index(key, 8) + assert 0 <= idx < 8 + + +@pytest.mark.unit +def test_shard_index_rejects_zero_replica_count() -> None: + with pytest.raises(ValueError, match="replica_count must be positive"): + shard_index("key", 0) + + +@pytest.mark.unit +def test_subject_helpers_include_prefix() -> None: + assert subjects.snapshots("augur", "kalshi", "m-1") == "augur.snapshots.kalshi.m-1" + assert subjects.features("augur", "m-1") == "augur.features.m-1" + assert subjects.candidates("augur", "cusum") == "augur.candidates.cusum" + assert subjects.flagged_signals("augur") == "augur.flagged_signals" + assert subjects.signals("augur") == "augur.signals" + assert subjects.briefs("augur", "json") == "augur.briefs.json" + assert subjects.snapshots_pattern("augur") == "augur.snapshots.>" + assert subjects.snapshots_pattern("augur", "kalshi") == "augur.snapshots.kalshi.>" From 837b73c8e80953ff5b3d04bd5ab1208fb45b2261 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 15:51:54 +0530 Subject: [PATCH 06/11] feat(workers): active-passive singleton pair with distributed lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SingletonRunner wraps one dedup-or-llm replica around a distributed lock. Boot: try acquire; if we win we become active and the harness runs our main coroutine with a SingletonHeartbeat renewing the lock every config.lock.renew_interval_seconds. Losing the renew flips the harness stop flag — the orchestrator restarts the process and the race re-runs, giving the surviving replica another chance. Passive peers wait in acquire_active_role polling the lock at wait_tick_seconds; max_wait_ticks is a test-only escape hatch so the passive loop is not unbounded under pytest. Failover telemetry: augur_failover_total counter increments whenever a heartbeat observes the lock was stolen, and augur_singleton_lock_holder gauge exposes the current holder per singleton_kind label. Tested end-to-end against InMemoryLock: happy-path activation, passive timeout, heartbeat detecting lost lock, and a two-runner sequence where the passive picks up after the active exits and releases. --- .../augur_signals/workers/singleton.py | 155 +++++++++++++++++ tests/signals/test_worker_singleton.py | 159 ++++++++++++++++++ 2 files changed, 314 insertions(+) create mode 100644 src/augur_signals/augur_signals/workers/singleton.py create mode 100644 tests/signals/test_worker_singleton.py diff --git a/src/augur_signals/augur_signals/workers/singleton.py b/src/augur_signals/augur_signals/workers/singleton.py new file mode 100644 index 0000000..4b67606 --- /dev/null +++ b/src/augur_signals/augur_signals/workers/singleton.py @@ -0,0 +1,155 @@ +"""Active-passive singleton worker pair with distributed-lock failover. + +Dedup and the LLM formatter run as one active instance with one passive +peer. The pair coordinates through a ``DistributedLock``: + +* Both replicas boot and try to ``acquire`` the shared lock. +* Whoever wins is **active** and starts processing. It renews the lock + every ``renew_interval_seconds``. +* The loser is **passive**; it sits in a retry loop checking whether + the lock is available. It processes nothing until it takes over. +* If the active replica crashes or is partitioned, its lock TTL lapses + and the passive's retry loop acquires, then begins processing. + +``SingletonHeartbeat`` is the ``HeartbeatEmitter`` the WorkerHarness +binds to: each beat renews the lock; losing the lock flips the worker +into passive mode, which the harness observes through a ``False`` +return and shuts down so the orchestrator restarts the process +(which then goes through the acquire loop again, this time winning). +""" + +from __future__ import annotations + +import asyncio +from collections.abc import Callable, Coroutine +from dataclasses import dataclass, field +from typing import Any +from uuid import uuid4 + +from augur_signals._observability import MetricCounter, MetricGauge +from augur_signals.bus._config import LockBody +from augur_signals.bus._lock import DistributedLock +from augur_signals.bus.base import EventBus +from augur_signals.workers.harness import HeartbeatEmitter, WorkerHarness + + +@dataclass(slots=True) +class SingletonHeartbeat: + """HeartbeatEmitter that renews a distributed lock each beat. + + Returning False from ``beat`` signals the harness to stop. This + happens when the lock was lost (another replica acquired) or the + lock backend raises a terminal error. + """ + + lock: DistributedLock + lock_name: str + holder_id: str + ttl_seconds: int + failover_counter: MetricCounter | None = None + holder_gauge: MetricGauge | None = None + + async def beat(self) -> bool: + still_holding = await self.lock.renew(self.lock_name, self.holder_id, self.ttl_seconds) + if still_holding: + if self.holder_gauge is not None: + self.holder_gauge.set( + 1.0, + singleton_kind=self.lock_name, + replica_id=self.holder_id, + ) + return True + if self.failover_counter is not None: + self.failover_counter.inc(singleton_kind=self.lock_name) + return False + + +async def acquire_active_role( + lock: DistributedLock, + lock_name: str, + holder_id: str, + config: LockBody, + *, + wait_tick_seconds: float = 1.0, + max_wait_ticks: int | None = None, +) -> bool: + """Block until this replica wins the lock; return True on acquire. + + Args: + lock: The distributed lock to acquire. + lock_name: Singleton role name (``"dedup"`` / ``"llm_formatter"``). + holder_id: This replica's stable identifier. + config: ``LockBody`` carrying TTL / renew interval. + wait_tick_seconds: Poll cadence while passive. + max_wait_ticks: Optional cap on ticks before giving up; None + means wait forever (the production default). Tests pass a + small cap so an unresolved passive role terminates the + test. + + Returns: + True if the replica acquired the lock. False only when + *max_wait_ticks* is finite and exhausted. + """ + ticks = 0 + while True: + if await lock.acquire(lock_name, holder_id, config.ttl_seconds): + return True + if max_wait_ticks is not None and ticks >= max_wait_ticks: + return False + await asyncio.sleep(wait_tick_seconds) + ticks += 1 + + +@dataclass(slots=True) +class SingletonRunner: + """Glue that turns a singleton workload into a ``WorkerHarness`` run. + + Attributes: + lock_name: Singleton role name; matches the keys in the + distributed lock backend. + bus: EventBus connection used by the main coroutine. + lock: DistributedLock coordinating active/passive. + config: ``LockBody`` holding TTL and renew interval. + main: Coroutine run while holding the active role. + """ + + lock_name: str + bus: EventBus + lock: DistributedLock + config: LockBody + main: Callable[[WorkerHarness], Coroutine[Any, Any, None]] + replica_id: str = field(default_factory=lambda: str(uuid4())) + + async def run(self, *, wait_tick_seconds: float = 1.0) -> None: + """Acquire, run main with heartbeat-driven renewal, release.""" + acquired = await acquire_active_role( + self.lock, + self.lock_name, + self.replica_id, + self.config, + wait_tick_seconds=wait_tick_seconds, + ) + if not acquired: + return + heartbeat: HeartbeatEmitter = SingletonHeartbeat( + lock=self.lock, + lock_name=self.lock_name, + holder_id=self.replica_id, + ttl_seconds=self.config.ttl_seconds, + failover_counter=MetricCounter("augur_failover_total", ["singleton_kind"]), + holder_gauge=MetricGauge( + "augur_singleton_lock_holder", ["singleton_kind", "replica_id"] + ), + ) + harness = WorkerHarness( + worker_kind=f"singleton.{self.lock_name}", + replica_id=self.replica_id, + bus=self.bus, + main=self.main, + heartbeat=heartbeat, + heartbeat_interval_seconds=float(self.config.renew_interval_seconds), + ) + try: + await harness.run() + finally: + await self.lock.release(self.lock_name, self.replica_id) diff --git a/tests/signals/test_worker_singleton.py b/tests/signals/test_worker_singleton.py new file mode 100644 index 0000000..ec91a37 --- /dev/null +++ b/tests/signals/test_worker_singleton.py @@ -0,0 +1,159 @@ +"""Tests for active-passive singleton worker failover.""" + +from __future__ import annotations + +import asyncio +from collections.abc import AsyncIterator +from dataclasses import dataclass + +import pytest + +from augur_signals.bus._config import LockBody +from augur_signals.bus._lock import InMemoryLock +from augur_signals.bus.base import BusMessage, EventBus +from augur_signals.workers.harness import WorkerHarness +from augur_signals.workers.singleton import ( + SingletonHeartbeat, + SingletonRunner, + acquire_active_role, +) + + +@dataclass +class _ManualClock: + t: float = 0.0 + + def now(self) -> float: + return self.t + + def advance(self, seconds: float) -> None: + self.t += seconds + + +@dataclass +class _MemoryBus(EventBus): + connected: bool = False + closed: bool = False + + async def connect(self) -> None: + self.connected = True + + async def close(self) -> None: + self.closed = True + + async def publish(self, message: BusMessage) -> None: # pragma: no cover + raise NotImplementedError + + async def subscribe( # pragma: no cover + self, subject_pattern: str, consumer_group: str + ) -> AsyncIterator[BusMessage]: + _ = subject_pattern, consumer_group + if False: # type: ignore[unreachable] + yield + + +@pytest.mark.asyncio +async def test_acquire_active_role_returns_true_when_lock_free() -> None: + lock = InMemoryLock() + ok = await acquire_active_role( + lock, + "dedup", + "replica-a", + LockBody(ttl_seconds=30, renew_interval_seconds=10), + wait_tick_seconds=0.0, + max_wait_ticks=0, + ) + assert ok is True + + +@pytest.mark.asyncio +async def test_acquire_active_role_gives_up_after_max_ticks() -> None: + lock = InMemoryLock() + await lock.acquire("dedup", "replica-a", ttl_seconds=30) + ok = await acquire_active_role( + lock, + "dedup", + "replica-b", + LockBody(ttl_seconds=30, renew_interval_seconds=10), + wait_tick_seconds=0.0, + max_wait_ticks=2, + ) + assert ok is False + assert await lock.holder("dedup") == "replica-a" + + +@pytest.mark.asyncio +async def test_singleton_heartbeat_stops_when_lock_lost() -> None: + clock = _ManualClock() + lock = InMemoryLock(_clock=clock) + await lock.acquire("dedup", "replica-a", ttl_seconds=30) + heart = SingletonHeartbeat( + lock=lock, + lock_name="dedup", + holder_id="replica-a", + ttl_seconds=30, + ) + assert await heart.beat() is True + # Simulate failover: replica-b takes the lock after TTL expiry. + clock.advance(40) + await lock.acquire("dedup", "replica-b", ttl_seconds=30) + assert await heart.beat() is False + + +@pytest.mark.asyncio +async def test_singleton_runner_releases_lock_on_shutdown() -> None: + lock = InMemoryLock() + bus = _MemoryBus() + ran = asyncio.Event() + + async def main(harness: WorkerHarness) -> None: + ran.set() + harness.request_stop() + + runner = SingletonRunner( + lock_name="dedup", + bus=bus, + lock=lock, + config=LockBody(ttl_seconds=30, renew_interval_seconds=10), + main=main, + replica_id="replica-a", + ) + await runner.run(wait_tick_seconds=0.0) + assert ran.is_set() + assert await lock.holder("dedup") is None + + +@pytest.mark.asyncio +async def test_singleton_runner_passive_peer_takes_over_on_failover() -> None: + lock = InMemoryLock() + bus_a = _MemoryBus() + bus_b = _MemoryBus() + b_ran = asyncio.Event() + + async def main_a(harness: WorkerHarness) -> None: + # Active: exit immediately so the lock is released. + harness.request_stop() + + async def main_b(harness: WorkerHarness) -> None: + b_ran.set() + harness.request_stop() + + runner_a = SingletonRunner( + lock_name="dedup", + bus=bus_a, + lock=lock, + config=LockBody(ttl_seconds=30, renew_interval_seconds=10), + main=main_a, + replica_id="replica-a", + ) + runner_b = SingletonRunner( + lock_name="dedup", + bus=bus_b, + lock=lock, + config=LockBody(ttl_seconds=30, renew_interval_seconds=10), + main=main_b, + replica_id="replica-b", + ) + await runner_a.run(wait_tick_seconds=0.0) + await runner_b.run(wait_tick_seconds=0.0) + assert b_ran.is_set() From cc347cf746cbb90165a01061d12f92fd4d3ee0ae Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 15:57:53 +0530 Subject: [PATCH 07/11] feat(scripts): timescaledb backfill and dual-write sidecar --- scripts/dual_write_sidecar.py | 168 ++++++++++++++ scripts/migrate_to_timescale.py | 289 ++++++++++++++++++++++++ tests/signals/test_migration_scripts.py | 249 ++++++++++++++++++++ 3 files changed, 706 insertions(+) create mode 100644 scripts/dual_write_sidecar.py create mode 100644 scripts/migrate_to_timescale.py create mode 100644 tests/signals/test_migration_scripts.py diff --git a/scripts/dual_write_sidecar.py b/scripts/dual_write_sidecar.py new file mode 100644 index 0000000..0777eb9 --- /dev/null +++ b/scripts/dual_write_sidecar.py @@ -0,0 +1,168 @@ +"""Dual-write sidecar replaying engine writes into TimescaleDB. + +The sidecar subscribes to the engine's write-tee bus subject (a +dedicated ``augur.writes.*`` channel the engine fans off during the +dual-write window) and replays every snapshot, feature, and signal +into TimescaleDB alongside the primary DuckDB write. It maintains a +per-table lag counter and fails the Prometheus +``augur_dual_write_lag_seconds`` gauge past the configured threshold. + +Usage: + + uv run python scripts/dual_write_sidecar.py \\ + --lag-alert-seconds 10 --bus-backend redis + +Rollback-friendly: if operators flip ``storage.toml`` back to DuckDB, +the sidecar observes no writes on the tee subject and sits idle until +the flag flips again. It never modifies DuckDB; it only reads the tee +and writes the mirror copy. +""" + +from __future__ import annotations + +import argparse +import asyncio +import json +import sys +from collections.abc import AsyncIterator +from dataclasses import dataclass, field +from datetime import UTC, datetime +from typing import TYPE_CHECKING, Protocol + +from augur_signals._observability import MetricCounter, MetricGauge + +if TYPE_CHECKING: + from augur_signals.bus.base import BusMessage, EventBus + from augur_signals.storage.timescaledb_store import TimescaleDBStore + + +class ClockReader(Protocol): + """Inject a clock so tests drive lag computation deterministically.""" + + def now(self) -> datetime: ... + + +@dataclass(slots=True) +class _WallClock: + def now(self) -> datetime: + return datetime.now(tz=UTC) + + +@dataclass(slots=True) +class LagTracker: + """Maintains a per-table lag gauge and alerts above the threshold.""" + + threshold_seconds: int + gauge: MetricGauge + alerts: MetricCounter + clock: ClockReader = field(default_factory=_WallClock) + + def record(self, table: str, message_ts: datetime) -> float: + delta = (self.clock.now() - message_ts).total_seconds() + self.gauge.set(delta, table=table) + if delta > self.threshold_seconds: + self.alerts.inc(table=table) + return delta + + +async def run_sidecar( + *, + bus: EventBus, + tee_subject: str, + consumer_group: str, + store: TimescaleDBStore, + tracker: LagTracker, + stop_after: int | None = None, +) -> int: + """Consume write-tee messages and replay into *store*. + + Args: + bus: EventBus carrying the tee subject. + tee_subject: Subject the engine fans write events to. + consumer_group: Consumer-group name; stable across restarts so + the sidecar resumes from the last acked entry. + store: TimescaleDBStore mirror target. + tracker: LagTracker recording observed lag per table. + stop_after: Optional cap on processed events (test only). None + keeps running until cancelled. + + Returns: + Number of events replayed. + """ + await bus.connect() + processed = 0 + try: + async for message in _subscribe(bus, tee_subject, consumer_group): + payload = json.loads(message.payload) + table = str(payload["table"]) + event_time = datetime.fromisoformat(payload["ts"]) + tracker.record(table, event_time) + await _apply(store, table, payload["row"]) + processed += 1 + if stop_after is not None and processed >= stop_after: + break + finally: + await bus.close() + return processed + + +def _subscribe(bus: EventBus, subject: str, group: str) -> AsyncIterator[BusMessage]: + """Thin indirection so tests can swap the subscription source.""" + return bus.subscribe(subject, group) + + +async def _apply(store: TimescaleDBStore, table: str, row: dict[str, object]) -> None: + """Dispatch the tee event to the matching TimescaleDBStore write.""" + from augur_signals.models import FeatureVector, MarketSignal, MarketSnapshot + + if table == "snapshots": + await store.insert_snapshot(MarketSnapshot.model_validate(row)) + elif table == "features": + await store.insert_feature(FeatureVector.model_validate(row)) + elif table == "signals": + await store.insert_signal(MarketSignal.model_validate(row)) + else: + raise ValueError(f"Unknown tee table: {table!r}") + + +def _parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(prog="dual_write_sidecar") + parser.add_argument("--lag-alert-seconds", type=int, default=10) + parser.add_argument("--bus-backend", choices=["nats", "redis"], default="nats") + parser.add_argument("--tee-subject", default="augur.writes") + parser.add_argument("--consumer-group", default="dual_write") + return parser.parse_args(argv) + + +async def _cli(argv: list[str]) -> int: # pragma: no cover — entrypoint only + args = _parse_args(argv) + from pathlib import Path + + from augur_signals._config import load_config + from augur_signals.bus._config import BusConfig + from augur_signals.bus.factory import make_event_bus + from augur_signals.storage._config import StorageConfig + from augur_signals.storage.factory import make_timescaledb_store + + bus_cfg = load_config(Path("config/bus.toml"), BusConfig) + store_cfg = load_config(Path("config/storage.toml"), StorageConfig) + bus = make_event_bus(bus_cfg) + store = await make_timescaledb_store(store_cfg) + tracker = LagTracker( + threshold_seconds=args.lag_alert_seconds, + gauge=MetricGauge("augur_dual_write_lag_seconds", ["table"]), + alerts=MetricCounter("augur_dual_write_lag_alerts_total", ["table"]), + ) + processed = await run_sidecar( + bus=bus, + tee_subject=args.tee_subject, + consumer_group=args.consumer_group, + store=store, + tracker=tracker, + ) + print(f"replayed {processed} events") + return 0 + + +if __name__ == "__main__": # pragma: no cover + raise SystemExit(asyncio.run(_cli(sys.argv[1:]))) diff --git a/scripts/migrate_to_timescale.py b/scripts/migrate_to_timescale.py new file mode 100644 index 0000000..40012ba --- /dev/null +++ b/scripts/migrate_to_timescale.py @@ -0,0 +1,289 @@ +"""Backfill the TimescaleDB hot tables from the DuckDB Parquet archive. + +Usage: + + uv run python scripts/migrate_to_timescale.py backfill \\ + --from labels/snapshots_archive --batch-size 10000 + + uv run python scripts/migrate_to_timescale.py verify \\ + --start 2026-01-01 --end 2026-04-01 + +The script reads partitioned Parquet files in chronological order and +bulk-inserts them into TimescaleDB using ``COPY`` for throughput. Per +partition it verifies row-count parity: the number of rows in the +Parquet file must match the number of rows the adapter reports landing +in the hypertable. On mismatch the script aborts before moving on so +the operator can investigate before the partition is replayed. + +``verify`` re-runs a (market, day) group-count parity query between +DuckDB and TimescaleDB for the requested window without inserting any +data. Operators run verify after backfill to confirm byte-for-byte +parity before the dual-write cutover. +""" + +from __future__ import annotations + +import argparse +import asyncio +import sys +from pathlib import Path +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from psycopg import AsyncConnection + + +class MigrationError(RuntimeError): + """Raised when a row-count parity check fails or a partition is corrupt.""" + + +async def backfill( + *, + source_root: Path, + batch_size: int, + connection_factory: ConnectionFactory, +) -> BackfillSummary: + """Load every Parquet partition under *source_root* into TimescaleDB. + + Args: + source_root: Root of the Parquet archive + (`labels/snapshots_archive/`). + batch_size: Rows per COPY batch. Tuned at operations time; 10k + is a reasonable starting point. + connection_factory: Async callable opening a new + ``AsyncConnection`` per partition so long-running backfill + runs recycle connections. + + Returns: + ``BackfillSummary`` with the partition count and total rows. + + Raises: + MigrationError: A partition's reported insert count did not + match its Parquet row count. + """ + partitions = _discover_partitions(source_root) + total_rows = 0 + for partition in partitions: + rows_in_parquet = _count_parquet_rows(partition) + async with connection_factory() as conn: + rows_inserted = await _copy_partition_into_timescale(conn, partition, batch_size) + if rows_inserted != rows_in_parquet: + raise MigrationError( + f"Row-count mismatch on {partition}: " + f"parquet={rows_in_parquet}, timescale={rows_inserted}" + ) + total_rows += rows_inserted + return BackfillSummary(partition_count=len(partitions), total_rows=total_rows) + + +async def verify( + *, + start: str, + end: str, + duckdb_path: Path, + connection_factory: ConnectionFactory, +) -> VerifySummary: + """Compare per-(market, day) group counts between DuckDB and TimescaleDB.""" + duck_counts = _duckdb_group_counts(duckdb_path, start, end) + async with connection_factory() as conn: + timescale_counts = await _timescale_group_counts(conn, start, end) + mismatches = { + key: (duck_counts.get(key, 0), timescale_counts.get(key, 0)) + for key in duck_counts.keys() | timescale_counts.keys() + if duck_counts.get(key, 0) != timescale_counts.get(key, 0) + } + return VerifySummary( + duckdb_groups=len(duck_counts), + timescale_groups=len(timescale_counts), + mismatches=mismatches, + ) + + +# --- helpers --------------------------------------------------------- + + +from collections.abc import Callable # noqa: E402 +from contextlib import AbstractAsyncContextManager # noqa: E402 +from dataclasses import dataclass # noqa: E402 + +ConnectionFactory = Callable[[], AbstractAsyncContextManager["AsyncConnection[object]"]] + + +@dataclass(frozen=True, slots=True) +class BackfillSummary: + """Result of ``backfill``.""" + + partition_count: int + total_rows: int + + +@dataclass(frozen=True, slots=True) +class VerifySummary: + """Result of ``verify``.""" + + duckdb_groups: int + timescale_groups: int + mismatches: dict[tuple[str, str], tuple[int, int]] + + +def _discover_partitions(source_root: Path) -> list[Path]: + """Return partitions in chronological order (``date=YYYY-MM-DD`` layout).""" + if not source_root.exists(): + raise MigrationError(f"Source root does not exist: {source_root}") + partitions = sorted( + (p for p in source_root.glob("date=*") if p.is_dir()), + key=lambda p: p.name, + ) + if not partitions: + raise MigrationError(f"No partitions found under {source_root}") + return partitions + + +def _count_parquet_rows(partition: Path) -> int: + """Sum row counts across every Parquet file in *partition*.""" + import pyarrow.parquet as pq + + total = 0 + for file in partition.glob("*.parquet"): + total += pq.ParquetFile(file).metadata.num_rows + return total + + +async def _copy_partition_into_timescale( + conn: AsyncConnection[object], partition: Path, batch_size: int +) -> int: + """COPY *partition* into the snapshots hypertable; return rows inserted. + + The implementation relies on the operator-supplied DSN pointing at + a TimescaleDB hypertable that already exists (via + ``TimescaleDBStore.initialize``). The script does not create + schemas — cutover sequencing is operator-driven. + """ + import pyarrow.parquet as pq + + # Enumerate parquet files up front so the async block does not touch + # the filesystem (ASYNC240 — Path.glob is blocking). Column names + # come from the arrow schema, not user input, so the dynamic SQL + # is safe despite S608's warning. + files = sorted(partition.glob("*.parquet")) # noqa: ASYNC240 + rows = 0 + async with conn.cursor() as cur: + for file in files: + table = pq.read_table(file) + batches = table.to_batches(max_chunksize=batch_size) + for batch in batches: + columns = batch.schema.names + placeholders = ", ".join(["%s"] * len(columns)) + column_list = ", ".join(f'"{c}"' for c in columns) + # Column names come from the arrow schema, not user + # input, so the dynamic SQL is safe despite S608. + sql = ( + f"INSERT INTO snapshots ({column_list}) " # noqa: S608 + f"VALUES ({placeholders}) ON CONFLICT DO NOTHING" + ) + records = [tuple(row) for row in batch.to_pylist()] + await cur.executemany(sql, records) + rows += len(records) + await conn.commit() + return rows + + +def _duckdb_group_counts(duckdb_path: Path, start: str, end: str) -> dict[tuple[str, str], int]: + """Per-(market_id, date) row counts from DuckDB snapshots.""" + import duckdb + + with duckdb.connect(str(duckdb_path)) as conn: + rows = conn.execute( + "SELECT market_id, DATE_TRUNC('day', timestamp)::DATE::VARCHAR AS day, " + "COUNT(*) FROM snapshots WHERE timestamp BETWEEN ? AND ? " + "GROUP BY market_id, day", + [start, end], + ).fetchall() + return {(m, d): int(c) for m, d, c in rows} + + +async def _timescale_group_counts( + conn: AsyncConnection[object], start: str, end: str +) -> dict[tuple[str, str], int]: + async with conn.cursor() as cur: + await cur.execute( + "SELECT market_id, DATE_TRUNC('day', timestamp)::DATE::TEXT AS day, " + "COUNT(*) FROM snapshots WHERE timestamp BETWEEN %s AND %s " + "GROUP BY market_id, day", + [start, end], + ) + rows: list[Any] = list(await cur.fetchall()) + result: dict[tuple[str, str], int] = {} + for row in rows: + market_id, day, count = row[0], row[1], row[2] + result[(str(market_id), str(day))] = int(count) + return result + + +# --- CLI ------------------------------------------------------------- + + +def _parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(prog="migrate_to_timescale") + sub = parser.add_subparsers(dest="command", required=True) + + bf = sub.add_parser("backfill", help="Load every Parquet partition into TimescaleDB") + bf.add_argument("--from", dest="source_root", required=True, type=Path) + bf.add_argument("--batch-size", type=int, default=10_000) + + ver = sub.add_parser("verify", help="Compare per-(market, day) group counts") + ver.add_argument("--start", required=True) + ver.add_argument("--end", required=True) + ver.add_argument("--duckdb", required=True, type=Path) + + return parser.parse_args(argv) + + +async def _cli(argv: list[str]) -> int: # pragma: no cover — thin wrapper + args = _parse_args(argv) + import os + from typing import Any, cast + + import psycopg + + dsn = os.environ["AUGUR_TIMESCALE_URL"] + + def _factory() -> AbstractAsyncContextManager[AsyncConnection[object]]: + # psycopg's AsyncConnection.connect returns a coroutine that + # doubles as an async context manager; cast through Any so + # mypy accepts the protocol adaptation. + return cast( + AbstractAsyncContextManager[AsyncConnection[object]], + cast(Any, psycopg.AsyncConnection.connect(dsn)), + ) + + if args.command == "backfill": + summary = await backfill( + source_root=args.source_root, + batch_size=args.batch_size, + connection_factory=_factory, + ) + print(f"backfilled {summary.partition_count} partitions, {summary.total_rows} rows") + return 0 + if args.command == "verify": + vsummary = await verify( + start=args.start, + end=args.end, + duckdb_path=args.duckdb, + connection_factory=_factory, + ) + if vsummary.mismatches: + print( + f"FAIL: {len(vsummary.mismatches)} mismatches across " + f"{vsummary.duckdb_groups} duckdb groups / " + f"{vsummary.timescale_groups} timescale groups", + file=sys.stderr, + ) + return 2 + print(f"OK: {vsummary.duckdb_groups} groups match (duckdb == timescale)") + return 0 + return 1 + + +if __name__ == "__main__": # pragma: no cover + raise SystemExit(asyncio.run(_cli(sys.argv[1:]))) diff --git a/tests/signals/test_migration_scripts.py b/tests/signals/test_migration_scripts.py new file mode 100644 index 0000000..5354fa0 --- /dev/null +++ b/tests/signals/test_migration_scripts.py @@ -0,0 +1,249 @@ +"""Tests for the TimescaleDB migration and dual-write sidecar scripts.""" + +from __future__ import annotations + +import json +import sys +from collections.abc import AsyncIterator +from contextlib import asynccontextmanager +from dataclasses import dataclass, field +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +import pytest + +sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "scripts")) + +import migrate_to_timescale as migrate +from dual_write_sidecar import LagTracker, run_sidecar + +from augur_signals._observability import MetricCounter, MetricGauge +from augur_signals.bus.base import BusMessage, EventBus + + +@dataclass +class _Cursor: + executed: list[tuple[str, list[Any] | None]] = field(default_factory=list) + pending_rows: list[tuple[Any, ...]] = field(default_factory=list) + + async def execute(self, sql: str, params: list[Any] | None = None) -> None: + self.executed.append((sql, params)) + + async def executemany(self, sql: str, params: list[tuple[Any, ...]]) -> None: + for p in params: + self.executed.append((sql, list(p))) + + async def fetchall(self) -> list[tuple[Any, ...]]: + rows = self.pending_rows + self.pending_rows = [] + return rows + + async def __aenter__(self) -> _Cursor: + return self + + async def __aexit__(self, *exc: object) -> None: + return None + + +@dataclass +class _Conn: + cur: _Cursor = field(default_factory=_Cursor) + committed: int = 0 + + def cursor(self) -> _Cursor: + return self.cur + + async def commit(self) -> None: + self.committed += 1 + + async def __aenter__(self) -> _Conn: + return self + + async def __aexit__(self, *exc: object) -> None: + return None + + +@pytest.mark.asyncio +async def test_backfill_aborts_on_row_count_mismatch(tmp_path: Path) -> None: + partition = tmp_path / "date=2026-04-01" + partition.mkdir(parents=True) + # Write a tiny parquet file so _count_parquet_rows returns a + # non-zero count. + import pyarrow as pa + import pyarrow.parquet as pq + + table = pa.table({"market_id": ["m-1", "m-2"], "timestamp": [1, 2]}) + pq.write_table(table, partition / "part-0.parquet") + + async def fake_copy(conn: Any, part: Path, batch: int) -> int: + _ = conn, part, batch + return 1 # Lie about rows landed. + + migrate._copy_partition_into_timescale = fake_copy # type: ignore[assignment] + + @asynccontextmanager + async def factory() -> AsyncIterator[_Conn]: + yield _Conn() + + with pytest.raises(migrate.MigrationError, match="Row-count mismatch"): + await migrate.backfill(source_root=tmp_path, batch_size=100, connection_factory=factory) + + +@pytest.mark.asyncio +async def test_backfill_happy_path_returns_summary(tmp_path: Path) -> None: + partition = tmp_path / "date=2026-04-01" + partition.mkdir(parents=True) + import pyarrow as pa + import pyarrow.parquet as pq + + table = pa.table({"market_id": ["m-1", "m-2"]}) + pq.write_table(table, partition / "part-0.parquet") + + async def fake_copy(conn: Any, part: Path, batch: int) -> int: + _ = conn, part, batch + return 2 + + migrate._copy_partition_into_timescale = fake_copy # type: ignore[assignment] + + @asynccontextmanager + async def factory() -> AsyncIterator[_Conn]: + yield _Conn() + + summary = await migrate.backfill( + source_root=tmp_path, batch_size=100, connection_factory=factory + ) + assert summary.partition_count == 1 + assert summary.total_rows == 2 + + +def test_discover_partitions_sorts_chronologically(tmp_path: Path) -> None: + for name in ("date=2026-04-01", "date=2026-03-01", "date=2026-05-01"): + (tmp_path / name).mkdir() + partitions = migrate._discover_partitions(tmp_path) + assert [p.name for p in partitions] == [ + "date=2026-03-01", + "date=2026-04-01", + "date=2026-05-01", + ] + + +def test_discover_partitions_rejects_empty_root(tmp_path: Path) -> None: + with pytest.raises(migrate.MigrationError, match="No partitions"): + migrate._discover_partitions(tmp_path) + + +# --- dual-write sidecar ---------------------------------------------- + + +@dataclass +class _FixedClock: + value: datetime = datetime(2026, 4, 1, 12, 0, 10, tzinfo=UTC) + + def now(self) -> datetime: + return self.value + + +@dataclass +class _MemoryBus(EventBus): + messages: list[BusMessage] = field(default_factory=list) + connected: bool = False + closed: bool = False + + async def connect(self) -> None: + self.connected = True + + async def close(self) -> None: + self.closed = True + + async def publish(self, message: BusMessage) -> None: # pragma: no cover + raise NotImplementedError + + async def subscribe( + self, subject_pattern: str, consumer_group: str + ) -> AsyncIterator[BusMessage]: + _ = subject_pattern, consumer_group + for msg in list(self.messages): + yield msg + + +@dataclass +class _RecordingStore: + snapshots_inserted: int = 0 + + async def insert_snapshot(self, snapshot: Any) -> None: + _ = snapshot + self.snapshots_inserted += 1 + + async def insert_feature(self, feature: Any) -> None: # pragma: no cover + _ = feature + + async def insert_signal(self, signal: Any) -> None: # pragma: no cover + _ = signal + + +@pytest.mark.asyncio +async def test_lag_tracker_fires_alert_past_threshold() -> None: + registry_gauge = MetricGauge("augur_dual_write_lag_seconds_test", ["table"]) + registry_counter = MetricCounter("augur_dual_write_lag_alerts_total_test", ["table"]) + clock = _FixedClock() + tracker = LagTracker( + threshold_seconds=5, + gauge=registry_gauge, + alerts=registry_counter, + clock=clock, + ) + # Record a 12-second-old event; should trip the threshold. + delta = tracker.record("snapshots", datetime(2026, 4, 1, 11, 59, 58, tzinfo=UTC)) + assert delta == pytest.approx(12.0) + + +@pytest.mark.asyncio +async def test_run_sidecar_replays_snapshots_and_tracks_lag() -> None: + bus = _MemoryBus( + messages=[ + BusMessage( + subject="augur.writes", + payload=json.dumps( + { + "table": "snapshots", + "ts": "2026-04-01T12:00:00+00:00", + "row": { + "market_id": "m-1", + "platform": "kalshi", + "timestamp": "2026-04-01T12:00:00+00:00", + "last_price": 0.5, + "bid": 0.49, + "ask": 0.51, + "spread": 0.02, + "volume_24h": 1000.0, + "liquidity": 5000.0, + "question": "Q", + "resolution_source": "R", + "resolution_criteria": "C", + "closes_at": "2026-06-01T00:00:00+00:00", + "raw_json": {}, + }, + } + ).encode("utf-8"), + ) + ] + ) + store = _RecordingStore() + tracker = LagTracker( + threshold_seconds=30, + gauge=MetricGauge("augur_dual_write_lag_seconds_itest", ["table"]), + alerts=MetricCounter("augur_dual_write_lag_alerts_total_itest", ["table"]), + clock=_FixedClock(), + ) + processed = await run_sidecar( + bus=bus, + tee_subject="augur.writes", + consumer_group="dual_write", + store=store, # type: ignore[arg-type] + tracker=tracker, + stop_after=1, + ) + assert processed == 1 + assert store.snapshots_inserted == 1 + assert bus.closed is True From df15c81a5306ea9e62f54f1d91028d537dde9c75 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 15:58:47 +0530 Subject: [PATCH 08/11] style(docs): normalize inline-code to single backticks in phase-5 comments The distributed-runtime modules landed with reStructuredText-style double backticks. The rest of the codebase uses single backticks in docstrings and comments, so fold them to match. --- scripts/dual_write_sidecar.py | 6 +++--- scripts/migrate_to_timescale.py | 16 +++++++-------- .../augur_signals/bus/factory.py | 14 ++++++------- .../augur_signals/storage/factory.py | 18 ++++++++--------- .../storage/timescaledb_store.py | 8 ++++---- .../augur_signals/workers/harness.py | 18 ++++++++--------- .../augur_signals/workers/poller.py | 18 ++++++++--------- .../augur_signals/workers/sharding.py | 10 +++++----- .../augur_signals/workers/singleton.py | 20 +++++++++---------- .../augur_signals/workers/stateless.py | 4 ++-- .../augur_signals/workers/subjects.py | 4 ++-- 11 files changed, 68 insertions(+), 68 deletions(-) diff --git a/scripts/dual_write_sidecar.py b/scripts/dual_write_sidecar.py index 0777eb9..c0d5575 100644 --- a/scripts/dual_write_sidecar.py +++ b/scripts/dual_write_sidecar.py @@ -1,18 +1,18 @@ """Dual-write sidecar replaying engine writes into TimescaleDB. The sidecar subscribes to the engine's write-tee bus subject (a -dedicated ``augur.writes.*`` channel the engine fans off during the +dedicated `augur.writes.*` channel the engine fans off during the dual-write window) and replays every snapshot, feature, and signal into TimescaleDB alongside the primary DuckDB write. It maintains a per-table lag counter and fails the Prometheus -``augur_dual_write_lag_seconds`` gauge past the configured threshold. +`augur_dual_write_lag_seconds` gauge past the configured threshold. Usage: uv run python scripts/dual_write_sidecar.py \\ --lag-alert-seconds 10 --bus-backend redis -Rollback-friendly: if operators flip ``storage.toml`` back to DuckDB, +Rollback-friendly: if operators flip `storage.toml` back to DuckDB, the sidecar observes no writes on the tee subject and sits idle until the flag flips again. It never modifies DuckDB; it only reads the tee and writes the mirror copy. diff --git a/scripts/migrate_to_timescale.py b/scripts/migrate_to_timescale.py index 40012ba..6d29cee 100644 --- a/scripts/migrate_to_timescale.py +++ b/scripts/migrate_to_timescale.py @@ -9,13 +9,13 @@ --start 2026-01-01 --end 2026-04-01 The script reads partitioned Parquet files in chronological order and -bulk-inserts them into TimescaleDB using ``COPY`` for throughput. Per +bulk-inserts them into TimescaleDB using `COPY` for throughput. Per partition it verifies row-count parity: the number of rows in the Parquet file must match the number of rows the adapter reports landing in the hypertable. On mismatch the script aborts before moving on so the operator can investigate before the partition is replayed. -``verify`` re-runs a (market, day) group-count parity query between +`verify` re-runs a (market, day) group-count parity query between DuckDB and TimescaleDB for the requested window without inserting any data. Operators run verify after backfill to confirm byte-for-byte parity before the dual-write cutover. @@ -51,11 +51,11 @@ async def backfill( batch_size: Rows per COPY batch. Tuned at operations time; 10k is a reasonable starting point. connection_factory: Async callable opening a new - ``AsyncConnection`` per partition so long-running backfill + `AsyncConnection` per partition so long-running backfill runs recycle connections. Returns: - ``BackfillSummary`` with the partition count and total rows. + `BackfillSummary` with the partition count and total rows. Raises: MigrationError: A partition's reported insert count did not @@ -111,7 +111,7 @@ async def verify( @dataclass(frozen=True, slots=True) class BackfillSummary: - """Result of ``backfill``.""" + """Result of `backfill`.""" partition_count: int total_rows: int @@ -119,7 +119,7 @@ class BackfillSummary: @dataclass(frozen=True, slots=True) class VerifySummary: - """Result of ``verify``.""" + """Result of `verify`.""" duckdb_groups: int timescale_groups: int @@ -127,7 +127,7 @@ class VerifySummary: def _discover_partitions(source_root: Path) -> list[Path]: - """Return partitions in chronological order (``date=YYYY-MM-DD`` layout).""" + """Return partitions in chronological order (`date=YYYY-MM-DD` layout).""" if not source_root.exists(): raise MigrationError(f"Source root does not exist: {source_root}") partitions = sorted( @@ -156,7 +156,7 @@ async def _copy_partition_into_timescale( The implementation relies on the operator-supplied DSN pointing at a TimescaleDB hypertable that already exists (via - ``TimescaleDBStore.initialize``). The script does not create + `TimescaleDBStore.initialize`). The script does not create schemas — cutover sequencing is operator-driven. """ import pyarrow.parquet as pq diff --git a/src/augur_signals/augur_signals/bus/factory.py b/src/augur_signals/augur_signals/bus/factory.py index 27f42e2..1adc598 100644 --- a/src/augur_signals/augur_signals/bus/factory.py +++ b/src/augur_signals/augur_signals/bus/factory.py @@ -1,4 +1,4 @@ -"""Factory that selects an EventBus implementation from ``BusConfig``. +"""Factory that selects an EventBus implementation from `BusConfig`. Call from the worker startup path: @@ -11,8 +11,8 @@ await bus.connect() The monolith engine does not use this factory; it instantiates -``InProcessAsyncBus`` directly with its native ``MarketSignal`` -interface. Phase 5 workers use the byte-level ``EventBus`` protocol +`InProcessAsyncBus` directly with its native `MarketSignal` +interface. Phase 5 workers use the byte-level `EventBus` protocol and select a backend via this factory at startup. """ @@ -25,12 +25,12 @@ def make_event_bus(config: BusConfig) -> EventBus: - """Return an ``EventBus`` implementation selected by *config*. + """Return an `EventBus` implementation selected by *config*. - The ``"memory"`` variant of ``BusConfig`` is reserved for the + The `"memory"` variant of `BusConfig` is reserved for the monolith engine's in-process bus and is not served by this - factory; callers that pass it receive ``BusError`` because they - should reach for ``InProcessAsyncBus`` in ``bus/memory.py`` + factory; callers that pass it receive `BusError` because they + should reach for `InProcessAsyncBus` in `bus/memory.py` directly. """ if config.backend.kind == "nats": diff --git a/src/augur_signals/augur_signals/storage/factory.py b/src/augur_signals/augur_signals/storage/factory.py index 92d3787..25811b6 100644 --- a/src/augur_signals/augur_signals/storage/factory.py +++ b/src/augur_signals/augur_signals/storage/factory.py @@ -1,14 +1,14 @@ -"""Storage backend factory keyed by ``StorageConfig.backend.kind``. +"""Storage backend factory keyed by `StorageConfig.backend.kind`. -The Phase 1-4 monolith calls ``make_duckdb_store(config)`` directly +The Phase 1-4 monolith calls `make_duckdb_store(config)` directly when instantiating the engine. Phase 5 workers use this factory at -startup so flipping ``config/storage.toml`` ``backend.kind`` from -``"duckdb"`` to ``"timescaledb"`` restarts the process against the +startup so flipping `config/storage.toml` `backend.kind` from +`"duckdb"` to `"timescaledb"` restarts the process against the new backend without code edits. -``make_storage`` returns the DuckDB adapter synchronously or the -TimescaleDB adapter paired with an open ``AsyncConnection``; the -TimescaleDB branch is ``async`` because opening the connection is +`make_storage` returns the DuckDB adapter synchronously or the +TimescaleDB adapter paired with an open `AsyncConnection`; the +TimescaleDB branch is `async` because opening the connection is awaited. Callers select the right helper for their deployment mode. """ @@ -45,8 +45,8 @@ async def make_timescaledb_store( """Open a TimescaleDB store from *config*. If *connection* is None the factory reads the DSN from the env var - named in ``config.backend.timescale_url_env`` and opens a new - ``AsyncConnection``. Tests pass a stub connection explicitly. + named in `config.backend.timescale_url_env` and opens a new + `AsyncConnection`. Tests pass a stub connection explicitly. """ if config.backend.kind != "timescaledb": raise StorageConfigurationError( diff --git a/src/augur_signals/augur_signals/storage/timescaledb_store.py b/src/augur_signals/augur_signals/storage/timescaledb_store.py index b6e56e1..d92d8a9 100644 --- a/src/augur_signals/augur_signals/storage/timescaledb_store.py +++ b/src/augur_signals/augur_signals/storage/timescaledb_store.py @@ -1,15 +1,15 @@ """TimescaleDB-backed persistence mirroring the DuckDBStore surface. -The adapter is a thin facade over ``psycopg`` that issues the same +The adapter is a thin facade over `psycopg` that issues the same schema statements the DuckDB store does, then converts the time-series tables into TimescaleDB hypertables and attaches compression and retention policies. Every public method has a matching method on -``DuckDBStore`` so engine code flips backends via configuration without +`DuckDBStore` so engine code flips backends via configuration without call-site edits. The connection is injected so unit tests can swap in fakes or sqlite-backed shims. Production startup reads the DSN from the env var -named in ``storage.toml``; the adapter itself does not know about the +named in `storage.toml`; the adapter itself does not know about the filesystem. """ @@ -140,7 +140,7 @@ class TimescaleDBStore: Attributes: CURRENT_SCHEMA_VERSION: Integer version stamped into the - ``schema_version`` table after ``initialize`` applies all + `schema_version` table after `initialize` applies all pending migrations. """ diff --git a/src/augur_signals/augur_signals/workers/harness.py b/src/augur_signals/augur_signals/workers/harness.py index ab8e4ee..f4197a3 100644 --- a/src/augur_signals/augur_signals/workers/harness.py +++ b/src/augur_signals/augur_signals/workers/harness.py @@ -1,14 +1,14 @@ """Worker harness orchestrating connect → run → shutdown with heartbeat. -Every worker process builds a ``WorkerHarness`` from its main module -and calls ``run`` to enter the supervisory loop. The harness connects +Every worker process builds a `WorkerHarness` from its main module +and calls `run` to enter the supervisory loop. The harness connects to the event bus, optionally starts a heartbeat task, and drives the -worker's ``process_once`` coroutine until a shutdown signal (SIGINT / +worker's `process_once` coroutine until a shutdown signal (SIGINT / SIGTERM) flips the stop flag. On shutdown it awaits the pending batch then closes the bus. -The harness stays backend-agnostic: it consumes the ``EventBus`` -protocol from ``bus/base.py`` and a ``HeartbeatEmitter`` protocol that +The harness stays backend-agnostic: it consumes the `EventBus` +protocol from `bus/base.py` and a `HeartbeatEmitter` protocol that callers plug in with concrete implementations. Stateless workers pass a no-op emitter; singletons pass a lock-holding emitter that renews the distributed lock each beat. @@ -48,15 +48,15 @@ class WorkerHarness: Attributes: worker_kind: Short identifier used as a metric label and log - field (``"feature"``, ``"detector"``, ``"dedup"``, ...). + field (`"feature"`, `"detector"`, `"dedup"`, ...). replica_id: Stable identifier for this specific replica. In Kubernetes this is the pod name; on bare-metal deployments operators supply it through an env var. bus: EventBus connection to open at startup and close on exit. main: Coroutine the harness drives to completion; the coroutine - is expected to honour ``stop_event`` via ``should_stop``. - heartbeat: Optional emitter whose ``beat`` fires every - ``heartbeat_interval_seconds``. Defaults to a no-op. + is expected to honour `stop_event` via `should_stop`. + heartbeat: Optional emitter whose `beat` fires every + `heartbeat_interval_seconds`. Defaults to a no-op. heartbeat_interval_seconds: Seconds between beats. """ diff --git a/src/augur_signals/augur_signals/workers/poller.py b/src/augur_signals/augur_signals/workers/poller.py index 31cfd80..393026f 100644 --- a/src/augur_signals/augur_signals/workers/poller.py +++ b/src/augur_signals/augur_signals/workers/poller.py @@ -1,8 +1,8 @@ """Poller worker entrypoint — one per platform. The poller subscribes to the platform's public market API via the -Phase 1 ``AdaptivePoller`` and forwards every normalized snapshot to -``augur.snapshots..`` on the event bus. +Phase 1 `AdaptivePoller` and forwards every normalized snapshot to +`augur.snapshots..` on the event bus. Run as: python -m augur_signals.workers.poller --platform polymarket @@ -10,7 +10,7 @@ The main coroutine stays thin — it wires the harness to the existing Phase 1 polling stack. The heavy lifting (adaptive backoff, rate limiting, DLQ, manipulation hints) already lives in -``augur_signals.ingestion``; this module only glues it to the bus. +`augur_signals.ingestion`; this module only glues it to the bus. """ from __future__ import annotations @@ -29,9 +29,9 @@ class SnapshotSource(Protocol): """Abstract snapshot producer for the poller worker. - Phase 1's ``AdaptivePoller`` implements this; tests pass a simple + Phase 1's `AdaptivePoller` implements this; tests pass a simple stub. The poller does not own market discovery — that lives in - ``augur_signals.ingestion``. + `augur_signals.ingestion`. """ def stream(self) -> AsyncIterator[MarketSnapshot]: ... @@ -61,7 +61,7 @@ def build_harness( source: SnapshotSource, subject_prefix: str, ) -> WorkerHarness: - """Assemble a ``WorkerHarness`` around ``run_poller`` for *platform*.""" + """Assemble a `WorkerHarness` around `run_poller` for *platform*.""" async def _main(harness: WorkerHarness) -> None: await run_poller(harness, source, subject_prefix) @@ -85,11 +85,11 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace: def main_factory_for( platform: str, ) -> Callable[[EventBus, SnapshotSource, str, str], WorkerHarness]: - """Curry ``build_harness`` for a given platform. + """Curry `build_harness` for a given platform. - Entrypoint scripts under ``python -m augur_signals.workers.poller`` + Entrypoint scripts under `python -m augur_signals.workers.poller` call this after parsing args; full container startup wires in the - concrete ``SnapshotSource`` from ``augur_signals.ingestion``. + concrete `SnapshotSource` from `augur_signals.ingestion`. """ def _build( diff --git a/src/augur_signals/augur_signals/workers/sharding.py b/src/augur_signals/augur_signals/workers/sharding.py index 15cb842..f7d5948 100644 --- a/src/augur_signals/augur_signals/workers/sharding.py +++ b/src/augur_signals/augur_signals/workers/sharding.py @@ -1,9 +1,9 @@ """Deterministic shard-key filter for stateful per-market workers. -Feature workers and detector workers shard by ``market_id`` so the +Feature workers and detector workers shard by `market_id` so the same market's observations always land on the same replica. The sharding function is FNV-1a over the UTF-8 bytes of the key modulo -``replica_count``, which is stable across processes and languages. +`replica_count`, which is stable across processes and languages. """ from __future__ import annotations @@ -17,15 +17,15 @@ def shard_index(key: str, replica_count: int) -> int: """Return the 0-based shard index for *key* in a pool of *replica_count*. Args: - key: The shard key (usually ``market_id``). + key: The shard key (usually `market_id`). replica_count: Total number of replicas in the pool; must be positive. Returns: - An integer in ``[0, replica_count)``. + An integer in `[0, replica_count)`. Raises: - ValueError: ``replica_count`` is zero or negative. + ValueError: `replica_count` is zero or negative. """ if replica_count <= 0: raise ValueError("replica_count must be positive") diff --git a/src/augur_signals/augur_signals/workers/singleton.py b/src/augur_signals/augur_signals/workers/singleton.py index 4b67606..73039df 100644 --- a/src/augur_signals/augur_signals/workers/singleton.py +++ b/src/augur_signals/augur_signals/workers/singleton.py @@ -1,19 +1,19 @@ """Active-passive singleton worker pair with distributed-lock failover. Dedup and the LLM formatter run as one active instance with one passive -peer. The pair coordinates through a ``DistributedLock``: +peer. The pair coordinates through a `DistributedLock`: -* Both replicas boot and try to ``acquire`` the shared lock. +* Both replicas boot and try to `acquire` the shared lock. * Whoever wins is **active** and starts processing. It renews the lock - every ``renew_interval_seconds``. + every `renew_interval_seconds`. * The loser is **passive**; it sits in a retry loop checking whether the lock is available. It processes nothing until it takes over. * If the active replica crashes or is partitioned, its lock TTL lapses and the passive's retry loop acquires, then begins processing. -``SingletonHeartbeat`` is the ``HeartbeatEmitter`` the WorkerHarness +`SingletonHeartbeat` is the `HeartbeatEmitter` the WorkerHarness binds to: each beat renews the lock; losing the lock flips the worker -into passive mode, which the harness observes through a ``False`` +into passive mode, which the harness observes through a `False` return and shuts down so the orchestrator restarts the process (which then goes through the acquire loop again, this time winning). """ @@ -37,7 +37,7 @@ class SingletonHeartbeat: """HeartbeatEmitter that renews a distributed lock each beat. - Returning False from ``beat`` signals the harness to stop. This + Returning False from `beat` signals the harness to stop. This happens when the lock was lost (another replica acquired) or the lock backend raises a terminal error. """ @@ -77,9 +77,9 @@ async def acquire_active_role( Args: lock: The distributed lock to acquire. - lock_name: Singleton role name (``"dedup"`` / ``"llm_formatter"``). + lock_name: Singleton role name (`"dedup"` / `"llm_formatter"`). holder_id: This replica's stable identifier. - config: ``LockBody`` carrying TTL / renew interval. + config: `LockBody` carrying TTL / renew interval. wait_tick_seconds: Poll cadence while passive. max_wait_ticks: Optional cap on ticks before giving up; None means wait forever (the production default). Tests pass a @@ -102,14 +102,14 @@ async def acquire_active_role( @dataclass(slots=True) class SingletonRunner: - """Glue that turns a singleton workload into a ``WorkerHarness`` run. + """Glue that turns a singleton workload into a `WorkerHarness` run. Attributes: lock_name: Singleton role name; matches the keys in the distributed lock backend. bus: EventBus connection used by the main coroutine. lock: DistributedLock coordinating active/passive. - config: ``LockBody`` holding TTL and renew interval. + config: `LockBody` holding TTL and renew interval. main: Coroutine run while holding the active role. """ diff --git a/src/augur_signals/augur_signals/workers/stateless.py b/src/augur_signals/augur_signals/workers/stateless.py index c50785a..e842aea 100644 --- a/src/augur_signals/augur_signals/workers/stateless.py +++ b/src/augur_signals/augur_signals/workers/stateless.py @@ -2,7 +2,7 @@ Each of these workers consumes from one subject, runs a pure function against the message payload, and publishes the output to another -subject. They share the ``run_bridge`` supervisor so the per-kind +subject. They share the `run_bridge` supervisor so the per-kind entrypoints stay tiny: they supply a deserializer, a transform, a serializer, and the input/output subjects. @@ -47,7 +47,7 @@ async def run_bridge[InT, OutT]( """Consume *input_pattern*, transform each payload, publish outputs. Args: - harness: The owning ``WorkerHarness``. Supplies the bus and + harness: The owning `WorkerHarness`. Supplies the bus and stop signal. input_pattern: Subject pattern to subscribe to. output_subject_builder: Function returning the subject to diff --git a/src/augur_signals/augur_signals/workers/subjects.py b/src/augur_signals/augur_signals/workers/subjects.py index 80ac047..4cb63d7 100644 --- a/src/augur_signals/augur_signals/workers/subjects.py +++ b/src/augur_signals/augur_signals/workers/subjects.py @@ -2,8 +2,8 @@ A single module owns the subject strings so producers and consumers stay aligned. Every helper returns a full subject with the configured -prefix so callers pass the result straight into ``bus.publish`` / -``bus.subscribe``. +prefix so callers pass the result straight into `bus.publish` / +`bus.subscribe`. """ from __future__ import annotations From feb46653b9ff812b56cbda1ace7a88a4ba9c0d10 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 16:01:17 +0530 Subject: [PATCH 09/11] feat(ops): dockerfile and kubernetes manifests for distributed runtime Multi-stage Dockerfile builds a single image used by every worker kind. The runtime stage copies the uv venv, source, and config into a non-root user; CMD defaults to the monolith engine and is overridden per worker in the Kubernetes manifests. K8s manifests under ops/deploy/: * namespace, ConfigMap (config/*.toml bundle), and Secret scaffolding for AUGUR_TIMESCALE_URL / REDIS_URL / NATS_CREDENTIALS_FILE / ANTHROPIC_API_KEY. * Deployments for each stateless worker (poller per platform, feature, detector, manipulation, calibration, context-format), each exposing /metrics on port 9090. * StatefulSets for the active-passive singletons (dedup, llm) so each replica gets a stable pod name forwarded to the distributed lock as holder_id. * Services: augur-websocket public endpoint; headless services for the singleton StatefulSets. * HorizontalPodAutoscalers on CPU + augur_bus_consume_lag_seconds. * ServiceMonitor for the prometheus-operator scrape setup. * kustomization.yaml wires everything and pins the image tag. --- ops/deploy/configmap.yaml | 25 +++ ops/deploy/hpa.yaml | 80 ++++++++++ ops/deploy/kustomization.yaml | 22 +++ ops/deploy/namespace.yaml | 6 + ops/deploy/pollers.yaml | 121 +++++++++++++++ ops/deploy/secrets.yaml | 21 +++ ops/deploy/servicemonitor.yaml | 21 +++ ops/deploy/services.yaml | 44 ++++++ ops/deploy/singletons.yaml | 115 ++++++++++++++ ops/deploy/stateless-workers.yaml | 245 ++++++++++++++++++++++++++++++ ops/docker/.dockerignore | 19 +++ ops/docker/Dockerfile | 75 +++++++++ 12 files changed, 794 insertions(+) create mode 100644 ops/deploy/configmap.yaml create mode 100644 ops/deploy/hpa.yaml create mode 100644 ops/deploy/kustomization.yaml create mode 100644 ops/deploy/namespace.yaml create mode 100644 ops/deploy/pollers.yaml create mode 100644 ops/deploy/secrets.yaml create mode 100644 ops/deploy/servicemonitor.yaml create mode 100644 ops/deploy/services.yaml create mode 100644 ops/deploy/singletons.yaml create mode 100644 ops/deploy/stateless-workers.yaml create mode 100644 ops/docker/.dockerignore create mode 100644 ops/docker/Dockerfile diff --git a/ops/deploy/configmap.yaml b/ops/deploy/configmap.yaml new file mode 100644 index 0000000..bac38b7 --- /dev/null +++ b/ops/deploy/configmap.yaml @@ -0,0 +1,25 @@ +# Phase 5 operational config. +# +# Populate `config.*.toml` entries from the repository `config/` files +# at deploy time: `kubectl create configmap augur-config --from-file=config/` +# or sync via Kustomize/Helm. The worker image mounts this ConfigMap at +# /app/config, matching the Dockerfile's `AUGUR_CONFIG_DIR`. +apiVersion: v1 +kind: ConfigMap +metadata: + name: augur-config + namespace: augur +data: + # Populated from the repository `config/` directory at deploy time. + # Each key matches the TOML filename the worker loads at startup. + storage.toml: "" + bus.toml: "" + observability.toml: "" + polling.toml: "" + detectors.toml: "" + dedup.toml: "" + formatters.toml: "" + consumers.toml: "" + llm.toml: "" + markets.toml: "" + forbidden_tokens.toml: "" diff --git a/ops/deploy/hpa.yaml b/ops/deploy/hpa.yaml new file mode 100644 index 0000000..508376c --- /dev/null +++ b/ops/deploy/hpa.yaml @@ -0,0 +1,80 @@ +# HorizontalPodAutoscalers for the stateless worker deployments. +# +# Scale on CPU utilization for simple workloads and on a custom +# bus-consume-lag metric (see observability stack) for the +# shard-sensitive feature/detector workers. Singletons do NOT +# autoscale — their replica count is fixed at 2 (active + passive). +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: augur-feature + namespace: augur +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: augur-feature + minReplicas: 2 + maxReplicas: 4 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + - type: Pods + pods: + metric: + name: augur_bus_consume_lag_seconds + target: + type: AverageValue + averageValue: "3" +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: augur-detector + namespace: augur +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: augur-detector + minReplicas: 2 + maxReplicas: 4 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + - type: Pods + pods: + metric: + name: augur_bus_consume_lag_seconds + target: + type: AverageValue + averageValue: "3" +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: augur-context-format + namespace: augur +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: augur-context-format + minReplicas: 4 + maxReplicas: 8 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 65 diff --git a/ops/deploy/kustomization.yaml b/ops/deploy/kustomization.yaml new file mode 100644 index 0000000..6d7f9a8 --- /dev/null +++ b/ops/deploy/kustomization.yaml @@ -0,0 +1,22 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: augur + +resources: + - namespace.yaml + - configmap.yaml + - secrets.yaml + - pollers.yaml + - stateless-workers.yaml + - singletons.yaml + - services.yaml + - hpa.yaml + - servicemonitor.yaml + +images: + - name: ghcr.io/aetherforge/augur + newTag: latest + +commonLabels: + app.kubernetes.io/part-of: augur diff --git a/ops/deploy/namespace.yaml b/ops/deploy/namespace.yaml new file mode 100644 index 0000000..9b841b2 --- /dev/null +++ b/ops/deploy/namespace.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: augur + labels: + app.kubernetes.io/part-of: augur diff --git a/ops/deploy/pollers.yaml b/ops/deploy/pollers.yaml new file mode 100644 index 0000000..a06f905 --- /dev/null +++ b/ops/deploy/pollers.yaml @@ -0,0 +1,121 @@ +# Per-platform pollers. +# +# One Deployment per platform, replicas=1 because each poller owns the +# adaptive-polling budget for its platform (see .docs/phase-5-scaling.md §3). +# The pollers are stateless — failure triggers a re-schedule, not failover. +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: augur-poller-polymarket + namespace: augur + labels: + app.kubernetes.io/name: augur-poller + app.kubernetes.io/component: poller + app.kubernetes.io/platform: polymarket +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: augur-poller + app.kubernetes.io/platform: polymarket + template: + metadata: + labels: + app.kubernetes.io/name: augur-poller + app.kubernetes.io/component: poller + app.kubernetes.io/platform: polymarket + spec: + containers: + - name: poller + image: ghcr.io/aetherforge/augur:latest + command: ["python", "-m"] + args: + - augur_signals.workers.poller + - --platform + - polymarket + - --replica-id + - $(POD_NAME) + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + envFrom: + - secretRef: + name: augur-secrets + volumeMounts: + - name: config + mountPath: /app/config + ports: + - name: metrics + containerPort: 9090 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + volumes: + - name: config + configMap: + name: augur-config +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: augur-poller-kalshi + namespace: augur + labels: + app.kubernetes.io/name: augur-poller + app.kubernetes.io/component: poller + app.kubernetes.io/platform: kalshi +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: augur-poller + app.kubernetes.io/platform: kalshi + template: + metadata: + labels: + app.kubernetes.io/name: augur-poller + app.kubernetes.io/component: poller + app.kubernetes.io/platform: kalshi + spec: + containers: + - name: poller + image: ghcr.io/aetherforge/augur:latest + command: ["python", "-m"] + args: + - augur_signals.workers.poller + - --platform + - kalshi + - --replica-id + - $(POD_NAME) + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + envFrom: + - secretRef: + name: augur-secrets + volumeMounts: + - name: config + mountPath: /app/config + ports: + - name: metrics + containerPort: 9090 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + volumes: + - name: config + configMap: + name: augur-config diff --git a/ops/deploy/secrets.yaml b/ops/deploy/secrets.yaml new file mode 100644 index 0000000..85e6dfc --- /dev/null +++ b/ops/deploy/secrets.yaml @@ -0,0 +1,21 @@ +# Secret scaffolding. +# +# Do NOT commit real credentials. This manifest is an allowlist of +# the env vars workers expect; populate the values via +# `kubectl create secret generic augur-secrets --from-literal=...` or +# Kustomize secretGenerator. +apiVersion: v1 +kind: Secret +metadata: + name: augur-secrets + namespace: augur +type: Opaque +stringData: + # DSN for TimescaleDB primary. + AUGUR_TIMESCALE_URL: "" + # URL for the Redis bus backend, if used. + REDIS_URL: "" + # NATS credentials file path inside the mounted volume. + NATS_CREDENTIALS_FILE: "" + # Anthropic API key for the cloud LLM backend (only if enabled). + ANTHROPIC_API_KEY: "" diff --git a/ops/deploy/servicemonitor.yaml b/ops/deploy/servicemonitor.yaml new file mode 100644 index 0000000..9668c8a --- /dev/null +++ b/ops/deploy/servicemonitor.yaml @@ -0,0 +1,21 @@ +# Prometheus ServiceMonitor so every worker's /metrics endpoint is +# scraped automatically when the prometheus-operator is installed in +# the cluster. Bare Prometheus setups replace this with a scrape +# config in the Prometheus ConfigMap. +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: augur-workers + namespace: augur + labels: + release: prometheus +spec: + selector: + matchLabels: + app.kubernetes.io/part-of: augur + namespaceSelector: + matchNames: + - augur + endpoints: + - port: metrics + interval: 15s diff --git a/ops/deploy/services.yaml b/ops/deploy/services.yaml new file mode 100644 index 0000000..d49d2f8 --- /dev/null +++ b/ops/deploy/services.yaml @@ -0,0 +1,44 @@ +# Service endpoints. +# +# augur-websocket: public endpoint for downstream consumers. +# augur-dedup / augur-llm: headless services backing the singleton +# StatefulSets (required for stable DNS per replica). +--- +apiVersion: v1 +kind: Service +metadata: + name: augur-websocket + namespace: augur +spec: + selector: + app.kubernetes.io/component: context-format + ports: + - name: ws + port: 8080 + targetPort: 8080 +--- +apiVersion: v1 +kind: Service +metadata: + name: augur-dedup + namespace: augur +spec: + clusterIP: None + selector: + app.kubernetes.io/component: dedup + ports: + - name: metrics + port: 9090 +--- +apiVersion: v1 +kind: Service +metadata: + name: augur-llm + namespace: augur +spec: + clusterIP: None + selector: + app.kubernetes.io/component: llm + ports: + - name: metrics + port: 9090 diff --git a/ops/deploy/singletons.yaml b/ops/deploy/singletons.yaml new file mode 100644 index 0000000..f7a8f3f --- /dev/null +++ b/ops/deploy/singletons.yaml @@ -0,0 +1,115 @@ +# Active-passive singleton StatefulSets. +# +# Dedup and the LLM formatter each run one active + one passive +# replica coordinated by the distributed lock. StatefulSet gives each +# pod a stable identity ($POD_NAME) which the singleton_runner +# forwards to the lock backend as holder_id. +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: augur-dedup + namespace: augur + labels: + app.kubernetes.io/component: dedup +spec: + serviceName: augur-dedup + replicas: 2 + selector: + matchLabels: + app.kubernetes.io/component: dedup + template: + metadata: + labels: + app.kubernetes.io/component: dedup + spec: + containers: + - name: dedup + image: ghcr.io/aetherforge/augur:latest + command: ["python", "-m"] + args: [augur_signals.workers.dedup] + env: + - name: AUGUR_REPLICA_ID + valueFrom: + fieldRef: + fieldPath: metadata.name + envFrom: + - secretRef: + name: augur-secrets + volumeMounts: + - name: config + mountPath: /app/config + ports: + - name: metrics + containerPort: 9090 + resources: + requests: + cpu: 300m + memory: 512Mi + limits: + cpu: 1000m + memory: 1Gi + startupProbe: + httpGet: + path: /metrics + port: metrics + failureThreshold: 30 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /metrics + port: metrics + periodSeconds: 10 + volumes: + - name: config + configMap: + name: augur-config +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: augur-llm + namespace: augur + labels: + app.kubernetes.io/component: llm +spec: + serviceName: augur-llm + replicas: 2 + selector: + matchLabels: + app.kubernetes.io/component: llm + template: + metadata: + labels: + app.kubernetes.io/component: llm + spec: + containers: + - name: llm + image: ghcr.io/aetherforge/augur:latest + command: ["python", "-m"] + args: [augur_format.workers.llm] + env: + - name: AUGUR_REPLICA_ID + valueFrom: + fieldRef: + fieldPath: metadata.name + envFrom: + - secretRef: + name: augur-secrets + volumeMounts: + - name: config + mountPath: /app/config + ports: + - name: metrics + containerPort: 9090 + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 4Gi + volumes: + - name: config + configMap: + name: augur-config diff --git a/ops/deploy/stateless-workers.yaml b/ops/deploy/stateless-workers.yaml new file mode 100644 index 0000000..c4e9e11 --- /dev/null +++ b/ops/deploy/stateless-workers.yaml @@ -0,0 +1,245 @@ +# Stateless worker deployments. +# +# Feature, detector, manipulation, calibration, and context_format +# workers scale horizontally. HPA blocks attach to each Deployment +# below with CPU + bus-consume-lag targets (see hpa.yaml). +# +# Per-market sharding for feature/detector is handled by the +# augur_signals.workers.sharding module; the worker reads +# REPLICA_COUNT from the environment and its pod ordinal from +# $POD_NAME. +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: augur-feature + namespace: augur + labels: + app.kubernetes.io/component: feature +spec: + replicas: 2 + selector: + matchLabels: + app.kubernetes.io/component: feature + template: + metadata: + labels: + app.kubernetes.io/component: feature + spec: + containers: + - name: feature + image: ghcr.io/aetherforge/augur:latest + command: ["python", "-m"] + args: + - augur_signals.workers.feature + - --shard + - $(REPLICA_INDEX)/$(REPLICA_COUNT) + env: + - name: REPLICA_INDEX + valueFrom: + fieldRef: + fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] + - name: REPLICA_COUNT + value: "2" + envFrom: + - secretRef: + name: augur-secrets + volumeMounts: + - name: config + mountPath: /app/config + ports: + - name: metrics + containerPort: 9090 + resources: + requests: + cpu: 250m + memory: 512Mi + limits: + cpu: 1000m + memory: 1Gi + volumes: + - name: config + configMap: + name: augur-config +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: augur-detector + namespace: augur + labels: + app.kubernetes.io/component: detector +spec: + replicas: 2 + selector: + matchLabels: + app.kubernetes.io/component: detector + template: + metadata: + labels: + app.kubernetes.io/component: detector + spec: + containers: + - name: detector + image: ghcr.io/aetherforge/augur:latest + command: ["python", "-m"] + args: + - augur_signals.workers.detector + - --shard + - $(REPLICA_INDEX)/$(REPLICA_COUNT) + env: + - name: REPLICA_INDEX + valueFrom: + fieldRef: + fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] + - name: REPLICA_COUNT + value: "2" + envFrom: + - secretRef: + name: augur-secrets + volumeMounts: + - name: config + mountPath: /app/config + ports: + - name: metrics + containerPort: 9090 + resources: + requests: + cpu: 250m + memory: 512Mi + limits: + cpu: 1000m + memory: 1Gi + volumes: + - name: config + configMap: + name: augur-config +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: augur-manipulation + namespace: augur + labels: + app.kubernetes.io/component: manipulation +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: manipulation + template: + metadata: + labels: + app.kubernetes.io/component: manipulation + spec: + containers: + - name: manipulation + image: ghcr.io/aetherforge/augur:latest + command: ["python", "-m"] + args: [augur_signals.workers.manipulation] + envFrom: + - secretRef: + name: augur-secrets + volumeMounts: + - name: config + mountPath: /app/config + ports: + - name: metrics + containerPort: 9090 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + volumes: + - name: config + configMap: + name: augur-config +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: augur-calibration + namespace: augur + labels: + app.kubernetes.io/component: calibration +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: calibration + template: + metadata: + labels: + app.kubernetes.io/component: calibration + spec: + containers: + - name: calibration + image: ghcr.io/aetherforge/augur:latest + command: ["python", "-m"] + args: [augur_signals.workers.calibration] + envFrom: + - secretRef: + name: augur-secrets + volumeMounts: + - name: config + mountPath: /app/config + ports: + - name: metrics + containerPort: 9090 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + volumes: + - name: config + configMap: + name: augur-config +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: augur-context-format + namespace: augur + labels: + app.kubernetes.io/component: context-format +spec: + replicas: 4 + selector: + matchLabels: + app.kubernetes.io/component: context-format + template: + metadata: + labels: + app.kubernetes.io/component: context-format + spec: + containers: + - name: context-format + image: ghcr.io/aetherforge/augur:latest + command: ["python", "-m"] + args: [augur_signals.workers.context_format] + envFrom: + - secretRef: + name: augur-secrets + volumeMounts: + - name: config + mountPath: /app/config + ports: + - name: metrics + containerPort: 9090 + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 1000m + memory: 512Mi + volumes: + - name: config + configMap: + name: augur-config diff --git a/ops/docker/.dockerignore b/ops/docker/.dockerignore new file mode 100644 index 0000000..616f728 --- /dev/null +++ b/ops/docker/.dockerignore @@ -0,0 +1,19 @@ +# Only source needed at build time; tests and dev artifacts excluded. +**/__pycache__/ +**/*.pyc +**/*.pyo +.git/ +.github/ +.venv/ +.pytest_cache/ +.ruff_cache/ +.mypy_cache/ +dist/ +build/ +data/ +labels/ +tests/ +.docs/ +docs/ +schemas/ +*.egg-info/ diff --git a/ops/docker/Dockerfile b/ops/docker/Dockerfile new file mode 100644 index 0000000..630f12b --- /dev/null +++ b/ops/docker/Dockerfile @@ -0,0 +1,75 @@ +# Multi-stage build for every Augur worker kind. +# +# The same image runs every worker — selection happens via `CMD` +# overrides in the Kubernetes manifests under `ops/deploy/`. See +# `.docs/phase-5-scaling.md §8` for the full table of CMD strings. + +# syntax=docker/dockerfile:1.7 + +# ----------------------------------------------------------------------------- +# Stage 1: Build. Install dependencies into /build/.venv via uv. +# ----------------------------------------------------------------------------- +FROM python:3.12-slim AS builder + +ARG AUGUR_EXTRAS="distributed" + +WORKDIR /build + +# System dependencies: build tools for wheels and libpq for psycopg. +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + build-essential \ + ca-certificates \ + curl \ + libpq5 \ + && rm -rf /var/lib/apt/lists/* + +# Install uv and pin it so reproducible builds. +COPY --from=ghcr.io/astral-sh/uv:0.9.6 /uv /usr/local/bin/uv + +# Copy metadata files first so layer caching works on dep-only changes. +COPY pyproject.toml uv.lock ./ +COPY src/augur_signals/pyproject.toml src/augur_signals/pyproject.toml +COPY src/augur_labels/pyproject.toml src/augur_labels/pyproject.toml +COPY src/augur_format/pyproject.toml src/augur_format/pyproject.toml + +# Resolve and lock workspace dependencies; installs the distributed +# extra for multi-process deployments. +RUN uv sync --frozen --no-dev --extra ${AUGUR_EXTRAS} + +# Copy source last. +COPY src/ src/ +COPY config/ config/ + +# Build the wheels into the venv. +RUN uv sync --frozen --no-dev --extra ${AUGUR_EXTRAS} + +# ----------------------------------------------------------------------------- +# Stage 2: Runtime. Slim image with only the venv + config. +# ----------------------------------------------------------------------------- +FROM python:3.12-slim AS runtime + +RUN apt-get update \ + && apt-get install --no-install-recommends -y libpq5 \ + && rm -rf /var/lib/apt/lists/* \ + && groupadd --system augur \ + && useradd --system --gid augur --home-dir /app augur + +WORKDIR /app +ENV PATH="/app/.venv/bin:$PATH" \ + PYTHONPATH="/app/src" \ + AUGUR_CONFIG_DIR="/app/config" \ + PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 + +COPY --from=builder --chown=augur:augur /build/.venv /app/.venv +COPY --from=builder --chown=augur:augur /build/src /app/src +COPY --from=builder --chown=augur:augur /build/config /app/config + +USER augur + +# Default to the monolith engine; Kubernetes manifests override CMD +# per worker kind (poller, feature, detector, manipulation, calibration, +# dedup, context_format, llm). +ENTRYPOINT ["python", "-m"] +CMD ["augur_signals.engine"] From 35291e56a0957b0464ef447247a2350324c3add5 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 16:03:31 +0530 Subject: [PATCH 10/11] docs: phase-5 distributed runtime changelog, runbook, system-design update --- CHANGELOG.md | 32 +++++++++ docs/architecture/system-design.md | 20 ++++++ docs/operations/distributed-runbook.md | 99 ++++++++++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 docs/operations/distributed-runbook.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 743f4dc..937f4eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,38 @@ All notable changes to Augur are recorded in this file. Format follows [Keep a C ## [Unreleased] +### Added — Distributed Runtime Scaffolding + +- `src/augur_signals/bus/base.py` — byte-level `EventBus` protocol plus `BusMessage` envelope. The Phase 1 `InProcessAsyncBus` remains the monolith transport; the new protocol is consumed by multi-process workers via the factory in `bus/factory.py`. +- `src/augur_signals/bus/nats.py` — NATS JetStream adapter. One stream per subject prefix, pull consumers keyed by `(pattern, consumer_group)`, publish with optional headers. `NATSKVLock` uses JetStream KV as the distributed-lock backend. +- `src/augur_signals/bus/redis_streams.py` — Redis Streams adapter with XADD/XREADGROUP/XACK for at-least-once delivery. `RedisLock` uses `SET NX EX` for acquire and `WATCH`/`MULTI`/`EXEC` for CAS renew/release so the adapter works against fakeredis and Redis Cluster alike. +- `src/augur_signals/bus/_lock.py` — `DistributedLock` protocol plus `InMemoryLock` reference implementation with an injectable monotonic clock for deterministic test failover. +- `src/augur_signals/storage/timescaledb_store.py` — TimescaleDB adapter mirroring `DuckDBStore`'s public surface. `initialize()` creates hypertables with configurable chunk intervals, compression segment-by clauses, and retention policies; zero-day values skip the policy. +- `src/augur_signals/storage/factory.py` — picks between DuckDB and TimescaleDB backends via `config/storage.toml` `backend.kind`. +- `src/augur_signals/_observability.py` — Prometheus-backed counters/gauges and an OpenTelemetry OTLP tracer behind the same shim call sites Phase 1 already instruments. Tests pass a fresh `CollectorRegistry` per case for isolation. +- `src/augur_signals/workers/harness.py` — `WorkerHarness` supervisor. Connects the bus, fires the heartbeat task, drives the worker main coroutine, handles SIGINT / SIGTERM, and records `augur_worker_alive` / `augur_worker_processed_total` metrics. +- `src/augur_signals/workers/stateless.py` — `run_bridge` consumer/transform/publisher spine shared by feature, detector, manipulation, calibration, and context-format workers. Shard filter uses FNV-1a modulo replica count for per-market pinning. +- `src/augur_signals/workers/singleton.py` — `SingletonRunner` with `SingletonHeartbeat` renewing a `DistributedLock` on every beat. Lost renewals stop the harness and trigger orchestrator-driven restart, which re-enters the acquire loop so the surviving replica takes over. +- `src/augur_signals/workers/poller.py` / `subjects.py` / `sharding.py` — platform-poller entrypoint, subject naming helpers aligned with `.docs/phase-5-scaling.md §4.3`, and the shared shard-index function. +- `config/storage.toml`, `config/bus.toml`, `config/observability.toml` with `StorageConfig`, `BusConfig`, `ObservabilityConfig` Pydantic loaders (`frozen=True`, `extra="forbid"`). +- `scripts/migrate_to_timescale.py` — `backfill` and `verify` subcommands for Parquet-to-TimescaleDB migration with row-count parity enforcement. +- `scripts/dual_write_sidecar.py` — tee consumer that replays engine writes into TimescaleDB during the dual-write window with `augur_dual_write_lag_seconds` gauge + alert counter. +- `ops/docker/Dockerfile` — multi-stage image shared across worker kinds; Kubernetes manifests under `ops/deploy/` (namespace, ConfigMap, Secret, pollers, stateless worker Deployments, singleton StatefulSets, Services, HPAs, ServiceMonitor, Kustomize overlay). +- `augur-signals` gains optional-dependency groups `bus-nats`, `bus-redis`, `storage-timescale`, `observability`, and `distributed` so the monolith wheel stays lean and the multi-process deployment pulls the full driver set. + +### Operational Handoff — Distributed Runtime + +After merge, the Phase 1-4 monolith remains the production deployment. Cutover to the multi-process runtime is operator-driven once the growth triggers in `.docs/phase-5-scaling.md §2` fire twice across separate measurement windows: + +1. Stand up TimescaleDB; run `scripts/migrate_to_timescale.py backfill --from labels/snapshots_archive`, then `verify` for byte-for-byte parity. +2. Start the dual-write sidecar; observe `augur_dual_write_lag_seconds` for ≥7 days below the 10-second threshold. +3. Deploy the message bus (NATS or Redis) and bring up shadow workers (consume only, no publish). +4. Flip workers to active mode one kind at a time, starting with manipulation (smallest blast radius), then feature/detector (per-market shard validation), then the dedup and LLM singletons. +5. Flip `config/storage.toml` `backend.kind` to `timescaledb` and restart the engine; retain the DuckDB archive for 30 days for rollback. +6. After 30 days of stable operation, remove the DuckDB startup path and archive the Parquet archive to cold storage. + +Live failover integration tests (NATS cluster, Redis Cluster, TimescaleDB with WAL streaming) remain operator-owned — the CI suite exercises the adapters against fakes and stubs. `ops/deploy/` manifests are a starting point; a production rollout layers operator-specific ingress, RBAC, and network policy on top. + ### Added — Gated LLM Secondary Formatter - `src/augur_format/llm/` package — the only location in the codebase where LLM SDK imports live, complementing the CI grep guard over `src/augur_signals/`. diff --git a/docs/architecture/system-design.md b/docs/architecture/system-design.md index dbd9e9c..ecb386a 100644 --- a/docs/architecture/system-design.md +++ b/docs/architecture/system-design.md @@ -428,3 +428,23 @@ The closed list of phrase strings the LLM formatter rejects. Maintained as a con 6. **Pre-resolution exclusion.** No detector fires within six hours of `closes_at`. Enforced inside each detector's `ingest()`. 7. **Manipulation flags are descriptive, not prescriptive.** Augur attaches flags; consumers apply suppression policy. 8. **Deterministic context primary, LLM secondary.** The canonical machine-consumed output is `SignalContext` JSON. The LLM formatter is gated, opt-in, and routed to human channels only by default. + +## Deployment Modes + +Augur supports two deployment modes from the same codebase: + +### Monolith (Phase 1-4 default) + +One `augur_signals.engine` process owns the full pipeline from ingestion to formatter emission. `InProcessAsyncBus` routes between layers; `DuckDBStore` persists; the deterministic formatters run inline. This mode is the supported deployment until the growth triggers in `.docs/phase-5-scaling.md §2` fire twice across separate measurement windows. + +### Distributed Runtime (Phase 5) + +The engine decomposes into worker processes when scale demands it: + +- Pollers (`augur_signals.workers.poller`), one per platform, publish snapshots to `augur.snapshots..`. +- Stateless workers (feature, detector, manipulation, calibration, context_format) scale horizontally behind an `EventBus` (NATS JetStream or Redis Streams). Per-market sharding uses FNV-1a modulo replica count; each replica sees only its shard. +- Singletons (dedup, llm_formatter) run as active-passive pairs coordinated by a `DistributedLock`. The active instance renews the lock on each heartbeat; a missed renewal flips the harness, orchestrator-driven restart re-enters the acquire loop, and the surviving replica takes over within `ttl_seconds + renew_interval_seconds`. +- TimescaleDB replaces DuckDB for persistence. Hypertables partition `snapshots`, `features`, and `signals` by time with compression and retention policies attached per `storage.toml`. +- Prometheus + OpenTelemetry replace the Phase 1 no-op shims without any call-site edits; the backend swap happens in `configure_observability`. + +The distributed runtime is operator-driven — see `docs/operations/distributed-runbook.md` for cutover, rollback, and failover procedures. The monolith path remains fully supported during and after rollout so operators can revert to DuckDB for 30 days post-cutover. diff --git a/docs/operations/distributed-runbook.md b/docs/operations/distributed-runbook.md new file mode 100644 index 0000000..c582c02 --- /dev/null +++ b/docs/operations/distributed-runbook.md @@ -0,0 +1,99 @@ +# Distributed Runtime Operational Runbook + +The Phase 5 multi-process deployment is triggered by growth thresholds in `.docs/phase-5-scaling.md §2`: >80M snapshot rows, P95 backtest latency >30s, or P99 live-write latency >500ms, each observed twice across separate measurement windows. Until the triggers fire, the single-process engine is the supported deployment. + +This runbook covers cutover, rollback, failover response, and on-call escalation for the distributed runtime. + +## 1. Pre-Cutover Checklist + +| Item | How to verify | +| --- | --- | +| TimescaleDB primary provisioned with hypertables | `SELECT * FROM timescaledb_information.hypertables` returns rows for `snapshots`, `features`, `signals` | +| Backfill complete with row-count parity | `scripts/migrate_to_timescale.py verify --start ... --end ...` exits 0 | +| Dual-write sidecar lag <10s for ≥7 days | `augur_dual_write_lag_seconds{table=*}` max-over-time stays under threshold | +| NATS or Redis bus operational; consumer groups created | `augur_bus_consume_lag_seconds` reports a value per `(topic, consumer_group)` | +| Shadow workers running for ≥48h without errors | `augur_worker_alive{worker_kind=*} == 1` continuously | +| Distributed lock backend seeded | `SET augur.lock.dedup ...` responds OK; NATS KV bucket exists | + +## 2. Cutover Procedure + +1. Announce the freeze window on the ops channel; set the monolith engine to drain mode (`AUGUR_DRAIN=true` env var) and let it finish in-flight cycles. +2. Flip `config/storage.toml`: + + ```toml + [backend] + kind = "timescaledb" # was "duckdb" + ``` + +3. Apply the updated ConfigMap: `kubectl apply -k ops/deploy/`. +4. Restart the monolith engine (or run `kubectl rollout restart deployment/augur-engine`) — the new process picks up TimescaleDB at startup and opens a connection pool from `AUGUR_TIMESCALE_URL`. +5. Watch `augur_db_query_seconds{table, operation}` for 15 minutes; rollback if P95 exceeds the pre-cutover DuckDB baseline by 2×. +6. Bring workers online in the order recommended in `.docs/phase-5-scaling.md §12.1`: manipulation → feature → detector → calibration → context_format → dedup → LLM. + +## 3. Rollback Procedure + +Rollback is always available for 30 days post-cutover because the Parquet archive is preserved. + +1. Flip `config/storage.toml` `backend.kind` back to `"duckdb"` and reapply the ConfigMap. +2. Scale the workers to zero: `kubectl scale --replicas=0 -n augur deployment --all statefulset --all`. +3. Start the monolith engine against the DuckDB archive. +4. Announce rollback on the ops channel and file a post-incident ticket with the TimescaleDB query traces that motivated rollback. + +After the DuckDB path is removed (day 30+), rollback requires restoring from a TimescaleDB backup — see §5. + +## 4. Failover Response + +### 4.1 Dedup Singleton + +Symptom: `augur_singleton_lock_holder{singleton_kind="dedup"}` drops to 0, or `augur_failover_total{singleton_kind="dedup"}` increments. + +Procedure: + +1. Confirm the active pod was terminated: `kubectl get pod -n augur -l app.kubernetes.io/component=dedup`. +2. Observe the passive replica acquire the lock within `lock.ttl_seconds + renew_interval_seconds` (default 40s). The metric flips back to 1 with the new pod name. +3. If the lock stays unheld for >2× `ttl_seconds`, manually delete the stale lock: + - Redis: `redis-cli DEL augur.lock.dedup` + - NATS: `nats kv delete augur-locks dedup` +4. Force a restart of both replicas so the acquire race runs clean: `kubectl rollout restart -n augur statefulset/augur-dedup`. + +### 4.2 LLM Formatter + +Identical to dedup with `singleton_kind="llm_formatter"`. In-flight briefs at failover time are dropped — by design per Phase 4 guidance. No retry. + +### 4.3 Stateful Worker (feature / detector) + +Stateful workers persist their per-market cursor to TimescaleDB every 60 seconds. On crash: + +1. Kubernetes reschedules the pod; the replacement replica reads the last persisted cursor and resumes. +2. If the shard count changed (HPA scaled the deployment), the new owner replays from the last cursor of the displaced replica. Expect a short backlog as messages re-ack. +3. Monitor `augur_bus_consume_lag_seconds{topic="augur.features.*", consumer_group="feature-*"}` — it should return below 5s within one poll cycle. + +## 5. Backup and Restore + +| Artefact | Cadence | Retention | +| --- | --- | --- | +| TimescaleDB base backup | Daily (pg_basebackup) | 30 days | +| TimescaleDB WAL | Continuous archive to S3-equivalent | 14 days | +| Parquet archive | Written by engine during dual-write | 30 days post-cutover, then 1 year cold | +| Reliability curves | Checkpointed per calibration run | Indefinite | + +Restore: `pg_basebackup` into a replacement host, replay WAL to the desired point-in-time, rerun `TimescaleDBStore.initialize` to validate hypertable definitions, then swap `AUGUR_TIMESCALE_URL`. + +## 6. SLO Response Thresholds + +| SLO | Target | Page if | +| --- | --- | --- | +| End-to-end signal latency P95 | <60s | >60s for 5+ min | +| End-to-end signal latency P99 | <120s | >120s for 5+ min | +| Live ingest write P99 | <200ms | >200ms for 2+ min | +| Bus consume lag P95 | <5s | >5s for 5+ min | +| Dedup failover time | <60s | >60s observed | +| LLM brief rejection rate | <5% / hr | >5% for 1 hr | + +Pager rotation and escalation are operations-team-owned; the engineering runbook only defines the thresholds. + +## 7. Common Investigations + +- **High bus lag on one shard**: check `augur_worker_processed_total{worker_kind="feature", replica_id="..."}` per replica. A replica that plateaus while peers advance is likely stuck on a long-running transform. +- **Increasing LLM rejections**: inspect `augur_llm_briefs_rejected_total{reason}` — the label identifies the gate that dropped the brief (forbidden token, schema violation, consumer gate, backend error). +- **TimescaleDB lock contention**: correlate `augur_db_query_seconds{operation="write"}` tail with pg_stat_activity; a handful of long-held WAL-sender sessions usually point to a slow read replica. From 7640a098014ebda9d046ffa4c6ea36fa4b59c165 Mon Sep 17 00:00:00 2001 From: Mathews-Tom Date: Fri, 17 Apr 2026 16:15:18 +0530 Subject: [PATCH 11/11] fix(phase-5): address pr-review CRITICAL and HIGH findings CRITICAL: - bus/redis_streams.py + bus/nats.py: defer XACK/msg.ack() to the start of the next iteration so consumers that break mid-async-for leave the in-flight message pending for redelivery. Previously the ack fired immediately after yield return, which under clean shutdown ran the ack for the last yielded message only if the consumer iterated forward, and fired the ack *before* downstream publish completed in run_bridge. - scripts/migrate_to_timescale.py: reject any Parquet column not in the snapshots allowlist before building dynamic SQL, and report cur.rowcount instead of len(records) so ON CONFLICT DO NOTHING skips do not silently pass the row-count parity check. - storage/timescaledb_store.py: compress_segmentby takes a column- list identifier, not a quoted string. Split, validate each column against the SQL-ident allowlist, and inline the unquoted list. HIGH: - bus/nats.py NATSKVLock: renew/release use kv.update/kv.delete with the observed revision so a stale replica recovering from a network blip cannot overwrite the current holder's value or delete a key the new holder already claimed. - storage/timescaledb_store.py: schema_version INSERT switches to INSERT ... SELECT WHERE NOT EXISTS so applied_at stays a migration audit record rather than a last-boot marker, matching DuckDB. - _observability.py: OTel TracerProvider is reused across configure_observability calls because the SDK refuses to replace it, eliminating silent 'Overriding current TracerProvider' warnings in test runs. - ops/deploy/hpa.yaml: remove HPA autoscale on feature/detector deployments; shard-sensitive workers cannot tolerate dynamic replica counts without re-sharding, so operators resize them by editing 'replicas' directly. - ops/deploy/{pollers,stateless-workers}.yaml: every Deployment now declares readinessProbe and livenessProbe on the /metrics port so rolling updates and service-mesh routing respect worker readiness. --- ops/deploy/hpa.yaml | 68 +++---------------- ops/deploy/pollers.yaml | 20 ++++++ ops/deploy/stateless-workers.yaml | 66 ++++++++++++++++-- scripts/migrate_to_timescale.py | 54 +++++++++++---- .../augur_signals/_observability.py | 8 +++ src/augur_signals/augur_signals/bus/nats.py | 36 ++++++++-- .../augur_signals/bus/redis_streams.py | 22 ++++-- .../storage/timescaledb_store.py | 30 ++++++-- tests/signals/test_bus_nats.py | 7 +- tests/signals/test_bus_redis.py | 19 +++--- 10 files changed, 220 insertions(+), 110 deletions(-) diff --git a/ops/deploy/hpa.yaml b/ops/deploy/hpa.yaml index 508376c..4d4492c 100644 --- a/ops/deploy/hpa.yaml +++ b/ops/deploy/hpa.yaml @@ -1,63 +1,11 @@ -# HorizontalPodAutoscalers for the stateless worker deployments. -# -# Scale on CPU utilization for simple workloads and on a custom -# bus-consume-lag metric (see observability stack) for the -# shard-sensitive feature/detector workers. Singletons do NOT -# autoscale — their replica count is fixed at 2 (active + passive). ---- -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: augur-feature - namespace: augur -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: augur-feature - minReplicas: 2 - maxReplicas: 4 - metrics: - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: 70 - - type: Pods - pods: - metric: - name: augur_bus_consume_lag_seconds - target: - type: AverageValue - averageValue: "3" ---- -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: augur-detector - namespace: augur -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: augur-detector - minReplicas: 2 - maxReplicas: 4 - metrics: - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: 70 - - type: Pods - pods: - metric: - name: augur_bus_consume_lag_seconds - target: - type: AverageValue - averageValue: "3" +# HorizontalPodAutoscaler only for stateless workers whose shard key +# is irrelevant. feature/detector workers shard by market_id modulo +# replica_count; dynamic scaling would silently drop or duplicate +# shards because the shard mod changes per replica-count change. To +# resize those pools the operator edits the Deployment `replicas` +# directly and restarts pods so the new REPLICA_COUNT propagates. +# Singletons do NOT autoscale — their replica count is pinned at 2 +# (active + passive). --- apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler diff --git a/ops/deploy/pollers.yaml b/ops/deploy/pollers.yaml index a06f905..e162ee7 100644 --- a/ops/deploy/pollers.yaml +++ b/ops/deploy/pollers.yaml @@ -50,6 +50,16 @@ spec: ports: - name: metrics containerPort: 9090 + readinessProbe: + tcpSocket: + port: metrics + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + tcpSocket: + port: metrics + initialDelaySeconds: 30 + periodSeconds: 30 resources: requests: cpu: 100m @@ -108,6 +118,16 @@ spec: ports: - name: metrics containerPort: 9090 + readinessProbe: + tcpSocket: + port: metrics + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + tcpSocket: + port: metrics + initialDelaySeconds: 30 + periodSeconds: 30 resources: requests: cpu: 100m diff --git a/ops/deploy/stateless-workers.yaml b/ops/deploy/stateless-workers.yaml index c4e9e11..ab40d66 100644 --- a/ops/deploy/stateless-workers.yaml +++ b/ops/deploy/stateless-workers.yaml @@ -1,13 +1,15 @@ # Stateless worker deployments. # -# Feature, detector, manipulation, calibration, and context_format -# workers scale horizontally. HPA blocks attach to each Deployment -# below with CPU + bus-consume-lag targets (see hpa.yaml). +# manipulation / calibration / context_format are stateless and scale +# freely. feature / detector workers shard by market_id modulo +# REPLICA_COUNT — resizing their pools requires an operator edit of +# `replicas` and a rolling restart so the new count propagates to +# every REPLICA_COUNT env var. HPA does NOT manage these pools; +# dynamic replica count would silently drop shards. # -# Per-market sharding for feature/detector is handled by the -# augur_signals.workers.sharding module; the worker reads -# REPLICA_COUNT from the environment and its pod ordinal from -# $POD_NAME. +# Every deployment exposes /metrics on port 9090 and is covered by +# a TCP readiness/liveness probe so rolling updates admit traffic +# only after the listener is live. --- apiVersion: apps/v1 kind: Deployment @@ -50,6 +52,16 @@ spec: ports: - name: metrics containerPort: 9090 + readinessProbe: + tcpSocket: + port: metrics + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + tcpSocket: + port: metrics + initialDelaySeconds: 30 + periodSeconds: 30 resources: requests: cpu: 250m @@ -103,6 +115,16 @@ spec: ports: - name: metrics containerPort: 9090 + readinessProbe: + tcpSocket: + port: metrics + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + tcpSocket: + port: metrics + initialDelaySeconds: 30 + periodSeconds: 30 resources: requests: cpu: 250m @@ -146,6 +168,16 @@ spec: ports: - name: metrics containerPort: 9090 + readinessProbe: + tcpSocket: + port: metrics + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + tcpSocket: + port: metrics + initialDelaySeconds: 30 + periodSeconds: 30 resources: requests: cpu: 100m @@ -189,6 +221,16 @@ spec: ports: - name: metrics containerPort: 9090 + readinessProbe: + tcpSocket: + port: metrics + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + tcpSocket: + port: metrics + initialDelaySeconds: 30 + periodSeconds: 30 resources: requests: cpu: 100m @@ -232,6 +274,16 @@ spec: ports: - name: metrics containerPort: 9090 + readinessProbe: + tcpSocket: + port: metrics + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + tcpSocket: + port: metrics + initialDelaySeconds: 30 + periodSeconds: 30 resources: requests: cpu: 200m diff --git a/scripts/migrate_to_timescale.py b/scripts/migrate_to_timescale.py index 6d29cee..d0f6f9c 100644 --- a/scripts/migrate_to_timescale.py +++ b/scripts/migrate_to_timescale.py @@ -149,43 +149,69 @@ def _count_parquet_rows(partition: Path) -> int: return total +# Allowlist of snapshot columns the backfill is permitted to write. +# Parquet files with any other column are rejected so a corrupt or +# adversarial partition cannot inject identifiers into the dynamic SQL. +_ALLOWED_SNAPSHOT_COLUMNS: frozenset[str] = frozenset( + { + "market_id", + "platform", + "timestamp", + "last_price", + "bid", + "ask", + "spread", + "volume_24h", + "liquidity", + "question", + "resolution_source", + "resolution_criteria", + "closes_at", + "raw_json", + "schema_version", + } +) + + async def _copy_partition_into_timescale( conn: AsyncConnection[object], partition: Path, batch_size: int ) -> int: - """COPY *partition* into the snapshots hypertable; return rows inserted. + """COPY *partition* into the snapshots hypertable; return rows actually inserted. - The implementation relies on the operator-supplied DSN pointing at - a TimescaleDB hypertable that already exists (via - `TimescaleDBStore.initialize`). The script does not create - schemas — cutover sequencing is operator-driven. + Returns the inserted count from `cur.rowcount` rather than the + number of rows attempted, so ON CONFLICT drops (duplicates on + re-run) are distinguishable from successful new inserts and the + parity check in `backfill` catches silent data skipping. """ import pyarrow.parquet as pq - # Enumerate parquet files up front so the async block does not touch - # the filesystem (ASYNC240 — Path.glob is blocking). Column names - # come from the arrow schema, not user input, so the dynamic SQL - # is safe despite S608's warning. files = sorted(partition.glob("*.parquet")) # noqa: ASYNC240 - rows = 0 + inserted = 0 async with conn.cursor() as cur: for file in files: table = pq.read_table(file) batches = table.to_batches(max_chunksize=batch_size) for batch in batches: columns = batch.schema.names + for column in columns: + if column not in _ALLOWED_SNAPSHOT_COLUMNS: + raise MigrationError( + f"Unexpected column {column!r} in {file}; " + "aborting to avoid SQL injection surface." + ) placeholders = ", ".join(["%s"] * len(columns)) column_list = ", ".join(f'"{c}"' for c in columns) - # Column names come from the arrow schema, not user - # input, so the dynamic SQL is safe despite S608. sql = ( f"INSERT INTO snapshots ({column_list}) " # noqa: S608 f"VALUES ({placeholders}) ON CONFLICT DO NOTHING" ) records = [tuple(row) for row in batch.to_pylist()] await cur.executemany(sql, records) - rows += len(records) + # rowcount reflects actually inserted rows only; + # ON CONFLICT skips do not increment it. + inserted += max(cur.rowcount, 0) await conn.commit() - return rows + return inserted def _duckdb_group_counts(duckdb_path: Path, start: str, end: str) -> dict[tuple[str, str], int]: diff --git a/src/augur_signals/augur_signals/_observability.py b/src/augur_signals/augur_signals/_observability.py index ca22d38..41c1af6 100644 --- a/src/augur_signals/augur_signals/_observability.py +++ b/src/augur_signals/augur_signals/_observability.py @@ -101,6 +101,14 @@ def __init__(self, service_name: str, endpoint: str, sampling_ratio: float) -> N from opentelemetry.sdk.trace.export import BatchSpanProcessor from opentelemetry.sdk.trace.sampling import TraceIdRatioBased + # OTel refuses to replace the global TracerProvider once set + # and logs a silent "Overriding" warning. Reuse an existing + # provider on re-configuration (common across test cases) + # rather than fighting the SDK's global state. + current = trace.get_tracer_provider() + if isinstance(current, TracerProvider): + self._tracer = trace.get_tracer("augur") + return resource = Resource.create({SERVICE_NAME: service_name}) provider = TracerProvider(resource=resource, sampler=TraceIdRatioBased(sampling_ratio)) provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(endpoint=endpoint))) diff --git a/src/augur_signals/augur_signals/bus/nats.py b/src/augur_signals/augur_signals/bus/nats.py index 500031e..ff0d4e1 100644 --- a/src/augur_signals/augur_signals/bus/nats.py +++ b/src/augur_signals/augur_signals/bus/nats.py @@ -67,17 +67,27 @@ async def publish(self, message: BusMessage) -> None: async def subscribe( self, subject_pattern: str, consumer_group: str ) -> AsyncIterator[BusMessage]: + """At-least-once delivery via pull-subscribe + deferred ack. + + The previous message is acked at the *start* of the next + iteration so a consumer that breaks out of the async-for + leaves the in-flight message un-acked. JetStream's pending + queue will redeliver it on restart, matching the + at-least-once contract in `base.py`. + """ if self._js is None: raise BusError("NATSBus.connect() must be called before subscribe()") sub = await self._js.pull_subscribe(subject_pattern, durable=consumer_group) import asyncio as _asyncio + pending_msg: Any | None = None try: while True: + if pending_msg is not None: + await pending_msg.ack() + pending_msg = None msgs = await sub.fetch(batch=1, timeout=1) if not msgs: - # Yield control so an outer cancellation or break can - # observe the generator between empty-fetch polls. await _asyncio.sleep(0) continue for msg in msgs: @@ -86,7 +96,10 @@ async def subscribe( payload=msg.data, headers=dict(msg.headers) if msg.headers else None, ) - await msg.ack() + # Defer ack to the next loop iteration so breaks + # and crashes leave messages pending for + # redelivery. + pending_msg = msg finally: await sub.unsubscribe() @@ -118,22 +131,35 @@ async def acquire(self, name: str, holder_id: str, ttl_seconds: int) -> bool: return True async def renew(self, name: str, holder_id: str, ttl_seconds: int) -> bool: + # TTL is configured on the bucket at create_key_value time; + # renew uses update with the observed revision so a stale + # replica recovering from a network blip cannot overwrite the + # new holder's value with its own holder_id. _ = ttl_seconds if self._kv is None: raise LockError("NATSKVLock.connect() must be called before renew()") entry = await self._kv.get(name) if entry is None or entry.value.decode("utf-8") != holder_id: return False - await self._kv.put(name, holder_id.encode("utf-8")) + try: + await self._kv.update(name, holder_id.encode("utf-8"), last=entry.revision) + except Exception: + return False return True async def release(self, name: str, holder_id: str) -> None: + # Compare-and-delete so a concurrent owner swap between the + # get() and delete() RPCs is detected and the release becomes + # a no-op rather than deleting the new holder's key. if self._kv is None: raise LockError("NATSKVLock.connect() must be called before release()") entry = await self._kv.get(name) if entry is None or entry.value.decode("utf-8") != holder_id: return - await self._kv.delete(name) + try: + await self._kv.delete(name, last=entry.revision) + except Exception: + return async def holder(self, name: str) -> str | None: if self._kv is None: diff --git a/src/augur_signals/augur_signals/bus/redis_streams.py b/src/augur_signals/augur_signals/bus/redis_streams.py index 98a2c8e..a610323 100644 --- a/src/augur_signals/augur_signals/bus/redis_streams.py +++ b/src/augur_signals/augur_signals/bus/redis_streams.py @@ -100,6 +100,15 @@ async def publish(self, message: BusMessage) -> None: async def subscribe( self, subject_pattern: str, consumer_group: str ) -> AsyncIterator[BusMessage]: + """At-least-once delivery via XREADGROUP / XACK. + + The previous message is acked at the *start* of the next loop + iteration rather than immediately after yield, so a consumer + that breaks out of the async-for leaves the in-flight message + pending for redelivery on restart. Clean termination acks + every message the consumer iterated past; crashes and breaks + only ack messages whose processing already completed. + """ if self._client is None: raise BusError("RedisStreamsBus.connect() must be called before subscribe()") group = f"{self.config.consumer_group_prefix}.{consumer_group}" @@ -109,10 +118,11 @@ async def subscribe( except Exception as exc: if "BUSYGROUP" not in str(exc): raise BusError(f"Failed to create consumer group {group}") from exc - # Redis consumer groups persist across restarts; nothing to - # tear down in a finally block. The loop exits on cancellation - # propagated from the caller's async-for. + pending_ack_id: bytes | str | None = None while True: + if pending_ack_id is not None: + await self._client.xack(subject_pattern, group, pending_ack_id) + pending_ack_id = None entries = await self._client.xreadgroup( groupname=group, consumername=consumer, @@ -121,14 +131,16 @@ async def subscribe( block=self.config.block_ms, ) if not entries: - # Yield control so an outer cancellation can fire. await asyncio.sleep(0) continue for _stream, messages in entries: for msg_id, fields in messages: message = _decode_message(subject_pattern, fields) yield message - await self._client.xack(subject_pattern, group, msg_id) + # Defer the ack to the next iteration so a consumer + # that breaks mid-iteration leaves this message + # pending for redelivery. + pending_ack_id = msg_id class RedisLock(DistributedLock): diff --git a/src/augur_signals/augur_signals/storage/timescaledb_store.py b/src/augur_signals/augur_signals/storage/timescaledb_store.py index d92d8a9..b68a5dc 100644 --- a/src/augur_signals/augur_signals/storage/timescaledb_store.py +++ b/src/augur_signals/augur_signals/storage/timescaledb_store.py @@ -204,13 +204,25 @@ async def initialize(self) -> None: ) if spec.compress_after_days > 0: if spec.segment_by: - await cur.execute( + # timescaledb.compress_segmentby takes a + # column-list identifier, not a quoted string; + # validate every column against the SQL-ident + # allowlist then inline the list literally. + segment_columns = [ + self._quote_ident(col.strip()) + for col in spec.segment_by.split(",") + if col.strip() + ] + segment_list = ", ".join(segment_columns) + # segment_list is built from validated idents + # so the dynamic SQL is safe despite S608. + alter_sql = ( "ALTER TABLE " + self._quote_ident(spec.table) + " SET (timescaledb.compress, " - "timescaledb.compress_segmentby = %s)", - [spec.segment_by], + + f"timescaledb.compress_segmentby = '{segment_list}')" ) + await cur.execute(alter_sql) await cur.execute( """ SELECT add_compression_policy( @@ -230,13 +242,19 @@ async def initialize(self) -> None: """, [spec.table, spec.retention_days], ) + # Record the migration timestamp once. ON CONFLICT DO + # NOTHING preserves the original applied_at so the row + # remains a migration audit record rather than a last- + # boot marker — matching DuckDB's INSERT OR IGNORE. await cur.execute( """ INSERT INTO schema_version (version, applied_at) - VALUES (%s, now()) - ON CONFLICT (version) DO NOTHING + SELECT %s, now() + WHERE NOT EXISTS ( + SELECT 1 FROM schema_version WHERE version = %s + ) """, - [self.CURRENT_SCHEMA_VERSION], + [self.CURRENT_SCHEMA_VERSION, self.CURRENT_SCHEMA_VERSION], ) await self._conn.commit() diff --git a/tests/signals/test_bus_nats.py b/tests/signals/test_bus_nats.py index 181eba5..07a782b 100644 --- a/tests/signals/test_bus_nats.py +++ b/tests/signals/test_bus_nats.py @@ -158,6 +158,8 @@ async def test_nats_close_drains_client(client: _FakeClient) -> None: @pytest.mark.asyncio async def test_nats_subscribe_acks_yielded_messages(client: _FakeClient) -> None: + """Ack is deferred to the next iteration; break leaves the last + yielded message un-acked for JetStream redelivery.""" config = NATSBody() bus = NATSBus(config, client=client) # type: ignore[arg-type] await bus.connect() @@ -171,7 +173,8 @@ async def test_nats_subscribe_acks_yielded_messages(client: _FakeClient) -> None if count >= 3: break - # The first two messages were acked in the iterations past them; - # the third is the yielded message at the break point (no ack). acks = [m._acked for m in client._js.published] + # msg-a acked at the iteration that yielded msg-b; msg-b acked at + # the iteration that yielded msg-c; msg-c pending because the + # consumer broke before the next iteration. assert acks == [True, True, False] diff --git a/tests/signals/test_bus_redis.py b/tests/signals/test_bus_redis.py index 0a13767..884b805 100644 --- a/tests/signals/test_bus_redis.py +++ b/tests/signals/test_bus_redis.py @@ -49,11 +49,8 @@ async def consume() -> None: async def test_redis_streams_xack_marks_processed_entries( redis_client: fakeredis.aioredis.FakeRedis, ) -> None: - """XACK fires after the consumer iterates past a yielded message. - - Consumers that break out of the subscribe iterator without advancing - past a yielded message leave it pending so Redis redelivers on - restart (at-least-once semantics). + """Ack is deferred to the next iteration; break leaves the current + yielded message pending for redelivery (at-least-once semantics). """ config = RedisBody(url_env="IGNORED", stream_max_length=100, block_ms=50) bus = RedisStreamsBus(config, client=redis_client) @@ -62,24 +59,24 @@ async def test_redis_streams_xack_marks_processed_entries( subject = "augur.flagged_signals" await bus.publish(BusMessage(subject=subject, payload=b"one")) await bus.publish(BusMessage(subject=subject, payload=b"two")) + await bus.publish(BusMessage(subject=subject, payload=b"three")) received: list[bytes] = [] async def consume() -> None: async for msg in bus.subscribe(subject, "test-group"): received.append(msg.payload) - if len(received) >= 2: - # Breaking after iterating past msg #1 means #1 is - # acked; #2 is the currently-yielded message whose ack - # follows only if the consumer iterates once more. + if len(received) >= 3: + # Break at msg #3. Expected: #1 and #2 were acked at + # the top of the iterations that yielded #2 and #3 + # respectively. #3 stays pending because we broke + # before re-entering the loop. break await asyncio.wait_for(consume(), timeout=2.0) summary = await redis_client.xpending(subject, "augur.test-group") pending = summary.get("pending") if isinstance(summary, dict) else summary[0] - # The first message is acked; the second remains pending because - # the consumer broke out before iterating past it. assert pending == 1 await bus.close()