Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ jobs:
with:
python-version: "3.12"

- name: Configure git auth for private alpha-engine-lib
env:
ALPHA_ENGINE_LIB_TOKEN: ${{ secrets.ALPHA_ENGINE_LIB_TOKEN }}
run: |
if [ -z "$ALPHA_ENGINE_LIB_TOKEN" ]; then
echo "::error::ALPHA_ENGINE_LIB_TOKEN secret not set — required to install private alpha-engine-lib"
exit 1
fi
git config --global url."https://x-access-token:${ALPHA_ENGINE_LIB_TOKEN}@github.com/cipher813/alpha-engine-lib".insteadOf "https://github.com/cipher813/alpha-engine-lib"

- name: Install dependencies
run: |
pip install --upgrade pip
Expand Down
95 changes: 0 additions & 95 deletions log_config.py

This file was deleted.

48 changes: 48 additions & 0 deletions preflight.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""
Data-module preflight: connectivity + freshness checks run at the top of
``weekly_collector.main()`` before any real collection work starts.

Primitives live in ``alpha_engine_lib.preflight.BasePreflight``; this
module only composes them into a mode-specific sequence. See the
alpha-engine-lib README for the rationale and the 2026-04-14 failure
mode that motivated the library.
"""

from __future__ import annotations

from alpha_engine_lib.preflight import BasePreflight


class DataPreflight(BasePreflight):
"""Preflight checks for the alpha-engine-data entrypoint.

Mode determines which external services must be reachable:

- ``"daily"`` — weekday DailyData step. ArcticDB must be readable
and SPY must be ≤4 days stale (covers Fri→Tue long weekends +
1 day of buffer).
- ``"phase1"`` — Saturday DataPhase1. External APIs (FRED, polygon)
needed; no ArcticDB freshness check (phase1 is what *populates*
ArcticDB).
- ``"phase2"`` — Saturday DataPhase2. FMP + SEC EDGAR needed.
"""

def __init__(self, bucket: str, mode: str):
super().__init__(bucket)
if mode not in ("daily", "phase1", "phase2"):
raise ValueError(f"DataPreflight: unknown mode {mode!r}")
self.mode = mode

def run(self) -> None:
self.check_env_vars("AWS_REGION")
if self.mode == "phase1":
self.check_env_vars("FRED_API_KEY", "POLYGON_API_KEY")
elif self.mode == "phase2":
self.check_env_vars("FMP_API_KEY", "EDGAR_IDENTITY")

self.check_s3_bucket()

if self.mode == "daily":
# 4-day threshold would have caught the 2026-04-14 bug
# (ArcticDB silently not writing) by 2026-04-17.
self.check_arcticdb_fresh("universe", "SPY", max_stale_days=4)
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ psycopg2-binary>=2.9
voyageai>=0.3
jsonschema>=4.20
arcticdb>=6.11
flow-doctor[diagnosis]>=0.3.0,<0.4.0
# flow-doctor is pulled in transitively via alpha-engine-lib[flow_doctor].
alpha-engine-lib[arcticdb,flow_doctor] @ git+https://github.com/cipher813/alpha-engine-lib@v0.1.0
14 changes: 12 additions & 2 deletions weekly_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,11 +647,21 @@ def main() -> None:
args = _parse_args()
_load_dotenv()

from log_config import setup_logging
setup_logging("data-collector")
from alpha_engine_lib.logging import setup_logging
setup_logging(
"data-collector",
flow_doctor_yaml=str(Path(__file__).parent / "flow-doctor.yaml"),
)
logging.getLogger().setLevel(getattr(logging, args.log_level))

config = load_config(args.config)

# Pre-flight: fail fast on env / connectivity drift before starting
# the real collection work. See alpha-engine-lib/README.md.
from preflight import DataPreflight
mode = "daily" if args.daily else f"phase{args.phase or 1}"
DataPreflight(config["bucket"], mode).run()

results = run_weekly(config, args)

# Hard-fail on any non-ok status — strict form of the no-silent-fails
Expand Down
Loading