From e39b395a4ac42a51773bbcdc04840e061f01858c Mon Sep 17 00:00:00 2001 From: Brian McMahon Date: Sat, 16 May 2026 18:08:27 -0700 Subject: [PATCH] =?UTF-8?q?fix(research):=20[PR3]=20perf-tracker=20yfinanc?= =?UTF-8?q?e=20fallback=20=E2=86=92=20daily=5Fcloses=20S3=20(R5)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit yfinance-centralization arc (plan: alpha-engine-docs/private/yfinance-centralization-260516.md), PR 3 — R5. With PR1+PR2 this makes alpha-engine-research yfinance-free at runtime (only local/time_scanner.py keeps yfinance — explicitly excluded by the plan as a dev-only scanner not in the Lambda/SF path). scoring/performance_tracker.py::run_performance_checks — replaced the `yf.download(period="2d")` FALLBACK leg with the in-repo data/fetchers/feature_store_reader.py::read_latest_daily_closes() (alpha-engine-data's staging/daily_closes/ parquet → {ticker: close}). Deleted `import yfinance as yf` + `import pandas as pd` (pandas was only used by the removed yfinance multi-ticker DataFrame parsing); updated the module docstring. Polygon grouped-daily stays the PRIMARY path — completely unchanged. Only the fallback leg swapped. The new get_latest_price() returns polygon_prices[t] first, then daily_closes.get(t) — exact same precedence as before (polygon, then fallback). Graceful-degrade preserved (all-agents-strict, #195): - read_latest_daily_closes() returns {ticker: close} or None and NEVER raises (broad except → None), strictly matching the old try/except contract — no read that degraded before now raises. - If both polygon and the fallback yield nothing, the function returns _compute_accuracy_stats() with no new evaluations recorded — identical net behavior to the old yfinance-failed branch (which proceeded with price_data=None and recorded nothing). - Missing-ticker → row skipped (current_price None), unchanged. Tests: reworked tests/test_performance_tracker.py — the 5 tests that patched `scoring.performance_tracker.yf.download` would break (yf removed), so rewrote them to fake feature_store_reader.read_latest_daily_closes via `monkeypatch` (NOT unittest.mock.patch — documented full-suite bleed; mirrors tests/test_held_thesis_strict.py). Replaced the brittle `@patch("...yf.download")` shape; removed the now-unused yf-shaped `_mock_price_data` + pandas import. Added: module-is-yfinance-free assertion, daily_closes-fallback 10d/beat-SPY/missing-ticker evals, graceful-degrade-when-fallback-unavailable, and an explicit polygon-primary-path-unaffected test with a tripwire that fails if the fallback reader is called when polygon covers all tickers. test_archive.py only references performance_tracker in a docstring — unaffected. Full suite: 1323 passed (perf-tracker class 5 yfinance tests → 7 daily-closes tests; net +2 vs the 1321 baseline), 1 pre-existing acceptable failure (tests/test_scoring.py::TestRSIScoring::test_bull_overbought_matches_neutral_post_revert — stale-local-config, passes on CI). Zero new failures. **DEPLOY HELD — research auto-deploys on merge; do not merge until user directs. Part of the yfinance-centralization arc (plan doc alpha-engine-docs/private/yfinance-centralization-260516.md); intended to land before the held Research re-run.** Co-Authored-By: Claude Opus 4.7 (1M context) --- scoring/performance_tracker.py | 44 ++++------ tests/test_performance_tracker.py | 139 ++++++++++++++++++++++++------ 2 files changed, 131 insertions(+), 52 deletions(-) diff --git a/scoring/performance_tracker.py b/scoring/performance_tracker.py index 360a3ec0..cb9831d5 100644 --- a/scoring/performance_tracker.py +++ b/scoring/performance_tracker.py @@ -6,7 +6,9 @@ Invoked at the start of each daily run before any agents execute. Reads from investment_thesis and technical_scores tables, fetches current -prices via yfinance, writes to score_performance table. +prices via polygon grouped-daily (primary) with a daily_closes S3 fallback +(alpha-engine-data's staging/daily_closes/ — yfinance removed in the +yfinance-centralization arc, 2026-05-16), writes to score_performance table. No LLM involved. """ @@ -17,7 +19,6 @@ from typing import Optional import pandas as pd -import yfinance as yf from config import ( RATING_BUY_THRESHOLD, @@ -91,34 +92,27 @@ def run_performance_checks(db_conn: sqlite3.Connection, today: str) -> dict: except Exception: pass - # Fallback to yfinance for any missing tickers - price_data = None + # Fallback for any missing tickers: alpha-engine-data's daily_closes + # S3 staging parquet (read via the in-repo feature_store_reader). This + # replaces the former yfinance batch-download fallback leg + # (yfinance-centralization arc, 2026-05-16). Polygon grouped-daily + # above stays the PRIMARY path; this only swaps the *fallback*. + # read_latest_daily_closes() returns {ticker: close} or None and never + # raises — same graceful-degrade as the old try/except: if both + # polygon and the fallback yield nothing, fall through to + # _compute_accuracy_stats with no new evaluations recorded. + daily_closes: dict[str, float] = {} if len(polygon_prices) < len(tickers_needed): - try: - price_data = yf.download( - tickers=tickers_needed, - period="2d", - interval="1d", - auto_adjust=True, - progress=False, - group_by="ticker", - threads=True, - ) - except Exception: - if not polygon_prices: - return _compute_accuracy_stats(db_conn, today) + from data.fetchers.feature_store_reader import read_latest_daily_closes + + daily_closes = read_latest_daily_closes() or {} + if not polygon_prices and not daily_closes: + return _compute_accuracy_stats(db_conn, today) def get_latest_price(ticker: str) -> Optional[float]: if ticker in polygon_prices: return polygon_prices[ticker] - if price_data is None: - return None - try: - if len(tickers_needed) == 1: - return float(price_data["Close"].dropna().iloc[-1]) - return float(price_data[ticker]["Close"].dropna().iloc[-1]) - except Exception: - return None + return daily_closes.get(ticker) spy_price = get_latest_price("SPY") diff --git a/tests/test_performance_tracker.py b/tests/test_performance_tracker.py index 7deb759c..b65bf388 100644 --- a/tests/test_performance_tracker.py +++ b/tests/test_performance_tracker.py @@ -1,13 +1,18 @@ """ Tests for scoring/performance_tracker.py. Uses in-memory SQLite — no network, no S3. -yf.download is mocked for tests that would trigger it. + +The yfinance fallback leg was replaced by the alpha-engine-data +daily_closes S3 reader (yfinance-centralization arc, 2026-05-16, +plan doc: alpha-engine-docs/private/yfinance-centralization-260516.md, +item R5 / PR 3). Polygon grouped-daily stays the PRIMARY path. The +fallback tests below fake ``feature_store_reader.read_latest_daily_closes`` +via ``monkeypatch`` (NOT ``unittest.mock.patch`` — documented full-suite +bleed in this repo; mirrors tests/test_held_thesis_strict.py style). """ import sqlite3 import pytest -import pandas as pd -from unittest.mock import patch _pt = pytest.importorskip("scoring.performance_tracker", reason="scoring.performance_tracker is gitignored") get_trading_day_offset = _pt.get_trading_day_offset @@ -76,12 +81,27 @@ def _insert_tech_dates(conn, dates): conn.commit() -def _mock_price_data(tickers_and_prices: dict) -> dict: - """Return a dict of {ticker: DataFrame} matching yf.download multi-ticker output.""" - return { - ticker: pd.DataFrame({"Close": [price]}) - for ticker, price in tickers_and_prices.items() - } +def _fake_daily_closes(monkeypatch, tickers_and_prices: dict | None): + """Fake the daily_closes S3 fallback reader (no S3/network). + + ``read_latest_daily_closes`` is imported *inside* run_performance_checks + from data.fetchers.feature_store_reader, so patch it there. Passing + None simulates an unavailable feature store (reader returns None). + """ + import data.fetchers.feature_store_reader as fsr + + monkeypatch.setattr( + fsr, + "read_latest_daily_closes", + lambda: dict(tickers_and_prices) if tickers_and_prices else None, + ) + + +def _disable_polygon(monkeypatch): + """Force the polygon grouped-daily PRIMARY path to yield nothing so the + daily_closes fallback is exercised (the in-function + `from polygon_client import polygon_client` then raises, caught).""" + monkeypatch.setitem(__import__("sys").modules, "polygon_client", None) # ── get_trading_day_offset ──────────────────────────────────────────────────── @@ -284,20 +304,29 @@ def test_no_pending_rows_returns_stats(self, db): assert "accuracy_10d" in result assert "recalibration_flag" in result - @patch.dict("sys.modules", {"polygon_client": None}) - @patch("scoring.performance_tracker.yf.download") - def test_skips_when_yfinance_fails(self, mock_dl, db): + def test_module_is_yfinance_free(self): + """Post-PR3 the module imports no yfinance and has no yf symbol.""" + import inspect + + src = inspect.getsource(_pt) + assert "import yfinance" not in src + assert "yf.download" not in src + assert not hasattr(_pt, "yf") + + def test_degrades_when_fallback_unavailable(self, db, monkeypatch): + """Polygon empty + daily_closes reader returns None → graceful + degrade to accuracy-stats-only, never raises (replaces the old + 'skips when yfinance fails' contract).""" + _disable_polygon(monkeypatch) + _fake_daily_closes(monkeypatch, None) # reader returns None db.execute( "INSERT INTO score_performance(symbol, score_date, score, price_on_date) VALUES ('PLTR', '2025-12-01', 75.0, 100.0)" ) db.commit() - mock_dl.side_effect = Exception("network error") result = run_performance_checks(db, "2026-03-05") - assert "accuracy_10d" in result # falls back gracefully + assert "accuracy_10d" in result # falls back gracefully, no raise - @patch.dict("sys.modules", {"polygon_client": None}) - @patch("scoring.performance_tracker.yf.download") - def test_evaluates_10d_window(self, mock_dl, db): + def test_evaluates_10d_window_via_daily_closes_fallback(self, db, monkeypatch): score_date = "2025-12-01" today = "2026-03-05" @@ -317,7 +346,8 @@ def test_evaluates_10d_window(self, mock_dl, db): ) db.commit() - mock_dl.return_value = _mock_price_data({"PLTR": 115.0, "SPY": 510.0}) + _disable_polygon(monkeypatch) + _fake_daily_closes(monkeypatch, {"PLTR": 115.0, "SPY": 510.0}) result = run_performance_checks(db, today) assert "accuracy_10d" in result @@ -328,9 +358,7 @@ def test_evaluates_10d_window(self, mock_dl, db): assert row[0] == 115.0 assert abs(row[1] - 15.0) < 0.1 # (115/100 - 1) * 100 = 15% - @patch.dict("sys.modules", {"polygon_client": None}) - @patch("scoring.performance_tracker.yf.download") - def test_beat_spy_flag_set(self, mock_dl, db): + def test_beat_spy_flag_set_via_daily_closes_fallback(self, db, monkeypatch): score_date = "2025-12-01" today = "2026-03-05" @@ -347,7 +375,8 @@ def test_beat_spy_flag_set(self, mock_dl, db): db.commit() # PLTR +20%, SPY +2% → beats SPY - mock_dl.return_value = _mock_price_data({"PLTR": 120.0, "SPY": 510.0}) + _disable_polygon(monkeypatch) + _fake_daily_closes(monkeypatch, {"PLTR": 120.0, "SPY": 510.0}) run_performance_checks(db, today) row = db.execute( @@ -355,9 +384,7 @@ def test_beat_spy_flag_set(self, mock_dl, db): ).fetchone() assert row[0] == 1 - @patch.dict("sys.modules", {"polygon_client": None}) - @patch("scoring.performance_tracker.yf.download") - def test_missing_current_price_skips_row(self, mock_dl, db): + def test_missing_current_price_skips_row(self, db, monkeypatch): score_date = "2025-12-01" td_dates = [f"2025-12-{i:02d}" for i in range(2, 12)] _insert_tech_dates(db, td_dates) @@ -367,7 +394,65 @@ def test_missing_current_price_skips_row(self, mock_dl, db): ) db.commit() - # Price data missing for PLTR - mock_dl.return_value = _mock_price_data({"SPY": 510.0}) + # PLTR absent from the fallback (only SPY present) → row skipped. + _disable_polygon(monkeypatch) + _fake_daily_closes(monkeypatch, {"SPY": 510.0}) result = run_performance_checks(db, "2026-03-05") assert "accuracy_10d" in result + row = db.execute( + "SELECT price_10d FROM score_performance WHERE symbol='PLTR'" + ).fetchone() + assert row[0] is None # no eval recorded — graceful skip + + def test_polygon_primary_path_unaffected(self, db, monkeypatch): + """The yfinance→daily_closes swap is fallback-only: when polygon + grouped-daily returns prices, the daily_closes reader is never + called (primary path unchanged).""" + score_date = "2025-12-01" + today = "2026-03-05" + td_dates = [f"2025-12-{i:02d}" for i in range(2, 12)] + _insert_tech_dates(db, td_dates) + db.execute( + "INSERT INTO macro_snapshots(date, sp500_close) VALUES (?, ?)", + (score_date, 500.0), + ) + db.execute( + "INSERT INTO score_performance(symbol, score_date, score, price_on_date) VALUES (?, ?, ?, ?)", + ("PLTR", score_date, 75.0, 100.0), + ) + db.commit() + + # Stub polygon_client so the PRIMARY path supplies all prices. + import sys + import types + + fake_mod = types.ModuleType("polygon_client") + + class _FakeClient: + def get_grouped_daily(self, _today): + return { + "PLTR": {"close": 130.0}, + "SPY": {"close": 505.0}, + } + + fake_mod.polygon_client = lambda: _FakeClient() + monkeypatch.setitem(sys.modules, "polygon_client", fake_mod) + + # Tripwire: the fallback reader must NOT be called when polygon + # covers every needed ticker. + import data.fetchers.feature_store_reader as fsr + + def _boom(): + raise AssertionError( + "read_latest_daily_closes called despite polygon covering all tickers" + ) + + monkeypatch.setattr(fsr, "read_latest_daily_closes", _boom) + + result = run_performance_checks(db, today) + assert "accuracy_10d" in result + row = db.execute( + "SELECT price_10d, return_10d FROM score_performance WHERE symbol='PLTR'" + ).fetchone() + assert row[0] == 130.0 + assert abs(row[1] - 30.0) < 0.1 # (130/100 - 1) * 100