diff --git a/features/SCHEMA.md b/features/SCHEMA.md index aaeb7b4..b70730c 100644 --- a/features/SCHEMA.md +++ b/features/SCHEMA.md @@ -112,6 +112,9 @@ parity. | `idio_vol_60d` | annualized vol (decimal) | `std(residual_returns).rolling(60) * sqrt(252)` after beta removal | predictor | | `vol_of_vol_30d` | stdev of vol | `realized_vol_20d.rolling(30).std()` | predictor | | `max_drawdown_60d` | non-positive decimal pct (bare-named convention) | min of `(close / rolling_max_60 - 1)` over 60d | predictor | +| `residual_momentum_ratio` | information ratio (dimensionless) | `sum(residual_returns)[t-252,t-21] / (std(residual_returns).rolling(20) * sqrt(231))` — reuses the beta-residualized log-return (same series as idio_vol_60d) | predictor (W2 residual-momentum L1, observe-gated) | +| `mom_12_1_pct` | decimal return | `close.shift(21) / close.shift(252) - 1` (12-1 skip-month momentum) | predictor (W2) | +| `sector_mom_pct` | decimal return | sector-ETF `close.shift(21) / close.shift(252) - 1` (absolute industry momentum) | predictor (W2) | ### Macro (one row per date — `per_ticker=False`) diff --git a/features/feature_engineer.py b/features/feature_engineer.py index 7bd6bef..34d0d74 100644 --- a/features/feature_engineer.py +++ b/features/feature_engineer.py @@ -57,6 +57,11 @@ "beta_window": 60, "vol_of_vol_window": 30, "max_drawdown_window": 60, + # W2 residual-momentum windows (L4469). 12-1 skip-month convention: + # cumulative residual return over [t-window, t-skip], vol-scaled. + "resid_mom_window": 252, + "resid_mom_skip": 21, + "resid_mom_vol_window": 20, } _FC = FEATURE_CFG @@ -159,6 +164,12 @@ "vol_of_vol_30d", "max_drawdown_60d", "realized_vol_63d", + # W2 (L4469) — residual/idiosyncratic momentum + 12-1 skip-month + sector + # momentum. Predictor-consumed; observe-gated in the predictor's L2 until + # the standalone leak-free read validates the signal. + "residual_momentum_ratio", + "mom_12_1_pct", + "sector_mom_pct", # C.1 (optimizer-sota-upgrades-260526.md §C.1) — factor-loading z-scores # for the executor's Σ = B·F·Bᵀ + D risk decomposition (C.3). Computed # POST-assembly in features/compute.py via features.cross_sectional. @@ -574,6 +585,51 @@ def compute_features( else: df["idio_vol_60d"] = float("nan") + # ── W2 (L4469): residual / idiosyncratic momentum ───────────────────────── + # Revives the dead raw-momentum L1 with the strongest single finding in the + # canon (Blitz/Hanauer residual momentum — ~½ vol, ~2× risk-adjusted, wins + # horse races). REUSES the ``residual_returns`` series computed above for + # idio_vol_60d — the SAME beta-residualized log-return, NO recompute. All + # windows are backward-only / point-in-time; front-of-history rows stay NaN + # until warm-up (consumers neutralize, as with momentum_5d/return_120d). + _rm_win = _FC["resid_mom_window"] + _rm_skip = _FC["resid_mom_skip"] + _rm_vol_w = _FC["resid_mom_vol_window"] + _rm_cum = max(_rm_win - _rm_skip, 1) + if spy_series is not None: + # residual_momentum_ratio: vol-scaled cumulative residual log-return over + # [t-window, t-skip] (12-1 skip-month) → an information-ratio (∑resid / + # (σ_resid·√window)). Dimensionless → ``_ratio``. + _cum_resid = residual_returns.rolling( + window=_rm_cum, min_periods=_rm_cum, + ).sum().shift(_rm_skip) + _resid_dvol = residual_returns.rolling( + window=_rm_vol_w, min_periods=_rm_vol_w, + ).std() + df["residual_momentum_ratio"] = ( + _cum_resid / (_resid_dvol * np.sqrt(_rm_cum)).replace(0, float("nan")) + ).replace([np.inf, -np.inf], float("nan")).astype(float) + else: + df["residual_momentum_ratio"] = float("nan") + + # mom_12_1_pct: raw 12-1 skip-month price momentum (the classic momentum + # factor — the store only has 5/20/60/120d, none skip the recent month). + # Decimal return → ``_pct``. + df["mom_12_1_pct"] = ( + (close.shift(_rm_skip) / close.shift(_rm_win)) - 1.0 + ).replace([np.inf, -np.inf], float("nan")).astype(float) + + # sector_mom_pct: the ticker's sector-ETF own 12-1 skip-month momentum + # (GKX industry momentum — absolute, distinct from the existing + # sector_vs_spy_* RELATIVE features). Decimal return → ``_pct``. + if sector_etf_series is not None: + _sec_for_mom = sector_etf_series.reindex(df.index, method="ffill").astype(float) + df["sector_mom_pct"] = ( + (_sec_for_mom.shift(_rm_skip) / _sec_for_mom.shift(_rm_win)) - 1.0 + ).replace([np.inf, -np.inf], float("nan")).astype(float) + else: + df["sector_mom_pct"] = float("nan") + # vol_of_vol_30d: 30d rolling stdev of realized_vol_20d. Captures the # stability of the vol regime — a stock whose realized vol oscillates # carries different risk than one whose vol is stable. diff --git a/features/registry.py b/features/registry.py index cda55ff..87d700f 100644 --- a/features/registry.py +++ b/features/registry.py @@ -100,6 +100,12 @@ class FeatureEntry: FeatureEntry("idio_vol_60d", "technical", "60d residual vol after removing beta exposure; std × sqrt(252) (idiosyncratic risk)", source="yfinance", refresh="daily"), FeatureEntry("vol_of_vol_30d", "technical", "30d rolling stdev of realized_vol_20d (stability of vol regime)", source="yfinance", refresh="daily"), FeatureEntry("max_drawdown_60d", "technical", "Worst peak-to-trough drawdown within trailing 60d window (non-positive decimal pct)", source="yfinance", refresh="daily"), + # W2 (L4469) — residual/idiosyncratic momentum family. residual_momentum_ratio + # reuses the beta-residualized log-return series (same as idio_vol_60d), NO + # beta recompute. Predictor-consumed; observe-gated in the L2 until validated. + FeatureEntry("residual_momentum_ratio", "technical", "Vol-scaled cumulative residual (idiosyncratic) log-return over the 12-1 skip-month window: ∑resid_ret[t-252,t-21] / (σ_resid·√231) — an information ratio (Blitz/Hanauer residual momentum)", source="yfinance", refresh="daily"), + FeatureEntry("mom_12_1_pct", "technical", "12-1 skip-month raw price momentum: close.shift(21)/close.shift(252) - 1 (classic momentum factor, skips the recent-month reversal)", source="yfinance", refresh="daily"), + FeatureEntry("sector_mom_pct", "technical", "The ticker's sector-ETF own 12-1 skip-month momentum (GKX industry momentum — absolute, distinct from sector_vs_spy_* relative features)", source="yfinance", refresh="daily"), # ── Fundamental (13) — quarterly financials ─────────────────────────────── FeatureEntry("pe_ratio", "fundamental", "Trailing P/E ratio, normalized (PE / 30)", source="fmp", refresh="quarterly"), diff --git a/tests/test_feature_engineer_residual_momentum.py b/tests/test_feature_engineer_residual_momentum.py new file mode 100644 index 0000000..f8ad729 --- /dev/null +++ b/tests/test_feature_engineer_residual_momentum.py @@ -0,0 +1,122 @@ +"""Tests for the W2 (L4469) residual-momentum features. + +Validates the 3 new feature-store columns added to feature_engineer.py: +- residual_momentum_ratio : vol-scaled cumulative residual (idiosyncratic) + log-return over the 12-1 skip-month window — REUSES the same beta-residualized + return series as idio_vol_60d (no beta recompute). +- mom_12_1_pct : 12-1 skip-month raw price momentum. +- sector_mom_pct : sector-ETF own 12-1 skip-month momentum. + +Plan doc: ~/Development/alpha-engine-docs/private/predictor-improvement-260530.md +""" +from __future__ import annotations + +import os +import sys + +import numpy as np +import pandas as pd + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from features.feature_engineer import FEATURES, compute_features + +_W2_COLS = ("residual_momentum_ratio", "mom_12_1_pct", "sector_mom_pct") + + +def _ohlcv(n=400, seed=0, drift=0.0, vol=0.012, start="2018-01-01"): + rng = np.random.default_rng(seed) + r = drift + rng.normal(0, vol, n) + close = 100.0 * np.exp(np.cumsum(r)) + idx = pd.date_range(start, periods=n, freq="B") + return pd.DataFrame({ + "Open": close * (1 + rng.normal(0, 0.003, n)), + "High": close * (1 + np.abs(rng.normal(0, 0.005, n))), + "Low": close * (1 - np.abs(rng.normal(0, 0.005, n))), + "Close": close, + "Volume": rng.integers(1_000_000, 10_000_000, n).astype(float), + }, index=idx) + + +def _series(n=400, seed=99, drift=0.0, vol=0.008, start="2018-01-01", base=300.0): + rng = np.random.default_rng(seed) + close = base * np.exp(np.cumsum(drift + rng.normal(0, vol, n))) + return pd.Series(close, index=pd.date_range(start, periods=n, freq="B")) + + +def _closes_from_returns_local(r, n, start="2018-01-01", base=100.0): + return pd.Series(base * np.exp(np.cumsum(r)), index=pd.date_range(start, periods=n, freq="B")) + + +class TestSchema: + def test_w2_columns_in_features_list(self): + for name in _W2_COLS: + assert name in FEATURES, f"{name} missing from FEATURES" + + def test_compute_features_emits_w2_columns(self): + out = compute_features(_ohlcv(), spy_series=_series(), sector_etf_series=_series(seed=7)) + for name in _W2_COLS: + assert name in out.columns + + +class TestResidualMomentumRatio: + def test_finite_after_warmup_nan_before(self): + out = compute_features(_ohlcv(n=400), spy_series=_series(n=400)) + assert np.isfinite(out["residual_momentum_ratio"].iloc[-1]) + # Pre-warmup (window 252 + skip 21) is NaN. + assert pd.isna(out["residual_momentum_ratio"].iloc[100]) + + def test_residual_momentum_negative_when_idio_drifts_down(self): + # Mirror of the idio-up case: market trends UP, the idiosyncratic + # component trends DOWN (beta=1) → residual momentum is NEGATIVE even + # though raw price momentum is positive. Strong drifts dominate noise, + # so this is robust across pandas/numpy versions (unlike a pure-beta + # 0/0 information ratio, which is jitter-dominated and ill-posed). + rng = np.random.default_rng(21) + n = 500 + r_bench = 0.001 + rng.normal(0, 0.001, n) # market drifts up + idio = -0.0008 + rng.normal(0, 0.001, n) # stock-specific drift down + close = _closes_from_returns_local(r_bench + idio, n) # beta_true = 1 + bench = _closes_from_returns_local(r_bench, n) + out = compute_features( + pd.DataFrame({ + "Open": close, "High": close * 1.001, "Low": close * 0.999, + "Close": close, "Volume": np.full(n, 5e6, dtype=float), + }, index=close.index), + spy_series=bench, + ) + assert out["residual_momentum_ratio"].iloc[-1] < 0 # residual momentum down + assert out["mom_12_1_pct"].iloc[-1] > 0 # raw price momentum up + + def test_nan_when_spy_missing(self): + out = compute_features(_ohlcv(), spy_series=None) + assert out["residual_momentum_ratio"].isna().all() + + +class TestMom121: + def test_skip_month_excludes_recent_window(self): + # mom_12_1_pct at the last date must NOT depend on the most-recent 21d. + df = _ohlcv(n=400) + out1 = compute_features(df.copy(), spy_series=_series(n=400)) + df2 = df.copy() + df2.iloc[-21:, df2.columns.get_loc("Close")] *= 1.3 # perturb last month + out2 = compute_features(df2, spy_series=_series(n=400)) + assert out1["mom_12_1_pct"].iloc[-1] == out2["mom_12_1_pct"].iloc[-1] + + def test_finite_after_warmup(self): + out = compute_features(_ohlcv(n=400), spy_series=_series(n=400)) + assert np.isfinite(out["mom_12_1_pct"].iloc[-1]) + assert pd.isna(out["mom_12_1_pct"].iloc[100]) + + +class TestSectorMom: + def test_finite_with_sector_etf(self): + out = compute_features( + _ohlcv(n=400), spy_series=_series(n=400), + sector_etf_series=_series(n=400, seed=7, drift=0.0005), + ) + assert np.isfinite(out["sector_mom_pct"].iloc[-1]) + + def test_nan_when_sector_etf_missing(self): + out = compute_features(_ohlcv(n=400), spy_series=_series(n=400), sector_etf_series=None) + assert out["sector_mom_pct"].isna().all()