Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions features/SCHEMA.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ parity.
| `idio_vol_60d` | annualized vol (decimal) | `std(residual_returns).rolling(60) * sqrt(252)` after beta removal | predictor |
| `vol_of_vol_30d` | stdev of vol | `realized_vol_20d.rolling(30).std()` | predictor |
| `max_drawdown_60d` | non-positive decimal pct (bare-named convention) | min of `(close / rolling_max_60 - 1)` over 60d | predictor |
| `residual_momentum_ratio` | information ratio (dimensionless) | `sum(residual_returns)[t-252,t-21] / (std(residual_returns).rolling(20) * sqrt(231))` — reuses the beta-residualized log-return (same series as idio_vol_60d) | predictor (W2 residual-momentum L1, observe-gated) |
| `mom_12_1_pct` | decimal return | `close.shift(21) / close.shift(252) - 1` (12-1 skip-month momentum) | predictor (W2) |
| `sector_mom_pct` | decimal return | sector-ETF `close.shift(21) / close.shift(252) - 1` (absolute industry momentum) | predictor (W2) |

### Macro (one row per date — `per_ticker=False`)

Expand Down
56 changes: 56 additions & 0 deletions features/feature_engineer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@
"beta_window": 60,
"vol_of_vol_window": 30,
"max_drawdown_window": 60,
# W2 residual-momentum windows (L4469). 12-1 skip-month convention:
# cumulative residual return over [t-window, t-skip], vol-scaled.
"resid_mom_window": 252,
"resid_mom_skip": 21,
"resid_mom_vol_window": 20,
}

_FC = FEATURE_CFG
Expand Down Expand Up @@ -159,6 +164,12 @@
"vol_of_vol_30d",
"max_drawdown_60d",
"realized_vol_63d",
# W2 (L4469) — residual/idiosyncratic momentum + 12-1 skip-month + sector
# momentum. Predictor-consumed; observe-gated in the predictor's L2 until
# the standalone leak-free read validates the signal.
"residual_momentum_ratio",
"mom_12_1_pct",
"sector_mom_pct",
# C.1 (optimizer-sota-upgrades-260526.md §C.1) — factor-loading z-scores
# for the executor's Σ = B·F·Bᵀ + D risk decomposition (C.3). Computed
# POST-assembly in features/compute.py via features.cross_sectional.
Expand Down Expand Up @@ -574,6 +585,51 @@ def compute_features(
else:
df["idio_vol_60d"] = float("nan")

# ── W2 (L4469): residual / idiosyncratic momentum ─────────────────────────
# Revives the dead raw-momentum L1 with the strongest single finding in the
# canon (Blitz/Hanauer residual momentum — ~½ vol, ~2× risk-adjusted, wins
# horse races). REUSES the ``residual_returns`` series computed above for
# idio_vol_60d — the SAME beta-residualized log-return, NO recompute. All
# windows are backward-only / point-in-time; front-of-history rows stay NaN
# until warm-up (consumers neutralize, as with momentum_5d/return_120d).
_rm_win = _FC["resid_mom_window"]
_rm_skip = _FC["resid_mom_skip"]
_rm_vol_w = _FC["resid_mom_vol_window"]
_rm_cum = max(_rm_win - _rm_skip, 1)
if spy_series is not None:
# residual_momentum_ratio: vol-scaled cumulative residual log-return over
# [t-window, t-skip] (12-1 skip-month) → an information-ratio (∑resid /
# (σ_resid·√window)). Dimensionless → ``_ratio``.
_cum_resid = residual_returns.rolling(
window=_rm_cum, min_periods=_rm_cum,
).sum().shift(_rm_skip)
_resid_dvol = residual_returns.rolling(
window=_rm_vol_w, min_periods=_rm_vol_w,
).std()
df["residual_momentum_ratio"] = (
_cum_resid / (_resid_dvol * np.sqrt(_rm_cum)).replace(0, float("nan"))
).replace([np.inf, -np.inf], float("nan")).astype(float)
else:
df["residual_momentum_ratio"] = float("nan")

# mom_12_1_pct: raw 12-1 skip-month price momentum (the classic momentum
# factor — the store only has 5/20/60/120d, none skip the recent month).
# Decimal return → ``_pct``.
df["mom_12_1_pct"] = (
(close.shift(_rm_skip) / close.shift(_rm_win)) - 1.0
).replace([np.inf, -np.inf], float("nan")).astype(float)

# sector_mom_pct: the ticker's sector-ETF own 12-1 skip-month momentum
# (GKX industry momentum — absolute, distinct from the existing
# sector_vs_spy_* RELATIVE features). Decimal return → ``_pct``.
if sector_etf_series is not None:
_sec_for_mom = sector_etf_series.reindex(df.index, method="ffill").astype(float)
df["sector_mom_pct"] = (
(_sec_for_mom.shift(_rm_skip) / _sec_for_mom.shift(_rm_win)) - 1.0
).replace([np.inf, -np.inf], float("nan")).astype(float)
else:
df["sector_mom_pct"] = float("nan")

# vol_of_vol_30d: 30d rolling stdev of realized_vol_20d. Captures the
# stability of the vol regime — a stock whose realized vol oscillates
# carries different risk than one whose vol is stable.
Expand Down
6 changes: 6 additions & 0 deletions features/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@ class FeatureEntry:
FeatureEntry("idio_vol_60d", "technical", "60d residual vol after removing beta exposure; std × sqrt(252) (idiosyncratic risk)", source="yfinance", refresh="daily"),
FeatureEntry("vol_of_vol_30d", "technical", "30d rolling stdev of realized_vol_20d (stability of vol regime)", source="yfinance", refresh="daily"),
FeatureEntry("max_drawdown_60d", "technical", "Worst peak-to-trough drawdown within trailing 60d window (non-positive decimal pct)", source="yfinance", refresh="daily"),
# W2 (L4469) — residual/idiosyncratic momentum family. residual_momentum_ratio
# reuses the beta-residualized log-return series (same as idio_vol_60d), NO
# beta recompute. Predictor-consumed; observe-gated in the L2 until validated.
FeatureEntry("residual_momentum_ratio", "technical", "Vol-scaled cumulative residual (idiosyncratic) log-return over the 12-1 skip-month window: ∑resid_ret[t-252,t-21] / (σ_resid·√231) — an information ratio (Blitz/Hanauer residual momentum)", source="yfinance", refresh="daily"),
FeatureEntry("mom_12_1_pct", "technical", "12-1 skip-month raw price momentum: close.shift(21)/close.shift(252) - 1 (classic momentum factor, skips the recent-month reversal)", source="yfinance", refresh="daily"),
FeatureEntry("sector_mom_pct", "technical", "The ticker's sector-ETF own 12-1 skip-month momentum (GKX industry momentum — absolute, distinct from sector_vs_spy_* relative features)", source="yfinance", refresh="daily"),

# ── Fundamental (13) — quarterly financials ───────────────────────────────
FeatureEntry("pe_ratio", "fundamental", "Trailing P/E ratio, normalized (PE / 30)", source="fmp", refresh="quarterly"),
Expand Down
122 changes: 122 additions & 0 deletions tests/test_feature_engineer_residual_momentum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
"""Tests for the W2 (L4469) residual-momentum features.

Validates the 3 new feature-store columns added to feature_engineer.py:
- residual_momentum_ratio : vol-scaled cumulative residual (idiosyncratic)
log-return over the 12-1 skip-month window — REUSES the same beta-residualized
return series as idio_vol_60d (no beta recompute).
- mom_12_1_pct : 12-1 skip-month raw price momentum.
- sector_mom_pct : sector-ETF own 12-1 skip-month momentum.

Plan doc: ~/Development/alpha-engine-docs/private/predictor-improvement-260530.md
"""
from __future__ import annotations

import os
import sys

import numpy as np
import pandas as pd

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from features.feature_engineer import FEATURES, compute_features

_W2_COLS = ("residual_momentum_ratio", "mom_12_1_pct", "sector_mom_pct")


def _ohlcv(n=400, seed=0, drift=0.0, vol=0.012, start="2018-01-01"):
rng = np.random.default_rng(seed)
r = drift + rng.normal(0, vol, n)
close = 100.0 * np.exp(np.cumsum(r))
idx = pd.date_range(start, periods=n, freq="B")
return pd.DataFrame({
"Open": close * (1 + rng.normal(0, 0.003, n)),
"High": close * (1 + np.abs(rng.normal(0, 0.005, n))),
"Low": close * (1 - np.abs(rng.normal(0, 0.005, n))),
"Close": close,
"Volume": rng.integers(1_000_000, 10_000_000, n).astype(float),
}, index=idx)


def _series(n=400, seed=99, drift=0.0, vol=0.008, start="2018-01-01", base=300.0):
rng = np.random.default_rng(seed)
close = base * np.exp(np.cumsum(drift + rng.normal(0, vol, n)))
return pd.Series(close, index=pd.date_range(start, periods=n, freq="B"))


def _closes_from_returns_local(r, n, start="2018-01-01", base=100.0):
return pd.Series(base * np.exp(np.cumsum(r)), index=pd.date_range(start, periods=n, freq="B"))


class TestSchema:
def test_w2_columns_in_features_list(self):
for name in _W2_COLS:
assert name in FEATURES, f"{name} missing from FEATURES"

def test_compute_features_emits_w2_columns(self):
out = compute_features(_ohlcv(), spy_series=_series(), sector_etf_series=_series(seed=7))
for name in _W2_COLS:
assert name in out.columns


class TestResidualMomentumRatio:
def test_finite_after_warmup_nan_before(self):
out = compute_features(_ohlcv(n=400), spy_series=_series(n=400))
assert np.isfinite(out["residual_momentum_ratio"].iloc[-1])
# Pre-warmup (window 252 + skip 21) is NaN.
assert pd.isna(out["residual_momentum_ratio"].iloc[100])

def test_residual_momentum_negative_when_idio_drifts_down(self):
# Mirror of the idio-up case: market trends UP, the idiosyncratic
# component trends DOWN (beta=1) → residual momentum is NEGATIVE even
# though raw price momentum is positive. Strong drifts dominate noise,
# so this is robust across pandas/numpy versions (unlike a pure-beta
# 0/0 information ratio, which is jitter-dominated and ill-posed).
rng = np.random.default_rng(21)
n = 500
r_bench = 0.001 + rng.normal(0, 0.001, n) # market drifts up
idio = -0.0008 + rng.normal(0, 0.001, n) # stock-specific drift down
close = _closes_from_returns_local(r_bench + idio, n) # beta_true = 1
bench = _closes_from_returns_local(r_bench, n)
out = compute_features(
pd.DataFrame({
"Open": close, "High": close * 1.001, "Low": close * 0.999,
"Close": close, "Volume": np.full(n, 5e6, dtype=float),
}, index=close.index),
spy_series=bench,
)
assert out["residual_momentum_ratio"].iloc[-1] < 0 # residual momentum down
assert out["mom_12_1_pct"].iloc[-1] > 0 # raw price momentum up

def test_nan_when_spy_missing(self):
out = compute_features(_ohlcv(), spy_series=None)
assert out["residual_momentum_ratio"].isna().all()


class TestMom121:
def test_skip_month_excludes_recent_window(self):
# mom_12_1_pct at the last date must NOT depend on the most-recent 21d.
df = _ohlcv(n=400)
out1 = compute_features(df.copy(), spy_series=_series(n=400))
df2 = df.copy()
df2.iloc[-21:, df2.columns.get_loc("Close")] *= 1.3 # perturb last month
out2 = compute_features(df2, spy_series=_series(n=400))
assert out1["mom_12_1_pct"].iloc[-1] == out2["mom_12_1_pct"].iloc[-1]

def test_finite_after_warmup(self):
out = compute_features(_ohlcv(n=400), spy_series=_series(n=400))
assert np.isfinite(out["mom_12_1_pct"].iloc[-1])
assert pd.isna(out["mom_12_1_pct"].iloc[100])


class TestSectorMom:
def test_finite_with_sector_etf(self):
out = compute_features(
_ohlcv(n=400), spy_series=_series(n=400),
sector_etf_series=_series(n=400, seed=7, drift=0.0005),
)
assert np.isfinite(out["sector_mom_pct"].iloc[-1])

def test_nan_when_sector_etf_missing(self):
out = compute_features(_ohlcv(n=400), spy_series=_series(n=400), sector_etf_series=None)
assert out["sector_mom_pct"].isna().all()
Loading