Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions infrastructure/spot_backtest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,24 @@
# Usage:
# ./infrastructure/spot_backtest.sh # full run (--mode all)
# ./infrastructure/spot_backtest.sh --smoke-only # quick validation, then terminate
# ./infrastructure/spot_backtest.sh --preflight-only # boot + deps + the
# # bootstrap-class smoke
# # harness only
# # (backtest.py --mode=smoke:
# # BacktesterPreflight +
# # _runtime_smoke — lib-pin /
# # imports / predictor-weights /
# # universe-freshness, ~30-60s,
# # from PRs #43-#48), then
# # exit 0 — NO param sweep,
# # NO portfolio sim, NO parity,
# # NO evaluator, NO config/*.json
# # auto-apply, ZERO external API
# # calls, ZERO S3/config writes.
# # Friday shell_run dry path
# # (ROADMAP "Friday shell-run —
# # per-module dry-path
# # activation" owed-item #3).
# ./infrastructure/spot_backtest.sh --mode simulate # override backtest mode
# ./infrastructure/spot_backtest.sh --instance-type c5.xlarge # override instance type
# ./infrastructure/spot_backtest.sh --dry-run # full-universe exercise without
Expand Down Expand Up @@ -81,6 +99,26 @@ BACKTEST_MODE="all"

# ── Parse flags ──────────────────────────────────────────────────────────────
RUN_MODE="full" # full | smoke-only
# PREFLIGHT_ONLY is a MODIFIER, orthogonal to RUN_MODE — matching the
# data (spot_data_weekly.sh #259) and predictor (spot_train.sh #175)
# siblings' verbatim --preflight-only flag for cross-script consistency
# (the Friday shell_run SF keystone follow-on dispatches the same flag
# name to every module). When set, the script boots + installs deps for
# real, runs ONLY the bootstrap-class smoke harness (backtest.py
# --mode=smoke = BacktesterPreflight + _runtime_smoke; ~30-60s,
# read-only), then `exit 0` BEFORE the per-phase smoke modes, the
# evaluate.py S3-probe diagnostics, AND the entire full-backtest heredoc
# (param sweep / portfolio sim / parity / pit_parity / evaluator /
# config/*.json optimizer auto-apply / CloudWatch heartbeats). Catches
# bootstrap-class breakage (lib-pin drift, sys.path collision, stale
# ArcticDB universe, missing predictor weights, SSM timeout, image gap)
# ~12h before the real Saturday Backtester. backtest.py --mode=smoke
# itself `return`s before _init_pipeline / the optimizer, so it writes
# no S3 config; gating in front of the full heredoc + the
# evaluate.py/per-phase smoke block makes every sweep/sim/parity/
# evaluator and every config/{executor,scoring,predictor,research,
# scanner}_params*.json writer statically unreachable under this flag.
PREFLIGHT_ONLY=0
# All PhaseRegistry-adjacent flags are also routable from the
# Saturday SF input via env vars. When set they pass through as
# CLI args to backtest.py.
Expand Down Expand Up @@ -132,6 +170,7 @@ USE_VECTORIZED_SWEEP="${USE_VECTORIZED_SWEEP:-false}"
while [[ $# -gt 0 ]]; do
case "$1" in
--smoke-only) RUN_MODE="smoke-only"; shift ;;
--preflight-only) PREFLIGHT_ONLY=1; shift ;;
--instance-type) INSTANCE_TYPE="$2"; shift 2 ;;
--instance-type=*) INSTANCE_TYPE="${1#*=}"; shift ;;
--mode) BACKTEST_MODE="$2"; shift 2 ;;
Expand Down Expand Up @@ -231,6 +270,7 @@ echo " Region : $AWS_REGION"
echo " Branch : $BRANCH"
echo " Backtest mode : $BACKTEST_MODE"
echo " Run mode : $RUN_MODE"
echo " Preflight-only: $PREFLIGHT_ONLY (1 = boot + deps + smoke harness + exit 0, NO sweep/sim/parity/evaluator/auto-apply, ZERO writes)"
echo " Skip phase 4 : $SKIP_PHASE4"
echo " Skip phases : ${SKIP_PHASES:-(none)}"
echo " Only phases : ${ONLY_PHASES:-(none)}"
Expand Down Expand Up @@ -522,6 +562,67 @@ ENV_SOURCE='set -a; [ -f /home/ec2-user/alpha-engine-backtester/.env ] && source
# Determine python binary on remote
REMOTE_PYTHON=$(run_remote "command -v python3.12 || command -v python3")

# ── Preflight-only (Friday shell_run dry path) ──────────────────────────────
# ROADMAP "Friday shell-run — per-module dry-path activation" owed-item #3.
# Placed AFTER the real boot/clone/deps/config-upload (so the bootstrap
# path — lib-pin resolution, sys.path, predictor cache sync, image deps —
# is genuinely exercised) and STRICTLY BEFORE both the --smoke-only block
# (per-phase smoke modes + the evaluate.py S3-probe diagnostics) and the
# full-backtest heredoc.
#
# Runs ONLY `backtest.py --mode=smoke` — the EXISTING bootstrap-class
# smoke harness from PRs #43-#48 (BacktesterPreflight: lib-version /
# imports / predictor-weights presence / executor-config validation, then
# _runtime_smoke: universe-symbols + per-ticker ArcticDB read + recent
# signals.json load + Layer-1A GBM load/predict — all S3 *reads*, ~30-60s).
# We REUSE backtest.py's existing --mode=smoke (no new harness): per
# backtest.py:4180-4184 it runs preflight + _runtime_smoke then `return`s
# BEFORE _init_pipeline / the simulation / the optimizer, so it itself
# performs zero config writes and makes no external API (yfinance/
# Anthropic) data fetch.
#
# Hard invariant proof (what is statically unreachable under this flag):
# * The per-phase smoke loop (smoke-simulate / smoke-param-sweep /
# smoke-predictor-backtest / smoke-phase4 / smoke-predictor-param-sweep)
# and the `evaluate.py --mode diagnostics` S3-probe block live INSIDE
# the `if [ "$RUN_MODE" = "smoke-only" ]` body below — the `exit 0`
# here never reaches it.
# * The full-backtest heredoc (backtest stage / pit_parity / parity /
# evaluator) and its config/{executor,scoring,predictor,research,
# scanner}_params*.json optimizer auto-apply (evaluate.py --upload,
# non-frozen) live further below — also unreachable.
# * No CloudWatch heartbeat, no parity_report.json / parity_metrics.csv
# upload, no reporter S3 upload — all of those are past this exit.
# Net: smoke harness (read-only) runs, then exit 0. Zero external API
# calls, zero S3/config writes. The `trap cleanup EXIT` still fires and
# terminates the spot instance.
if [ "$PREFLIGHT_ONLY" = "1" ]; then
echo ""
echo "═══════════════════════════════════════════════════════════════"
echo " PREFLIGHT-ONLY (Friday shell_run dry path)"
echo " boot + deps done; running bootstrap-class smoke harness only,"
echo " then exit 0 — NO sweep / sim / parity / evaluator / auto-apply,"
echo " ZERO external API calls, ZERO S3/config writes."
echo "═══════════════════════════════════════════════════════════════"
run_remote bash -s <<PREFLIGHT
set -euo pipefail
cd /home/ec2-user/alpha-engine-backtester
${ENV_SOURCE}

# backtest.py --mode=smoke = BacktesterPreflight + _runtime_smoke, then
# returns 0 BEFORE _init_pipeline / simulation / optimizer (see
# backtest.py:4180). No --upload, no full mode, no config write.
echo "==> Preflight: backtest.py --mode=smoke"
$REMOTE_PYTHON -u backtest.py --mode=smoke --log-level INFO 2>&1
PREFLIGHT

echo ""
echo "==> Preflight-only PASSED — bootstrap-class smoke clean."
echo "==> Instance will be terminated (no sweep/sim/parity/evaluator,"
echo " no config/*.json auto-apply, no S3/config writes performed)."
exit 0
fi

# ── Smoke test ────────────────────────────────────────────────────────────────
if [ "$RUN_MODE" = "smoke-only" ]; then
echo ""
Expand Down
146 changes: 146 additions & 0 deletions tests/test_spot_backtest_preflight_only.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
"""Pins spot_backtest.sh `--preflight-only` to the Friday shell_run dry-path
hard invariant: boot + deps + the EXISTING bootstrap-class smoke harness
(backtest.py --mode=smoke = BacktesterPreflight + _runtime_smoke), then
`exit 0` BEFORE the per-phase smoke modes, the evaluate.py S3-probe
diagnostics, AND the full-backtest heredoc — with NO param sweep, NO
portfolio sim, NO parity, NO evaluator, NO config/*.json optimizer
auto-apply, ZERO external API calls, and ZERO S3/config writes.

Owed-item #3 of ROADMAP "Friday shell-run — per-module dry-path
activation" (P1). Static-analysis test (mirrors
test_spot_backtest_aws_region.py) — the spot_backtest.sh SSM/EC2 path
cannot be exercised in CI; these assertions guard the structural
invariant against a future edit that would let preflight-only fall
through into the sweep / parity / evaluator / config-auto-apply.

Cross-script consistency: the flag name is `--preflight-only`, verbatim
identical to the data (spot_data_weekly.sh #259) and predictor
(spot_train.sh #175) siblings, because the Friday shell_run SF keystone
follow-on dispatches the same flag name to every module.
"""

from __future__ import annotations

from pathlib import Path

_SCRIPT = (
Path(__file__).resolve().parent.parent / "infrastructure" / "spot_backtest.sh"
)


def _text() -> str:
return _SCRIPT.read_text()


def test_spot_backtest_exists():
assert _SCRIPT.is_file()


def test_preflight_only_flag_parses():
text = _text()
assert "--preflight-only) PREFLIGHT_ONLY=1; shift ;;" in text, (
"--preflight-only flag not wired into the flag parser"
)


def test_preflight_only_is_an_orthogonal_modifier_default_off():
"""PREFLIGHT_ONLY is a MODIFIER (default 0), orthogonal to RUN_MODE —
matching the data/predictor siblings. A default of 0 means a normal
Saturday SF run (no flag) is completely unaffected."""
text = _text()
assert "PREFLIGHT_ONLY=0" in text, (
"PREFLIGHT_ONLY must default to 0 so the unflagged Saturday run "
"is unaffected"
)


def test_preflight_only_branch_exists_and_exits_zero():
text = _text()
assert 'if [ "$PREFLIGHT_ONLY" = "1" ]; then' in text, (
"no dedicated preflight-only branch found"
)
# The branch must terminate with `exit 0` (clean dispatcher exit;
# trap cleanup still terminates the spot instance).
branch = text.split('if [ "$PREFLIGHT_ONLY" = "1" ]; then', 1)[1]
branch = branch.split("# ── Smoke test", 1)[0]
assert "exit 0" in branch, "preflight-only branch must exit 0"


def test_preflight_only_runs_before_smoke_only_block_and_full_backtest():
"""The exit 0 must short-circuit BEFORE both the --smoke-only body
(which runs the heavy per-phase smoke modes + evaluate.py S3-probe
diagnostics) and the full-backtest heredoc (sweep/parity/evaluator/
config auto-apply)."""
text = _text()
i_branch = text.index('if [ "$PREFLIGHT_ONLY" = "1" ]; then')
i_smoke_only = text.index('if [ "$RUN_MODE" = "smoke-only" ]; then')
i_full = text.index("# ── Full backtest")
assert i_branch < i_smoke_only < i_full, (
"preflight-only branch must precede the smoke-only block and the "
"full-backtest heredoc so its exit 0 short-circuits before any "
"sweep / parity / evaluator / config auto-apply"
)


def test_preflight_only_body_only_runs_mode_smoke_and_no_writers():
"""The preflight-only SSM heredoc must invoke ONLY backtest.py
--mode=smoke (the read-only bootstrap harness) and must NOT reference
any sweep / sim / parity / evaluator / --upload / optimizer
auto-apply token — those are the param-sweep, portfolio-sim, and
config/*.json S3 writers."""
text = _text()
start = text.index('if [ "$PREFLIGHT_ONLY" = "1" ]; then')
# The preflight heredoc payload ends at its terminator.
end = text.index("\nPREFLIGHT\n", start)
payload = text[start:end]

# Strip comment + echo lines so the human-readable proof text
# ("NO sweep / sim / parity") does not false-positive against the
# forbidden-token scan.
code_lines = [
ln
for ln in payload.splitlines()
if not ln.lstrip().startswith(("#", "echo "))
]
code = "\n".join(code_lines)

assert "backtest.py --mode=smoke" in code, (
"preflight-only must reuse the existing backtest.py --mode=smoke "
"bootstrap harness (do not rebuild a parallel preflight)"
)

forbidden = [
"--mode $BACKTEST_MODE", # full backtest
"--mode smoke-", # per-phase heavy smoke modes
"--mode=smoke-",
"evaluate.py", # evaluator + S3-probe diagnostics
"--upload", # the optimizer config/*.json auto-apply
"--pit-parity", # pit_parity extra predictor sim
"param_sweep",
"test_parity_replay", # parity stage
"put-metric-data", # CloudWatch heartbeat
"aws s3 cp", # any S3 upload
"aws s3 sync",
]
for token in forbidden:
assert token not in code, (
f"preflight-only body must NOT reference {token!r} — it would "
f"break the no-sweep/no-parity/no-evaluator/no-auto-apply/"
f"no-write invariant"
)


def test_preflight_only_step_keeps_aws_region_export():
"""Same #247 regression guard as test_spot_backtest_aws_region.py —
the preflight heredoc sources ENV_SOURCE, which must export
AWS_REGION/AWS_DEFAULT_REGION (BacktesterPreflight + boto3 require
it; no .env post-deprecation)."""
text = _text()
start = text.index('if [ "$PREFLIGHT_ONLY" = "1" ]; then')
end = text.index("\nPREFLIGHT\n", start)
payload = text[start:end]
assert "${ENV_SOURCE}" in payload, (
"preflight-only heredoc must source ${ENV_SOURCE} so AWS_REGION / "
"AWS_DEFAULT_REGION (and the .env runtime config) are exported — "
"without it BacktesterPreflight / boto3 fail (#247 class)."
)
Loading