From b6f93dfc60cbfc491244451a4f18323f8888259a Mon Sep 17 00:00:00 2001 From: Tibor Date: Tue, 19 May 2026 14:44:25 +0200 Subject: [PATCH] Precompile replay event stream once --- docs/benchmarks.md | 4 ++++ docs/execution-engines.md | 3 +++ src/ordersim/replay/simulator.py | 16 +++++++++++++++- tests/test_replay.py | 27 +++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 1 deletion(-) diff --git a/docs/benchmarks.md b/docs/benchmarks.md index d4319f7..716b00c 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -71,6 +71,10 @@ Full replay throughput answers, "how quickly can the normal audited research workflow produce a `ReplayResult`?" Both numbers matter. They should not be collapsed into one claim. +`Replay(...)` compiles the immutable event stream into primitive columns once +per replay object. Repeated strategy runs can share that read-only view, which +keeps the future boundary-batched path from rebuilding columns inside +`run_many(...)`. ## What This Exposes diff --git a/docs/execution-engines.md b/docs/execution-engines.md index aaccf74..b367dac 100644 --- a/docs/execution-engines.md +++ b/docs/execution-engines.md @@ -90,6 +90,9 @@ exposes a compiled batch-ingest path. It accepts primitive columns derived from the same `MBOEvent` schema and returns passive fills without changing the public matching semantics. Ordinary `Replay(...)` still applies one event at a time so it can record the per-event valuation marks that build the default equity curve. +`Replay(...)` precompiles its immutable event stream once and shares that column +view with each strategy run, so future compiled replay paths do not need to +rebuild primitive columns inside `run_many(...)`. ```python from ordersim import CompiledEventColumns, CppMatchingEngine diff --git a/src/ordersim/replay/simulator.py b/src/ordersim/replay/simulator.py index 7072636..c79b4eb 100644 --- a/src/ordersim/replay/simulator.py +++ b/src/ordersim/replay/simulator.py @@ -19,6 +19,7 @@ default_latency_model_factory, ) from ordersim.recording import RecordingGateway +from ordersim.replay.compiled_events import CompiledEventColumns from ordersim.sim import ( ExecutionEngine, ExecutionEngineFactory, @@ -76,11 +77,17 @@ def _from_canonical_events( *, engine: ExecutionEngine, latency_model: LatencyModel, + compiled_events: CompiledEventColumns | None = None, ) -> "ReplayGateway": """Build a gateway from the immutable event tuple already held by Replay.""" gateway = cls.__new__(cls) - gateway._init(events, engine=engine, latency_model=latency_model) + gateway._init( + events, + engine=engine, + latency_model=latency_model, + compiled_events=compiled_events, + ) return gateway def _init( @@ -89,8 +96,10 @@ def _init( *, engine: ExecutionEngine | None, latency_model: LatencyModel | None, + compiled_events: CompiledEventColumns | None = None, ) -> None: self._events = events + self._compiled_events = compiled_events self._engine = engine or python_execution_engine_factory() self._latency_model = latency_model or default_latency_model_factory() self._cursor = 0 @@ -227,6 +236,10 @@ def __init__( ) for event in self.data: instrument.assert_price_aligned(event.price) + self._compiled_events = CompiledEventColumns.from_events( + self.data, + tick_size=instrument.tick_size, + ) def run( self, @@ -240,6 +253,7 @@ def run( self.data, engine=self._execution_engine_factory(), latency_model=self._latency_model_factory(), + compiled_events=self._compiled_events, ) order_events: list[OrderEvent] = [] recording_gateway = RecordingGateway( diff --git a/tests/test_replay.py b/tests/test_replay.py index a33d03c..d35bf56 100644 --- a/tests/test_replay.py +++ b/tests/test_replay.py @@ -14,6 +14,7 @@ RestingOrder, ) from ordersim.fixtures.synthetic import SyntheticSource +from ordersim.replay import simulator as replay_simulator from ordersim.types import OrderEvent @@ -94,6 +95,32 @@ def test_replay_run_many_preserves_solo_equivalence() -> None: assert many["copy"].fills == solo.fills +def test_replay_compiles_immutable_stream_once_for_run_many(monkeypatch) -> None: + calls = 0 + original = replay_simulator.CompiledEventColumns.from_events + + def spy_from_events(cls, events, *, tick_size): + nonlocal calls + calls += 1 + return original(events, tick_size=tick_size) + + monkeypatch.setattr( + replay_simulator.CompiledEventColumns, + "from_events", + classmethod(spy_from_events), + ) + replay = Replay(data=SyntheticSource.small_mbo(), instrument=gc_spec()) + + replay.run_many( + { + "baseline": read_book_then_cross_spread, + "copy": read_book_then_cross_spread, + } + ) + + assert calls == 1 + + def test_replay_gateway_exposes_book_depth() -> None: replay = Replay(data=SyntheticSource.small_mbo(), instrument=gc_spec())