From 90a876b8b924921d06dba5766d5c147266c954e7 Mon Sep 17 00:00:00 2001 From: Tibor Date: Wed, 20 May 2026 12:33:27 +0200 Subject: [PATCH] Refactor valuation marks out of economics --- AGENTS.md | 1 + docs/architecture.md | 4 ++ docs/economics.md | 4 ++ docs/schema.md | 9 +-- src/ordersim/__init__.py | 3 +- src/ordersim/economics.py | 88 ++++++------------------- src/ordersim/replay/simulator.py | 8 ++- src/ordersim/sim/cpp_matching_engine.py | 2 +- src/ordersim/valuation.py | 80 ++++++++++++++++++++++ tests/test_economics.py | 11 ---- tests/test_valuation.py | 53 +++++++++++++++ 11 files changed, 174 insertions(+), 89 deletions(-) create mode 100644 src/ordersim/valuation.py create mode 100644 tests/test_valuation.py diff --git a/AGENTS.md b/AGENTS.md index c4d5cfc..bfaaf7d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -32,6 +32,7 @@ extraction targets and should not be imported until they exist. | `ordersim/recording.py` | Recording wrapper for order-intent logs | Yes | | `ordersim/specs.py` | Instrument specifications | Public extension surface | | `ordersim/types.py` | Public dataclasses and type aliases | Yes | +| `ordersim/valuation.py` | Valuation marks and compact mark transport | Yes | | `ordersim/fixtures/` | Tiny public fixtures for examples and tests | Public | | `ordersim/connectors/` | Data source contracts | Yes | | `ordersim/connectors/csv.py` | Normalized CSV `MBOEvent` source | Yes | diff --git a/docs/architecture.md b/docs/architecture.md index cdff761..c8d49c6 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -18,6 +18,7 @@ flowchart LR engine["ExecutionEngine"] python["MatchingEngine
Python reference"] cpp["CppMatchingEngine
preferred when available"] + valuation["Valuation marks"] result["ReplayResult
fills, order log, economics"] raw --> connector @@ -29,6 +30,8 @@ flowchart LR gateway --> engine engine --> python engine --> cpp + gateway --> valuation + valuation --> result gateway --> result recording --> result ``` @@ -39,6 +42,7 @@ The main boundaries are: - strategies depend on `OrderGateway`, not on storage or engine internals; - replay normalizes inputs once, chooses an execution engine, and gathers results; +- valuation marks are collected during replay and consumed by economics; - the Python engine defines behavior; the C++ engine must match it. ## Recommended Data Flow diff --git a/docs/economics.md b/docs/economics.md index 5a635d5..7d276a1 100644 --- a/docs/economics.md +++ b/docs/economics.md @@ -81,6 +81,10 @@ midpoints as `bid_ticks + ask_ticks` until equity construction. Public only avoids creating an intermediate Python `ValuationMark` object for every market-data event. +Valuation mark inputs live in `ordersim.valuation`; economics consumes them to +build realized and marked output. The top-level package re-exports +`ValuationMark` and `CompiledValuationMarks` for ordinary user code. + Replay only marks times it actually advances through. Full-session intraday drawdown therefore requires the strategy or harness to advance through the session window being studied, or to call `build_equity_curve(...)` directly with diff --git a/docs/schema.md b/docs/schema.md index 960c91a..3b11692 100644 --- a/docs/schema.md +++ b/docs/schema.md @@ -102,16 +102,17 @@ See `docs/economics.md` for the assumptions and explicit non-goals. ## `ValuationMark` And `EquityPoint` -`ValuationMark` is an input mark used to value open lots. +`ValuationMark` lives in `ordersim.valuation` and is re-exported from +`ordersim`. It is an input mark used to value open lots. | Field | Type | Meaning | |---|---|---| | `ts_ns` | `int` | Mark timestamp as UTC Unix-epoch nanoseconds. | | `price` | `Decimal` | Price used for open-lot valuation. | -`CompiledValuationMarks` is the compact internal form used by the C++ replay -path. It stores mark timestamps and midpoint prices as primitive integer -columns: +`CompiledValuationMarks` also lives in `ordersim.valuation`. It is the compact +internal form used by the C++ replay path. It stores mark timestamps and +midpoint prices as primitive integer columns: | Field | Type | Meaning | |---|---|---| diff --git a/src/ordersim/__init__.py b/src/ordersim/__init__.py index a397de6..f1dfb2a 100644 --- a/src/ordersim/__init__.py +++ b/src/ordersim/__init__.py @@ -11,11 +11,9 @@ write_parquet, ) from ordersim.economics import ( - CompiledValuationMarks, EquityPoint, ExecutionSummary, PositionLot, - ValuationMark, build_equity_curve, summarize_fills, ) @@ -64,6 +62,7 @@ Side, TimeInForce, ) +from ordersim.valuation import CompiledValuationMarks, ValuationMark __all__ = [ "BookSide", diff --git a/src/ordersim/economics.py b/src/ordersim/economics.py index 7b34569..b81811a 100644 --- a/src/ordersim/economics.py +++ b/src/ordersim/economics.py @@ -1,12 +1,27 @@ """Execution economics computed directly from fills.""" -from collections.abc import Iterable from dataclasses import dataclass from decimal import Decimal -from typing import TypeAlias from ordersim.specs import InstrumentSpec from ordersim.types import Fill, Price, Side +from ordersim.valuation import ( + CompiledValuationMarks, + ValuationMark, + ValuationMarkInput, + iter_valuation_mark_pairs, +) + +__all__ = [ + "CompiledValuationMarks", + "EquityPoint", + "ExecutionSummary", + "PositionLot", + "ValuationMark", + "ValuationMarkInput", + "build_equity_curve", + "summarize_fills", +] @dataclass(frozen=True, slots=True) @@ -37,46 +52,6 @@ class ExecutionSummary: open_lots: tuple[PositionLot, ...] -@dataclass(frozen=True, slots=True) -class ValuationMark: - """One mark price used to value open lots.""" - - ts_ns: int - price: Price - - -@dataclass(frozen=True, slots=True) -class CompiledValuationMarks: - """Compact valuation marks stored as timestamp and midpoint tick columns. - - `mid_ticks_x2` stores `bid_ticks + ask_ticks`, so half-tick midpoints stay - exact until the public `Decimal` equity curve is built. - """ - - ts_ns: memoryview - mid_ticks_x2: memoryview - tick_size: Decimal - - @classmethod - def from_bytes( - cls, - *, - ts_ns: bytes, - mid_ticks_x2: bytes, - tick_size: Decimal, - ) -> "CompiledValuationMarks": - """Build compact marks from native int64 byte columns.""" - - timestamps = memoryview(ts_ns).cast("q") - mids = memoryview(mid_ticks_x2).cast("q") - if len(timestamps) != len(mids): - raise ValueError("valuation mark columns must have equal length") - return cls(ts_ns=timestamps, mid_ticks_x2=mids, tick_size=tick_size) - - def __len__(self) -> int: - return len(self.ts_ns) - - @dataclass(frozen=True, slots=True) class EquityPoint: """One point on a mark-to-market equity curve.""" @@ -89,14 +64,6 @@ class EquityPoint: equity: Decimal drawdown: Decimal - -ValuationMarkInput: TypeAlias = ( - tuple[ValuationMark | CompiledValuationMarks, ...] - | list[ValuationMark | CompiledValuationMarks] - | CompiledValuationMarks -) - - def summarize_fills( fills: tuple[Fill, ...] | list[Fill], instrument: InstrumentSpec, @@ -160,7 +127,9 @@ def build_equity_curve( instrument, ) - sorted_marks = tuple(sorted(_iter_mark_pairs(marks), key=lambda mark: mark[0])) + sorted_marks = tuple( + sorted(iter_valuation_mark_pairs(marks), key=lambda mark: mark[0]) + ) open_lots: list[PositionLot] = [] realized_pnl = Decimal("0") commission = Decimal("0") @@ -196,23 +165,6 @@ def build_equity_curve( return tuple(points) -def _iter_mark_pairs( - marks: ValuationMarkInput, -) -> Iterable[tuple[int, Price]]: - for mark in marks: - if isinstance(mark, CompiledValuationMarks): - yield from _iter_compiled_mark_pairs(mark) - else: - yield mark.ts_ns, mark.price - - -def _iter_compiled_mark_pairs( - marks: CompiledValuationMarks, -) -> Iterable[tuple[int, Price]]: - for ts_ns, mid_ticks_x2 in zip(marks.ts_ns, marks.mid_ticks_x2, strict=True): - yield ts_ns, marks.tick_size * Decimal(mid_ticks_x2) / 2 - - def _build_equity_curve_from_compiled_marks( sorted_fills: tuple[Fill, ...], marks: CompiledValuationMarks, diff --git a/src/ordersim/replay/simulator.py b/src/ordersim/replay/simulator.py index 0351d3f..1028099 100644 --- a/src/ordersim/replay/simulator.py +++ b/src/ordersim/replay/simulator.py @@ -7,11 +7,8 @@ from ordersim.connectors import EventInput, normalize_events from ordersim.economics import ( - CompiledValuationMarks, EquityPoint, ExecutionSummary, - ValuationMark, - ValuationMarkInput, build_equity_curve, summarize_fills, ) @@ -42,6 +39,11 @@ Side, TimeInForce, ) +from ordersim.valuation import ( + CompiledValuationMarks, + ValuationMark, + ValuationMarkInput, +) Strategy = Callable[[OrderGateway], Any] diff --git a/src/ordersim/sim/cpp_matching_engine.py b/src/ordersim/sim/cpp_matching_engine.py index c69b098..e6a4671 100644 --- a/src/ordersim/sim/cpp_matching_engine.py +++ b/src/ordersim/sim/cpp_matching_engine.py @@ -3,7 +3,6 @@ from decimal import Decimal from typing import Any -from ordersim.economics import CompiledValuationMarks from ordersim.replay.compiled_events import CompiledEventSlice from ordersim.sim.matching_engine import PriceLevel from ordersim.types import ( @@ -16,6 +15,7 @@ Side, TimeInForce, ) +from ordersim.valuation import CompiledValuationMarks class CppMatchingEngine: diff --git a/src/ordersim/valuation.py b/src/ordersim/valuation.py new file mode 100644 index 0000000..458ff5a --- /dev/null +++ b/src/ordersim/valuation.py @@ -0,0 +1,80 @@ +"""Valuation marks used to build mark-to-market equity curves.""" + +from collections.abc import Iterable +from dataclasses import dataclass +from decimal import Decimal +from typing import TypeAlias + +from ordersim.types import Price + + +@dataclass(frozen=True, slots=True) +class ValuationMark: + """One public mark price used to value open lots.""" + + ts_ns: int + price: Price + + +@dataclass(frozen=True, slots=True) +class CompiledValuationMarks: + """Compact valuation marks stored as timestamp and midpoint tick columns. + + `mid_ticks_x2` stores `bid_ticks + ask_ticks`, so half-tick midpoints stay + exact until the public `Decimal` equity curve is built. + """ + + ts_ns: memoryview + mid_ticks_x2: memoryview + tick_size: Decimal + + @classmethod + def from_bytes( + cls, + *, + ts_ns: bytes, + mid_ticks_x2: bytes, + tick_size: Decimal, + ) -> "CompiledValuationMarks": + """Build compact marks from native int64 byte columns.""" + + timestamps = memoryview(ts_ns).cast("q") + mids = memoryview(mid_ticks_x2).cast("q") + if len(timestamps) != len(mids): + raise ValueError("valuation mark columns must have equal length") + return cls(ts_ns=timestamps, mid_ticks_x2=mids, tick_size=tick_size) + + def __len__(self) -> int: + return len(self.ts_ns) + + +ValuationMarkInput: TypeAlias = ( + tuple[ValuationMark | CompiledValuationMarks, ...] + | list[ValuationMark | CompiledValuationMarks] + | CompiledValuationMarks +) + + +def iter_valuation_mark_pairs( + marks: ValuationMarkInput, +) -> Iterable[tuple[int, Price]]: + """Yield `(timestamp, price)` pairs from public or compact mark inputs.""" + + if isinstance(marks, CompiledValuationMarks): + yield from iter_compiled_valuation_mark_pairs(marks) + return + + for mark in marks: + if isinstance(mark, CompiledValuationMarks): + yield from iter_compiled_valuation_mark_pairs(mark) + else: + yield mark.ts_ns, mark.price + + +def iter_compiled_valuation_mark_pairs( + marks: CompiledValuationMarks, +) -> Iterable[tuple[int, Price]]: + """Yield Decimal midpoint prices from compact integer mark columns.""" + + for ts_ns, mid_ticks_x2 in zip(marks.ts_ns, marks.mid_ticks_x2, strict=True): + yield ts_ns, marks.tick_size * Decimal(mid_ticks_x2) / 2 diff --git a/tests/test_economics.py b/tests/test_economics.py index 13590b4..d258f4a 100644 --- a/tests/test_economics.py +++ b/tests/test_economics.py @@ -1,7 +1,5 @@ from decimal import Decimal -import pytest - from ordersim import ( CompiledValuationMarks, EquityPoint, @@ -168,12 +166,3 @@ def test_build_equity_curve_accepts_mixed_public_and_compact_marks() -> None: (1, Decimal("100.0")), (2, Decimal("100.5")), ] - - -def test_compact_valuation_marks_reject_mismatched_columns() -> None: - with pytest.raises(ValueError, match="equal length"): - CompiledValuationMarks.from_bytes( - ts_ns=int64_bytes((1, 2)), - mid_ticks_x2=int64_bytes((2000,)), - tick_size=Decimal("0.10"), - ) diff --git a/tests/test_valuation.py b/tests/test_valuation.py new file mode 100644 index 0000000..e696bd1 --- /dev/null +++ b/tests/test_valuation.py @@ -0,0 +1,53 @@ +from decimal import Decimal + +import pytest + +from ordersim.valuation import ( + CompiledValuationMarks, + ValuationMark, + iter_valuation_mark_pairs, +) + + +def int64_bytes(values: tuple[int, ...]) -> bytes: + from array import array + + return array("q", values).tobytes() + + +def test_compact_valuation_marks_reject_mismatched_columns() -> None: + with pytest.raises(ValueError, match="equal length"): + CompiledValuationMarks.from_bytes( + ts_ns=int64_bytes((1, 2)), + mid_ticks_x2=int64_bytes((2000,)), + tick_size=Decimal("0.10"), + ) + + +def test_iter_valuation_mark_pairs_accepts_public_and_compact_marks() -> None: + marks = [ + ValuationMark(ts_ns=1, price=Decimal("100.0")), + CompiledValuationMarks.from_bytes( + ts_ns=int64_bytes((2,)), + mid_ticks_x2=int64_bytes((2010,)), + tick_size=Decimal("0.10"), + ), + ] + + assert list(iter_valuation_mark_pairs(marks)) == [ + (1, Decimal("100.0")), + (2, Decimal("100.5")), + ] + + +def test_iter_valuation_mark_pairs_accepts_compact_marks_directly() -> None: + marks = CompiledValuationMarks.from_bytes( + ts_ns=int64_bytes((1, 2)), + mid_ticks_x2=int64_bytes((2000, 2010)), + tick_size=Decimal("0.10"), + ) + + assert list(iter_valuation_mark_pairs(marks)) == [ + (1, Decimal("100.0")), + (2, Decimal("100.5")), + ]