Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,6 @@ Core tests should prove behavior, not implementation details:

If a change touches matching, venue, OMS, or replay ordering, run the
solo-equivalence tests before merging.

For performance work, keep direct execution-engine throughput separate from
full audited replay throughput. See `docs/benchmarks.md`.
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,12 @@ The projects serve different workflows.
The pure Python engine is still the reference implementation, because it is the
clearest place to inspect queue behavior and prove equivalence. Packaged wheels
include the compiled `CppMatchingEngine`; ordinary `Replay(...)` runs prefer it
because it preserves the same public contract while avoiding the Python hot
loop. Source checkouts build the extension during normal installation:
because it is the compiled implementation the project intends to keep
equivalent and scale over time. The direct C++ batch-ingest path is already
substantially faster for callers that own the event loop; ordinary audited
`Replay(...)` currently remains event-by-event so it can record per-event
valuation marks. Source checkouts build the extension during normal
installation:

```bash
python -m pip install -e ".[dev]"
Expand Down Expand Up @@ -275,6 +279,7 @@ Planned next milestones:
- Connectors: `docs/connectors.md`
- Releasing: `docs/releasing.md`
- Engineering standards: `docs/engineering-standards.md`
- Benchmarks: `docs/benchmarks.md`
- Example: `examples/canonical.py`
- Schema reference: `docs/schema.md`
- AI agent guide: `AGENTS.md`
Expand Down
1 change: 1 addition & 0 deletions benchmarks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Small public benchmark scripts for ordersim."""
166 changes: 166 additions & 0 deletions benchmarks/engine_throughput.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
"""Measure direct execution-engine event throughput."""

import argparse
from collections.abc import Callable, Sequence
from dataclasses import dataclass
from decimal import Decimal
from statistics import median
from time import perf_counter

from benchmarks.workloads import build_mixed_mbo_workload
from ordersim import (
CompiledEventColumns,
CppMatchingEngine,
MatchingEngine,
MBOEvent,
cpp_execution_engine_available,
)
from ordersim.sim import ExecutionEngine

TICK_SIZE = Decimal("0.10")


@dataclass(frozen=True, slots=True)
class BenchmarkResult:
"""One measured direct-engine path."""

path_name: str
event_count: int
median_seconds: float

@property
def events_per_second(self) -> float:
"""Return median event throughput."""

return self.event_count / self.median_seconds


def run_scalar(engine: ExecutionEngine, events: Sequence[MBOEvent]) -> None:
"""Apply one event at a time through the public scalar engine API."""

for event in events:
engine.apply_event(event)


def run_batch(engine: CppMatchingEngine, columns: CompiledEventColumns) -> None:
"""Apply one compiled event slice through the C++ batch API."""

engine.apply_events_batch(columns.slice(0, len(columns.ts_ns)))


def measure(
path_name: str,
runner: Callable[[], None],
*,
event_count: int,
repeats: int,
warmups: int,
) -> BenchmarkResult:
"""Measure median elapsed time for one benchmark runner."""

if repeats <= 0:
raise ValueError("repeats must be positive")
if warmups < 0:
raise ValueError("warmups must be non-negative")

for _ in range(warmups):
runner()

timings: list[float] = []
for _ in range(repeats):
started = perf_counter()
runner()
timings.append(perf_counter() - started)

return BenchmarkResult(
path_name=path_name,
event_count=event_count,
median_seconds=median(timings),
)


def format_result(result: BenchmarkResult) -> str:
"""Render one direct-engine result as a compact terminal row."""

return (
f"{result.path_name:<28}"
f"{result.event_count:>10,} events "
f"{result.median_seconds:>8.4f} s "
f"{result.events_per_second:>12,.0f} events/s"
)


def main() -> None:
"""Run direct-engine throughput benchmarks from the command line."""

parser = argparse.ArgumentParser(
description="Measure direct execution-engine throughput."
)
parser.add_argument(
"--cycles",
type=int,
default=20_000,
help="number of six-event mixed MBO cycles to generate",
)
parser.add_argument(
"--repeats",
type=int,
default=5,
help="number of measured runs per path",
)
parser.add_argument(
"--warmups",
type=int,
default=1,
help="number of discarded warmup runs per path",
)
args = parser.parse_args()

events = build_mixed_mbo_workload(args.cycles)
columns = CompiledEventColumns.from_events(events, tick_size=TICK_SIZE)
results = [
measure(
"MatchingEngine scalar",
lambda: run_scalar(MatchingEngine(), events),
event_count=len(events),
repeats=args.repeats,
warmups=args.warmups,
)
]

print("Direct execution-engine throughput")
print("----------------------------------")
print(format_result(results[0]))

if not cpp_execution_engine_available():
print("CppMatchingEngine unavailable; compiled paths were skipped.")
return

cpp_scalar = measure(
"CppMatchingEngine scalar",
lambda: run_scalar(CppMatchingEngine(tick_size=TICK_SIZE), events),
event_count=len(events),
repeats=args.repeats,
warmups=args.warmups,
)
cpp_batch = measure(
"CppMatchingEngine batch",
lambda: run_batch(CppMatchingEngine(tick_size=TICK_SIZE), columns),
event_count=len(events),
repeats=args.repeats,
warmups=args.warmups,
)
results.extend((cpp_scalar, cpp_batch))

for result in results[1:]:
print(format_result(result))

python_eps = results[0].events_per_second
scalar_speedup = cpp_scalar.events_per_second / python_eps
batch_speedup = cpp_batch.events_per_second / python_eps
print(f"scalar C++ speedup vs Python {scalar_speedup:>7.2f}x")
print(f"batch C++ speedup vs Python {batch_speedup:>7.2f}x")


if __name__ == "__main__":
main()
166 changes: 166 additions & 0 deletions benchmarks/replay_throughput.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
"""Measure full audited replay throughput."""

import argparse
from collections.abc import Callable
from dataclasses import dataclass
from decimal import Decimal
from statistics import median
from time import perf_counter

from benchmarks.workloads import build_mixed_mbo_workload
from ordersim import (
InstrumentSpec,
MatchingEngine,
MBOEvent,
Replay,
cpp_execution_engine_available,
)


@dataclass(frozen=True, slots=True)
class BenchmarkResult:
"""One measured replay path."""

path_name: str
event_count: int
median_seconds: float

@property
def events_per_second(self) -> float:
"""Return median replay throughput."""

return self.event_count / self.median_seconds


def gc_spec() -> InstrumentSpec:
"""Return the small benchmark instrument definition."""

return InstrumentSpec(
symbol="GC",
tick_size=Decimal("0.10"),
point_value=Decimal("100"),
)


def advance_to_end(last_ts_ns: int) -> Callable:
"""Build the smallest strategy that consumes the full replay."""

def strategy(gateway) -> None:
gateway.advance_to(last_ts_ns)

return strategy


def run_replay(
events: tuple[MBOEvent, ...],
*,
execution_engine_factory=None,
) -> None:
"""Construct one replay and run it through the final event."""

replay = Replay(
data=events,
instrument=gc_spec(),
execution_engine_factory=execution_engine_factory,
)
replay.run(advance_to_end(events[-1].ts_ns))


def measure(
path_name: str,
runner: Callable[[], None],
*,
event_count: int,
repeats: int,
warmups: int,
) -> BenchmarkResult:
"""Measure median elapsed time for one replay runner."""

if repeats <= 0:
raise ValueError("repeats must be positive")
if warmups < 0:
raise ValueError("warmups must be non-negative")

for _ in range(warmups):
runner()

timings: list[float] = []
for _ in range(repeats):
started = perf_counter()
runner()
timings.append(perf_counter() - started)

return BenchmarkResult(
path_name=path_name,
event_count=event_count,
median_seconds=median(timings),
)


def format_result(result: BenchmarkResult) -> str:
"""Render one replay result as a compact terminal row."""

return (
f"{result.path_name:<28}"
f"{result.event_count:>10,} events "
f"{result.median_seconds:>8.4f} s "
f"{result.events_per_second:>12,.0f} events/s"
)


def main() -> None:
"""Run replay throughput benchmarks from the command line."""

parser = argparse.ArgumentParser(
description="Measure full audited replay throughput."
)
parser.add_argument(
"--cycles",
type=int,
default=20_000,
help="number of six-event mixed MBO cycles to generate",
)
parser.add_argument(
"--repeats",
type=int,
default=5,
help="number of measured runs per path",
)
parser.add_argument(
"--warmups",
type=int,
default=1,
help="number of discarded warmup runs per path",
)
args = parser.parse_args()

events = build_mixed_mbo_workload(args.cycles)
python_result = measure(
"Replay + Python engine",
lambda: run_replay(events, execution_engine_factory=MatchingEngine),
event_count=len(events),
repeats=args.repeats,
warmups=args.warmups,
)
default_result = measure(
"Replay + default engine",
lambda: run_replay(events),
event_count=len(events),
repeats=args.repeats,
warmups=args.warmups,
)

print("Full audited replay throughput")
print("------------------------------")
print(format_result(python_result))
print(format_result(default_result))
print(
"default engine speedup vs Python"
f" {default_result.events_per_second / python_result.events_per_second:>7.2f}x"
)
if not cpp_execution_engine_available():
print("CppMatchingEngine unavailable; the default path used Python.")


if __name__ == "__main__":
main()
Loading