Skip to content
Open
12 changes: 5 additions & 7 deletions .github/configs/amd-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ minimaxm2.5-fp8-mi355x-vllm:
- { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }

minimaxm2.5-fp8-mi355x-atom:
image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2
image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
model: MiniMaxAI/MiniMax-M2.5
model-prefix: minimaxm2.5
runner: mi355x
Expand All @@ -545,15 +545,13 @@ minimaxm2.5-fp8-mi355x-atom:
- isl: 1024
osl: 1024
search-space:
- { tp: 2, conc-start: 4, conc-end: 128 }
- { tp: 4, conc-start: 4, conc-end: 128 }
- { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }
- { tp: 2, conc-start: 4, conc-end: 256 }
- { tp: 4, conc-start: 4, conc-end: 256 }
- isl: 8192
osl: 1024
search-space:
- { tp: 2, conc-start: 4, conc-end: 128 }
- { tp: 4, conc-start: 4, conc-end: 128 }
- { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }
- { tp: 2, conc-start: 4, conc-end: 256 }
- { tp: 4, conc-start: 4, conc-end: 256 }

minimaxm2.5-fp4-mi355x-vllm:
image: vllm/vllm-openai-rocm:v0.19.1
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/single_node/minimaxm2.5_fp8_mi355x_atom.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ fi

# Start GPU monitoring (power, temperature, clocks every second)
start_gpu_monitor
MEM_FRAC_STATIC=0.9

set -x

Expand All @@ -47,6 +48,7 @@ python3 -m atom.entrypoints.openai_server \
--server-port $PORT \
-tp $TP \
--kv_cache_dtype fp8 $CALCULATED_MAX_MODEL_LEN $EP \
--gpu-memory-utilization $MEM_FRAC_STATIC \
--trust-remote-code \
> $SERVER_LOG 2>&1 &

Expand Down
7 changes: 7 additions & 0 deletions perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1812,3 +1812,10 @@
- "Topologies: low-conc 1p1d-dep8-tep8 (4 nodes, mirrored from NVIDIA srt-slurm PR #71 with offload kept and numa-bind dropped); mid 1p1d-dep8-dep16 (6 nodes) and high 3p1d-dep8-dep16 (10 nodes) hand-rolled, structurally derived from the kimi-k2.5 1k/1k pattern"
- "Recipes stored under benchmarks/multi_node/srt-slurm-recipes/ and overlaid onto the upstream srt-slurm checkout at runtime"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1129

- config-keys:
- minimaxm2.5-fp8-mi355x-atom
description:
- "Update MiniMax-M2.5 FP8 MI355X Atom benchmark (rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post)"
- "Concurrency up to 256 on TP2/TP4"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1194
Loading