SemiAnalysisAI · seungrokj · Apr 16, 2026 · Apr 17, 2026 · Apr 25, 2026 · Apr 25, 2026
@@ -534,7 +534,7 @@ minimaxm2.5-fp8-mi355x-vllm:
     - { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }
 
 minimaxm2.5-fp8-mi355x-atom:
-  image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2
+  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
   model: MiniMaxAI/MiniMax-M2.5
   model-prefix: minimaxm2.5
   runner: mi355x
@@ -545,15 +545,13 @@ minimaxm2.5-fp8-mi355x-atom:
   - isl: 1024
     osl: 1024
     search-space:
-    - { tp: 2, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 128 }
-    - { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }
+    - { tp: 2, conc-start: 4, conc-end: 256 }
+    - { tp: 4, conc-start: 4, conc-end: 256 }
   - isl: 8192
     osl: 1024
     search-space:
-    - { tp: 2, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 128 }
-    - { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }
+    - { tp: 2, conc-start: 4, conc-end: 256 }
+    - { tp: 4, conc-start: 4, conc-end: 256 }
 
 minimaxm2.5-fp4-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.19.1

diff --git a/benchmarks/single_node/minimaxm2.5_fp8_mi355x_atom.sh b/benchmarks/single_node/minimaxm2.5_fp8_mi355x_atom.sh
@@ -39,6 +39,7 @@ fi
 
 # Start GPU monitoring (power, temperature, clocks every second)
 start_gpu_monitor
+MEM_FRAC_STATIC=0.9
 
 set -x
 
@@ -47,6 +48,7 @@ python3 -m atom.entrypoints.openai_server \
     --server-port $PORT \
     -tp $TP \
     --kv_cache_dtype fp8 $CALCULATED_MAX_MODEL_LEN $EP \
+    --gpu-memory-utilization $MEM_FRAC_STATIC \
     --trust-remote-code \
     > $SERVER_LOG 2>&1 &
 

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -1812,3 +1812,10 @@
     - "Topologies: low-conc 1p1d-dep8-tep8 (4 nodes, mirrored from NVIDIA srt-slurm PR #71 with offload kept and numa-bind dropped); mid 1p1d-dep8-dep16 (6 nodes) and high 3p1d-dep8-dep16 (10 nodes) hand-rolled, structurally derived from the kimi-k2.5 1k/1k pattern"
     - "Recipes stored under benchmarks/multi_node/srt-slurm-recipes/ and overlaid onto the upstream srt-slurm checkout at runtime"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1129
+
+- config-keys:
+    - minimaxm2.5-fp8-mi355x-atom
+  description:
+    - "Update MiniMax-M2.5 FP8 MI355X Atom benchmark (rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post)"
+    - "Concurrency up to 256 on TP2/TP4"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1194