From d136db5171a3fee8a53195470b48ca9ef5e6d109 Mon Sep 17 00:00:00 2001
From: seungrokj <seungrok.jung@amd.com>
Date: Fri, 24 Apr 2026 12:15:05 +0900
Subject: [PATCH 1/3] Update GLM5 FP8 atom config: new image, add TP=4, set
 gpu-memory-utilization

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/configs/amd-master.yaml                | 4 +++-
 benchmarks/single_node/glm5_fp8_mi355x_atom.sh | 2 ++
 perf-changelog.yaml                            | 6 ++++++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
index 0cd82774e..9cdd8797d 100644
--- a/.github/configs/amd-master.yaml
+++ b/.github/configs/amd-master.yaml
@@ -362,7 +362,7 @@ glm5-fp8-mi355x-sglang-mtp:
     - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
 
 glm5-fp8-mi355x-atom:
-  image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2.post
+  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
   model: zai-org/GLM-5-FP8
   model-prefix: glm5
   runner: mi355x
@@ -373,10 +373,12 @@ glm5-fp8-mi355x-atom:
   - isl: 1024
     osl: 1024
     search-space:
+    - { tp: 4, conc-start: 4, conc-end: 256 }
     - { tp: 8, conc-start: 4, conc-end: 256 }
   - isl: 8192
     osl: 1024
     search-space:
+    - { tp: 4, conc-start: 4, conc-end: 256 }
     - { tp: 8, conc-start: 4, conc-end: 256 }
 
 glm5.1-fp4-mi355x-sglang:
diff --git a/benchmarks/single_node/glm5_fp8_mi355x_atom.sh b/benchmarks/single_node/glm5_fp8_mi355x_atom.sh
index 31bc8b25f..036346af3 100644
--- a/benchmarks/single_node/glm5_fp8_mi355x_atom.sh
+++ b/benchmarks/single_node/glm5_fp8_mi355x_atom.sh
@@ -39,6 +39,7 @@ fi
 
 # Start GPU monitoring (power, temperature, clocks every second)
 start_gpu_monitor
+MEM_FRAC_STATIC=0.9
 
 set -x
 pip install -U transformers
@@ -47,6 +48,7 @@ python3 -m atom.entrypoints.openai_server \
     --server-port $PORT \
     -tp $TP \
     --kv_cache_dtype fp8 $CALCULATED_MAX_MODEL_LEN $EP \
+    --gpu-memory-utilization $MEM_FRAC_STATIC \
     --default-chat-template-kwargs '{"enable_thinking": false}' \
     --trust-remote-code \
     > $SERVER_LOG 2>&1 &
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index ddc6409c2..da81d6df9 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1733,3 +1733,9 @@
     - "TP=2 and TP=4, concurrency 4-256 for 1k1k and 8k1k sequence lengths"
     - "Add --max-num-seqs and --gpu-memory-utilization 0.9 to server launch"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1043
+
+- config-keys:  
+    - glm5-fp4-mi355x-atom
+  description:
+    - ""
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/

From 8a1620cf1a379294efa5007b4cb6ada3de5da71f Mon Sep 17 00:00:00 2001
From: seungrokj <seungrok.jung@amd.com>
Date: Fri, 24 Apr 2026 12:19:21 +0900
Subject: [PATCH 2/3] Fix perf-changelog entry for GLM5 FP8 atom config

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 perf-changelog.yaml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index da81d6df9..4195d8564 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1734,8 +1734,11 @@
     - "Add --max-num-seqs and --gpu-memory-utilization 0.9 to server launch"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1043
 
-- config-keys:  
-    - glm5-fp4-mi355x-atom
+- config-keys:
+    - glm5-fp8-mi355x-atom
   description:
-    - ""
+    - "Update GLM-5 FP8 MI355X ATOM benchmark: new image, add TP=4, set gpu-memory-utilization"
+    - "Image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post"
+    - "Add TP=4, concurrency 4-256 for 1k1k and 8k1k sequence lengths"
+    - "Add --gpu-memory-utilization 0.9 to server launch"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/

From 66c42b42da74cca5e452f47e34ae2b514b90e6bc Mon Sep 17 00:00:00 2001
From: seungrokj <seungrok.jung@amd.com>
Date: Fri, 24 Apr 2026 12:20:16 +0900
Subject: [PATCH 3/3] Add PR link to perf-changelog for GLM5 FP8 atom config

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 perf-changelog.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 4195d8564..35a98bcf9 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -1741,4 +1741,4 @@
     - "Image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post"
     - "Add TP=4, concurrency 4-256 for 1k1k and 8k1k sequence lengths"
     - "Add --gpu-memory-utilization 0.9 to server launch"
-  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1126