diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 1c431427e..c275f42f3 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -688,7 +688,7 @@ gptoss-fp4-mi355x-vllm: - { tp: 8, conc-start: 4, conc-end: 8 } gptoss-fp4-mi355x-atom: - image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x + image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post model: openai/gpt-oss-120b model-prefix: gptoss runner: mi355x @@ -699,12 +699,12 @@ gptoss-fp4-mi355x-atom: - isl: 1024 osl: 1024 search-space: - - { tp: 1, conc-start: 16, conc-end: 128 } + - { tp: 1, conc-start: 16, conc-end: 256 } - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 } - isl: 8192 osl: 1024 search-space: - - { tp: 1, conc-start: 4, conc-end: 128 } + - { tp: 1, conc-start: 4, conc-end: 256 } - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 } dsr1-fp8-mi355x-atom: diff --git a/benchmarks/single_node/gptoss_fp4_mi355x_atom.sh b/benchmarks/single_node/gptoss_fp4_mi355x_atom.sh index 76bc87c0c..ee0810e8f 100644 --- a/benchmarks/single_node/gptoss_fp4_mi355x_atom.sh +++ b/benchmarks/single_node/gptoss_fp4_mi355x_atom.sh @@ -44,6 +44,7 @@ fi # Start GPU monitoring (power, temperature, clocks every second) start_gpu_monitor +MEM_FRAC_STATIC=0.9 set -x @@ -54,6 +55,7 @@ python3 -m atom.entrypoints.openai_server \ --server-port $PORT \ -tp $TP \ --kv_cache_dtype fp8 $CALCULATED_MAX_MODEL_LEN $EP \ + --gpu-memory-utilization $MEM_FRAC_STATIC \ --block-size $BLOCK_SIZE > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 2bd2f025c..8cff8bda2 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1892,3 +1892,9 @@ description: - "Pass --dsv4 to run_benchmark_serving so MTP benchmarks use the DSv4 chat template (PR #1153)" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1182 + +- config-keys: + - gptoss-fp4-mi355x-atom + description: + - "Update GPTOSS-120B FP4 MI355X Atom benchmark (rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post)" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1195