From e7a81b79cd7ac617e2021b72819c91696c5b7207 Mon Sep 17 00:00:00 2001 From: jiacao-amd Date: Mon, 27 Apr 2026 12:31:57 -0500 Subject: [PATCH] Enable shuffled KV cache layout for MiniMax vLLM --- benchmarks/single_node/minimaxm2.5_fp8_mi355x.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/single_node/minimaxm2.5_fp8_mi355x.sh b/benchmarks/single_node/minimaxm2.5_fp8_mi355x.sh index 569172cee..53cffceee 100755 --- a/benchmarks/single_node/minimaxm2.5_fp8_mi355x.sh +++ b/benchmarks/single_node/minimaxm2.5_fp8_mi355x.sh @@ -26,6 +26,7 @@ fi export VLLM_ROCM_USE_AITER=1 export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4 +export VLLM_ROCM_SHUFFLE_KV_CACHE_LAYOUT=1 SERVER_LOG=/workspace/server.log PORT=${PORT:-8888}