diff --git a/.github/configs/CONFIGS.md b/.github/configs/CONFIGS.md
index 9d3c24309..b62470cf9 100644
--- a/.github/configs/CONFIGS.md
+++ b/.github/configs/CONFIGS.md
@@ -12,15 +12,21 @@ entry-name:
   runner: string
   precision: string
   framework: string
-  seq-len-configs:
-  - isl: int
-    osl: int
-    search-space:
-    - { tp: int, conc-start: int, conc-end: int }
-    # Optionally, specify 'ep' (expert-parallelism) and 'dp-attn' (data parallel attention)
-    - { tp: int, ep: int, dp-attn: bool, conc-start: int, conc-end: int }
+  scenarios:
+    fixed-seq-len:
+    - isl: int
+      osl: int
+      search-space:
+      - { tp: int, conc-start: int, conc-end: int }
+      # Optionally, specify 'ep' (expert-parallelism) and 'dp-attn' (data parallel attention)
+      - { tp: int, ep: int, dp-attn: bool, conc-start: int, conc-end: int }
+      - ...
     - ...
-  - ...
+    agentic-coding:  # optional
+    - trace-source: string
+      search-space:
+      - { tp: int, conc-start: int, conc-end: int }
+      - ...
 ```
 Note: while not required, `entry-name` typically takes the format `<INFMAX_MODEL_PREFIX>-<PRECISION>-<GPU>-<FRAMEWORK>`.
 
@@ -32,16 +38,20 @@ The below list describes what each field is:
 - `runner`: This is the runner on which to run the benchmark. This must be a valid runner (key or value) from `runners.yaml`.
 - `precision`: The precision to run the benchmark. Again, this is used to find which script to run in `benchmarks/`.
 - `framework`: The framework (serving runtime) to serve the benchmark, e.g., `vllm`, `sglang`, `trt`.
-- `seq-len-configs`: A list of possible sequence lengths to benchmark. Each entry must have the following fields:
-  - `isl`: An integer representing the input sequence length, e.g., `1024`
-  - `osl`: An integer representing the output sequence length, e.g., `8192`
-  - `search-space`: A list of configurations to run with respective `isl` and `osl`, each entry must be a dict with the following fields:
-    - `tp`: An integer representing the tensor parallelism level that the configuration will be served at.
-    - `conc-start`: An integer representing the starting level of concurrency e.g., `4`
-    - `conc-end`: An integer representing the ending level of concurrency (inclusive) e.g., `128`
-    - Note: the step factor between `conc-start` and `conc-end` is 2, so if `conc-start` is 4 and `conc-end` is 128, all concurrencies `4, 8, 16, 32, ..., 128` will be run.
-    - (Optional) `ep`: An integer representing the expert parallelism level that the configuration will be served at. Default is 1 (no expert parallelism) when not specified.
-    - (Optional) `dp-attn`: A boolean representing whether or not to activate data parallel attention for the configuration. Default is false when not specified.
+- `scenarios`: A dictionary of benchmark scenario types. At least one must be specified. Currently supported:
+  - `fixed-seq-len`: Fixed input/output sequence length benchmarks. Each entry must have:
+    - `isl`: An integer representing the input sequence length, e.g., `1024`
+    - `osl`: An integer representing the output sequence length, e.g., `8192`
+    - `search-space`: A list of configurations to run with respective `isl` and `osl`, each entry must be a dict with the following fields:
+      - `tp`: An integer representing the tensor parallelism level that the configuration will be served at.
+      - `conc-start`: An integer representing the starting level of concurrency e.g., `4`
+      - `conc-end`: An integer representing the ending level of concurrency (inclusive) e.g., `128`
+      - Note: the step factor between `conc-start` and `conc-end` is 2, so if `conc-start` is 4 and `conc-end` is 128, all concurrencies `4, 8, 16, 32, ..., 128` will be run.
+      - (Optional) `ep`: An integer representing the expert parallelism level that the configuration will be served at. Default is 1 (no expert parallelism) when not specified.
+      - (Optional) `dp-attn`: A boolean representing whether or not to activate data parallel attention for the configuration. Default is false when not specified.
+  - `agentic-coding`: Agentic trace replay benchmarks using real conversation traces. Each entry must have:
+    - `trace-source`: Identifier for the trace dataset to use.
+    - `search-space`: Same structure as `fixed-seq-len` search-space entries.
 
 Notes:
 - No extra fields besides the ones listed may be specified, or else the benchmarks will fail to run.
diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
index 9fad7d33b..ae5cd3427 100644
--- a/.github/configs/amd-master.yaml
+++ b/.github/configs/amd-master.yaml
@@ -6,16 +6,21 @@ dsr1-fp4-mi355x-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    agentic-coding:
+    - duration: 1800
+      search-space:
+      - { tp: 8, offloading: none, conc-list: [1, 2, 4, 8, 12, 16, 32, 64, 128, 256] }
 
 dsr1-fp4-mi355x-atom:
   image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x
@@ -25,17 +30,18 @@ dsr1-fp4-mi355x-atom:
   precision: fp4
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
 
 dsr1-fp4-mi355x-atom-mtp:
   image: rocm/atom:rocm7.2.0-ubuntu24.04-pytorch2.9-atom0.1.1
@@ -46,17 +52,18 @@ dsr1-fp4-mi355x-atom-mtp:
   # WIP framework (no customers yet)
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    #- { tp: 4, conc-start: 32, conc-end: 256, spec-decoding: mtp }
-    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+      - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      #- { tp: 4, conc-start: 32, conc-end: 256, spec-decoding: mtp }
+      - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 dsr1-fp8-mi300x-sglang:
   image: lmsysorg/sglang:v0.5.9-rocm700-mi30x
@@ -66,15 +73,16 @@ dsr1-fp8-mi300x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 dsr1-fp8-mi325x-sglang:
   image: lmsysorg/sglang:v0.5.9-rocm700-mi30x
@@ -84,15 +92,16 @@ dsr1-fp8-mi325x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 dsr1-fp8-mi355x-sglang:
   image: lmsysorg/sglang:v0.5.9-rocm700-mi35x
@@ -102,16 +111,17 @@ dsr1-fp8-mi355x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, conc-start: 32, conc-end: 64 }
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 32, conc-end: 64 }
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-bf16-mi355x-sglang:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415
@@ -121,15 +131,16 @@ qwen3.5-bf16-mi355x-sglang:
   precision: bf16
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
 
 qwen3.5-bf16-mi355x-sglang-mtp:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415
@@ -139,15 +150,16 @@ qwen3.5-bf16-mi355x-sglang-mtp:
   precision: bf16
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 qwen3.5-bf16-mi300x-sglang:
   image: lmsysorg/sglang:v0.5.10-rocm720-mi30x
@@ -157,15 +169,16 @@ qwen3.5-bf16-mi300x-sglang:
   precision: bf16
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-bf16-mi325x-sglang:
   image: lmsysorg/sglang:v0.5.10-rocm720-mi30x
@@ -175,15 +188,16 @@ qwen3.5-bf16-mi325x-sglang:
   precision: bf16
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-fp8-mi325x-sglang:
   image: lmsysorg/sglang:v0.5.10-rocm720-mi30x
@@ -193,15 +207,16 @@ qwen3.5-fp8-mi325x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-fp8-mi355x-sglang:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260414
@@ -211,18 +226,19 @@ qwen3.5-fp8-mi355x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
-    - { tp: 8, ep: 8, conc-start: 64, conc-end: 256 }
-    - { tp: 2, ep: 2, conc-start: 128, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, ep: 2, conc-start: 4, conc-end: 32 }
-    - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
+      - { tp: 8, ep: 8, conc-start: 64, conc-end: 256 }
+      - { tp: 2, ep: 2, conc-start: 128, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, ep: 2, conc-start: 4, conc-end: 32 }
+      - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 }
 
 qwen3.5-fp8-mi355x-sglang-mtp:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260414
@@ -232,18 +248,19 @@ qwen3.5-fp8-mi355x-sglang-mtp:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 32, spec-decoding: mtp }
-    - { tp: 8, ep: 8, conc-start: 64, conc-end: 256, spec-decoding: mtp }
-    - { tp: 2, ep: 2, conc-start: 128, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, ep: 2, conc-start: 4, conc-end: 32, spec-decoding: mtp }
-    - { tp: 4, ep: 1, conc-start: 32, conc-end: 256, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 32, spec-decoding: mtp }
+      - { tp: 8, ep: 8, conc-start: 64, conc-end: 256, spec-decoding: mtp }
+      - { tp: 2, ep: 2, conc-start: 128, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, ep: 2, conc-start: 4, conc-end: 32, spec-decoding: mtp }
+      - { tp: 4, ep: 1, conc-start: 32, conc-end: 256, spec-decoding: mtp }
 
 qwen3.5-fp8-mi355x-atom:
   image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
@@ -253,19 +270,20 @@ qwen3.5-fp8-mi355x-atom:
   precision: fp8
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, ep: 1, conc-start: 4, conc-end: 256 }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, ep: 1, conc-start: 4, conc-end: 256 }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, ep: 1, conc-start: 4, conc-end: 256 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, ep: 1, conc-start: 4, conc-end: 256 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
 
 qwen3.5-fp8-mi355x-atom-mtp:
   image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
@@ -275,17 +293,18 @@ qwen3.5-fp8-mi355x-atom-mtp:
   precision: fp8
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 qwen3.5-fp4-mi355x-sglang:
   image: rocm/sgl-dev:v0.5.10rc0-rocm720-mi35x-20260413
@@ -295,17 +314,18 @@ qwen3.5-fp4-mi355x-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 256 }
-    - { tp: 4, conc-start: 4, conc-end: 16 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 256 }
-    - { tp: 4, conc-start: 4, conc-end: 16 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 256 }
+      - { tp: 4, conc-start: 4, conc-end: 16 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 256 }
+      - { tp: 4, conc-start: 4, conc-end: 16 }
 
 qwen3.5-fp8-mi300x-sglang:
   image: lmsysorg/sglang:v0.5.10-rocm720-mi30x
@@ -315,15 +335,16 @@ qwen3.5-fp8-mi300x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 glm5-fp8-mi355x-sglang:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260413
@@ -333,15 +354,16 @@ glm5-fp8-mi355x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 glm5-fp8-mi355x-sglang-mtp:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260413
@@ -351,15 +373,16 @@ glm5-fp8-mi355x-sglang-mtp:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
 
 glm5-fp8-mi355x-atom:
   image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2.post
@@ -369,15 +392,16 @@ glm5-fp8-mi355x-atom:
   precision: fp8
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 256 }
 
 glm5.1-fp4-mi355x-sglang:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415
@@ -387,17 +411,18 @@ glm5.1-fp4-mi355x-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 256 }
-    - { tp: 4, conc-start: 4, conc-end: 16 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 256 }
-    - { tp: 4, conc-start: 4, conc-end: 16 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 256 }
+      - { tp: 4, conc-start: 4, conc-end: 16 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 256 }
+      - { tp: 4, conc-start: 4, conc-end: 16 }
 
 glm5.1-fp4-mi355x-atom:
   image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
@@ -407,15 +432,16 @@ glm5.1-fp4-mi355x-atom:
   precision: fp4
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, conc-start: 4, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, conc-start: 4, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 4, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 4, conc-end: 256 }
 
 kimik2.5-int4-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
@@ -425,15 +451,16 @@ kimik2.5-int4-mi355x-vllm:
   precision: int4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 kimik2.5-int4-mi325x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
@@ -443,15 +470,16 @@ kimik2.5-int4-mi325x-vllm:
   precision: int4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 kimik2.5-int4-mi300x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
@@ -461,15 +489,16 @@ kimik2.5-int4-mi300x-vllm:
   precision: int4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 kimik2.5-fp4-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
@@ -479,17 +508,18 @@ kimik2.5-fp4-mi355x-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
 
 kimik2.5-fp4-mi355x-atom:
   image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2
@@ -499,17 +529,18 @@ kimik2.5-fp4-mi355x-atom:
   precision: fp4
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 128 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 128 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 128 }
+      - { tp: 4, conc-start: 4, conc-end: 128 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 128 }
+      - { tp: 4, conc-start: 4, conc-end: 128 }
 
 minimaxm2.5-fp8-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.19.0
@@ -519,19 +550,20 @@ minimaxm2.5-fp8-mi355x-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, ep: 2, conc-start: 2, conc-end: 512 }
-    - { tp: 4, ep: 4, conc-start: 4, conc-end: 256 }
-    - { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, ep: 2, conc-start: 2, conc-end: 256 }
-    - { tp: 4, ep: 4, conc-start: 4, conc-end: 512 }
-    - { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, ep: 2, conc-start: 2, conc-end: 512 }
+      - { tp: 4, ep: 4, conc-start: 4, conc-end: 256 }
+      - { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, ep: 2, conc-start: 2, conc-end: 256 }
+      - { tp: 4, ep: 4, conc-start: 4, conc-end: 512 }
+      - { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }
 
 minimaxm2.5-fp8-mi355x-atom:
   image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2
@@ -541,19 +573,20 @@ minimaxm2.5-fp8-mi355x-atom:
   precision: fp8
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 128 }
-    - { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 128 }
-    - { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 128 }
+      - { tp: 4, conc-start: 4, conc-end: 128 }
+      - { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 128 }
+      - { tp: 4, conc-start: 4, conc-end: 128 }
+      - { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }
 
 minimaxm2.5-fp4-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.19.1
@@ -563,19 +596,20 @@ minimaxm2.5-fp4-mi355x-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 32 }
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 32 }
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 32 }
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 32 }
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
 
 minimaxm2.5-fp8-mi300x-vllm:
   image: vllm/vllm-openai-rocm:v0.16.0
@@ -585,17 +619,18 @@ minimaxm2.5-fp8-mi300x-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
 
 minimaxm2.5-fp8-mi325x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
@@ -605,66 +640,67 @@ minimaxm2.5-fp8-mi325x-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 512 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 512 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 }
 
 gptoss-fp4-mi300x-vllm:
-  image: vllm/vllm-openai-rocm:v0.17.0
+  image: vllm/vllm-openai-rocm:v0.19.1
   model: openai/gpt-oss-120b
   model-prefix: gptoss
   runner: mi300x
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 64, conc-end: 256 }
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-    - { tp: 8, conc-start: 1, conc-end: 16 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 64 }
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-    - { tp: 8, conc-start: 1, conc-end: 16 }
-
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 64, conc-end: 256 }
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+      - { tp: 8, conc-start: 1, conc-end: 16 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 64 }
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+      - { tp: 8, conc-start: 1, conc-end: 16 }
 gptoss-fp4-mi325x-vllm:
-  image: vllm/vllm-openai-rocm:v0.17.0
+  image: vllm/vllm-openai-rocm:v0.19.1
   model: openai/gpt-oss-120b
   model-prefix: gptoss
   runner: mi325x
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 64 }
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 64 }
-    - { tp: 2, conc-start: 4, conc-end: 8 }
-    - { tp: 4, conc-start: 4, conc-end: 8 }
-    - { tp: 8, conc-start: 4, conc-end: 16 }
-
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 64 }
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 64 }
+      - { tp: 2, conc-start: 4, conc-end: 8 }
+      - { tp: 4, conc-start: 4, conc-end: 8 }
+      - { tp: 8, conc-start: 4, conc-end: 16 }
 gptoss-fp4-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.17.0
   model: amd/gpt-oss-120b-w-mxfp4-a-fp8
@@ -673,19 +709,20 @@ gptoss-fp4-mi355x-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 8 }
-    - { tp: 8, conc-start: 4, conc-end: 16 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 4 }
-    - { tp: 8, conc-start: 4, conc-end: 8 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 128 }
+      - { tp: 4, conc-start: 4, conc-end: 8 }
+      - { tp: 8, conc-start: 4, conc-end: 16 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 128 }
+      - { tp: 4, conc-start: 4, conc-end: 4 }
+      - { tp: 8, conc-start: 4, conc-end: 8 }
 
 gptoss-fp4-mi355x-atom:
   image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x
@@ -695,17 +732,18 @@ gptoss-fp4-mi355x-atom:
   precision: fp4
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 16, conc-end: 128 }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 128 }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 16, conc-end: 128 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 128 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 }
 
 dsr1-fp8-mi355x-atom:
   image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x
@@ -716,15 +754,16 @@ dsr1-fp8-mi355x-atom:
   # WIP framework (no customers yet)
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 128 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 128 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 128 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 128 }
 
 dsr1-fp8-mi355x-atom-mtp:
   image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2
@@ -734,15 +773,16 @@ dsr1-fp8-mi355x-atom-mtp:
   precision: fp8
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp  }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp  }
 
 dsr1-fp8-mi355x-sglang-disagg:
   image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2
@@ -753,150 +793,151 @@ dsr1-fp8-mi355x-sglang-disagg:
   framework: sglang-disagg
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # non-MTP configurations
-    # "Top of curve" (1 prefill workers each at DEP8 and 1 decode workers at DEP16)
-    - spec-decoding: "none"
-      conc-list: [ 1024, 2048 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    # "Middle of curve" (1 prefill workers each at TP8 and 2 decode workers at DEP8)
-    - spec-decoding: "none"
-      conc-list: [ 1536, 1024, 512 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-
-    # "Bottom of curve" (1 prefill worker at TEP8 and 2 decode workers at TEP8)
-    - spec-decoding: "none"
-      conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    - spec-decoding: "none"
-      conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=0"
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # non-MTP configurations
-    # "Top of curve" (2 prefill worker at DEP8 and 1 decode worker at DEP8)
-    - spec-decoding: "none"
-      conc-list: [ 1024, 2048 ]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "PREFILL_NODES=2"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=0"
-
-    # "Bottom of curve" (1 prefill worker at TP8 and 2 decode workers at TP8)
-    - spec-decoding: "none"
-      conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    - spec-decoding: "none"
-      conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=0"
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # non-MTP configurations
+      # "Top of curve" (1 prefill workers each at DEP8 and 1 decode workers at DEP16)
+      - spec-decoding: "none"
+        conc-list: [ 1024, 2048 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      # "Middle of curve" (1 prefill workers each at TP8 and 2 decode workers at DEP8)
+      - spec-decoding: "none"
+        conc-list: [ 1536, 1024, 512 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+
+      # "Bottom of curve" (1 prefill worker at TEP8 and 2 decode workers at TEP8)
+      - spec-decoding: "none"
+        conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      - spec-decoding: "none"
+        conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=0"
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # non-MTP configurations
+      # "Top of curve" (2 prefill worker at DEP8 and 1 decode worker at DEP8)
+      - spec-decoding: "none"
+        conc-list: [ 1024, 2048 ]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=2"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=0"
+
+      # "Bottom of curve" (1 prefill worker at TP8 and 2 decode workers at TP8)
+      - spec-decoding: "none"
+        conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      - spec-decoding: "none"
+        conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=0"
 
 
 dsr1-fp8-mi355x-sglang-disagg-mtp:
@@ -908,150 +949,151 @@ dsr1-fp8-mi355x-sglang-disagg-mtp:
   framework: sglang-disagg
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # MTP configurations
-    # "Top of curve" (1 prefill worker at DEP8 and 1 decode worker at DEP16)
-    - spec-decoding: "mtp"
-      conc-list: [ 1024, 2048 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=1"
-
-    # "Middle of curve" (1 prefill worker at TP8 and 2 decode workers each at DEP8)
-    - spec-decoding: "mtp"
-      conc-list: [ 1536, 1024, 512, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=1"
-
-
-    # "Bottom of curve" (1 prefill worker at TEP8 and 2 decode workers at TEP8)
-    - spec-decoding: "mtp"
-      conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=2"
-
-    - spec-decoding: "mtp"
-      conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=2"
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # MTP configurations
-    # "Top of curve" (2 prefill worker at DEP8 and 1 decode worker at DEP8)
-    - spec-decoding: "mtp"
-      conc-list: [ 1024, 2048 ]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "PREFILL_NODES=2"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=1"
-
-    # "Bottom of curve" (1 prefill worker at TP8 and 2 decode workers at TP8)
-    - spec-decoding: "mtp"
-      conc-list: [ 256, 128, 64, 32, 16, 8, 4, 2 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=2"
-
-    - spec-decoding: "mtp"
-      conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=2"
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # MTP configurations
+      # "Top of curve" (1 prefill worker at DEP8 and 1 decode worker at DEP16)
+      - spec-decoding: "mtp"
+        conc-list: [ 1024, 2048 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=1"
+
+      # "Middle of curve" (1 prefill worker at TP8 and 2 decode workers each at DEP8)
+      - spec-decoding: "mtp"
+        conc-list: [ 1536, 1024, 512, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=1"
+
+
+      # "Bottom of curve" (1 prefill worker at TEP8 and 2 decode workers at TEP8)
+      - spec-decoding: "mtp"
+        conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=2"
+
+      - spec-decoding: "mtp"
+        conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=2"
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # MTP configurations
+      # "Top of curve" (2 prefill worker at DEP8 and 1 decode worker at DEP8)
+      - spec-decoding: "mtp"
+        conc-list: [ 1024, 2048 ]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=2"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=1"
+
+      # "Bottom of curve" (1 prefill worker at TP8 and 2 decode workers at TP8)
+      - spec-decoding: "mtp"
+        conc-list: [ 256, 128, 64, 32, 16, 8, 4, 2 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=2"
+
+      - spec-decoding: "mtp"
+        conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=2"
 
 
 dsr1-fp4-mi355x-sglang-disagg:
@@ -1063,204 +1105,205 @@ dsr1-fp4-mi355x-sglang-disagg:
   framework: sglang-disagg
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # non-MTP configurations
-    # 1P1D TP8
-    - spec-decoding: "none"
-      conc-list: [ 1, 2, 4, 8 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=0"
-
-    # 1P2D TP8
-    - spec-decoding: "none"
-      conc-list: [ 2, 4, 8, 16, 32 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    # 1P2D TP8
-    - spec-decoding: "none" 
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    # 1P2D TP4
-    - spec-decoding: "none" 
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # non-MTP configurations
+      # 1P1D TP8
+      - spec-decoding: "none"
+        conc-list: [ 1, 2, 4, 8 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=0"
+
+      # 1P2D TP8
+      - spec-decoding: "none"
+        conc-list: [ 2, 4, 8, 16, 32 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      # 1P2D TP8
+      - spec-decoding: "none" 
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      # 1P2D TP4
+      - spec-decoding: "none" 
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
     
-    # 1*DEP4+ 1*DEP8
-    - spec-decoding: "none"
-      conc-list: [ 1024, 2048 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=0"
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # non-MTP configurations
-    # 1P1D pure TP8
-    - spec-decoding: "none"
-      conc-list: [ 1, 2, 4, 8 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=0"
-
-    # 1P2D TP8
-    - spec-decoding: "none"
-      conc-list: [ 2, 4, 8, 16, 32 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    # 1P2D TP8
-    - spec-decoding: "none"
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    # 1P2D TP4
-    - spec-decoding: "none"
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    # 4*DEP4 + 1*DEP8
-    - spec-decoding: "none"
-      conc-list: [ 1024, 2048, 4096 ]
-      prefill:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "PREFILL_NODES=4"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=0"
+      # 1*DEP4+ 1*DEP8
+      - spec-decoding: "none"
+        conc-list: [ 1024, 2048 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=0"
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # non-MTP configurations
+      # 1P1D pure TP8
+      - spec-decoding: "none"
+        conc-list: [ 1, 2, 4, 8 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=0"
+
+      # 1P2D TP8
+      - spec-decoding: "none"
+        conc-list: [ 2, 4, 8, 16, 32 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      # 1P2D TP8
+      - spec-decoding: "none"
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      # 1P2D TP4
+      - spec-decoding: "none"
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      # 4*DEP4 + 1*DEP8
+      - spec-decoding: "none"
+        conc-list: [ 1024, 2048, 4096 ]
+        prefill:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=4"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=0"
 
 dsr1-fp4-mi355x-sglang-disagg-mtp:
   image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-3
@@ -1271,206 +1314,207 @@ dsr1-fp4-mi355x-sglang-disagg-mtp:
   framework: sglang-disagg
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # MTP configurations
-    # 1P1D TP8
-    - spec-decoding: "mtp"
-      conc-list: [ 1, 2, 4, 8 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=3"
-
-    # 1P2D TP8
-    - spec-decoding: "mtp" 
-      conc-list: [ 2, 4, 8, 16, 32 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=3"
-
-    # 1P2D TP8
-    - spec-decoding: "mtp" 
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=1"
-
-    # 1P2D TP4
-    - spec-decoding: "mtp" 
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=1"
-
-    # 1*DEP4+ 1*DEP8
-    - spec-decoding: "mtp"
-      conc-list: [ 1024, 2048 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=1"
-
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # MTP configurations
-    # 1P1D pure TP8
-    - spec-decoding: "mtp"
-      conc-list: [ 1, 2, 4, 8 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=3"
-
-
-    # 1P2D TP8
-    - spec-decoding: "mtp"
-      conc-list: [ 2, 4, 8, 16, 32 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=3"
-
-    # 1P2D TP8
-    - spec-decoding: "mtp"
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=1"
-
-    # 1P2D TP4
-    - spec-decoding: "mtp"
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=1"
-
-    # 4*DEP4 + 1*DEP8
-    - spec-decoding: "mtp"
-      conc-list: [ 1024, 2048, 4096 ]
-      prefill:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "PREFILL_NODES=4"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=1"
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # MTP configurations
+      # 1P1D TP8
+      - spec-decoding: "mtp"
+        conc-list: [ 1, 2, 4, 8 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=3"
+
+      # 1P2D TP8
+      - spec-decoding: "mtp" 
+        conc-list: [ 2, 4, 8, 16, 32 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=3"
+
+      # 1P2D TP8
+      - spec-decoding: "mtp" 
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=1"
+
+      # 1P2D TP4
+      - spec-decoding: "mtp" 
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=1"
+
+      # 1*DEP4+ 1*DEP8
+      - spec-decoding: "mtp"
+        conc-list: [ 1024, 2048 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=1"
+
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # MTP configurations
+      # 1P1D pure TP8
+      - spec-decoding: "mtp"
+        conc-list: [ 1, 2, 4, 8 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=3"
+
+
+      # 1P2D TP8
+      - spec-decoding: "mtp"
+        conc-list: [ 2, 4, 8, 16, 32 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=3"
+
+      # 1P2D TP8
+      - spec-decoding: "mtp"
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=1"
+
+      # 1P2D TP4
+      - spec-decoding: "mtp"
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=1"
+
+      # 4*DEP4 + 1*DEP8
+      - spec-decoding: "mtp"
+        conc-list: [ 1024, 2048, 4096 ]
+        prefill:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=4"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=1"
 
 dsv4-fp8-mi355x-sglang:
   image: rocm/sgl-dev:deepseek-v4-mi35x
@@ -1480,15 +1524,16 @@ dsv4-fp8-mi355x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 # vLLM with AITER MLA decode for DSv4 on MI355X (vllm-project/vllm#40889,
 # stacked on #40871). Uses the ATOM MI355X image (ROCm 7.2.2, aiter with
@@ -1504,23 +1549,24 @@ dsv4-fp8-mi355x-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 1, conc-end: 1 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 1, conc-end: 1 }
-
-# Day-0 single-sequence marker for DeepSeek-V4 on ATOM (ROCm/ATOM#650).
-# PR1 of the ATOM DSv4 series — single-sequence only (kv_cache[:1,...]
-# hardcode), --enforce-eager required, ATOM_USE_TRITON_MOE=1 required on
-# gfx950. Image is the standard atom0.1.2.post MI355X base (matching
-# qwen3.5-fp8-mi355x-atom); the DSv4 PR is overlaid at runtime by
-# benchmarks/single_node/dsv4_fp4_mi355x_atom.sh at a pinned SHA. Sweep
-# will expand once ATOM PR3 (multi-request) and PR4 (CUDAGraph) land.
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 1, conc-end: 1 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 1, conc-end: 1 }
+
+  # Day-0 single-sequence marker for DeepSeek-V4 on ATOM (ROCm/ATOM#650).
+  # PR1 of the ATOM DSv4 series — single-sequence only (kv_cache[:1,...]
+  # hardcode), --enforce-eager required, ATOM_USE_TRITON_MOE=1 required on
+  # gfx950. Image is the standard atom0.1.2.post MI355X base (matching
+  # qwen3.5-fp8-mi355x-atom); the DSv4 PR is overlaid at runtime by
+  # benchmarks/single_node/dsv4_fp4_mi355x_atom.sh at a pinned SHA. Sweep
+  # will expand once ATOM PR3 (multi-request) and PR4 (CUDAGraph) land.
 dsv4-fp4-mi355x-atom:
   image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
   model: deepseek-ai/DeepSeek-V4-Pro
@@ -1529,18 +1575,19 @@ dsv4-fp4-mi355x-atom:
   precision: fp4
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 1, conc-end: 1 }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
-    - { tp: 8, ep: 1, conc-start: 16, conc-end: 16 }
-    - { tp: 8, ep: 1, conc-start: 32, conc-end: 32 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 1, conc-end: 1 }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
-    - { tp: 8, ep: 1, conc-start: 16, conc-end: 16 }
-    - { tp: 8, ep: 1, conc-start: 32, conc-end: 32 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 1, conc-end: 1 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 8, ep: 1, conc-start: 16, conc-end: 16 }
+      - { tp: 8, ep: 1, conc-start: 32, conc-end: 32 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 1, conc-end: 1 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 8, ep: 1, conc-start: 16, conc-end: 16 }
+      - { tp: 8, ep: 1, conc-start: 32, conc-end: 32 }
diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index 9e4177ee8..de58728da 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -7,381 +7,401 @@ dsr1-fp4-b200-dynamo-trt:
   framework: dynamo-trt
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - spec-decoding: "mtp"
-      conc-list: [1214]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen2_dep8_batch64_eplb0_mtp2.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen2_dep8_batch64_eplb0_mtp2.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [875]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_dep8_batch16_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_dep8_batch16_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [6]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [10, 15, 25, 45, 90, 180]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [ 4968 ]
-      prefill:
-        num-worker: 3
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx3_gen4_dep8_batch128_eplb0_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx3_gen4_dep8_batch128_eplb0_mtp1.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [10860]
-      prefill:
-        num-worker: 3
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch512_eplb0_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch512_eplb0_mtp1.yaml"
-      decode:
-        num-worker: 5
-        tp: 4
-        ep: 4
-        dp-attn: true
-
-    # Non-MTP configurations
-    - conc-list: [4096]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [2192]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen2_dep8_batch128_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen2_dep8_batch128_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [1365]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_dep8_batch32_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_dep8_batch32_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [6]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_tep8_batch1_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_tep8_batch1_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [10, 15, 25, 45, 90, 180]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_tep8_batch32_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_tep8_batch32_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [450]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen6_tep8_batch64_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen6_tep8_batch64_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 8
-        dp-attn: false
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - spec-decoding: "mtp"
-      conc-list: [90]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen1_dep8_batch8_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen1_dep8_batch8_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [66]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen3_tep8_batch16_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen3_tep8_batch16_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [6]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [10, 15, 30, 60]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen5_tep8_batch8_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen5_tep8_batch8_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [548]
-      prefill:
-        num-worker: 3
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [1096, 1691]
-      prefill:
-        num-worker: 5
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx5_gen1_dep8_batch192_eplb0_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx5_gen1_dep8_batch192_eplb0_mtp1.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [658]
-      prefill:
-        num-worker: 5
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx5_gen2_dep8_batch32_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx5_gen2_dep8_batch32_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-
-    # Non-MTP configurations
-    - conc-list: [6]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx1_gen5_tep8_batch1_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx1_gen5_tep8_batch1_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [10, 15, 25, 50, 100]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx1_gen5_tep8_batch8_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx1_gen5_tep8_batch8_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [370]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx2_gen5_tep8_batch64_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx2_gen5_tep8_batch64_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [1606]
-      prefill:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx4_gen1_dep8_batch192_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx4_gen1_dep8_batch192_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [837]
-      prefill:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx4_gen3_dep8_batch32_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx4_gen3_dep8_batch32_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [2222]
-      prefill:
-        num-worker: 7
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx7_gen2_dep8_batch128_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx7_gen2_dep8_batch128_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - spec-decoding: "mtp"
+        conc-list: [1214]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen2_dep8_batch64_eplb0_mtp2.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen2_dep8_batch64_eplb0_mtp2.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [875]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_dep8_batch16_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_dep8_batch16_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [6]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [10, 15, 25, 45, 90, 180]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [ 4968 ]
+        prefill:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx3_gen4_dep8_batch128_eplb0_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx3_gen4_dep8_batch128_eplb0_mtp1.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [10860]
+        prefill:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch512_eplb0_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch512_eplb0_mtp1.yaml"
+        decode:
+          num-worker: 5
+          tp: 4
+          ep: 4
+          dp-attn: true
+
+      # Non-MTP configurations
+      - conc-list: [4096]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [2192]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen2_dep8_batch128_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen2_dep8_batch128_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [1365]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_dep8_batch32_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_dep8_batch32_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [6]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_tep8_batch1_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_tep8_batch1_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [10, 15, 25, 45, 90, 180]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_tep8_batch32_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen5_tep8_batch32_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [450]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen6_tep8_batch64_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/1k1k/stp/ctx1_gen6_tep8_batch64_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 8
+          dp-attn: false
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - spec-decoding: "mtp"
+        conc-list: [90]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen1_dep8_batch8_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen1_dep8_batch8_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [66]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen3_tep8_batch16_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen3_tep8_batch16_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [6]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [10, 15, 30, 60]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen5_tep8_batch8_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx1_gen5_tep8_batch8_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [548]
+        prefill:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [1096, 1691]
+        prefill:
+          num-worker: 5
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx5_gen1_dep8_batch192_eplb0_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx5_gen1_dep8_batch192_eplb0_mtp1.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [658]
+        prefill:
+          num-worker: 5
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/mtp/ctx5_gen2_dep8_batch32_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/mtp/ctx5_gen2_dep8_batch32_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+
+      # Non-MTP configurations
+      - conc-list: [6]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx1_gen5_tep8_batch1_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx1_gen5_tep8_batch1_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [10, 15, 25, 50, 100]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx1_gen5_tep8_batch8_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx1_gen5_tep8_batch8_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [370]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx2_gen5_tep8_batch64_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx2_gen5_tep8_batch64_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [1606]
+        prefill:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx4_gen1_dep8_batch192_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx4_gen1_dep8_batch192_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [837]
+        prefill:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx4_gen3_dep8_batch32_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx4_gen3_dep8_batch32_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [2222]
+        prefill:
+          num-worker: 7
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp4/8k1k/stp/ctx7_gen2_dep8_batch128_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/8k1k/stp/ctx7_gen2_dep8_batch128_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+
+    agentic-coding:
+    - duration: 300
+      search-space:
+      - spec-decoding: "none"
+        conc-list: [ 1, 2, 4, 8, 16, 32 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/cquil11/srt-slurm-nv/blob/cam/sa-submission-q2-2026/recipes/trtllm/b200-fp4/agentic/ctx1_gen1_tep8_128k_agentic.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp4/agentic/ctx1_gen1_tep8_128k_agentic.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
 
 dsr1-fp8-b200-dynamo-trt:
   image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2
@@ -392,446 +412,446 @@ dsr1-fp8-b200-dynamo-trt:
   framework: dynamo-trt
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # MTP configurations - Low latency (TP attention)
-    - spec-decoding: "mtp"
-      conc-list: [8]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch1_eplb0_mtp3_8.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch1_eplb0_mtp3_8.yaml"
-      decode:
-        num-worker: 8
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [32]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch4_eplb0_mtp3_32.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch4_eplb0_mtp3_32.yaml"
-      decode:
-        num-worker: 8
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [64]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch8_eplb0_mtp3_64.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch8_eplb0_mtp3_64.yaml"
-      decode:
-        num-worker: 8
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [256]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch32_eplb0_mtp3_256.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch32_eplb0_mtp3_256.yaml"
-      decode:
-        num-worker: 8
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # MTP configurations - High throughput (DP attention)
-    - spec-decoding: "mtp"
-      conc-list: [896]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen7_dep8_batch128_eplb0_mtp3_896.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen7_dep8_batch128_eplb0_mtp3_896.yaml"
-      decode:
-        num-worker: 7
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [1024]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen4_dep8_batch256_eplb0_mtp3_1024.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen4_dep8_batch256_eplb0_mtp3_1024.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [1184]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen3_dep8_batch384_eplb0_mtp3_1184.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen3_dep8_batch384_eplb0_mtp3_1184.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [1600]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen2_dep8_batch768_eplb0_mtp2_1600.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen2_dep8_batch768_eplb0_mtp2_1600.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-
-    # Non-MTP (STP) configurations - Low latency (TP attention)
-    - conc-list: [4]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_4.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_4.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - conc-list: [32]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_32.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_32.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - conc-list: [128]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_128.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_128.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # Non-MTP (STP) configurations - High throughput (DP attention)
-    - conc-list: [1920]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen5_dep8_batch48_eplb0_mtp0_1920.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen5_dep8_batch48_eplb0_mtp0_1920.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [4096]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4096.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4096.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [5152]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx2_gen5_dep8_batch128_eplb0_mtp0_5152.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx2_gen5_dep8_batch128_eplb0_mtp0_5152.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: true
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # MTP configurations - Low latency (TP attention)
-    - spec-decoding: "mtp"
-      conc-list: [8]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen6_tp8_batch8_eplb0_mtp3_8.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen6_tp8_batch8_eplb0_mtp3_8.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [8]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen2_tp8_batch32_eplb0_mtp3_8.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen2_tp8_batch32_eplb0_mtp3_8.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [48]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen6_tp8_batch8_eplb0_mtp3_48.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen6_tp8_batch8_eplb0_mtp3_48.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [64]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen4_tp8_batch16_eplb0_mtp3_64.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen4_tp8_batch16_eplb0_mtp3_64.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # MTP configurations - High throughput (DP attention)
-    - spec-decoding: "mtp"
-      conc-list: [224]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx2_gen3_dep8_batch8_eplb0_mtp3_224.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx2_gen3_dep8_batch8_eplb0_mtp3_224.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [288]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx2_gen1_dep8_batch32_eplb0_mtp3_288.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx2_gen1_dep8_batch32_eplb0_mtp3_288.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [1088]
-      prefill:
-        num-worker: 4
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx4_gen1_dep8_batch128_eplb0_mtp2_1088.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx4_gen1_dep8_batch128_eplb0_mtp2_1088.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-
-    # Non-MTP (STP) configurations - Low latency (TP attention)
-    - conc-list: [1]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_tp8_batch1_eplb0_mtp0_1.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_tp8_batch1_eplb0_mtp0_1.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - conc-list: [32]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen4_tp8_batch32_eplb0_mtp0_32.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen4_tp8_batch32_eplb0_mtp0_32.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - conc-list: [128]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen4_tp8_batch32_eplb0_mtp0_128.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen4_tp8_batch32_eplb0_mtp0_128.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - conc-list: [96]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen6_tp8_batch16_eplb0_mtp0_96.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen6_tp8_batch16_eplb0_mtp0_96.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # Non-MTP (STP) configurations - High throughput (DP attention)
-    - conc-list: [128]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_dep8_batch128_eplb0_mtp0_128.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_dep8_batch128_eplb0_mtp0_128.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [128]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0_128.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0_128.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [256]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_dep8_batch256_eplb0_mtp0_256.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_dep8_batch256_eplb0_mtp0_256.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [640]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx2_gen1_dep8_batch640_eplb0_mtp0_640.yaml
-        - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx2_gen1_dep8_batch640_eplb0_mtp0_640.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # MTP configurations - Low latency (TP attention)
+      - spec-decoding: "mtp"
+        conc-list: [8]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch1_eplb0_mtp3_8.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch1_eplb0_mtp3_8.yaml"
+        decode:
+          num-worker: 8
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [32]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch4_eplb0_mtp3_32.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch4_eplb0_mtp3_32.yaml"
+        decode:
+          num-worker: 8
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [64]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch8_eplb0_mtp3_64.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch8_eplb0_mtp3_64.yaml"
+        decode:
+          num-worker: 8
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [256]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch32_eplb0_mtp3_256.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen8_tp8_batch32_eplb0_mtp3_256.yaml"
+        decode:
+          num-worker: 8
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # MTP configurations - High throughput (DP attention)
+      - spec-decoding: "mtp"
+        conc-list: [896]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen7_dep8_batch128_eplb0_mtp3_896.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen7_dep8_batch128_eplb0_mtp3_896.yaml"
+        decode:
+          num-worker: 7
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [1024]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen4_dep8_batch256_eplb0_mtp3_1024.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen4_dep8_batch256_eplb0_mtp3_1024.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [1184]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen3_dep8_batch384_eplb0_mtp3_1184.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen3_dep8_batch384_eplb0_mtp3_1184.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [1600]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen2_dep8_batch768_eplb0_mtp2_1600.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/mtp/ctx1_gen2_dep8_batch768_eplb0_mtp2_1600.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+
+      # Non-MTP (STP) configurations - Low latency (TP attention)
+      - conc-list: [4]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_4.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_4.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [32]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_32.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_32.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [128]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_128.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen3_tp8_batch1024_eplb0_mtp0_128.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # Non-MTP (STP) configurations - High throughput (DP attention)
+      - conc-list: [1920]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen5_dep8_batch48_eplb0_mtp0_1920.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen5_dep8_batch48_eplb0_mtp0_1920.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [4096]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4096.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4096.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [5152]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/1k1k/stp/ctx2_gen5_dep8_batch128_eplb0_mtp0_5152.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/1k1k/stp/ctx2_gen5_dep8_batch128_eplb0_mtp0_5152.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: true
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # MTP configurations - Low latency (TP attention)
+      - spec-decoding: "mtp"
+        conc-list: [8]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen6_tp8_batch8_eplb0_mtp3_8.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen6_tp8_batch8_eplb0_mtp3_8.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [8]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen2_tp8_batch32_eplb0_mtp3_8.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen2_tp8_batch32_eplb0_mtp3_8.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [48]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen6_tp8_batch8_eplb0_mtp3_48.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen6_tp8_batch8_eplb0_mtp3_48.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [64]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen4_tp8_batch16_eplb0_mtp3_64.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx1_gen4_tp8_batch16_eplb0_mtp3_64.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # MTP configurations - High throughput (DP attention)
+      - spec-decoding: "mtp"
+        conc-list: [224]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx2_gen3_dep8_batch8_eplb0_mtp3_224.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx2_gen3_dep8_batch8_eplb0_mtp3_224.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [288]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx2_gen1_dep8_batch32_eplb0_mtp3_288.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx2_gen1_dep8_batch32_eplb0_mtp3_288.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [1088]
+        prefill:
+          num-worker: 4
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/mtp/ctx4_gen1_dep8_batch128_eplb0_mtp2_1088.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/mtp/ctx4_gen1_dep8_batch128_eplb0_mtp2_1088.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+
+      # Non-MTP (STP) configurations - Low latency (TP attention)
+      - conc-list: [1]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_tp8_batch1_eplb0_mtp0_1.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_tp8_batch1_eplb0_mtp0_1.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [32]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen4_tp8_batch32_eplb0_mtp0_32.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen4_tp8_batch32_eplb0_mtp0_32.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [128]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen4_tp8_batch32_eplb0_mtp0_128.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen4_tp8_batch32_eplb0_mtp0_128.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [96]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen6_tp8_batch16_eplb0_mtp0_96.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen6_tp8_batch16_eplb0_mtp0_96.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # Non-MTP (STP) configurations - High throughput (DP attention)
+      - conc-list: [128]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_dep8_batch128_eplb0_mtp0_128.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_dep8_batch128_eplb0_mtp0_128.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [128]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0_128.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0_128.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [256]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_dep8_batch256_eplb0_mtp0_256.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx1_gen1_dep8_batch256_eplb0_mtp0_256.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [640]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b200-fp8/8k1k/stp/ctx2_gen1_dep8_batch640_eplb0_mtp0_640.yaml
+          - "CONFIG_FILE=recipes/trtllm/b200-fp8/8k1k/stp/ctx2_gen1_dep8_batch640_eplb0_mtp0_640.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
 
 dsr1-fp4-b300-dynamo-trt:
   image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1
@@ -842,410 +862,410 @@ dsr1-fp4-b300-dynamo-trt:
   framework: dynamo-trt
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - spec-decoding: "mtp"
-      conc-list: [654]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen1_dep8_batch64_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen1_dep8_batch64_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [271]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen2_dep8_batch16_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen2_dep8_batch16_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [11]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [10, 20, 25, 60, 120, 200]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [2342]
-      prefill:
-        num-worker: 2
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx2_gen1_dep8_batch256_eplb0_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx2_gen1_dep8_batch256_eplb0_mtp1.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [8609]
-      prefill:
-        num-worker: 5
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch512_eplb0_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch512_eplb0_mtp1.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [12926]
-      prefill:
-        num-worker: 5
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch768_eplb0_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch768_eplb0_mtp1.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-
-    # Non-MTP configurations
-    - conc-list: [1176]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [6]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [5, 10, 15, 25]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 5
-        tp: 4
-        ep: 4
-        dp-attn: false
-    - conc-list: [60, 110, 195, 395]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep8_batch64_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep8_batch64_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [4405]
-      prefill:
-        num-worker: 2
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx2_gen1_dep8_batch512_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx2_gen1_dep8_batch512_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [8192]
-      prefill:
-        num-worker: 3
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx3_gen1_dep8_batch1024_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx3_gen1_dep8_batch1024_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [4611]
-      prefill:
-        num-worker: 3
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx3_gen2_dep8_batch256_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx3_gen2_dep8_batch256_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - spec-decoding: "mtp"
-      conc-list: [2198]
-      prefill:
-        num-worker: 10
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [52]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep4_batch8_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep4_batch8_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [8]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [32]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [181]
-      prefill:
-        num-worker: 3
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx3_gen1_dep8_batch16_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx3_gen1_dep8_batch16_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [1197]
-      prefill:
-        num-worker: 9
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx9_gen1_dep8_batch128_eplb0_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx9_gen1_dep8_batch128_eplb0_mtp1.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-
-    # Non-MTP configurations
-    - conc-list: [105]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep4_batch32_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep4_batch32_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 4
-        ep: 4
-        dp-attn: false
-    - conc-list: [63]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [4]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [12]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen4_tep4_batch2_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen4_tep4_batch2_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: false
-    - conc-list: [589]
-      prefill:
-        num-worker: 5
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx5_gen2_dep8_batch32_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx5_gen2_dep8_batch32_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [1093]
-      prefill:
-        num-worker: 6
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx6_gen1_dep8_batch128_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx6_gen1_dep8_batch128_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [2048]
-      prefill:
-        num-worker: 8
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx8_gen1_dep8_batch256_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx8_gen1_dep8_batch256_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - spec-decoding: "mtp"
+        conc-list: [654]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen1_dep8_batch64_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen1_dep8_batch64_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [271]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen2_dep8_batch16_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen2_dep8_batch16_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [11]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [10, 20, 25, 60, 120, 200]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [2342]
+        prefill:
+          num-worker: 2
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx2_gen1_dep8_batch256_eplb0_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx2_gen1_dep8_batch256_eplb0_mtp1.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [8609]
+        prefill:
+          num-worker: 5
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch512_eplb0_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch512_eplb0_mtp1.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [12926]
+        prefill:
+          num-worker: 5
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch768_eplb0_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch768_eplb0_mtp1.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+
+      # Non-MTP configurations
+      - conc-list: [1176]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [6]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [5, 10, 15, 25]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 5
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [60, 110, 195, 395]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep8_batch64_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep8_batch64_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [4405]
+        prefill:
+          num-worker: 2
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx2_gen1_dep8_batch512_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx2_gen1_dep8_batch512_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [8192]
+        prefill:
+          num-worker: 3
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx3_gen1_dep8_batch1024_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx3_gen1_dep8_batch1024_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [4611]
+        prefill:
+          num-worker: 3
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/1k1k/stp/ctx3_gen2_dep8_batch256_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/1k1k/stp/ctx3_gen2_dep8_batch256_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - spec-decoding: "mtp"
+        conc-list: [2198]
+        prefill:
+          num-worker: 10
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [52]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep4_batch8_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep4_batch8_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [8]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [32]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [181]
+        prefill:
+          num-worker: 3
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx3_gen1_dep8_batch16_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx3_gen1_dep8_batch16_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [1197]
+        prefill:
+          num-worker: 9
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/mtp/ctx9_gen1_dep8_batch128_eplb0_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/mtp/ctx9_gen1_dep8_batch128_eplb0_mtp1.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+
+      # Non-MTP configurations
+      - conc-list: [105]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep4_batch32_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep4_batch32_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [63]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [4]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [12]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen4_tep4_batch2_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx1_gen4_tep4_batch2_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [589]
+        prefill:
+          num-worker: 5
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx5_gen2_dep8_batch32_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx5_gen2_dep8_batch32_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [1093]
+        prefill:
+          num-worker: 6
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx6_gen1_dep8_batch128_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx6_gen1_dep8_batch128_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [2048]
+        prefill:
+          num-worker: 8
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp4/8k1k/stp/ctx8_gen1_dep8_batch256_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp4/8k1k/stp/ctx8_gen1_dep8_batch256_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
 dsr1-fp8-b300-dynamo-trt:
   image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1
   model: deepseek-ai/DeepSeek-R1-0528
@@ -1255,400 +1275,400 @@ dsr1-fp8-b300-dynamo-trt:
   framework: dynamo-trt
   multinode: true
   disagg: true
-  seq-len-configs:
-  # 1k1k MTP configs
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - spec-decoding: "mtp"
-      conc-list: [10]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen8_tp8_batch1_eplb0_mtp3_10.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen8_tp8_batch1_eplb0_mtp3_10.yaml"
-      decode:
-        num-worker: 8
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [160]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen8_tp8_batch16_eplb0_mtp3_160.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen8_tp8_batch16_eplb0_mtp3_160.yaml"
-      decode:
-        num-worker: 8
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [3072]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen1_dp8_batch256_eplb0_mtp1_3072.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen1_dp8_batch256_eplb0_mtp1_3072.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [2560]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen2_dep8_batch128_eplb0_mtp1_2560.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen2_dep8_batch128_eplb0_mtp1_2560.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [720]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen5_dep8_batch16_eplb0_mtp2_720.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen5_dep8_batch16_eplb0_mtp2_720.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [11264]
-      prefill:
-        num-worker: 3
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx3_gen2_dp8_batch512_eplb0_mtp1_11264.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx3_gen2_dp8_batch512_eplb0_mtp1_11264.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: true
-  # 1k1k STP configs
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - conc-list: [2112]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen1_dep8_batch256_eplb0_mtp0_2112.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen1_dep8_batch256_eplb0_mtp0_2112.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [3072]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen2_dp8_batch128_eplb0_mtp0_3072.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen2_dp8_batch128_eplb0_mtp0_3072.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: true
-    - conc-list: [1280]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen3_dp8_batch48_eplb0_mtp0_1280.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen3_dp8_batch48_eplb0_mtp0_1280.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 1
-        dp-attn: true
-    - conc-list: [12]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_12.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_12.yaml"
-      decode:
-        num-worker: 8
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - conc-list: [128]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_128.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_128.yaml"
-      decode:
-        num-worker: 8
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - conc-list: [384]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_384.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_384.yaml"
-      decode:
-        num-worker: 8
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - conc-list: [16384]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx2_gen1_dp8_batch1024_eplb0_mtp0_16384.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx2_gen1_dp8_batch1024_eplb0_mtp0_16384.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-  # 8k1k MTP configs
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - spec-decoding: "mtp"
-      conc-list: [40]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen2_tp8_batch16_eplb0_mtp3_40.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen2_tp8_batch16_eplb0_mtp3_40.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [8]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen4_tp8_batch1_eplb0_mtp3_8.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen4_tp8_batch1_eplb0_mtp3_8.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [20]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen4_tp8_batch4_eplb0_mtp3_20.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen4_tp8_batch4_eplb0_mtp3_20.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [72]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen1_dp8_batch8_eplb0_mtp3_72.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen1_dp8_batch8_eplb0_mtp3_72.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [144]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx2_gen1_dp8_batch16_eplb0_mtp3_144.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx2_gen1_dp8_batch16_eplb0_mtp3_144.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [512]
-      prefill:
-        num-worker: 4
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx4_gen1_dp8_batch64_eplb0_mtp2_512.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx4_gen1_dp8_batch64_eplb0_mtp2_512.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-  # 8k1k STP configs
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - conc-list: [64]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx1_gen4_tp8_batch16_eplb0_mtp0_64.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx1_gen4_tp8_batch16_eplb0_mtp0_64.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - conc-list: [16]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx1_gen8_tp8_batch2_eplb0_mtp0_16.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx1_gen8_tp8_batch2_eplb0_mtp0_16.yaml"
-      decode:
-        num-worker: 8
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - conc-list: [256]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx2_gen1_dp8_batch32_eplb0_mtp0_256.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx2_gen1_dp8_batch32_eplb0_mtp0_256.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-    - conc-list: [512]
-      prefill:
-        num-worker: 3
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx3_gen1_dp8_batch64_eplb0_mtp0_512.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx3_gen1_dp8_batch64_eplb0_mtp0_512.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-    - conc-list: [256]
-      prefill:
-        num-worker: 3
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx3_gen5_tp8_batch64_eplb0_mtp0_256.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx3_gen5_tp8_batch64_eplb0_mtp0_256.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - conc-list: [1075]
-      prefill:
-        num-worker: 5
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx5_gen1_dp8_batch128_eplb0_mtp0_1075.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx5_gen1_dp8_batch128_eplb0_mtp0_1075.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-    - conc-list: [3072]
-      prefill:
-        num-worker: 7
-        tp: 4
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx7_gen1_dep8_batch384_eplb0_mtp0_3072.yaml
-        - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx7_gen1_dep8_batch384_eplb0_mtp0_3072.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-
+  scenarios:
+    fixed-seq-len:
+    # 1k1k MTP configs
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - spec-decoding: "mtp"
+        conc-list: [10]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen8_tp8_batch1_eplb0_mtp3_10.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen8_tp8_batch1_eplb0_mtp3_10.yaml"
+        decode:
+          num-worker: 8
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [160]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen8_tp8_batch16_eplb0_mtp3_160.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen8_tp8_batch16_eplb0_mtp3_160.yaml"
+        decode:
+          num-worker: 8
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [3072]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen1_dp8_batch256_eplb0_mtp1_3072.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen1_dp8_batch256_eplb0_mtp1_3072.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [2560]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen2_dep8_batch128_eplb0_mtp1_2560.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen2_dep8_batch128_eplb0_mtp1_2560.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [720]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen5_dep8_batch16_eplb0_mtp2_720.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx1_gen5_dep8_batch16_eplb0_mtp2_720.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [11264]
+        prefill:
+          num-worker: 3
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/mtp/ctx3_gen2_dp8_batch512_eplb0_mtp1_11264.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/mtp/ctx3_gen2_dp8_batch512_eplb0_mtp1_11264.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: true
+    # 1k1k STP configs
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - conc-list: [2112]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen1_dep8_batch256_eplb0_mtp0_2112.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen1_dep8_batch256_eplb0_mtp0_2112.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [3072]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen2_dp8_batch128_eplb0_mtp0_3072.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen2_dp8_batch128_eplb0_mtp0_3072.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: true
+      - conc-list: [1280]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen3_dp8_batch48_eplb0_mtp0_1280.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen3_dp8_batch48_eplb0_mtp0_1280.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 1
+          dp-attn: true
+      - conc-list: [12]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_12.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_12.yaml"
+        decode:
+          num-worker: 8
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [128]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_128.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_128.yaml"
+        decode:
+          num-worker: 8
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [384]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_384.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx1_gen8_tp8_batch64_eplb0_mtp0_384.yaml"
+        decode:
+          num-worker: 8
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [16384]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/1k1k/stp/ctx2_gen1_dp8_batch1024_eplb0_mtp0_16384.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/1k1k/stp/ctx2_gen1_dp8_batch1024_eplb0_mtp0_16384.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+    # 8k1k MTP configs
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - spec-decoding: "mtp"
+        conc-list: [40]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen2_tp8_batch16_eplb0_mtp3_40.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen2_tp8_batch16_eplb0_mtp3_40.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [8]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen4_tp8_batch1_eplb0_mtp3_8.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen4_tp8_batch1_eplb0_mtp3_8.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [20]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen4_tp8_batch4_eplb0_mtp3_20.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen4_tp8_batch4_eplb0_mtp3_20.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [72]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen1_dp8_batch8_eplb0_mtp3_72.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx1_gen1_dp8_batch8_eplb0_mtp3_72.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [144]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx2_gen1_dp8_batch16_eplb0_mtp3_144.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx2_gen1_dp8_batch16_eplb0_mtp3_144.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [512]
+        prefill:
+          num-worker: 4
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/mtp/ctx4_gen1_dp8_batch64_eplb0_mtp2_512.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/mtp/ctx4_gen1_dp8_batch64_eplb0_mtp2_512.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+    # 8k1k STP configs
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - conc-list: [64]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx1_gen4_tp8_batch16_eplb0_mtp0_64.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx1_gen4_tp8_batch16_eplb0_mtp0_64.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [16]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx1_gen8_tp8_batch2_eplb0_mtp0_16.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx1_gen8_tp8_batch2_eplb0_mtp0_16.yaml"
+        decode:
+          num-worker: 8
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [256]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx2_gen1_dp8_batch32_eplb0_mtp0_256.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx2_gen1_dp8_batch32_eplb0_mtp0_256.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+      - conc-list: [512]
+        prefill:
+          num-worker: 3
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx3_gen1_dp8_batch64_eplb0_mtp0_512.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx3_gen1_dp8_batch64_eplb0_mtp0_512.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+      - conc-list: [256]
+        prefill:
+          num-worker: 3
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx3_gen5_tp8_batch64_eplb0_mtp0_256.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx3_gen5_tp8_batch64_eplb0_mtp0_256.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [1075]
+        prefill:
+          num-worker: 5
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx5_gen1_dp8_batch128_eplb0_mtp0_1075.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx5_gen1_dp8_batch128_eplb0_mtp0_1075.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+      - conc-list: [3072]
+        prefill:
+          num-worker: 7
+          tp: 4
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/b300-fp8/8k1k/stp/ctx7_gen1_dep8_batch384_eplb0_mtp0_3072.yaml
+          - "CONFIG_FILE=recipes/trtllm/b300-fp8/8k1k/stp/ctx7_gen1_dep8_batch384_eplb0_mtp0_3072.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
 dsr1-fp4-b200-sglang:
   image: lmsysorg/sglang:v0.5.9-cu130
   model: nvidia/DeepSeek-R1-0528-FP4-V2
@@ -1657,17 +1677,23 @@ dsr1-fp4-b200-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 128 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 16 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 128 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 16 }
+    agentic-coding:
+    - duration: 1800
+      search-space:
+      - { tp: 4, ep: 4, offloading: none, conc-list: [1, 2, 4, 8, 12, 16, 24, 32, 48, 64, 128, 256] }
+      - { tp: 8, ep: 8, offloading: none, conc-list: [1, 2, 4, 8, 12, 16, 32, 64, 128, 256, 512] }
 
 dsv4-fp4-b200-sglang:
   image: lmsysorg/sglang:deepseek-v4-blackwell@sha256:df18bfc4aa9ecf59451002b49ba00cae58042de9e2a96378bbd21b404dd62c7b
@@ -1686,25 +1712,26 @@ dsv4-fp4-b200-sglang:
   # only --max-running-requests scales with CONC.
   # ep is implicit in sglang: --moe-a2a-backend deepep forces ep_size=tp_size,
   # while low-latency leaves ep_size at the default of 1.
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # low-latency (DP_ATTENTION=false)
-    - { tp: 8, ep: 1, conc-start: 1, conc-end: 32 }
-    # DP-attention (DP_ATTENTION=true) — balanced CONC range
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 128 }
-    # DP-attention (DP_ATTENTION=true) — max-throughput CONC range
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 256, conc-end: 1024 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # low-latency (DP_ATTENTION=false)
-    - { tp: 8, ep: 1, conc-start: 1, conc-end: 32 }
-    # DP-attention (DP_ATTENTION=true) — balanced CONC range
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 128 }
-    # DP-attention (DP_ATTENTION=true) — max-throughput CONC range
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 256, conc-end: 512 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # low-latency (DP_ATTENTION=false)
+      - { tp: 8, ep: 1, conc-start: 1, conc-end: 32 }
+      # DP-attention (DP_ATTENTION=true) — balanced CONC range
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 128 }
+      # DP-attention (DP_ATTENTION=true) — max-throughput CONC range
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 256, conc-end: 1024 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # low-latency (DP_ATTENTION=false)
+      - { tp: 8, ep: 1, conc-start: 1, conc-end: 32 }
+      # DP-attention (DP_ATTENTION=true) — balanced CONC range
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 128 }
+      # DP-attention (DP_ATTENTION=true) — max-throughput CONC range
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 256, conc-end: 512 }
 
 dsv4-fp4-b200-vllm:
   image: vllm/vllm-openai:deepseekv4-cu130
@@ -1714,18 +1741,19 @@ dsv4-fp4-b200-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 1, conc-end: 64 }
-    - { tp: 8, ep: 8, conc-start: 128, conc-end: 128 }
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 256, conc-end: 4096 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 1, conc-end: 32 }
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 1024 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 1, conc-end: 64 }
+      - { tp: 8, ep: 8, conc-start: 128, conc-end: 128 }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 256, conc-end: 4096 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 1, conc-end: 32 }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 1024 }
 
 # NOTE: At the time of submission, https://cookbook.sglang.io/autoregressive/DeepSeek/DeepSeek-R1
 # does not have a B300-specific recipe, so this config reuses the existing DSR1 FP4
@@ -1738,17 +1766,18 @@ dsr1-fp4-b300-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 128 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 16 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 128 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 16 }
 
 dsr1-fp4-b200-trt:
   image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc6.post2
@@ -1758,29 +1787,30 @@ dsr1-fp4-b200-trt:
   precision: fp4
   framework: trt
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # low concurrency cases use TP only
-    # concurrency 64 uses TP & EP
-    # high concurrency cases use TP & EP & DP-ATTN
-    - { tp: 4, conc-start: 4, conc-end: 16 }
-    - { tp: 4, ep: 4, dp-attn: true, conc-start: 256, conc-end: 256 }
-    - { tp: 8, conc-start: 4, conc-end: 4 }
-    - { tp: 8, ep: 8, conc-start: 64, conc-end: 64 }
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 128, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # low concurrency cases use TP only
-    # concurrency 32 uses TP & EP
-    # high concurrency cases use TP & EP & DP-ATTN
-    - { tp: 4, conc-start: 4, conc-end: 32 }
-    - { tp: 4, ep: 4, conc-start: 32, conc-end: 32 }
-    - { tp: 4, ep: 4, dp-attn: true, conc-start: 256, conc-end: 256 }
-    - { tp: 8, conc-start: 4, conc-end: 4 }
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 128, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # low concurrency cases use TP only
+      # concurrency 64 uses TP & EP
+      # high concurrency cases use TP & EP & DP-ATTN
+      - { tp: 4, conc-start: 4, conc-end: 16 }
+      - { tp: 4, ep: 4, dp-attn: true, conc-start: 256, conc-end: 256 }
+      - { tp: 8, conc-start: 4, conc-end: 4 }
+      - { tp: 8, ep: 8, conc-start: 64, conc-end: 64 }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 128, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # low concurrency cases use TP only
+      # concurrency 32 uses TP & EP
+      # high concurrency cases use TP & EP & DP-ATTN
+      - { tp: 4, conc-start: 4, conc-end: 32 }
+      - { tp: 4, ep: 4, conc-start: 32, conc-end: 32 }
+      - { tp: 4, ep: 4, dp-attn: true, conc-start: 256, conc-end: 256 }
+      - { tp: 8, conc-start: 4, conc-end: 4 }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 128, conc-end: 256 }
 
 dsr1-fp4-b200-trt-mtp:
   image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc6.post3
@@ -1790,28 +1820,29 @@ dsr1-fp4-b200-trt-mtp:
   precision: fp4
   framework: trt
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # TP=4 configurations
-    - { tp: 4, conc-start: 4, conc-end: 8, spec-decoding: mtp }
-    - { tp: 4, ep: 4, dp-attn: true, conc-start: 256, conc-end: 256, spec-decoding: mtp }
-    # TP=8 configurations
-    - { tp: 8, conc-start: 4, conc-end: 4, spec-decoding: mtp }
-    - { tp: 8, conc-start: 128, conc-end: 128, spec-decoding: mtp }
-    - { tp: 8, ep: 8, conc-start: 32, conc-end: 128, spec-decoding: mtp }
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 32, conc-end: 64, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
       # TP=4 configurations
-    - { tp: 4, conc-start: 4, conc-end: 16, spec-decoding: mtp }
-    - { tp: 4, ep: 4, conc-start: 32, conc-end: 32, spec-decoding: mtp }
-    - { tp: 4, ep: 4, dp-attn: true, conc-start: 256, conc-end: 256, spec-decoding: mtp }
+      - { tp: 4, conc-start: 4, conc-end: 8, spec-decoding: mtp }
+      - { tp: 4, ep: 4, dp-attn: true, conc-start: 256, conc-end: 256, spec-decoding: mtp }
       # TP=8 configurations
-    - { tp: 8, conc-start: 4, conc-end: 4, spec-decoding: mtp }
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 256, spec-decoding: mtp }
+      - { tp: 8, conc-start: 4, conc-end: 4, spec-decoding: mtp }
+      - { tp: 8, conc-start: 128, conc-end: 128, spec-decoding: mtp }
+      - { tp: 8, ep: 8, conc-start: 32, conc-end: 128, spec-decoding: mtp }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 32, conc-end: 64, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+        # TP=4 configurations
+      - { tp: 4, conc-start: 4, conc-end: 16, spec-decoding: mtp }
+      - { tp: 4, ep: 4, conc-start: 32, conc-end: 32, spec-decoding: mtp }
+      - { tp: 4, ep: 4, dp-attn: true, conc-start: 256, conc-end: 256, spec-decoding: mtp }
+        # TP=8 configurations
+      - { tp: 8, conc-start: 4, conc-end: 4, spec-decoding: mtp }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 256, spec-decoding: mtp }
 
 dsr1-fp8-b200-sglang:
   image: lmsysorg/sglang:v0.5.9-cu130
@@ -1821,20 +1852,21 @@ dsr1-fp8-b200-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 32 }
-
-# NOTE: At the time of submission, https://cookbook.sglang.io/autoregressive/DeepSeek/DeepSeek-R1
-# does not have a B300-specific recipe, so this config reuses the existing DSR1 FP8
-# B200 SGLang recipe as-is until B300-specific tuning is available.
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 32 }
+
+  # NOTE: At the time of submission, https://cookbook.sglang.io/autoregressive/DeepSeek/DeepSeek-R1
+  # does not have a B300-specific recipe, so this config reuses the existing DSR1 FP8
+  # B200 SGLang recipe as-is until B300-specific tuning is available.
 dsr1-fp8-b300-sglang:
   image: lmsysorg/sglang:v0.5.10.post1-cu130
   model: deepseek-ai/DeepSeek-R1-0528
@@ -1843,16 +1875,17 @@ dsr1-fp8-b300-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 32 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 32 }
 
 # NOTE: https://docs.sglang.io/cookbook/autoregressive/DeepSeek/DeepSeek-V4
 # lists B200 (not B300) as the Blackwell target. This config reuses the
@@ -1875,29 +1908,30 @@ dsv4-fp4-b300-sglang:
   # Split so result filenames (ep=, dpa=) accurately reflect the recipe.
   # ep is implicit in sglang: --moe-a2a-backend deepep forces ep_size=tp_size,
   # while low-latency leaves ep_size at the default of 1.
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 1, conc-end: 1 }
-    - { tp: 4, ep: 1, conc-start: 32, conc-end: 32 }
-    - { tp: 4, ep: 4, dp-attn: true, conc-start: 512, conc-end: 512 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 1, conc-end: 1 }
-    - { tp: 4, ep: 1, conc-start: 32, conc-end: 32 }
-    - { tp: 4, ep: 4, dp-attn: true, conc-start: 512, conc-end: 512 }
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 2048, conc-end: 2048 }
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 4096, conc-end: 4096 }
-
-# DeepSeek-V4-Pro on B300 with EAGLE/MTP speculative decoding. Recipe is
-# selected inside benchmarks/single_node/dsv4_fp4_b300_sglang_mtp.sh by
-# DP_ATTENTION:
-#   dp-attn: false -> TP-only + flashinfer_mxfp4 + chunked-prefill 8192
-#                     + EAGLE (3,1,4) + mem-fraction 0.90
-#   dp-attn: true  -> DP-attn  + flashinfer_mxfp4 + chunked-prefill 32768
-#                     + EAGLE (1,1,2) + mem-fraction 0.92 + max-running 256
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 1, conc-end: 1 }
+      - { tp: 4, ep: 1, conc-start: 32, conc-end: 32 }
+      - { tp: 4, ep: 4, dp-attn: true, conc-start: 512, conc-end: 512 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 1, conc-end: 1 }
+      - { tp: 4, ep: 1, conc-start: 32, conc-end: 32 }
+      - { tp: 4, ep: 4, dp-attn: true, conc-start: 512, conc-end: 512 }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 2048, conc-end: 2048 }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 4096, conc-end: 4096 }
+
+  # DeepSeek-V4-Pro on B300 with EAGLE/MTP speculative decoding. Recipe is
+  # selected inside benchmarks/single_node/dsv4_fp4_b300_sglang_mtp.sh by
+  # DP_ATTENTION:
+  #   dp-attn: false -> TP-only + flashinfer_mxfp4 + chunked-prefill 8192
+  #                     + EAGLE (3,1,4) + mem-fraction 0.90
+  #   dp-attn: true  -> DP-attn  + flashinfer_mxfp4 + chunked-prefill 32768
+  #                     + EAGLE (1,1,2) + mem-fraction 0.92 + max-running 256
 dsv4-fp4-b300-sglang-mtp:
   image: lmsysorg/sglang:deepseek-v4-b300@sha256:26e116bd211e300dbb76924d56c5cbe6cc3ee5ee2fe314859cb8774f5bc070f3
   model: deepseek-ai/DeepSeek-V4-Pro
@@ -1910,17 +1944,18 @@ dsv4-fp4-b300-sglang-mtp:
   #   A: TP=8 ep=1            -- conc 1-8    EAGLE (3,1,4) TP-only fallback
   #   B: TP=4 ep=1            -- conc 4-32   EAGLE (3,1,4) TP-only mid batch
   #   C: TP=4 ep=1 dp-attn    -- conc 16-256 EAGLE (1,1,2) DP-attn flashinfer
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 1, conc-end: 8, spec-decoding: mtp }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 32, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 1, conc-end: 8, spec-decoding: mtp }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 32, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 1, conc-end: 8, spec-decoding: mtp }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 32, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 1, conc-end: 8, spec-decoding: mtp }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 32, spec-decoding: mtp }
 
 qwen3.5-bf16-b200-sglang:
   image: lmsysorg/sglang:nightly-dev-20260216-d3bae71e
@@ -1930,15 +1965,16 @@ qwen3.5-bf16-b200-sglang:
   precision: bf16
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
 
 qwen3.5-bf16-b200-sglang-mtp:
   image: lmsysorg/sglang:nightly-dev-20260216-d3bae71e
@@ -1948,15 +1984,16 @@ qwen3.5-bf16-b200-sglang-mtp:
   precision: bf16
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 64, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 64, spec-decoding: mtp }
 
 qwen3.5-fp8-b200-sglang:
   image: lmsysorg/sglang:v0.5.9-cu130-amd64
@@ -1966,17 +2003,18 @@ qwen3.5-fp8-b200-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 }
-    - { tp: 4, ep: 4, conc-start: 16, conc-end: 128 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 }
-    - { tp: 4, ep: 4, conc-start: 16, conc-end: 128 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 }
+      - { tp: 4, ep: 4, conc-start: 16, conc-end: 128 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 }
+      - { tp: 4, ep: 4, conc-start: 16, conc-end: 128 }
 
 qwen3.5-fp4-b200-sglang:
   image: lmsysorg/sglang:nightly-dev-20260402-d7256eb6
@@ -1986,15 +2024,16 @@ qwen3.5-fp4-b200-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 }
 
 qwen3.5-fp4-b200-sglang-mtp:
   image: lmsysorg/sglang:nightly-dev-20260402-d7256eb6
@@ -2004,15 +2043,16 @@ qwen3.5-fp4-b200-sglang-mtp:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 128, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 128, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 128, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 128, spec-decoding: mtp }
 
 glm5-fp8-b200-sglang:
   image: lmsysorg/sglang:nightly-dev-cu13-20260317-1eea7448
@@ -2022,15 +2062,16 @@ glm5-fp8-b200-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
 
 glm5-fp8-b200-sglang-mtp:
   image: lmsysorg/sglang:nightly-dev-cu13-20260317-1eea7448
@@ -2040,19 +2081,20 @@ glm5-fp8-b200-sglang-mtp:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-
-# NOTE: At the time of submission, https://cookbook.sglang.io/autoregressive/GLM/GLM-5.1
-# does not have a B300-specific recipe, so this config reuses the existing GLM5 FP8
-# B200 SGLang recipe as-is until B300-specific tuning is available.
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+
+  # NOTE: At the time of submission, https://cookbook.sglang.io/autoregressive/GLM/GLM-5.1
+  # does not have a B300-specific recipe, so this config reuses the existing GLM5 FP8
+  # B200 SGLang recipe as-is until B300-specific tuning is available.
 glm5-fp8-b300-sglang:
   image: lmsysorg/sglang:v0.5.10.post1-cu130
   model: zai-org/GLM-5-FP8
@@ -2061,15 +2103,16 @@ glm5-fp8-b300-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
 
 glm5-fp8-b300-sglang-mtp:
   image: lmsysorg/sglang:v0.5.10.post1-cu130
@@ -2079,15 +2122,16 @@ glm5-fp8-b300-sglang-mtp:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 glm5-fp4-b200-sglang:
   image: lmsysorg/sglang:v0.5.10.post1-cu130
@@ -2097,17 +2141,18 @@ glm5-fp4-b200-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
 
 glm5-fp4-b200-sglang-mtp:
   image: lmsysorg/sglang:v0.5.10.post1-cu130
@@ -2117,21 +2162,22 @@ glm5-fp4-b200-sglang-mtp:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4, spec-decoding: mtp }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4, spec-decoding: mtp }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-
-# NOTE: At the time of submission, https://cookbook.sglang.io/autoregressive/GLM/GLM-5
-# does not have a B300-specific recipe, so this config reuses the existing
-# GLM-5 FP4 B200 SGLang recipe as-is until B300-specific tuning is available.
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4, spec-decoding: mtp }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4, spec-decoding: mtp }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+
+  # NOTE: At the time of submission, https://cookbook.sglang.io/autoregressive/GLM/GLM-5
+  # does not have a B300-specific recipe, so this config reuses the existing
+  # GLM-5 FP4 B200 SGLang recipe as-is until B300-specific tuning is available.
 glm5-fp4-b300-sglang:
   image: lmsysorg/sglang:v0.5.10.post1-cu130
   model: nvidia/GLM-5-NVFP4
@@ -2140,17 +2186,18 @@ glm5-fp4-b300-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
 
 glm5-fp4-b300-sglang-mtp:
   image: lmsysorg/sglang:v0.5.10.post1-cu130
@@ -2160,17 +2207,18 @@ glm5-fp4-b300-sglang-mtp:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4, spec-decoding: mtp }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4, spec-decoding: mtp }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4, spec-decoding: mtp }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4, spec-decoding: mtp }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 qwen3.5-fp8-b200-sglang-mtp:
   image: lmsysorg/sglang:v0.5.9-cu130
@@ -2180,15 +2228,16 @@ qwen3.5-fp8-b200-sglang-mtp:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
     
 
 qwen3.5-fp8-b300-sglang-mtp:
@@ -2199,15 +2248,16 @@ qwen3.5-fp8-b300-sglang-mtp:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 qwen3.5-fp8-b300-sglang:
   image: lmsysorg/sglang:v0.5.10.post1-cu130
@@ -2217,15 +2267,16 @@ qwen3.5-fp8-b300-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
 
 qwen3.5-fp4-b300-sglang:
   image: lmsysorg/sglang:v0.5.10.post1-cu130
@@ -2235,17 +2286,18 @@ qwen3.5-fp4-b300-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 }
-    - { tp: 2, ep: 2, conc-start: 4, conc-end: 128 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 }
-    - { tp: 2, ep: 2, conc-start: 4, conc-end: 128 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 }
+      - { tp: 2, ep: 2, conc-start: 4, conc-end: 128 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 }
+      - { tp: 2, ep: 2, conc-start: 4, conc-end: 128 }
 
 qwen3.5-fp4-b300-sglang-mtp:
   image: lmsysorg/sglang:v0.5.10.post1-cu130
@@ -2255,17 +2307,18 @@ qwen3.5-fp4-b300-sglang-mtp:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 128, spec-decoding: mtp }
-    - { tp: 2, ep: 2, conc-start: 4, conc-end: 128, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 128, spec-decoding: mtp }
-    - { tp: 2, ep: 2, conc-start: 4, conc-end: 128, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 128, spec-decoding: mtp }
+      - { tp: 2, ep: 2, conc-start: 4, conc-end: 128, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 128, spec-decoding: mtp }
+      - { tp: 2, ep: 2, conc-start: 4, conc-end: 128, spec-decoding: mtp }
 
 qwen3.5-bf16-b300-sglang:
   image: lmsysorg/sglang:v0.5.10.post1-cu130
@@ -2275,17 +2328,18 @@ qwen3.5-bf16-b300-sglang:
   precision: bf16
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
 
 qwen3.5-bf16-b300-sglang-mtp:
   image: lmsysorg/sglang:v0.5.10.post1-cu130
@@ -2295,17 +2349,18 @@ qwen3.5-bf16-b300-sglang-mtp:
   precision: bf16
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 64, spec-decoding: mtp }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 64, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 64, spec-decoding: mtp }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 64, spec-decoding: mtp }
 
 kimik2.5-int4-b200-vllm:
   image: vllm/vllm-openai:v0.15.1
@@ -2315,15 +2370,16 @@ kimik2.5-int4-b200-vllm:
   precision: int4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 kimik2.5-int4-h200-vllm:
   image: vllm/vllm-openai:v0.16.0
@@ -2333,15 +2389,16 @@ kimik2.5-int4-h200-vllm:
   precision: int4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 kimik2.5-fp4-b200-vllm:
   image: vllm/vllm-openai:v0.17.0
@@ -2351,17 +2408,18 @@ kimik2.5-fp4-b200-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
 
 # NOTE: At the time of submission, https://docs.vllm.ai/projects/recipes/en/latest/moonshotai/Kimi-K2.5.html
 # does not have a B300-specific recipe, so this config reuses the existing
@@ -2374,17 +2432,18 @@ kimik2.5-fp4-b300-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
 
 dsr1-fp8-b200-sglang-mtp:
   image: lmsysorg/sglang:v0.5.9-cu130
@@ -2394,20 +2453,21 @@ dsr1-fp8-b200-sglang-mtp:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 512, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 512, spec-decoding: mtp }
-
-# NOTE: At the time of submission, https://cookbook.sglang.io/autoregressive/DeepSeek/DeepSeek-R1
-# does not have a B300-specific recipe, so this config reuses the existing DSR1 FP8
-# B200 SGLang MTP recipe as-is until B300-specific tuning is available. Image bumped
-# to v0.5.10.post1-cu130 to match the standard B300 SGLang image used by other B300 configs.
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 512, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 512, spec-decoding: mtp }
+
+  # NOTE: At the time of submission, https://cookbook.sglang.io/autoregressive/DeepSeek/DeepSeek-R1
+  # does not have a B300-specific recipe, so this config reuses the existing DSR1 FP8
+  # B200 SGLang MTP recipe as-is until B300-specific tuning is available. Image bumped
+  # to v0.5.10.post1-cu130 to match the standard B300 SGLang image used by other B300 configs.
 dsr1-fp8-b300-sglang-mtp:
   image: lmsysorg/sglang:v0.5.10.post1-cu130
   model: deepseek-ai/DeepSeek-R1-0528
@@ -2416,15 +2476,16 @@ dsr1-fp8-b300-sglang-mtp:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 512, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 512, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 512, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 512, spec-decoding: mtp }
 
 dsr1-fp8-b200-trt:
   image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc6.post2
@@ -2434,19 +2495,20 @@ dsr1-fp8-b200-trt:
   precision: fp8
   framework: trt
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 64, conc-end: 128 }
-    - { tp: 4, ep: 1, conc-start: 8, conc-end: 16 } 
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 8 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 64, conc-end: 256 }
-    - { tp: 4, ep: 1, conc-start: 8, conc-end: 32 }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 8 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 64, conc-end: 128 }
+      - { tp: 4, ep: 1, conc-start: 8, conc-end: 16 } 
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 8 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 64, conc-end: 256 }
+      - { tp: 4, ep: 1, conc-start: 8, conc-end: 32 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 8 }
 
 dsr1-fp8-b200-trt-mtp:
   image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc6.post3
@@ -2456,20 +2518,21 @@ dsr1-fp8-b200-trt-mtp:
   precision: fp8
   framework: trt
   multinode: false
-  seq-len-configs:
-  # For all sequence lengths, MTP=3 (or MTP=1 when DP_ATTN=true)
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # mostly TP8
-    # If CONC == 256, then TP8, EP8, DP_ATTN=true
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 128, spec-decoding: mtp }
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 256, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # TP8 for all points
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    # For all sequence lengths, MTP=3 (or MTP=1 when DP_ATTN=true)
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # mostly TP8
+      # If CONC == 256, then TP8, EP8, DP_ATTN=true
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 128, spec-decoding: mtp }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 256, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # TP8 for all points
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 dsr1-fp8-h200-sglang:
   image: lmsysorg/sglang:v0.5.9-cu130
@@ -2479,15 +2542,16 @@ dsr1-fp8-h200-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 # DeepSeek-V4-Pro H200 recipe from https://vllm.ai/blog/deepseek-v4
 # Uses the cu129 image. H200 has no FP4 path, so the FP4 indexer cache
@@ -2500,20 +2564,21 @@ dsv4-fp8-h200-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 4, conc-end: 64 }
-
-# DeepSeek-V4-Pro B300 single-node aggregate recipe from the submitted B300
-# pareto sweep. The single-node schema has no explicit data-parallel-size
-# field, so dp-attn=true is used as the existing vLLM script switch for DP4
-# layouts on 4 allocated GPUs.
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 4, conc-end: 64 }
+
+  # DeepSeek-V4-Pro B300 single-node aggregate recipe from the submitted B300
+  # pareto sweep. The single-node schema has no explicit data-parallel-size
+  # field, so dp-attn=true is used as the existing vLLM script switch for DP4
+  # layouts on 4 allocated GPUs.
 dsv4-fp4-b300-vllm:
   image: vllm/vllm-openai:deepseekv4-cu130
   model: deepseek-ai/DeepSeek-V4-Pro
@@ -2522,22 +2587,23 @@ dsv4-fp4-b300-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, conc-start: 1, conc-end: 128 }
-    - { tp: 8, conc-start: 1, conc-end: 128 }
-    - { tp: 4, ep: 4, dp-attn: true, conc-start: 256, conc-end: 512 }
-    - { tp: 4, ep: 4, dp-attn: true, conc-start: 2048, conc-end: 2048 }
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 4096, conc-end: 8192 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, conc-start: 1, conc-end: 64 }
-    - { tp: 8, conc-start: 1, conc-end: 64 }
-    - { tp: 4, ep: 4, dp-attn: true, conc-start: 256, conc-end: 1024 }
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 2048, conc-end: 2048 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 1, conc-end: 128 }
+      - { tp: 8, conc-start: 1, conc-end: 128 }
+      - { tp: 4, ep: 4, dp-attn: true, conc-start: 256, conc-end: 512 }
+      - { tp: 4, ep: 4, dp-attn: true, conc-start: 2048, conc-end: 2048 }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 4096, conc-end: 8192 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 1, conc-end: 64 }
+      - { tp: 8, conc-start: 1, conc-end: 64 }
+      - { tp: 4, ep: 4, dp-attn: true, conc-start: 256, conc-end: 1024 }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 2048, conc-end: 2048 }
 
 qwen3.5-fp8-h200-sglang:
   image: lmsysorg/sglang:v0.5.9-cu129-amd64
@@ -2547,15 +2613,16 @@ qwen3.5-fp8-h200-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-fp8-h200-sglang-mtp:
   image: lmsysorg/sglang:v0.5.10.post1
@@ -2565,15 +2632,16 @@ qwen3.5-fp8-h200-sglang-mtp:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 128, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 128, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 128, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 128, spec-decoding: mtp }
 
 glm5-fp8-h200-sglang:
   image: lmsysorg/sglang:glm5-hopper
@@ -2583,15 +2651,16 @@ glm5-fp8-h200-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 dsr1-fp8-h200-trt:
   image: nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc2.post2
@@ -2602,18 +2671,19 @@ dsr1-fp8-h200-trt:
   framework: trt
   multinode: false
   # For all sequence lengths, EP=TP
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    # If CONC > 64, then DP_ATTN=true
-    search-space:
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    # If CONC > 32, then DP_ATTN=true
-    search-space:
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 32 }
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      # If CONC > 64, then DP_ATTN=true
+      search-space:
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      # If CONC > 32, then DP_ATTN=true
+      search-space:
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 32 }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 64 }
 
 dsr1-fp8-h200-trt-mtp:
   image: nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc2.post2
@@ -2624,19 +2694,20 @@ dsr1-fp8-h200-trt-mtp:
   framework: trt
   multinode: false
   # For all sequence lengths, EP=TP, MOE_BACKEND=CUTLASS, MTP=3 (or MTP=1 when DP_ATTN=true)
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # If CONC >= 128, then DP_ATTN=true, MTP=1
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 128, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # If CONC >= 64, then DP_ATTN=true, MTP=1
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 32, spec-decoding: mtp }
-    - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 256, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # If CONC >= 128, then DP_ATTN=true, MTP=1
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 128, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # If CONC >= 64, then DP_ATTN=true, MTP=1
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 32, spec-decoding: mtp }
+      - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 256, spec-decoding: mtp }
 
 dsr1-fp8-h200-dynamo-trt:
   image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post1
@@ -2647,539 +2718,540 @@ dsr1-fp8-h200-dynamo-trt:
   framework: dynamo-trt
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # MTP configurations
-    - spec-decoding: "mtp"
-      conc-list: [1]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c1_ctx1_gen11_tep8_batch1_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c1_ctx1_gen11_tep8_batch1_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 11
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [4]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c4_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c4_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 11
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [8]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c8_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c8_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 11
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [16]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c16_ctx1_gen9_tep8_batch128_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c16_ctx1_gen9_tep8_batch128_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 9
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [32]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c32_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c32_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 11
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [64]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c64_ctx1_gen8_dep8_batch128_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c64_ctx1_gen8_dep8_batch128_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 8
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [128]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c128_ctx1_gen7_dep8_batch128_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c128_ctx1_gen7_dep8_batch128_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 7
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [256]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c256_ctx1_gen4_dep8_batch128_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c256_ctx1_gen4_dep8_batch128_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [512]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c512_ctx1_gen2_dep8_batch256_eplb0_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c512_ctx1_gen2_dep8_batch256_eplb0_mtp1.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-    # Non-MTP configurations (STP)
-    - conc-list: [1]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c1_ctx1_gen9_tep8_batch1_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c1_ctx1_gen9_tep8_batch1_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 9
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [4]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c4_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c4_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 9
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [8]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c8_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c8_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 9
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [16]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c16_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c16_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 9
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [32]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c32_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c32_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 9
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [64]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c64_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c64_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 9
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [128]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c128_ctx1_gen9_dep8_batch512_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c128_ctx1_gen9_dep8_batch512_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 9
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [256]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c256_ctx1_gen6_dep8_batch512_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c256_ctx1_gen6_dep8_batch512_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [512]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c512_ctx2_gen7_dep8_batch512_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c512_ctx2_gen7_dep8_batch512_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 7
-        tp: 8
-        ep: 8
-        dp-attn: true
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # MTP configurations
-    - spec-decoding: "mtp"
-      conc-list: [1]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c1_ctx1_gen7_tep8_batch1_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c1_ctx1_gen7_tep8_batch1_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 7
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [4]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c4_ctx1_gen7_tep8_batch32_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c4_ctx1_gen7_tep8_batch32_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 7
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [8]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c8_ctx1_gen6_tep8_batch32_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c8_ctx1_gen6_tep8_batch32_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [16]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c16_ctx1_gen3_tep8_batch32_eplb0_mtp2.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c16_ctx1_gen3_tep8_batch32_eplb0_mtp2.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [32]
-      prefill:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c32_ctx3_gen5_tep8_batch32_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c32_ctx3_gen5_tep8_batch32_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [64]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c64_ctx1_gen1_dep8_batch32_eplb0_mtp2.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c64_ctx1_gen1_dep8_batch32_eplb0_mtp2.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [128]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c128_ctx2_gen1_dep8_batch32_eplb0_mtp2.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c128_ctx2_gen1_dep8_batch32_eplb0_mtp2.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [256]
-      prefill:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c256_ctx3_gen1_dep8_batch32_eplb0_mtp2.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c256_ctx3_gen1_dep8_batch32_eplb0_mtp2.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [512]
-      prefill:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c512_ctx3_gen1_dep8_batch64_eplb0_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c512_ctx3_gen1_dep8_batch64_eplb0_mtp1.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    # Non-MTP configurations (STP)
-    - conc-list: [1]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c1_ctx1_gen7_tep8_batch1_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c1_ctx1_gen7_tep8_batch1_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 7
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [4]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c4_ctx1_gen7_tep8_batch32_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c4_ctx1_gen7_tep8_batch32_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 7
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [8]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c8_ctx1_gen6_tep8_batch16_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c8_ctx1_gen6_tep8_batch16_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [16]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c16_ctx1_gen3_tep8_batch32_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c16_ctx1_gen3_tep8_batch32_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [32]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c32_ctx2_gen5_tep8_batch128_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c32_ctx2_gen5_tep8_batch128_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [64]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c64_ctx2_gen3_dep8_batch128_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c64_ctx2_gen3_dep8_batch128_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [128]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c128_ctx1_gen1_dep8_batch256_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c128_ctx1_gen1_dep8_batch256_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [256]
-      prefill:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c256_ctx5_gen3_dep8_batch256_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c256_ctx5_gen3_dep8_batch256_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [512]
-      prefill:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c512_ctx3_gen1_dep8_batch512_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c512_ctx3_gen1_dep8_batch512_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # MTP configurations
+      - spec-decoding: "mtp"
+        conc-list: [1]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c1_ctx1_gen11_tep8_batch1_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c1_ctx1_gen11_tep8_batch1_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 11
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [4]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c4_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c4_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 11
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [8]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c8_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c8_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 11
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [16]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c16_ctx1_gen9_tep8_batch128_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c16_ctx1_gen9_tep8_batch128_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 9
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [32]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c32_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c32_ctx1_gen11_tep8_batch128_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 11
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [64]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c64_ctx1_gen8_dep8_batch128_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c64_ctx1_gen8_dep8_batch128_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 8
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [128]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c128_ctx1_gen7_dep8_batch128_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c128_ctx1_gen7_dep8_batch128_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 7
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [256]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c256_ctx1_gen4_dep8_batch128_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c256_ctx1_gen4_dep8_batch128_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [512]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/mtp/c512_ctx1_gen2_dep8_batch256_eplb0_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/mtp/c512_ctx1_gen2_dep8_batch256_eplb0_mtp1.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+      # Non-MTP configurations (STP)
+      - conc-list: [1]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c1_ctx1_gen9_tep8_batch1_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c1_ctx1_gen9_tep8_batch1_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 9
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [4]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c4_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c4_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 9
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [8]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c8_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c8_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 9
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [16]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c16_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c16_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 9
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [32]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c32_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c32_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 9
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [64]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c64_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c64_ctx1_gen9_tep8_batch256_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 9
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [128]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c128_ctx1_gen9_dep8_batch512_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c128_ctx1_gen9_dep8_batch512_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 9
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [256]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c256_ctx1_gen6_dep8_batch512_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c256_ctx1_gen6_dep8_batch512_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [512]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/1k1k/stp/c512_ctx2_gen7_dep8_batch512_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/1k1k/stp/c512_ctx2_gen7_dep8_batch512_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 7
+          tp: 8
+          ep: 8
+          dp-attn: true
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # MTP configurations
+      - spec-decoding: "mtp"
+        conc-list: [1]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c1_ctx1_gen7_tep8_batch1_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c1_ctx1_gen7_tep8_batch1_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 7
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [4]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c4_ctx1_gen7_tep8_batch32_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c4_ctx1_gen7_tep8_batch32_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 7
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [8]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c8_ctx1_gen6_tep8_batch32_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c8_ctx1_gen6_tep8_batch32_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [16]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c16_ctx1_gen3_tep8_batch32_eplb0_mtp2.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c16_ctx1_gen3_tep8_batch32_eplb0_mtp2.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [32]
+        prefill:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c32_ctx3_gen5_tep8_batch32_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c32_ctx3_gen5_tep8_batch32_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [64]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c64_ctx1_gen1_dep8_batch32_eplb0_mtp2.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c64_ctx1_gen1_dep8_batch32_eplb0_mtp2.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [128]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c128_ctx2_gen1_dep8_batch32_eplb0_mtp2.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c128_ctx2_gen1_dep8_batch32_eplb0_mtp2.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [256]
+        prefill:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c256_ctx3_gen1_dep8_batch32_eplb0_mtp2.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c256_ctx3_gen1_dep8_batch32_eplb0_mtp2.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [512]
+        prefill:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/mtp/c512_ctx3_gen1_dep8_batch64_eplb0_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/mtp/c512_ctx3_gen1_dep8_batch64_eplb0_mtp1.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      # Non-MTP configurations (STP)
+      - conc-list: [1]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c1_ctx1_gen7_tep8_batch1_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c1_ctx1_gen7_tep8_batch1_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 7
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [4]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c4_ctx1_gen7_tep8_batch32_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c4_ctx1_gen7_tep8_batch32_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 7
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [8]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c8_ctx1_gen6_tep8_batch16_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c8_ctx1_gen6_tep8_batch16_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [16]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c16_ctx1_gen3_tep8_batch32_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c16_ctx1_gen3_tep8_batch32_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [32]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c32_ctx2_gen5_tep8_batch128_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c32_ctx2_gen5_tep8_batch128_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [64]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c64_ctx2_gen3_dep8_batch128_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c64_ctx2_gen3_dep8_batch128_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [128]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c128_ctx1_gen1_dep8_batch256_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c128_ctx1_gen1_dep8_batch256_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [256]
+        prefill:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c256_ctx5_gen3_dep8_batch256_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c256_ctx5_gen3_dep8_batch256_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [512]
+        prefill:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h200/8k1k/stp/c512_ctx3_gen1_dep8_batch512_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h200/8k1k/stp/c512_ctx3_gen1_dep8_batch512_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
 
 dsr1-fp8-h100-dynamo-trt:
   image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post3
@@ -3190,440 +3262,441 @@ dsr1-fp8-h100-dynamo-trt:
   framework: dynamo-trt
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # MTP configurations
-    - spec-decoding: "mtp"
-      conc-list: [6]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [9]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [30]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [60]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch16_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch16_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [117]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch32_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch32_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [231]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_dep16_batch4_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_dep16_batch4_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [462]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [615]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [1229]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    # Non-MTP configurations (STP)
-    - conc-list: [6]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - conc-list: [9]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - conc-list: [30]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - conc-list: [60]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch16_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch16_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - conc-list: [231]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch4_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch4_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [462]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch8_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch8_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [924]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch16_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch16_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [1845]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch32_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch32_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [4916]
-      prefill:
-        num-worker: 2
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx2_gen1_dep16_batch256_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx2_gen1_dep16_batch256_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # MTP configurations (6 points)
-    - spec-decoding: "mtp"
-      conc-list: [6]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [9]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [30]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [77]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen1_dep16_batch4_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen1_dep16_batch4_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    # commenting out cuz it persistently causes problems
-    # https://github.com/InferenceMAX/InferenceMAX/actions/runs/21769314582/job/62813105509
-    # - spec-decoding: "mtp"
-    #   conc-list: [78]
-    #   prefill:
-    #     num-worker: 1
-    #     tp: 16
-    #     ep: 16
-    #     dp-attn: true
-    #     additional-settings:
-    #     # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml
-    #     - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml"
-    #   decode:
-    #     num-worker: 2
-    #     tp: 16
-    #     ep: 16
-    #     dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [154]
-      prefill:
-        num-worker: 2
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx2_gen1_dep16_batch8_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx2_gen1_dep16_batch8_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    # STP configurations (5 points)
-    - conc-list: [6]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - conc-list: [9]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - conc-list: [30]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - conc-list: [154]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen2_tep16_batch64_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen2_tep16_batch64_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 2
-        tp: 16
-        ep: 16
-        dp-attn: false
-    - conc-list: [308]
-      prefill:
-        num-worker: 2
-        tp: 16
-        ep: 16
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx2_gen1_dep16_batch16_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx2_gen1_dep16_batch16_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # MTP configurations
+      - spec-decoding: "mtp"
+        conc-list: [6]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [9]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [30]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [60]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch16_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch16_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [117]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch32_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch32_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [231]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_dep16_batch4_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_dep16_batch4_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [462]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen3_tep16_batch128_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [615]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp2.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [1229]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      # Non-MTP configurations (STP)
+      - conc-list: [6]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - conc-list: [9]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - conc-list: [30]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - conc-list: [60]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch16_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_tep16_batch16_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - conc-list: [231]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch4_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch4_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [462]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch8_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch8_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [924]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch16_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch16_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [1845]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch32_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx1_gen3_dep16_batch32_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [4916]
+        prefill:
+          num-worker: 2
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/1k1k/stp/ctx2_gen1_dep16_batch256_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/1k1k/stp/ctx2_gen1_dep16_batch256_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # MTP configurations (6 points)
+      - spec-decoding: "mtp"
+        conc-list: [6]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch1_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [9]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch2_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [30]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen3_tep16_batch8_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [77]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen1_dep16_batch4_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen1_dep16_batch4_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      # commenting out cuz it persistently causes problems
+      # https://github.com/InferenceMAX/InferenceMAX/actions/runs/21769314582/job/62813105509
+      # - spec-decoding: "mtp"
+      #   conc-list: [78]
+      #   prefill:
+      #     num-worker: 1
+      #     tp: 16
+      #     ep: 16
+      #     dp-attn: true
+      #     additional-settings:
+      #     # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml
+      #     - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx1_gen2_tep16_batch32_eplb0_mtp3.yaml"
+      #   decode:
+      #     num-worker: 2
+      #     tp: 16
+      #     ep: 16
+      #     dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [154]
+        prefill:
+          num-worker: 2
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/mtp/ctx2_gen1_dep16_batch8_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/mtp/ctx2_gen1_dep16_batch8_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      # STP configurations (5 points)
+      - conc-list: [6]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch1_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - conc-list: [9]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch2_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - conc-list: [30]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen3_tep16_batch8_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - conc-list: [154]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen2_tep16_batch64_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx1_gen2_tep16_batch64_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 2
+          tp: 16
+          ep: 16
+          dp-attn: false
+      - conc-list: [308]
+        prefill:
+          num-worker: 2
+          tp: 16
+          ep: 16
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/h100-fp8/8k1k/stp/ctx2_gen1_dep16_batch16_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/h100-fp8/8k1k/stp/ctx2_gen1_dep16_batch16_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
 
 gptoss-fp4-b200-trt:
   image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2.post2
@@ -3633,25 +3706,26 @@ gptoss-fp4-b200-trt:
   precision: fp4
   framework: trt
   multinode: false
-  seq-len-configs:
-  # Low ==> high TP from Left to Right of pareto
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 256, conc-end: 256 }
-    - { tp: 2, ep: 2, dp-attn: true, conc-start: 256, conc-end: 256 }
-    - { tp: 2, conc-start:  4, conc-end: 256 }
-    - { tp: 4, ep: 4, dp-attn: true, conc-start: 64, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 4 }
-    - { tp: 8, conc-start: 4, conc-end: 4 }
-  # Low ==> high TP from Left to Right of pareto
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start:   4, conc-end: 256}
-    - { tp: 2, conc-start:   4, conc-end: 256}
-    - { tp: 4, conc-start:   4, conc-end:  32}
-    - { tp: 8, conc-start:   4, conc-end:   4}
+  scenarios:
+    fixed-seq-len:
+    # Low ==> high TP from Left to Right of pareto
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 256, conc-end: 256 }
+      - { tp: 2, ep: 2, dp-attn: true, conc-start: 256, conc-end: 256 }
+      - { tp: 2, conc-start:  4, conc-end: 256 }
+      - { tp: 4, ep: 4, dp-attn: true, conc-start: 64, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 4 }
+      - { tp: 8, conc-start: 4, conc-end: 4 }
+    # Low ==> high TP from Left to Right of pareto
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start:   4, conc-end: 256}
+      - { tp: 2, conc-start:   4, conc-end: 256}
+      - { tp: 4, conc-start:   4, conc-end:  32}
+      - { tp: 8, conc-start:   4, conc-end:   4}
 
 gptoss-fp4-b200-vllm:
   image: vllm/vllm-openai:v0.15.1
@@ -3661,21 +3735,22 @@ gptoss-fp4-b200-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 128 }
-    - { tp: 2, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-    - { tp: 8, conc-start: 4, conc-end: 8 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 128 }
-    - { tp: 2, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-    - { tp: 8, conc-start: 4, conc-end: 4 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 128 }
+      - { tp: 2, conc-start: 4, conc-end: 128 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+      - { tp: 8, conc-start: 4, conc-end: 8 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 128 }
+      - { tp: 2, conc-start: 4, conc-end: 128 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+      - { tp: 8, conc-start: 4, conc-end: 4 }
 
 minimaxm2.5-fp8-b200-vllm:
   image: vllm/vllm-openai:v0.19.0-cu130
@@ -3685,22 +3760,23 @@ minimaxm2.5-fp8-b200-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, ep: 2, conc-start: 512, conc-end: 512 }
-    - { tp: 4, conc-start: 4, conc-end: 128 }
-    - { tp: 4, ep: 4, conc-start: 256, conc-end: 512 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 512 }
-    - { tp: 4, conc-start: 4, conc-end: 512 }
-
-# NOTE: At the time of submission, https://docs.vllm.ai/projects/recipes/en/latest/MiniMax/MiniMax-M2.html
-# does not have a B300-specific recipe, so this config reuses the existing
-# MiniMax-M2.5 FP8 B200 vLLM recipe as-is until B300-specific tuning is available.
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, ep: 2, conc-start: 512, conc-end: 512 }
+      - { tp: 4, conc-start: 4, conc-end: 128 }
+      - { tp: 4, ep: 4, conc-start: 256, conc-end: 512 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 512 }
+      - { tp: 4, conc-start: 4, conc-end: 512 }
+
+  # NOTE: At the time of submission, https://docs.vllm.ai/projects/recipes/en/latest/MiniMax/MiniMax-M2.html
+  # does not have a B300-specific recipe, so this config reuses the existing
+  # MiniMax-M2.5 FP8 B200 vLLM recipe as-is until B300-specific tuning is available.
 minimaxm2.5-fp8-b300-vllm:
   image: vllm/vllm-openai:v0.19.0-cu130
   model: MiniMaxAI/MiniMax-M2.5
@@ -3709,20 +3785,21 @@ minimaxm2.5-fp8-b300-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, conc-start: 4, conc-end: 128 }
-    - { tp: 4, ep: 4, conc-start: 256, conc-end: 512 }
-    - { tp: 2, ep: 2, conc-start: 512, conc-end: 1024 }
-    - { tp: 2, ep: 2, dp-attn: true, conc-start: 1024, conc-end: 1024 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 16 }
-    - { tp: 2, conc-start: 64, conc-end: 256 }
-    - { tp: 4, conc-start: 4, conc-end: 8 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 4, conc-end: 128 }
+      - { tp: 4, ep: 4, conc-start: 256, conc-end: 512 }
+      - { tp: 2, ep: 2, conc-start: 512, conc-end: 1024 }
+      - { tp: 2, ep: 2, dp-attn: true, conc-start: 1024, conc-end: 1024 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 16 }
+      - { tp: 2, conc-start: 64, conc-end: 256 }
+      - { tp: 4, conc-start: 4, conc-end: 8 }
 
 minimaxm2.5-fp4-b200-vllm:
   image: vllm/vllm-openai:v0.19.0-cu130
@@ -3732,29 +3809,30 @@ minimaxm2.5-fp4-b200-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 16 }
-    - { tp: 2, conc-start: 16, conc-end: 16 }
-    - { tp: 2, ep: 2, conc-start: 128, conc-end: 128 }
-    - { tp: 2, ep: 2, dp-attn: true, conc-start: 256, conc-end: 1024 }
-    - { tp: 4, conc-start: 4, conc-end: 16 }
-    - { tp: 4, ep: 4, conc-start: 64, conc-end: 128 }
-    - { tp: 8, conc-start: 4, conc-end: 8 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 32 }
-    - { tp: 1, conc-start: 256, conc-end: 256 }
-    - { tp: 2, ep: 2, conc-start: 128, conc-end: 512 }
-    - { tp: 4, conc-start: 4, conc-end: 8 }
-    - { tp: 8, conc-start: 4, conc-end: 4 }
-
-# NOTE: At the time of submission, https://docs.vllm.ai/projects/recipes/en/latest/MiniMax/MiniMax-M2.html
-# does not have a B300-specific recipe, so this config reuses the existing
-# MiniMax-M2.5 FP4 B200 vLLM recipe as-is until B300-specific tuning is available.
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 16 }
+      - { tp: 2, conc-start: 16, conc-end: 16 }
+      - { tp: 2, ep: 2, conc-start: 128, conc-end: 128 }
+      - { tp: 2, ep: 2, dp-attn: true, conc-start: 256, conc-end: 1024 }
+      - { tp: 4, conc-start: 4, conc-end: 16 }
+      - { tp: 4, ep: 4, conc-start: 64, conc-end: 128 }
+      - { tp: 8, conc-start: 4, conc-end: 8 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 32 }
+      - { tp: 1, conc-start: 256, conc-end: 256 }
+      - { tp: 2, ep: 2, conc-start: 128, conc-end: 512 }
+      - { tp: 4, conc-start: 4, conc-end: 8 }
+      - { tp: 8, conc-start: 4, conc-end: 4 }
+
+  # NOTE: At the time of submission, https://docs.vllm.ai/projects/recipes/en/latest/MiniMax/MiniMax-M2.html
+  # does not have a B300-specific recipe, so this config reuses the existing
+  # MiniMax-M2.5 FP4 B200 vLLM recipe as-is until B300-specific tuning is available.
 minimaxm2.5-fp4-b300-vllm:
   image: vllm/vllm-openai:v0.19.0-cu130
   model: nvidia/MiniMax-M2.5-NVFP4
@@ -3763,46 +3841,47 @@ minimaxm2.5-fp4-b300-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 8 }
-    - { tp: 2, ep: 2, conc-start: 128, conc-end: 128 }
-    - { tp: 2, ep: 2, dp-attn: true, conc-start: 256, conc-end: 2048 }
-    - { tp: 4, conc-start: 8, conc-end: 8 }
-    - { tp: 4, ep: 4, conc-start: 64, conc-end: 128 }
-    - { tp: 8, conc-start: 4, conc-end: 8 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 256 }
-    - { tp: 2, ep: 2, dp-attn: true, conc-start: 512, conc-end: 512 }
-    - { tp: 4, conc-start: 4, conc-end: 8 }
-    - { tp: 8, conc-start: 4, conc-end: 4 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 8 }
+      - { tp: 2, ep: 2, conc-start: 128, conc-end: 128 }
+      - { tp: 2, ep: 2, dp-attn: true, conc-start: 256, conc-end: 2048 }
+      - { tp: 4, conc-start: 8, conc-end: 8 }
+      - { tp: 4, ep: 4, conc-start: 64, conc-end: 128 }
+      - { tp: 8, conc-start: 4, conc-end: 8 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 256 }
+      - { tp: 2, ep: 2, dp-attn: true, conc-start: 512, conc-end: 512 }
+      - { tp: 4, conc-start: 4, conc-end: 8 }
+      - { tp: 8, conc-start: 4, conc-end: 4 }
 
 gptoss-fp4-h100-vllm:
-  image: vllm/vllm-openai:v0.18.0
+  image: vllm/vllm-openai:v0.19.1
   model: openai/gpt-oss-120b
   model-prefix: gptoss
   runner: h100
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-    - { tp: 8, conc-start: 4, conc-end: 16 }
-
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+      - { tp: 8, conc-start: 4, conc-end: 16 }
 minimaxm2.5-fp8-h100-vllm:
   image: vllm/vllm-openai:v0.18.0
   model: MiniMaxAI/MiniMax-M2.5
@@ -3811,17 +3890,18 @@ minimaxm2.5-fp8-h100-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
-    - { tp: 4, ep: 4, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
-    - { tp: 4, ep: 4, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
+      - { tp: 4, ep: 4, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
+      - { tp: 4, ep: 4, conc-start: 4, conc-end: 64 }
 
 dsr1-fp8-h100-dynamo-sglang:
   image: lmsysorg/sglang:v0.5.8-cu130
@@ -3832,129 +3912,130 @@ dsr1-fp8-h100-dynamo-sglang:
   framework: dynamo-sglang
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # # STP: Max throughput TEP (1 prefill, 2 decode)
-    # - conc-list: [1, 2, 4, 8, 16, 32, 64, 128]
-    #   prefill:
-    #     num-worker: 1
-    #     tp: 16
-    #     ep: 1
-    #     dp-attn: false
-    #     additional-settings:
-    #     - "CONFIG_FILE=recipes/h100/1k1k/stp/h100-fp8-1p2d-max-tp.yaml"
-    #   decode:
-    #     num-worker: 2
-    #     tp: 16
-    #     ep: 1
-    #     dp-attn: false
-    # # STP: Max throughput DEP (1 prefill, 1 decode, dp-attention)
-    # - conc-list: [1, 2, 4, 8, 16, 32, 64]
-    #   prefill:
-    #     num-worker: 1
-    #     tp: 16
-    #     ep: 1
-    #     dp-attn: false
-    #     additional-settings:
-    #     - "CONFIG_FILE=recipes/h100/1k1k/stp/h100-fp8-1p1d-max-dep.yaml"
-    #   decode:
-    #     num-worker: 1
-    #     tp: 16
-    #     ep: 16
-    #     dp-attn: true
-    # MTP: Max throughput TEP (1 prefill, 2 decode)
-    - spec-decoding: "mtp"
-      conc-list: [1, 2, 4, 8, 16, 32, 64, 128]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h100/1k1k/mtp/h100-fp8-1p2d-max-tp-mtp.yaml"
-      decode:
-        num-worker: 2
-        tp: 16
-        ep: 1
-        dp-attn: false
-    # MTP: Max throughput DEP (1 prefill, 1 decode, dp-attention)
-    - spec-decoding: "mtp"
-      conc-list: [1, 2, 4, 8, 16, 32, 64]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h100/1k1k/mtp/h100-fp8-1p1d-max-dep-mtp.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # # STP: Max throughput TEP (1 prefill, 1 decode)
-    # - conc-list: [1, 2, 4, 8, 16, 32, 64, 128]
-    #   prefill:
-    #     num-worker: 1
-    #     tp: 16
-    #     ep: 1
-    #     dp-attn: false
-    #     additional-settings:
-    #     - "CONFIG_FILE=recipes/h100/8k1k/stp/h100-fp8-1p1d-max-tp.yaml"
-    #   decode:
-    #     num-worker: 1
-    #     tp: 16
-    #     ep: 1
-    #     dp-attn: false
-    # # STP: Max throughput DEP (1 prefill, 1 decode, dp-attention)
-    # - conc-list: [1, 2, 4, 8, 16, 32, 64]
-    #   prefill:
-    #     num-worker: 1
-    #     tp: 16
-    #     ep: 1
-    #     dp-attn: false
-    #     additional-settings:
-    #     - "CONFIG_FILE=recipes/h100/8k1k/stp/h100-fp8-1p1d-max-dep.yaml"
-    #   decode:
-    #     num-worker: 1
-    #     tp: 16
-    #     ep: 16
-    #     dp-attn: true
-    # MTP: Max throughput TEP (1 prefill, 1 decode)
-    - spec-decoding: "mtp"
-      conc-list: [1, 2, 4, 8, 16, 32, 64, 128]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h100/8k1k/mtp/h100-fp8-1p1d-max-tp-mtp.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 1
-        dp-attn: false
-    # MTP: Max throughput DEP (1 prefill, 1 decode, dp-attention)
-    - spec-decoding: "mtp"
-      conc-list: [1, 2, 4, 8, 16, 32, 64]
-      prefill:
-        num-worker: 1
-        tp: 16
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h100/8k1k/mtp/h100-fp8-1p1d-max-dep-mtp.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # # STP: Max throughput TEP (1 prefill, 2 decode)
+      # - conc-list: [1, 2, 4, 8, 16, 32, 64, 128]
+      #   prefill:
+      #     num-worker: 1
+      #     tp: 16
+      #     ep: 1
+      #     dp-attn: false
+      #     additional-settings:
+      #     - "CONFIG_FILE=recipes/h100/1k1k/stp/h100-fp8-1p2d-max-tp.yaml"
+      #   decode:
+      #     num-worker: 2
+      #     tp: 16
+      #     ep: 1
+      #     dp-attn: false
+      # # STP: Max throughput DEP (1 prefill, 1 decode, dp-attention)
+      # - conc-list: [1, 2, 4, 8, 16, 32, 64]
+      #   prefill:
+      #     num-worker: 1
+      #     tp: 16
+      #     ep: 1
+      #     dp-attn: false
+      #     additional-settings:
+      #     - "CONFIG_FILE=recipes/h100/1k1k/stp/h100-fp8-1p1d-max-dep.yaml"
+      #   decode:
+      #     num-worker: 1
+      #     tp: 16
+      #     ep: 16
+      #     dp-attn: true
+      # MTP: Max throughput TEP (1 prefill, 2 decode)
+      - spec-decoding: "mtp"
+        conc-list: [1, 2, 4, 8, 16, 32, 64, 128]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h100/1k1k/mtp/h100-fp8-1p2d-max-tp-mtp.yaml"
+        decode:
+          num-worker: 2
+          tp: 16
+          ep: 1
+          dp-attn: false
+      # MTP: Max throughput DEP (1 prefill, 1 decode, dp-attention)
+      - spec-decoding: "mtp"
+        conc-list: [1, 2, 4, 8, 16, 32, 64]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h100/1k1k/mtp/h100-fp8-1p1d-max-dep-mtp.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # # STP: Max throughput TEP (1 prefill, 1 decode)
+      # - conc-list: [1, 2, 4, 8, 16, 32, 64, 128]
+      #   prefill:
+      #     num-worker: 1
+      #     tp: 16
+      #     ep: 1
+      #     dp-attn: false
+      #     additional-settings:
+      #     - "CONFIG_FILE=recipes/h100/8k1k/stp/h100-fp8-1p1d-max-tp.yaml"
+      #   decode:
+      #     num-worker: 1
+      #     tp: 16
+      #     ep: 1
+      #     dp-attn: false
+      # # STP: Max throughput DEP (1 prefill, 1 decode, dp-attention)
+      # - conc-list: [1, 2, 4, 8, 16, 32, 64]
+      #   prefill:
+      #     num-worker: 1
+      #     tp: 16
+      #     ep: 1
+      #     dp-attn: false
+      #     additional-settings:
+      #     - "CONFIG_FILE=recipes/h100/8k1k/stp/h100-fp8-1p1d-max-dep.yaml"
+      #   decode:
+      #     num-worker: 1
+      #     tp: 16
+      #     ep: 16
+      #     dp-attn: true
+      # MTP: Max throughput TEP (1 prefill, 1 decode)
+      - spec-decoding: "mtp"
+        conc-list: [1, 2, 4, 8, 16, 32, 64, 128]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h100/8k1k/mtp/h100-fp8-1p1d-max-tp-mtp.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 1
+          dp-attn: false
+      # MTP: Max throughput DEP (1 prefill, 1 decode, dp-attention)
+      - spec-decoding: "mtp"
+        conc-list: [1, 2, 4, 8, 16, 32, 64]
+        prefill:
+          num-worker: 1
+          tp: 16
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h100/8k1k/mtp/h100-fp8-1p1d-max-dep-mtp.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
 
 gptoss-fp4-h200-trt:
   image: nvcr.io#nvidia/tensorrt-llm/release:1.3.0rc11
@@ -3965,46 +4046,47 @@ gptoss-fp4-h200-trt:
   framework: trt
   multinode: false
   # For all sequence lengths, EP=TP, DP_ATTENTION=false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, ep: 1, dp-attn: false, conc-start: 4, conc-end: 64 }
-    - { tp: 2, ep: 2, dp-attn: false, conc-start: 4, conc-end: 64 }
-    - { tp: 4, ep: 4, dp-attn: false, conc-start: 4, conc-end: 32 }
-    - { tp: 8, ep: 8, dp-attn: false, conc-start: 4, conc-end: 8 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, ep: 1, dp-attn: false, conc-start: 4, conc-end: 64 }
-    - { tp: 2, ep: 2, dp-attn: false, conc-start: 4, conc-end: 64 }
-    - { tp: 4, ep: 4, dp-attn: false, conc-start: 4, conc-end: 64 }
-    - { tp: 8, ep: 8, dp-attn: false, conc-start: 4, conc-end: 8 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, ep: 1, dp-attn: false, conc-start: 4, conc-end: 64 }
+      - { tp: 2, ep: 2, dp-attn: false, conc-start: 4, conc-end: 64 }
+      - { tp: 4, ep: 4, dp-attn: false, conc-start: 4, conc-end: 32 }
+      - { tp: 8, ep: 8, dp-attn: false, conc-start: 4, conc-end: 8 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, ep: 1, dp-attn: false, conc-start: 4, conc-end: 64 }
+      - { tp: 2, ep: 2, dp-attn: false, conc-start: 4, conc-end: 64 }
+      - { tp: 4, ep: 4, dp-attn: false, conc-start: 4, conc-end: 64 }
+      - { tp: 8, ep: 8, dp-attn: false, conc-start: 4, conc-end: 8 }
 
 gptoss-fp4-h200-vllm:
-  image: vllm/vllm-openai:v0.18.0
+  image: vllm/vllm-openai:v0.19.1
   model: openai/gpt-oss-120b
   model-prefix: gptoss
   runner: h200
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 4 }
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 64 }
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-    - { tp: 8, conc-start: 4, conc-end: 32 }
-
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 64 }
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+      - { tp: 8, conc-start: 4, conc-end: 32 }
 minimaxm2.5-fp8-h200-vllm:
   image: vllm/vllm-openai:v0.18.0
   model: MiniMaxAI/MiniMax-M2.5
@@ -4013,15 +4095,16 @@ minimaxm2.5-fp8-h200-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 128 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 128 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 128 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 128 }
 
 dsr1-fp4-gb200-dynamo-trt:
   image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2
@@ -4032,354 +4115,354 @@ dsr1-fp4-gb200-dynamo-trt:
   framework: dynamo-trt
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # MTP configurations (spec_decoding="mtp")
-    - spec-decoding: "mtp"
-      conc-list: [ 180 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [ 4, 8, 12, 24, 48 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [ 4301 ]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx2_gen1_dep16_batch256_eplb256_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx2_gen1_dep16_batch256_eplb256_mtp1.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [ 2253 ]
-      prefill:
-        num-worker: 3
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen1_dep32_batch64_eplb288_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen1_dep32_batch64_eplb288_mtp1.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [ 16130 ]
-      prefill:
-        num-worker: 3
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch768_eplb0_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch768_eplb0_mtp1.yaml"
-      decode:
-        num-worker: 5
-        tp: 4
-        ep: 4
-        dp-attn: true
-
-
-    # Non-MTP configurations (default spec_decoding="none")
-    - conc-list: [ 4301 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [ 666 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep32_batch16_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep32_batch16_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - conc-list: [ 6144 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen2_dep4_batch768_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen2_dep4_batch768_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-    - conc-list: [ 12, 24, 48, 96, 192 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [ 5 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [ 4301 ]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep16_batch256_eplb256_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep16_batch256_eplb256_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [ 2253 ]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep32_batch64_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep32_batch64_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # MTP configurations (spec_decoding="mtp")
-    - spec-decoding: "mtp"
-      conc-list: [ 4, 8, 12, 24, 48 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [ 180 ]
-      prefill:
-        num-worker: 3
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx3_gen1_dep32_batch4_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx3_gen1_dep32_batch4_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [ 1229 ]
-      prefill:
-        num-worker: 7
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx7_gen1_dep16_batch64_eplb256_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx7_gen1_dep16_batch64_eplb256_mtp1.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [ 666 ]
-      prefill:
-        num-worker: 8
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx8_gen1_dep32_batch16_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx8_gen1_dep32_batch16_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [ 4301 ]
-      prefill:
-        num-worker: 11
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx11_gen1_dep16_batch256_eplb256_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx11_gen1_dep16_batch256_eplb256_mtp1.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-
-    # Non-MTP configurations (default spec_decoding="none")
-    - conc-list: [ 12, 44, 76 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [ 5 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [ 333 ]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - conc-list: [ 1229 ]
-      prefill:
-        num-worker: 7
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx7_gen1_dep32_batch32_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx7_gen1_dep32_batch32_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - conc-list: [ 2253 ]
-      prefill:
-        num-worker: 8
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx8_gen1_dep16_batch128_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx8_gen1_dep16_batch128_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [ 4096 ]
-      prefill:
-        num-worker: 10
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx10_gen1_dep16_batch256_eplb256_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx10_gen1_dep16_batch256_eplb256_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # MTP configurations (spec_decoding="mtp")
+      - spec-decoding: "mtp"
+        conc-list: [ 180 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [ 4, 8, 12, 24, 48 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [ 4301 ]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx2_gen1_dep16_batch256_eplb256_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx2_gen1_dep16_batch256_eplb256_mtp1.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [ 2253 ]
+        prefill:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen1_dep32_batch64_eplb288_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen1_dep32_batch64_eplb288_mtp1.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [ 16130 ]
+        prefill:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch768_eplb0_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/mtp/ctx3_gen5_dep4_batch768_eplb0_mtp1.yaml"
+        decode:
+          num-worker: 5
+          tp: 4
+          ep: 4
+          dp-attn: true
+
+
+      # Non-MTP configurations (default spec_decoding="none")
+      - conc-list: [ 4301 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [ 666 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep32_batch16_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen1_dep32_batch16_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - conc-list: [ 6144 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen2_dep4_batch768_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen2_dep4_batch768_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+      - conc-list: [ 12, 24, 48, 96, 192 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [ 5 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [ 4301 ]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep16_batch256_eplb256_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep16_batch256_eplb256_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [ 2253 ]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep32_batch64_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/1k1k/stp/ctx2_gen1_dep32_batch64_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # MTP configurations (spec_decoding="mtp")
+      - spec-decoding: "mtp"
+        conc-list: [ 4, 8, 12, 24, 48 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [ 180 ]
+        prefill:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx3_gen1_dep32_batch4_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx3_gen1_dep32_batch4_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [ 1229 ]
+        prefill:
+          num-worker: 7
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx7_gen1_dep16_batch64_eplb256_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx7_gen1_dep16_batch64_eplb256_mtp1.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [ 666 ]
+        prefill:
+          num-worker: 8
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx8_gen1_dep32_batch16_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx8_gen1_dep32_batch16_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [ 4301 ]
+        prefill:
+          num-worker: 11
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/mtp/ctx11_gen1_dep16_batch256_eplb256_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/mtp/ctx11_gen1_dep16_batch256_eplb256_mtp1.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+
+      # Non-MTP configurations (default spec_decoding="none")
+      - conc-list: [ 12, 44, 76 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [ 5 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [ 333 ]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - conc-list: [ 1229 ]
+        prefill:
+          num-worker: 7
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx7_gen1_dep32_batch32_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx7_gen1_dep32_batch32_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - conc-list: [ 2253 ]
+        prefill:
+          num-worker: 8
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx8_gen1_dep16_batch128_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx8_gen1_dep16_batch128_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [ 4096 ]
+        prefill:
+          num-worker: 10
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp4/8k1k/stp/ctx10_gen1_dep16_batch256_eplb256_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx10_gen1_dep16_batch256_eplb256_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
 
 dsr1-fp8-gb200-dynamo-trt:
   image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2
@@ -4390,423 +4473,424 @@ dsr1-fp8-gb200-dynamo-trt:
   framework: dynamo-trt
   multinode: true
   disagg: true
-  seq-len-configs:
-  # 1k1k MTP configs
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - spec-decoding: "mtp"
-      conc-list: [4301]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch512_eplb0_mtp1_4301.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch512_eplb0_mtp1_4301.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [2151]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch256_eplb0_mtp1_2151.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch256_eplb0_mtp1_2151.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [1229]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1_1229.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1_1229.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [615]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_615.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_615.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [36]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3_36.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3_36.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [18]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_18.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_18.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [9]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_9.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_9.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-  # 1k1k STP configs
-    - conc-list: [6144]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch768_eplb0_mtp0_6144.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch768_eplb0_mtp0_6144.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [4301]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4301.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4301.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [2151]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep16_batch128_eplb0_mtp0_2151.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep16_batch128_eplb0_mtp0_2151.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [1127]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0_1127.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0_1127.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - conc-list: [256]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch8_eplb0_mtp0_256.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch8_eplb0_mtp0_256.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - conc-list: [27]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch8_eplb0_mtp0_27.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch8_eplb0_mtp0_27.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [3]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_3.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-  # 8k1k MTP configs
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - spec-decoding: "mtp"
-      conc-list: [666]
-      prefill:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3_666.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3_666.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [666]
-      prefill:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx5_gen1_dep16_batch32_eplb0_mtp3_666.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx5_gen1_dep16_batch32_eplb0_mtp3_666.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [333]
-      prefill:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep16_batch16_eplb0_mtp3_333.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep16_batch16_eplb0_mtp3_333.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [333]
-      prefill:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx4_gen1_dep32_batch8_eplb0_mtp3_333.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx4_gen1_dep32_batch8_eplb0_mtp3_333.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [90]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx2_gen1_dep32_batch2_eplb0_mtp3_90.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx2_gen1_dep32_batch2_eplb0_mtp3_90.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [15]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_15.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_15.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [6]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_6.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_6.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-  # 8k1k STP configs
-    - conc-list: [1229]
-      prefill:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx5_gen1_dep16_batch64_eplb0_mtp0_1229.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx5_gen1_dep16_batch64_eplb0_mtp0_1229.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [666]
-      prefill:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx4_gen1_dep32_batch16_eplb0_mtp0_666.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx4_gen1_dep32_batch16_eplb0_mtp0_666.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - conc-list: [615]
-      prefill:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx3_gen1_dep16_batch32_eplb0_mtp0_615.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx3_gen1_dep16_batch32_eplb0_mtp0_615.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [333]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0_333.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0_333.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - conc-list: [63]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [18]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch4_eplb0_mtp0_18.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch4_eplb0_mtp0_18.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [6]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_6.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_6.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
+  scenarios:
+    fixed-seq-len:
+    # 1k1k MTP configs
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - spec-decoding: "mtp"
+        conc-list: [4301]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch512_eplb0_mtp1_4301.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch512_eplb0_mtp1_4301.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [2151]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch256_eplb0_mtp1_2151.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep8_batch256_eplb0_mtp1_2151.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [1229]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1_1229.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep16_batch64_eplb0_mtp1_1229.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [615]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_615.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_615.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [36]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3_36.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3_36.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [18]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_18.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_18.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [9]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_9.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_9.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+    # 1k1k STP configs
+      - conc-list: [6144]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch768_eplb0_mtp0_6144.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch768_eplb0_mtp0_6144.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [4301]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4301.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep8_batch512_eplb0_mtp0_4301.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [2151]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep16_batch128_eplb0_mtp0_2151.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep16_batch128_eplb0_mtp0_2151.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [1127]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0_1127.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0_1127.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - conc-list: [256]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch8_eplb0_mtp0_256.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen1_dep32_batch8_eplb0_mtp0_256.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - conc-list: [27]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch8_eplb0_mtp0_27.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch8_eplb0_mtp0_27.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [3]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/1k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_3.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+    # 8k1k MTP configs
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - spec-decoding: "mtp"
+        conc-list: [666]
+        prefill:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3_666.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep8_batch64_eplb0_mtp3_666.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [666]
+        prefill:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx5_gen1_dep16_batch32_eplb0_mtp3_666.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx5_gen1_dep16_batch32_eplb0_mtp3_666.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [333]
+        prefill:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep16_batch16_eplb0_mtp3_333.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx3_gen1_dep16_batch16_eplb0_mtp3_333.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [333]
+        prefill:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx4_gen1_dep32_batch8_eplb0_mtp3_333.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx4_gen1_dep32_batch8_eplb0_mtp3_333.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [90]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx2_gen1_dep32_batch2_eplb0_mtp3_90.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx2_gen1_dep32_batch2_eplb0_mtp3_90.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [15]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_15.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch4_eplb0_mtp3_15.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [6]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_6.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/mtp/ctx1_gen3_tep8_batch2_eplb0_mtp3_6.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+    # 8k1k STP configs
+      - conc-list: [1229]
+        prefill:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx5_gen1_dep16_batch64_eplb0_mtp0_1229.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx5_gen1_dep16_batch64_eplb0_mtp0_1229.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [666]
+        prefill:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx4_gen1_dep32_batch16_eplb0_mtp0_666.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx4_gen1_dep32_batch16_eplb0_mtp0_666.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - conc-list: [615]
+        prefill:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx3_gen1_dep16_batch32_eplb0_mtp0_615.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx3_gen1_dep16_batch32_eplb0_mtp0_615.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [333]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0_333.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx2_gen1_dep32_batch8_eplb0_mtp0_333.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - conc-list: [63]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [18]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch4_eplb0_mtp0_18.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch4_eplb0_mtp0_18.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [6]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_6.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0_6.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
 
 
 dsr1-fp8-gb200-dynamo-sglang:
@@ -4818,124 +4902,125 @@ dsr1-fp8-gb200-dynamo-sglang:
   framework: dynamo-sglang
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-   # "Low latency" (1 prefill worker at TP4 and 1 decode worker at TP4)
-    - conc-list: [4, 8]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/1k1k/low-latency.yaml
-        - "CONFIG_FILE=recipes/gb200-fp8/1k1k/low-latency.yaml"
-      decode:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-
-    # "Mid curve" (3 prefill workers at DEP8 and 1 decode worker at DEP48)
-    - conc-list: [1024, 2048, 4096]
-      prefill:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/1k1k/mid-curve.yaml
-        - "CONFIG_FILE=recipes/gb200-fp8/1k1k/mid-curve.yaml"
-      decode:
-        num-worker: 1
-        tp: 48
-        ep: 48
-        dp-attn: true
-
-    # "Max throughput" (2 prefill workers at DEP8 and 1 decode worker at DEP32)
-    - conc-list: [1024, 2048, 4096, 6144]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/1k1k/max-tpt.yaml
-        - "CONFIG_FILE=recipes/gb200-fp8/1k1k/max-tpt.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-
-    # "Ultra throughput" (1 prefill workers at DEP8 and 1 decode worker at DEP8)
-    - conc-list: [4096]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/1k1k/ultra-tpt.yaml
-        - "CONFIG_FILE=recipes/gb200-fp8/1k1k/ultra-tpt.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-   # "Low latency" (1 prefill worker at TP8 and 1 decode worker at TP8)
-    - conc-list: [4, 8, 16]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/8k1k/low-latency.yaml
-        - "CONFIG_FILE=recipes/gb200-fp8/8k1k/low-latency.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-
-    # "Mid curve" (5 prefill workers at DEP8 and 1 decode worker at DEP32)
-    - conc-list: [512, 1024, 2048, 6144]
-      prefill:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/8k1k/mid-curve.yaml
-        - "CONFIG_FILE=recipes/gb200-fp8/8k1k/mid-curve.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-
-    # "Max throughput" (6 prefill workers at DEP8 and 1 decode worker at DEP24)
-    - conc-list: [2048, 4096, 6144]
-      prefill:
-        num-worker: 6
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/8k1k/max_tpt.yaml
-        - "CONFIG_FILE=recipes/gb200-fp8/8k1k/max_tpt.yaml"
-      decode:
-        num-worker: 1
-        tp: 24
-        ep: 24
-        dp-attn: true
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+     # "Low latency" (1 prefill worker at TP4 and 1 decode worker at TP4)
+      - conc-list: [4, 8]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/1k1k/low-latency.yaml
+          - "CONFIG_FILE=recipes/gb200-fp8/1k1k/low-latency.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+
+      # "Mid curve" (3 prefill workers at DEP8 and 1 decode worker at DEP48)
+      - conc-list: [1024, 2048, 4096]
+        prefill:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/1k1k/mid-curve.yaml
+          - "CONFIG_FILE=recipes/gb200-fp8/1k1k/mid-curve.yaml"
+        decode:
+          num-worker: 1
+          tp: 48
+          ep: 48
+          dp-attn: true
+
+      # "Max throughput" (2 prefill workers at DEP8 and 1 decode worker at DEP32)
+      - conc-list: [1024, 2048, 4096, 6144]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/1k1k/max-tpt.yaml
+          - "CONFIG_FILE=recipes/gb200-fp8/1k1k/max-tpt.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+
+      # "Ultra throughput" (1 prefill workers at DEP8 and 1 decode worker at DEP8)
+      - conc-list: [4096]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/1k1k/ultra-tpt.yaml
+          - "CONFIG_FILE=recipes/gb200-fp8/1k1k/ultra-tpt.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+     # "Low latency" (1 prefill worker at TP8 and 1 decode worker at TP8)
+      - conc-list: [4, 8, 16]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/8k1k/low-latency.yaml
+          - "CONFIG_FILE=recipes/gb200-fp8/8k1k/low-latency.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+
+      # "Mid curve" (5 prefill workers at DEP8 and 1 decode worker at DEP32)
+      - conc-list: [512, 1024, 2048, 6144]
+        prefill:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/8k1k/mid-curve.yaml
+          - "CONFIG_FILE=recipes/gb200-fp8/8k1k/mid-curve.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+
+      # "Max throughput" (6 prefill workers at DEP8 and 1 decode worker at DEP24)
+      - conc-list: [2048, 4096, 6144]
+        prefill:
+          num-worker: 6
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb200-fp8/8k1k/max_tpt.yaml
+          - "CONFIG_FILE=recipes/gb200-fp8/8k1k/max_tpt.yaml"
+        decode:
+          num-worker: 1
+          tp: 24
+          ep: 24
+          dp-attn: true
 
 dsr1-fp8-gb300-dynamo-sglang:
   image: lmsysorg/sglang:v0.5.8.post1-cu130
@@ -4946,108 +5031,109 @@ dsr1-fp8-gb300-dynamo-sglang:
   framework: dynamo-sglang
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-   # "Low latency" (1 prefill worker at TP4 and 4 decode workers at TP4)
-    - conc-list: [4, 8, 16, 32]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/1k1k/stp/low-latency.yaml
-        - "CONFIG_FILE=recipes/gb300-fp8/1k1k/stp/low-latency.yaml"
-      decode:
-        num-worker: 4
-        tp: 4
-        ep: 1
-        dp-attn: false
-
-    # "Mid curve" (2 prefill workers at DEP8 and 1 decode worker at DEP32)
-    - conc-list: [1024, 2048, 4096, 6144]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/1k1k/stp/mid.yaml
-        - "CONFIG_FILE=recipes/gb300-fp8/1k1k/stp/mid.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-
-    # "Max throughput" (1 prefill worker at DEP8 and 1 decode worker at DEP8)
-    - conc-list: [4096, 7168, 7680]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/1k1k/stp/max.yaml
-        - "CONFIG_FILE=recipes/gb300-fp8/1k1k/stp/max.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-   # "Low latency" (1 prefill worker at TP4 and 1 decode worker at TP4)
-    - conc-list: [4, 8]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/8k1k/stp/low-latency.yaml
-        - "CONFIG_FILE=recipes/gb300-fp8/8k1k/stp/low-latency.yaml"
-      decode:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-
-    # "Mid curve" (5 prefill workers at DEP8 and 1 decode worker at DEP32)
-    - conc-list: [128, 256, 512, 1024]
-      prefill:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/8k1k/stp/mid.yaml
-        - "CONFIG_FILE=recipes/gb300-fp8/8k1k/stp/mid.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-
-    # "Max throughput" (6 prefill workers at DEP8 and 1 decode worker at DEP24)
-    - conc-list: [2048, 4096]
-      prefill:
-        num-worker: 6
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/8k1k/stp/max.yaml
-        - "CONFIG_FILE=recipes/gb300-fp8/8k1k/stp/max.yaml"
-      decode:
-        num-worker: 1
-        tp: 24
-        ep: 24
-        dp-attn: true
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+     # "Low latency" (1 prefill worker at TP4 and 4 decode workers at TP4)
+      - conc-list: [4, 8, 16, 32]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/1k1k/stp/low-latency.yaml
+          - "CONFIG_FILE=recipes/gb300-fp8/1k1k/stp/low-latency.yaml"
+        decode:
+          num-worker: 4
+          tp: 4
+          ep: 1
+          dp-attn: false
+
+      # "Mid curve" (2 prefill workers at DEP8 and 1 decode worker at DEP32)
+      - conc-list: [1024, 2048, 4096, 6144]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/1k1k/stp/mid.yaml
+          - "CONFIG_FILE=recipes/gb300-fp8/1k1k/stp/mid.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+
+      # "Max throughput" (1 prefill worker at DEP8 and 1 decode worker at DEP8)
+      - conc-list: [4096, 7168, 7680]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/1k1k/stp/max.yaml
+          - "CONFIG_FILE=recipes/gb300-fp8/1k1k/stp/max.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+     # "Low latency" (1 prefill worker at TP4 and 1 decode worker at TP4)
+      - conc-list: [4, 8]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/8k1k/stp/low-latency.yaml
+          - "CONFIG_FILE=recipes/gb300-fp8/8k1k/stp/low-latency.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+
+      # "Mid curve" (5 prefill workers at DEP8 and 1 decode worker at DEP32)
+      - conc-list: [128, 256, 512, 1024]
+        prefill:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/8k1k/stp/mid.yaml
+          - "CONFIG_FILE=recipes/gb300-fp8/8k1k/stp/mid.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+
+      # "Max throughput" (6 prefill workers at DEP8 and 1 decode worker at DEP24)
+      - conc-list: [2048, 4096]
+        prefill:
+          num-worker: 6
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/gb300-fp8/8k1k/stp/max.yaml
+          - "CONFIG_FILE=recipes/gb300-fp8/8k1k/stp/max.yaml"
+        decode:
+          num-worker: 1
+          tp: 24
+          ep: 24
+          dp-attn: true
 
 dsr1-fp4-gb200-dynamo-sglang:
   image: "lmsysorg/sglang:v0.5.8-cu130"
@@ -5058,110 +5144,111 @@ dsr1-fp4-gb200-dynamo-sglang:
   framework: dynamo-sglang
   multinode: true
   disagg: true
-  seq-len-configs:
-  # 1k1k configurations
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # Low latency (1 prefill node, 2 decode nodes)
-    - spec-decoding: "none"
-      conc-list: [ 4, 8, 32 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/gb200-fp4/1k1k/low-latency.yaml"
-      decode:
-        num-worker: 2
-        tp: 4
-        ep: 1
-        dp-attn: false
-
-    # Mid curve (4 prefill nodes, 8 decode nodes)
-    - spec-decoding: "none"
-      conc-list: [ 512, 2048, 4096, 8192 ]
-      prefill:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/gb200-fp4/1k1k/mid-curve.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-
-    # Max throughput (4 prefill nodes, 12 decode nodes)
-    - spec-decoding: "none"
-      conc-list: [ 2048, 4096 ]
-      prefill:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/gb200-fp4/1k1k/max-tpt.yaml"
-      decode:
-        num-worker: 1
-        tp: 48
-        ep: 48
-        dp-attn: true
-
-  # 8k1k configurations
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # Low latency (1 prefill node, 4 decode nodes)
-    - spec-decoding: "none"
-      conc-list: [ 4, 8 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/gb200-fp4/8k1k/low-latency.yaml"
-      decode:
-        num-worker: 4
-        tp: 4
-        ep: 1
-        dp-attn: false
-
-    # Mid curve (6 prefill nodes, 12 decode nodes)
-    - spec-decoding: "none"
-      conc-list: [ 512, 2048, 4096 ]
-      prefill:
-        num-worker: 6
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/gb200-fp4/8k1k/mid-curve.yaml"
-      decode:
-        num-worker: 1
-        tp: 48
-        ep: 48
-        dp-attn: true
-
-    # Max throughput (10 prefill nodes, 8 decode nodes)
-    - spec-decoding: "none"
-      conc-list: [ 2048 ]
-      prefill:
-        num-worker: 10
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/gb200-fp4/8k1k/max-tpt.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
+  scenarios:
+    fixed-seq-len:
+    # 1k1k configurations
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # Low latency (1 prefill node, 2 decode nodes)
+      - spec-decoding: "none"
+        conc-list: [ 4, 8, 32 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/gb200-fp4/1k1k/low-latency.yaml"
+        decode:
+          num-worker: 2
+          tp: 4
+          ep: 1
+          dp-attn: false
+
+      # Mid curve (4 prefill nodes, 8 decode nodes)
+      - spec-decoding: "none"
+        conc-list: [ 512, 2048, 4096, 8192 ]
+        prefill:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/gb200-fp4/1k1k/mid-curve.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+
+      # Max throughput (4 prefill nodes, 12 decode nodes)
+      - spec-decoding: "none"
+        conc-list: [ 2048, 4096 ]
+        prefill:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/gb200-fp4/1k1k/max-tpt.yaml"
+        decode:
+          num-worker: 1
+          tp: 48
+          ep: 48
+          dp-attn: true
+
+    # 8k1k configurations
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # Low latency (1 prefill node, 4 decode nodes)
+      - spec-decoding: "none"
+        conc-list: [ 4, 8 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/gb200-fp4/8k1k/low-latency.yaml"
+        decode:
+          num-worker: 4
+          tp: 4
+          ep: 1
+          dp-attn: false
+
+      # Mid curve (6 prefill nodes, 12 decode nodes)
+      - spec-decoding: "none"
+        conc-list: [ 512, 2048, 4096 ]
+        prefill:
+          num-worker: 6
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/gb200-fp4/8k1k/mid-curve.yaml"
+        decode:
+          num-worker: 1
+          tp: 48
+          ep: 48
+          dp-attn: true
+
+      # Max throughput (10 prefill nodes, 8 decode nodes)
+      - spec-decoding: "none"
+        conc-list: [ 2048 ]
+        prefill:
+          num-worker: 10
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/gb200-fp4/8k1k/max-tpt.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
 
 dsr1-fp4-gb300-dynamo-trt:
   image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2
@@ -5172,424 +5259,424 @@ dsr1-fp4-gb300-dynamo-trt:
   framework: dynamo-trt
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # MTP configurations
-    - spec-decoding: "mtp"
-      conc-list: [3226]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep4_batch768_eplb0_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep4_batch768_eplb0_mtp1.yaml"
-      decode:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [333]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep32_batch8_eplb0_mtp.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep32_batch8_eplb0_mtp.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [5]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [8, 12, 24, 48]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [2253]
-      prefill:
-        num-worker: 3
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep16_batch128_eplb256_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep16_batch128_eplb256_mtp1.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [1229]
-      prefill:
-        num-worker: 3
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep32_batch32_eplb288_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep32_batch32_eplb288_mtp3.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    # Non-MTP configurations (default spec_decoding="none")
-    - conc-list: [5]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [12, 48, 96, 192]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [8192]
-      prefill:
-        num-worker: 2
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep8_batch1024_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep8_batch1024_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [1229]
-      prefill:
-        num-worker: 2
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - conc-list: [4301]
-      prefill:
-        num-worker: 3
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep16_batch256_eplb256_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep16_batch256_eplb256_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [2253]
-      prefill:
-        num-worker: 3
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep32_batch64_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep32_batch64_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # MTP configurations (spec_decoding="mtp")
-    - spec-decoding: "mtp"
-      conc-list: [33]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [5]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [12, 24]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [180]
-      prefill:
-        num-worker: 4
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx4_gen1_dep32_batch4_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx4_gen1_dep32_batch4_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [308]
-      prefill:
-        num-worker: 8
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx8_gen1_dep32_batch8_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx8_gen1_dep32_batch8_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [2253]
-      prefill:
-        num-worker: 10
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [666]
-      prefill:
-        num-worker: 10
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep16_batch32_eplb0_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep16_batch32_eplb0_mtp3.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [1127]
-      prefill:
-        num-worker: 13
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx13_gen1_dep16_batch64_eplb256_mtp3.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx13_gen1_dep16_batch64_eplb256_mtp3.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    # Non-MTP configurations (default spec_decoding="none")
-    - conc-list: [72]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [5]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [12]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch2_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch2_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [5, 15, 30]
-      prefill:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 5
-        tp: 4
-        ep: 4
-        dp-attn: false
-    - conc-list: [666]
-      prefill:
-        num-worker: 7
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx7_gen1_dep32_batch16_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx7_gen1_dep32_batch16_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - conc-list: [1229]
-      prefill:
-        num-worker: 9
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx9_gen1_dep16_batch64_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx9_gen1_dep16_batch64_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [3228]
-      prefill:
-        num-worker: 11
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx11_gen3_dep4_batch256_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx11_gen3_dep4_batch256_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 3
-        tp: 4
-        ep: 4
-        dp-attn: true
-    - conc-list: [2253]
-      prefill:
-        num-worker: 14
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx14_gen1_dep16_batch128_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx14_gen1_dep16_batch128_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # MTP configurations
+      - spec-decoding: "mtp"
+        conc-list: [3226]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep4_batch768_eplb0_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep4_batch768_eplb0_mtp1.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [333]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep32_batch8_eplb0_mtp.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep32_batch8_eplb0_mtp.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [5]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [8, 12, 24, 48]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [2253]
+        prefill:
+          num-worker: 3
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep16_batch128_eplb256_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep16_batch128_eplb256_mtp1.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [1229]
+        prefill:
+          num-worker: 3
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep32_batch32_eplb288_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep32_batch32_eplb288_mtp3.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      # Non-MTP configurations (default spec_decoding="none")
+      - conc-list: [5]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [12, 48, 96, 192]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [8192]
+        prefill:
+          num-worker: 2
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep8_batch1024_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep8_batch1024_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [1229]
+        prefill:
+          num-worker: 2
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - conc-list: [4301]
+        prefill:
+          num-worker: 3
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep16_batch256_eplb256_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep16_batch256_eplb256_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [2253]
+        prefill:
+          num-worker: 3
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep32_batch64_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep32_batch64_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # MTP configurations (spec_decoding="mtp")
+      - spec-decoding: "mtp"
+        conc-list: [33]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [5]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [12, 24]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [180]
+        prefill:
+          num-worker: 4
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx4_gen1_dep32_batch4_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx4_gen1_dep32_batch4_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [308]
+        prefill:
+          num-worker: 8
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx8_gen1_dep32_batch8_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx8_gen1_dep32_batch8_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [2253]
+        prefill:
+          num-worker: 10
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [666]
+        prefill:
+          num-worker: 10
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep16_batch32_eplb0_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep16_batch32_eplb0_mtp3.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [1127]
+        prefill:
+          num-worker: 13
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx13_gen1_dep16_batch64_eplb256_mtp3.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx13_gen1_dep16_batch64_eplb256_mtp3.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      # Non-MTP configurations (default spec_decoding="none")
+      - conc-list: [72]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [5]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [12]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch2_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch2_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [5, 15, 30]
+        prefill:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 5
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [666]
+        prefill:
+          num-worker: 7
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx7_gen1_dep32_batch16_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx7_gen1_dep32_batch16_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - conc-list: [1229]
+        prefill:
+          num-worker: 9
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx9_gen1_dep16_batch64_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx9_gen1_dep16_batch64_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [3228]
+        prefill:
+          num-worker: 11
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx11_gen3_dep4_batch256_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx11_gen3_dep4_batch256_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: true
+      - conc-list: [2253]
+        prefill:
+          num-worker: 14
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx14_gen1_dep16_batch128_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx14_gen1_dep16_batch128_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
 dsr1-fp4-gb300-dynamo-sglang:
   image: "lmsysorg/sglang:v0.5.8.post1-cu130-runtime"
   model: nvidia/DeepSeek-R1-0528-NVFP4-v2
@@ -5599,110 +5686,111 @@ dsr1-fp4-gb300-dynamo-sglang:
   framework: dynamo-sglang
   multinode: true
   disagg: true
-  seq-len-configs:
-  # 1k1k configurations
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # Low latency (1 prefill node, 2 decode nodes)
-    - spec-decoding: "none"
-      conc-list: [ 4, 8, 32 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/gb300-fp4/1k1k/low_latency.yaml"
-      decode:
-        num-worker: 2
-        tp: 4
-        ep: 1
-        dp-attn: false
-
-    # Mid curve (4 prefill nodes, 8 decode nodes)
-    - spec-decoding: "none"
-      conc-list: [ 512, 2048, 4096, 8192 ]
-      prefill:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/gb300-fp4/1k1k/mid_curve.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-
-    # Max throughput (4 prefill nodes, 12 decode nodes)
-    - spec-decoding: "none"
-      conc-list: [ 512, 2048, 4096, 8192 ]
-      prefill:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/gb300-fp4/1k1k/max_tpt.yaml"
-      decode:
-        num-worker: 1
-        tp: 48
-        ep: 48
-        dp-attn: true
-
-  # 8k1k configurations
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # Low latency (1 prefill node, 4 decode nodes)
-    - spec-decoding: "none"
-      conc-list: [ 4, 8, 32, 64 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/gb300-fp4/8k1k/low_latency.yaml"
-      decode:
-        num-worker: 4
-        tp: 4
-        ep: 1
-        dp-attn: false
-
-    # Mid curve (6 prefill nodes, 12 decode nodes)
-    - spec-decoding: "none"
-      conc-list: [ 512, 2048, 4096 ]
-      prefill:
-        num-worker: 6
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/gb300-fp4/8k1k/mid_curve.yaml"
-      decode:
-        num-worker: 1
-        tp: 48
-        ep: 48
-        dp-attn: true
-
-    # Max throughput (10 prefill nodes, 8 decode nodes)
-    - spec-decoding: "none"
-      conc-list: [ 2048 ]
-      prefill:
-        num-worker: 10
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/gb300-fp4/8k1k/max_tpt.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
+  scenarios:
+    fixed-seq-len:
+    # 1k1k configurations
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # Low latency (1 prefill node, 2 decode nodes)
+      - spec-decoding: "none"
+        conc-list: [ 4, 8, 32 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/gb300-fp4/1k1k/low_latency.yaml"
+        decode:
+          num-worker: 2
+          tp: 4
+          ep: 1
+          dp-attn: false
+
+      # Mid curve (4 prefill nodes, 8 decode nodes)
+      - spec-decoding: "none"
+        conc-list: [ 512, 2048, 4096, 8192 ]
+        prefill:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/gb300-fp4/1k1k/mid_curve.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+
+      # Max throughput (4 prefill nodes, 12 decode nodes)
+      - spec-decoding: "none"
+        conc-list: [ 512, 2048, 4096, 8192 ]
+        prefill:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/gb300-fp4/1k1k/max_tpt.yaml"
+        decode:
+          num-worker: 1
+          tp: 48
+          ep: 48
+          dp-attn: true
+
+    # 8k1k configurations
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # Low latency (1 prefill node, 4 decode nodes)
+      - spec-decoding: "none"
+        conc-list: [ 4, 8, 32, 64 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/gb300-fp4/8k1k/low_latency.yaml"
+        decode:
+          num-worker: 4
+          tp: 4
+          ep: 1
+          dp-attn: false
+
+      # Mid curve (6 prefill nodes, 12 decode nodes)
+      - spec-decoding: "none"
+        conc-list: [ 512, 2048, 4096 ]
+        prefill:
+          num-worker: 6
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/gb300-fp4/8k1k/mid_curve.yaml"
+        decode:
+          num-worker: 1
+          tp: 48
+          ep: 48
+          dp-attn: true
+
+      # Max throughput (10 prefill nodes, 8 decode nodes)
+      - spec-decoding: "none"
+        conc-list: [ 2048 ]
+        prefill:
+          num-worker: 10
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/gb300-fp4/8k1k/max_tpt.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
 
 dsr1-fp8-gb300-dynamo-trt:
   image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2
@@ -5713,408 +5801,409 @@ dsr1-fp8-gb300-dynamo-trt:
   framework: dynamo-trt
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # MTP configurations (spec_decoding="mtp")
-    - spec-decoding: "mtp"
-      conc-list: [8]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [24]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [180]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3_180.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3_180.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [564]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep32_batch16_eplb0_mtp3_564.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep32_batch16_eplb0_mtp3_564.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [666]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp3_666.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp3_666.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [2253]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep16_batch128_eplb0_mtp1_2253.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep16_batch128_eplb0_mtp1_2253.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [8192]
-      prefill:
-        num-worker: 3
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx3_gen2_dep8_batch512_eplb0_mtp1_8192.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx3_gen2_dep8_batch512_eplb0_mtp1_8192.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-    # STP configurations (no spec_decoding)
-    - conc-list: [4]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [24]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [84]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0_84.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0_84.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [1229]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0_1229.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0_1229.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - conc-list: [2253]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep16_batch128_eplb0_mtp0_2253.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep16_batch128_eplb0_mtp0_2253.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [8602]
-      prefill:
-        num-worker: 3
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch512_eplb0_mtp0_8602.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch512_eplb0_mtp0_8602.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [12288]
-      prefill:
-        num-worker: 3
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch768_eplb0_mtp0_12288.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch768_eplb0_mtp0_12288.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # MTP configurations (spec_decoding="mtp")
-    - spec-decoding: "mtp"
-      conc-list: [8]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [24]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [333]
-      prefill:
-        num-worker: 6
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx6_gen1_dep32_batch8_eplb0_mtp3_333.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx6_gen1_dep32_batch8_eplb0_mtp3_333.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [666]
-      prefill:
-        num-worker: 8
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx8_gen1_dep16_batch32_eplb0_mtp3_666.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx8_gen1_dep16_batch32_eplb0_mtp3_666.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [1229]
-      prefill:
-        num-worker: 10
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx10_gen1_dep16_batch64_eplb0_mtp1_1229.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx10_gen1_dep16_batch64_eplb0_mtp1_1229.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [1229]
-      prefill:
-        num-worker: 7
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx7_gen1_dep8_batch128_eplb0_mtp1_1229.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx7_gen1_dep8_batch128_eplb0_mtp1_1229.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    # STP configurations (no spec_decoding)
-    - conc-list: [4]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [24]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [36]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [512]
-      prefill:
-        num-worker: 6
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx6_gen1_dep32_batch16_eplb0_mtp0_512.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx6_gen1_dep32_batch16_eplb0_mtp0_512.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - conc-list: [666]
-      prefill:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx4_gen1_dep16_batch32_eplb0_mtp0_666.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx4_gen1_dep16_batch32_eplb0_mtp0_666.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [1229]
-      prefill:
-        num-worker: 7
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep16_batch64_eplb0_mtp0_1229.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep16_batch64_eplb0_mtp0_1229.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [2151]
-      prefill:
-        num-worker: 7
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep8_batch256_eplb0_mtp0_2151.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep8_batch256_eplb0_mtp0_2151.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # MTP configurations (spec_decoding="mtp")
+      - spec-decoding: "mtp"
+        conc-list: [8]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [24]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [180]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3_180.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3_180.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [564]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep32_batch16_eplb0_mtp3_564.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep32_batch16_eplb0_mtp3_564.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [666]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp3_666.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp3_666.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [2253]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep16_batch128_eplb0_mtp1_2253.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep16_batch128_eplb0_mtp1_2253.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [8192]
+        prefill:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx3_gen2_dep8_batch512_eplb0_mtp1_8192.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx3_gen2_dep8_batch512_eplb0_mtp1_8192.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+      # STP configurations (no spec_decoding)
+      - conc-list: [4]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [24]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [84]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0_84.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0_84.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [1229]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0_1229.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0_1229.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - conc-list: [2253]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep16_batch128_eplb0_mtp0_2253.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep16_batch128_eplb0_mtp0_2253.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [8602]
+        prefill:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch512_eplb0_mtp0_8602.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch512_eplb0_mtp0_8602.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [12288]
+        prefill:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch768_eplb0_mtp0_12288.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch768_eplb0_mtp0_12288.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # MTP configurations (spec_decoding="mtp")
+      - spec-decoding: "mtp"
+        conc-list: [8]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [24]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [333]
+        prefill:
+          num-worker: 6
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx6_gen1_dep32_batch8_eplb0_mtp3_333.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx6_gen1_dep32_batch8_eplb0_mtp3_333.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [666]
+        prefill:
+          num-worker: 8
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx8_gen1_dep16_batch32_eplb0_mtp3_666.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx8_gen1_dep16_batch32_eplb0_mtp3_666.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [1229]
+        prefill:
+          num-worker: 10
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx10_gen1_dep16_batch64_eplb0_mtp1_1229.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx10_gen1_dep16_batch64_eplb0_mtp1_1229.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [1229]
+        prefill:
+          num-worker: 7
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx7_gen1_dep8_batch128_eplb0_mtp1_1229.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx7_gen1_dep8_batch128_eplb0_mtp1_1229.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      # STP configurations (no spec_decoding)
+      - conc-list: [4]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [24]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [36]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [512]
+        prefill:
+          num-worker: 6
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx6_gen1_dep32_batch16_eplb0_mtp0_512.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx6_gen1_dep32_batch16_eplb0_mtp0_512.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - conc-list: [666]
+        prefill:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx4_gen1_dep16_batch32_eplb0_mtp0_666.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx4_gen1_dep16_batch32_eplb0_mtp0_666.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [1229]
+        prefill:
+          num-worker: 7
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep16_batch64_eplb0_mtp0_1229.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep16_batch64_eplb0_mtp0_1229.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [2151]
+        prefill:
+          num-worker: 7
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep8_batch256_eplb0_mtp0_2151.yaml
+          - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep8_batch256_eplb0_mtp0_2151.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
 gptoss-fp4-gb200-dynamo-trt:
   image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.7.0.post2
   model: openai/gpt-oss-120b
@@ -6124,266 +6213,267 @@ gptoss-fp4-gb200-dynamo-trt:
   framework: dynamo-trt
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    #Right of pareto
-    #P: 1xTP1   D:1xTP4
-    - spec-decoding: "none"
-      conc-list: [ 1, 2, 4, 16, 32, 64, 128 ]
-      prefill:
-        num-worker: 1
-        tp: 1
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-        - "PREFILL_MAX_NUM_TOKENS=20000"
-        - "PREFILL_MAX_BATCH_SIZE=32"
-      decode:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MAX_NUM_TOKENS=20000"
-        - "DECODE_MAX_BATCH_SIZE=256"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
-
-# P: 1xTP1   D:4xTP2
-    - spec-decoding: "none"
-      conc-list: [ 16 ]
-      prefill:
-        num-worker: 1
-        tp: 1
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-        - "PREFILL_MAX_NUM_TOKENS=20000"
-        - "PREFILL_MAX_BATCH_SIZE=32"
-      decode:
-        num-worker: 4
-        tp: 2
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MAX_NUM_TOKENS=20000"
-        - "DECODE_MAX_BATCH_SIZE=32"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
-
-  # P: 1xTP1   D:1xDEP2
-    - spec-decoding: "none"
-      conc-list: [ 256, 512, 1024, 2048, 2560 ]
-      prefill:
-        num-worker: 1
-        tp: 1
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-        - "PREFILL_MAX_NUM_TOKENS=20000"
-        - "PREFILL_MAX_BATCH_SIZE=32"
-      decode:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MAX_NUM_TOKENS=20000"
-        - "DECODE_MAX_BATCH_SIZE=1536"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
-
-  # P: 1xTP1   D:2xDEP2
-    - spec-decoding: "none"
-      conc-list: [ 512, 1024, 2048, 2560 ]
-      prefill:
-        num-worker: 1
-        tp: 1
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-        - "PREFILL_MAX_NUM_TOKENS=20000"
-        - "PREFILL_MAX_BATCH_SIZE=32"
-      decode:
-        num-worker: 2
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MAX_NUM_TOKENS=20000"
-        - "DECODE_MAX_BATCH_SIZE=1536"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
-
-  # P: 1xTP1   D:1xDEP4
-    - spec-decoding: "none"
-      conc-list: [ 256, 1024, 1536 ]
-      prefill:
-        num-worker: 1
-        tp: 1
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-        - "PREFILL_MAX_NUM_TOKENS=20000"
-        - "PREFILL_MAX_BATCH_SIZE=32"
-      decode:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MAX_NUM_TOKENS=20000"
-        - "DECODE_MAX_BATCH_SIZE=512"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
-
-# P: 1xTP1   D:3xDEP4
-    - spec-decoding: "none"
-      conc-list: [ 3072 ]
-      prefill:
-        num-worker: 1
-        tp: 1
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-        - "PREFILL_MAX_NUM_TOKENS=20000"
-        - "PREFILL_MAX_BATCH_SIZE=32"
-      decode:
-        num-worker: 3
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MAX_NUM_TOKENS=20000"
-        - "DECODE_MAX_BATCH_SIZE=1024"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # Right side of pareto
-    - spec-decoding: "none"
-      conc-list: [1]
-      prefill:
-        num-worker: 1
-        tp: 1
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-        - "PREFILL_MAX_NUM_TOKENS=20000"
-        - "PREFILL_MAX_BATCH_SIZE=32"
-      decode:
-        num-worker: 1  
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MAX_NUM_TOKENS=20000"
-        - "DECODE_MAX_BATCH_SIZE=4"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
-
-    - spec-decoding: "none"
-      conc-list: [2, 4, 8, 16, 32, 64]
-      prefill:
-        num-worker: 1
-        tp: 1
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-        - "PREFILL_MAX_NUM_TOKENS=20000"
-        - "PREFILL_MAX_BATCH_SIZE=32"
-      decode:
-        num-worker: 1  
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MAX_NUM_TOKENS=20000"
-        - "DECODE_MAX_BATCH_SIZE=128"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
-
-# Middle of pareto
-# P: 2xTP1   D:1xTP4
-    - spec-decoding: "none"
-      conc-list: [128, 512]
-      prefill:
-        num-worker: 2
-        tp: 1
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-        - "PREFILL_MAX_NUM_TOKENS=20000"
-        - "PREFILL_MAX_BATCH_SIZE=32"
-      decode:
-        num-worker: 1  
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MAX_NUM_TOKENS=20000"
-        - "DECODE_MAX_BATCH_SIZE=1024"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
-
-# P: 2xTP1   D:1xTP2
-    - spec-decoding: "none"
-      conc-list: [256, 384]
-      prefill:
-        num-worker: 2
-        tp: 1
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-        - "PREFILL_MAX_NUM_TOKENS=20000"
-        - "PREFILL_MAX_BATCH_SIZE=32"
-      decode:
-        num-worker: 1  
-        tp: 2
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MAX_NUM_TOKENS=20000"
-        - "DECODE_MAX_BATCH_SIZE=512"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
-
-# P: 2xTP1   D:1xDEP2
-    - spec-decoding: "none"
-      conc-list: [128, 512]
-      prefill:
-        num-worker: 2
-        tp: 1
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-        - "PREFILL_MAX_NUM_TOKENS=20000"
-        - "PREFILL_MAX_BATCH_SIZE=32"
-      decode:
-        num-worker: 1
-        tp: 2
-        ep: 2
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MAX_NUM_TOKENS=20000"
-        - "DECODE_MAX_BATCH_SIZE=512"
-        - "DECODE_GPU_MEM_FRACTION=0.9"
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      #Right of pareto
+      #P: 1xTP1   D:1xTP4
+      - spec-decoding: "none"
+        conc-list: [ 1, 2, 4, 16, 32, 64, 128 ]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+          - "PREFILL_MAX_NUM_TOKENS=20000"
+          - "PREFILL_MAX_BATCH_SIZE=32"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MAX_NUM_TOKENS=20000"
+          - "DECODE_MAX_BATCH_SIZE=256"
+          - "DECODE_GPU_MEM_FRACTION=0.9"
+
+  # P: 1xTP1   D:4xTP2
+      - spec-decoding: "none"
+        conc-list: [ 16 ]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+          - "PREFILL_MAX_NUM_TOKENS=20000"
+          - "PREFILL_MAX_BATCH_SIZE=32"
+        decode:
+          num-worker: 4
+          tp: 2
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MAX_NUM_TOKENS=20000"
+          - "DECODE_MAX_BATCH_SIZE=32"
+          - "DECODE_GPU_MEM_FRACTION=0.9"
+
+    # P: 1xTP1   D:1xDEP2
+      - spec-decoding: "none"
+        conc-list: [ 256, 512, 1024, 2048, 2560 ]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+          - "PREFILL_MAX_NUM_TOKENS=20000"
+          - "PREFILL_MAX_BATCH_SIZE=32"
+        decode:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MAX_NUM_TOKENS=20000"
+          - "DECODE_MAX_BATCH_SIZE=1536"
+          - "DECODE_GPU_MEM_FRACTION=0.9"
+
+    # P: 1xTP1   D:2xDEP2
+      - spec-decoding: "none"
+        conc-list: [ 512, 1024, 2048, 2560 ]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+          - "PREFILL_MAX_NUM_TOKENS=20000"
+          - "PREFILL_MAX_BATCH_SIZE=32"
+        decode:
+          num-worker: 2
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MAX_NUM_TOKENS=20000"
+          - "DECODE_MAX_BATCH_SIZE=1536"
+          - "DECODE_GPU_MEM_FRACTION=0.9"
+
+    # P: 1xTP1   D:1xDEP4
+      - spec-decoding: "none"
+        conc-list: [ 256, 1024, 1536 ]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+          - "PREFILL_MAX_NUM_TOKENS=20000"
+          - "PREFILL_MAX_BATCH_SIZE=32"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MAX_NUM_TOKENS=20000"
+          - "DECODE_MAX_BATCH_SIZE=512"
+          - "DECODE_GPU_MEM_FRACTION=0.9"
+
+  # P: 1xTP1   D:3xDEP4
+      - spec-decoding: "none"
+        conc-list: [ 3072 ]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+          - "PREFILL_MAX_NUM_TOKENS=20000"
+          - "PREFILL_MAX_BATCH_SIZE=32"
+        decode:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MAX_NUM_TOKENS=20000"
+          - "DECODE_MAX_BATCH_SIZE=1024"
+          - "DECODE_GPU_MEM_FRACTION=0.9"
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # Right side of pareto
+      - spec-decoding: "none"
+        conc-list: [1]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+          - "PREFILL_MAX_NUM_TOKENS=20000"
+          - "PREFILL_MAX_BATCH_SIZE=32"
+        decode:
+          num-worker: 1  
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MAX_NUM_TOKENS=20000"
+          - "DECODE_MAX_BATCH_SIZE=4"
+          - "DECODE_GPU_MEM_FRACTION=0.9"
+
+      - spec-decoding: "none"
+        conc-list: [2, 4, 8, 16, 32, 64]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+          - "PREFILL_MAX_NUM_TOKENS=20000"
+          - "PREFILL_MAX_BATCH_SIZE=32"
+        decode:
+          num-worker: 1  
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MAX_NUM_TOKENS=20000"
+          - "DECODE_MAX_BATCH_SIZE=128"
+          - "DECODE_GPU_MEM_FRACTION=0.9"
+
+  # Middle of pareto
+  # P: 2xTP1   D:1xTP4
+      - spec-decoding: "none"
+        conc-list: [128, 512]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+          - "PREFILL_MAX_NUM_TOKENS=20000"
+          - "PREFILL_MAX_BATCH_SIZE=32"
+        decode:
+          num-worker: 1  
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MAX_NUM_TOKENS=20000"
+          - "DECODE_MAX_BATCH_SIZE=1024"
+          - "DECODE_GPU_MEM_FRACTION=0.9"
+
+  # P: 2xTP1   D:1xTP2
+      - spec-decoding: "none"
+        conc-list: [256, 384]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+          - "PREFILL_MAX_NUM_TOKENS=20000"
+          - "PREFILL_MAX_BATCH_SIZE=32"
+        decode:
+          num-worker: 1  
+          tp: 2
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MAX_NUM_TOKENS=20000"
+          - "DECODE_MAX_BATCH_SIZE=512"
+          - "DECODE_GPU_MEM_FRACTION=0.9"
+
+  # P: 2xTP1   D:1xDEP2
+      - spec-decoding: "none"
+        conc-list: [128, 512]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+          - "PREFILL_MAX_NUM_TOKENS=20000"
+          - "PREFILL_MAX_BATCH_SIZE=32"
+        decode:
+          num-worker: 1
+          tp: 2
+          ep: 2
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MAX_NUM_TOKENS=20000"
+          - "DECODE_MAX_BATCH_SIZE=512"
+          - "DECODE_GPU_MEM_FRACTION=0.9"
 
 
 dsr1-fp8-h200-dynamo-sglang:
@@ -6395,254 +6485,254 @@ dsr1-fp8-h200-dynamo-sglang:
   framework: dynamo-sglang
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # STP: Low latency (1 prefill, 9 decode, TEP)
-    - spec-decoding: "none"
-      conc-list: [1, 4, 8, 16, 32, 64, 128, 256]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/1k1k/low-latency-1p9d.yaml"
-      decode:
-        num-worker: 9
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # STP: High throughput TEP (1 prefill, 6 decode)
-    - spec-decoding: "none"
-      conc-list: [512, 1024, 2048]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/1k1k/bs256-1p6d-tp.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # STP: High throughput DEP (1 prefill, 6 decode, dp-attention)
-    - spec-decoding: "none"
-      conc-list: [128, 256, 512, 1024, 2048]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/1k1k/bs256-1p6d-dep.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 8
-        dp-attn: true
-    # MTP: Low latency (1 prefill, 9 decode, TEP)
-    - spec-decoding: "mtp"
-      conc-list: [1, 4, 8, 16, 32, 64, 128, 256]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/1k1k/low-latency-1p9d-mtp.yaml"
-      decode:
-        num-worker: 9
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # MTP: High throughput TEP (1 prefill, 6 decode)
-    - spec-decoding: "mtp"
-      conc-list: [512, 1024, 2048]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/1k1k/bs256-1p6d-tp-mtp.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # MTP: High throughput DEP (1 prefill, 6 decode, dp-attention)
-    - spec-decoding: "mtp"
-      conc-list: [128, 256, 512, 1024, 2048]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/1k1k/bs256-1p6d-dep-mtp.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 8
-        dp-attn: true
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # STP: Low latency TEP (1 prefill, 7 decode)
-    - spec-decoding: "none"
-      conc-list: [1, 4, 8]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/8k1k/bs4-1p7d.yaml"
-      decode:
-        num-worker: 7
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # STP: TEP (1 prefill, 6 decode)
-    - spec-decoding: "none"
-      conc-list: [4, 8, 16]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/8k1k/bs8-1p6d.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # STP: TEP (1 prefill, 3 decode)
-    - spec-decoding: "none"
-      conc-list: [8, 16, 32]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/8k1k/bs16-1p3d.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # STP: TEP (2 prefill, 3 decode)
-    - spec-decoding: "none"
-      conc-list: [32, 64, 128]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/8k1k/bs64-2p3d.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # STP: High throughput DEP (1 prefill, 1 decode, dp-attention)
-    - spec-decoding: "none"
-      conc-list: [64, 128, 256]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/8k1k/bs128-1p1d-dep.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    # MTP: Low latency TEP (1 prefill, 7 decode)
-    - spec-decoding: "mtp"
-      conc-list: [1, 4, 8]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/8k1k/bs4-1p7d-mtp.yaml"
-      decode:
-        num-worker: 7
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # MTP: TEP (1 prefill, 6 decode)
-    - spec-decoding: "mtp"
-      conc-list: [2, 4, 8, 16, 32]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/8k1k/bs8-1p6d-mtp.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # MTP: TEP (1 prefill, 3 decode)
-    - spec-decoding: "mtp"
-      conc-list: [4, 8, 16, 32, 64]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/8k1k/bs16-1p3d-mtp.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # MTP: TEP (2 prefill, 3 decode)
-    - spec-decoding: "mtp"
-      conc-list: [32, 64, 128]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/8k1k/bs64-2p3d-mtp.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # MTP: High throughput DEP (1 prefill, 1 decode, dp-attention)
-    - spec-decoding: "mtp"
-      conc-list: [32, 64, 128, 256, 512]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/h200/8k1k/bs128-1p1d-dep-mtp.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # STP: Low latency (1 prefill, 9 decode, TEP)
+      - spec-decoding: "none"
+        conc-list: [1, 4, 8, 16, 32, 64, 128, 256]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/1k1k/low-latency-1p9d.yaml"
+        decode:
+          num-worker: 9
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # STP: High throughput TEP (1 prefill, 6 decode)
+      - spec-decoding: "none"
+        conc-list: [512, 1024, 2048]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/1k1k/bs256-1p6d-tp.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # STP: High throughput DEP (1 prefill, 6 decode, dp-attention)
+      - spec-decoding: "none"
+        conc-list: [128, 256, 512, 1024, 2048]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/1k1k/bs256-1p6d-dep.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 8
+          dp-attn: true
+      # MTP: Low latency (1 prefill, 9 decode, TEP)
+      - spec-decoding: "mtp"
+        conc-list: [1, 4, 8, 16, 32, 64, 128, 256]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/1k1k/low-latency-1p9d-mtp.yaml"
+        decode:
+          num-worker: 9
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # MTP: High throughput TEP (1 prefill, 6 decode)
+      - spec-decoding: "mtp"
+        conc-list: [512, 1024, 2048]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/1k1k/bs256-1p6d-tp-mtp.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # MTP: High throughput DEP (1 prefill, 6 decode, dp-attention)
+      - spec-decoding: "mtp"
+        conc-list: [128, 256, 512, 1024, 2048]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/1k1k/bs256-1p6d-dep-mtp.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 8
+          dp-attn: true
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # STP: Low latency TEP (1 prefill, 7 decode)
+      - spec-decoding: "none"
+        conc-list: [1, 4, 8]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/8k1k/bs4-1p7d.yaml"
+        decode:
+          num-worker: 7
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # STP: TEP (1 prefill, 6 decode)
+      - spec-decoding: "none"
+        conc-list: [4, 8, 16]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/8k1k/bs8-1p6d.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # STP: TEP (1 prefill, 3 decode)
+      - spec-decoding: "none"
+        conc-list: [8, 16, 32]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/8k1k/bs16-1p3d.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # STP: TEP (2 prefill, 3 decode)
+      - spec-decoding: "none"
+        conc-list: [32, 64, 128]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/8k1k/bs64-2p3d.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # STP: High throughput DEP (1 prefill, 1 decode, dp-attention)
+      - spec-decoding: "none"
+        conc-list: [64, 128, 256]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/8k1k/bs128-1p1d-dep.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      # MTP: Low latency TEP (1 prefill, 7 decode)
+      - spec-decoding: "mtp"
+        conc-list: [1, 4, 8]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/8k1k/bs4-1p7d-mtp.yaml"
+        decode:
+          num-worker: 7
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # MTP: TEP (1 prefill, 6 decode)
+      - spec-decoding: "mtp"
+        conc-list: [2, 4, 8, 16, 32]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/8k1k/bs8-1p6d-mtp.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # MTP: TEP (1 prefill, 3 decode)
+      - spec-decoding: "mtp"
+        conc-list: [4, 8, 16, 32, 64]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/8k1k/bs16-1p3d-mtp.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # MTP: TEP (2 prefill, 3 decode)
+      - spec-decoding: "mtp"
+        conc-list: [32, 64, 128]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/8k1k/bs64-2p3d-mtp.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # MTP: High throughput DEP (1 prefill, 1 decode, dp-attention)
+      - spec-decoding: "mtp"
+        conc-list: [32, 64, 128, 256, 512]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/h200/8k1k/bs128-1p1d-dep-mtp.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
 dsr1-fp4-b200-dynamo-sglang:
   image: lmsysorg/sglang:v0.5.8.post1-cu130-runtime
   model: deepseek-r1-fp4
@@ -6652,133 +6742,133 @@ dsr1-fp4-b200-dynamo-sglang:
   framework: dynamo-sglang
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # Non-MTP configurations
-    - conc-list: [16, 128]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_stp_lowlat[0]"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [32, 64, 256]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_stp_lowlat[1]"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [512]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_stp_maxtpt[0]"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [512]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_stp_maxtpt[1]"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # Non-MTP configurations
-    - conc-list: [64, 128]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_stp_lowlat[0]"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [8]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_stp_lowlat[1]"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [4, 128]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_stp_lowlat[2]"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [4, 8, 16, 64]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:override_stp_tp4"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - conc-list: [1024, 2048]
-      prefill:
-        num-worker: 7
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:override_stp_maxtpt_7p2d"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # Non-MTP configurations
+      - conc-list: [16, 128]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_stp_lowlat[0]"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [32, 64, 256]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_stp_lowlat[1]"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [512]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_stp_maxtpt[0]"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [512]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_stp_maxtpt[1]"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # Non-MTP configurations
+      - conc-list: [64, 128]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_stp_lowlat[0]"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [8]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_stp_lowlat[1]"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [4, 128]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_stp_lowlat[2]"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [4, 8, 16, 64]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:override_stp_tp4"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [1024, 2048]
+        prefill:
+          num-worker: 7
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:override_stp_maxtpt_7p2d"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
 dsr1-fp8-b200-dynamo-sglang:
   image: lmsysorg/sglang:v0.5.8.post1-cu130-amd64
   model: deepseek-ai/DeepSeek-R1-0528
@@ -6788,166 +6878,167 @@ dsr1-fp8-b200-dynamo-sglang:
   framework: dynamo-sglang
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # Non-MTP configurations
-    - conc-list: [4]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_stp_lowlat[0]"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [16, 32, 64, 128, 256]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_stp_lowlat[1]"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [1024, 2048, 4096]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_stp_maxtpt[0]"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [2048, 4096]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_stp_maxtpt[1]"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: true
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # STP low-latency: resolved from 8k1k.yaml zip_override_stp_lowlat
-    - conc-list: [128]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_lowlat_0.yaml
-        - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_lowlat_0.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - conc-list: [128]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_lowlat_1.yaml
-        - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_lowlat_1.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - conc-list: [8, 16, 32, 64, 128]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_lowlat_2.yaml
-        - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_lowlat_2.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # STP max-throughput: resolved from 8k1k.yaml zip_override_stp_maxtpt
-    - conc-list: [288]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_maxtpt_0.yaml
-        - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_maxtpt_0.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [160, 288]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_maxtpt_1.yaml
-        - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_maxtpt_1.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [512]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_maxtpt_2.yaml
-        - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_maxtpt_2.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [1024]
-      prefill:
-        num-worker: 3
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_maxtpt_3.yaml
-        - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_maxtpt_3.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # Non-MTP configurations
+      - conc-list: [4]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_stp_lowlat[0]"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [16, 32, 64, 128, 256]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_stp_lowlat[1]"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [1024, 2048, 4096]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_stp_maxtpt[0]"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [2048, 4096]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_stp_maxtpt[1]"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: true
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # STP low-latency: resolved from 8k1k.yaml zip_override_stp_lowlat
+      - conc-list: [128]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_lowlat_0.yaml
+          - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_lowlat_0.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [128]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_lowlat_1.yaml
+          - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_lowlat_1.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [8, 16, 32, 64, 128]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_lowlat_2.yaml
+          - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_lowlat_2.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # STP max-throughput: resolved from 8k1k.yaml zip_override_stp_maxtpt
+      - conc-list: [288]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_maxtpt_0.yaml
+          - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_maxtpt_0.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [160, 288]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_maxtpt_1.yaml
+          - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_maxtpt_1.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [512]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_maxtpt_2.yaml
+          - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_maxtpt_2.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [1024]
+        prefill:
+          num-worker: 3
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_stp_maxtpt_3.yaml
+          - "CONFIG_FILE=recipes/b200-fp8/8k1k_stp_maxtpt_3.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
 
 dsr1-fp8-b200-dynamo-sglang-mtp:
   image: lmsysorg/sglang:v0.5.8.post1-cu130-amd64
@@ -6958,195 +7049,196 @@ dsr1-fp8-b200-dynamo-sglang-mtp:
   framework: dynamo-sglang
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # MTP low-latency: 1P1D
-    - spec-decoding: "mtp"
-      conc-list: [4, 64]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_mtp_lowlat[0]"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-    # MTP low-latency: 1P3D
-    - spec-decoding: "mtp"
-      conc-list: [4, 8, 16, 32, 128]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_mtp_lowlat[1]"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: false
-    # MTP max-tpt: 1P5D
-    - spec-decoding: "mtp"
-      conc-list: [512, 4096]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_mtp_maxtpt[1]"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: true
-    # MTP max-tpt: 2P5D
-    - spec-decoding: "mtp"
-      conc-list: [1024, 2048, 4096]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_mtp_maxtpt[2]"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: true
-    # MTP max-tpt: 1P2D
-    - spec-decoding: "mtp"
-      conc-list: [512, 1024, 2048]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:override_mtp_maxtpt_1p2d"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # MTP low-latency: resolved from 8k1k.yaml zip_override_mtp_lowlat
-    - spec-decoding: "mtp"
-      conc-list: [128]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_lowlat_0.yaml
-        - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_lowlat_0.yaml"
-      decode:
-        num-worker: 3
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [128]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_lowlat_1.yaml
-        - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_lowlat_1.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 1
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [8, 16, 32, 64, 128]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_lowlat_2.yaml
-        - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_lowlat_2.yaml"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # MTP max-throughput: resolved from 8k1k.yaml zip_override_mtp_maxtpt
-    - spec-decoding: "mtp"
-      conc-list: [288]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_0.yaml
-        - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_maxtpt_0.yaml"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [160, 288]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_1.yaml
-        - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_maxtpt_1.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [512]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_2.yaml
-        - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_maxtpt_2.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [1024]
-      prefill:
-        num-worker: 3
-        tp: 8
-        ep: 1
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_3.yaml
-        - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_maxtpt_3.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # MTP low-latency: 1P1D
+      - spec-decoding: "mtp"
+        conc-list: [4, 64]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_mtp_lowlat[0]"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+      # MTP low-latency: 1P3D
+      - spec-decoding: "mtp"
+        conc-list: [4, 8, 16, 32, 128]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_mtp_lowlat[1]"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: false
+      # MTP max-tpt: 1P5D
+      - spec-decoding: "mtp"
+        conc-list: [512, 4096]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_mtp_maxtpt[1]"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: true
+      # MTP max-tpt: 2P5D
+      - spec-decoding: "mtp"
+        conc-list: [1024, 2048, 4096]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:zip_override_mtp_maxtpt[2]"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: true
+      # MTP max-tpt: 1P2D
+      - spec-decoding: "mtp"
+        conc-list: [512, 1024, 2048]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/b200-fp8/1k1k.yaml:override_mtp_maxtpt_1p2d"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # MTP low-latency: resolved from 8k1k.yaml zip_override_mtp_lowlat
+      - spec-decoding: "mtp"
+        conc-list: [128]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_lowlat_0.yaml
+          - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_lowlat_0.yaml"
+        decode:
+          num-worker: 3
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [128]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_lowlat_1.yaml
+          - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_lowlat_1.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [8, 16, 32, 64, 128]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_lowlat_2.yaml
+          - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_lowlat_2.yaml"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # MTP max-throughput: resolved from 8k1k.yaml zip_override_mtp_maxtpt
+      - spec-decoding: "mtp"
+        conc-list: [288]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_0.yaml
+          - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_maxtpt_0.yaml"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [160, 288]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_1.yaml
+          - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_maxtpt_1.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [512]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_2.yaml
+          - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_maxtpt_2.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [1024]
+        prefill:
+          num-worker: 3
+          tp: 8
+          ep: 1
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp8/8k1k_mtp_maxtpt_3.yaml
+          - "CONFIG_FILE=recipes/b200-fp8/8k1k_mtp_maxtpt_3.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
 
 dsr1-fp4-b200-dynamo-sglang-mtp:
   image: "lmsysorg/sglang:v0.5.8.post1-cu130"
@@ -7157,136 +7249,136 @@ dsr1-fp4-b200-dynamo-sglang-mtp:
   framework: dynamo-sglang
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - spec-decoding: "mtp"
-      conc-list: [16, 512]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/1k1k.yaml
-        - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_lowlat[0]"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [32, 64, 256, 512]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/1k1k.yaml
-        - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_lowlat[1]"
-      decode:
-        num-worker: 6
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [512, 1024]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/1k1k.yaml
-        - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_maxtpt[0]"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - spec-decoding: "mtp"
-      conc-list: [512]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/1k1k.yaml
-        - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_maxtpt[1]"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-
-
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - spec-decoding: "mtp"
-      conc-list: [64, 128]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/8k1k.yaml
-        - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_mtp_lowlat[0]"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [8]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/8k1k.yaml
-        - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_mtp_lowlat[1]"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [4, 128]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/8k1k.yaml
-        - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_mtp_lowlat[2]"
-      decode:
-        num-worker: 5
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - spec-decoding: "mtp"
-      conc-list: [4, 8, 16, 64]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/8k1k.yaml
-        - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:override_mtp_tp4"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - spec-decoding: "mtp"
+        conc-list: [16, 512]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/1k1k.yaml
+          - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_lowlat[0]"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [32, 64, 256, 512]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/1k1k.yaml
+          - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_lowlat[1]"
+        decode:
+          num-worker: 6
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [512, 1024]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/1k1k.yaml
+          - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_maxtpt[0]"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - spec-decoding: "mtp"
+        conc-list: [512]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/1k1k.yaml
+          - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_maxtpt[1]"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - spec-decoding: "mtp"
+        conc-list: [64, 128]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/8k1k.yaml
+          - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_mtp_lowlat[0]"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [8]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/8k1k.yaml
+          - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_mtp_lowlat[1]"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [4, 128]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/8k1k.yaml
+          - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:zip_override_mtp_lowlat[2]"
+        decode:
+          num-worker: 5
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - spec-decoding: "mtp"
+        conc-list: [4, 8, 16, 64]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/b200-fp4/8k1k.yaml
+          - "CONFIG_FILE=recipes/b200-fp4/8k1k.yaml:override_mtp_tp4"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
 
 kimik2.5-fp4-gb200-dynamo-trt:
   image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:1.1.0-dev.2
@@ -7297,212 +7389,213 @@ kimik2.5-fp4-gb200-dynamo-trt:
   framework: dynamo-trt
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # Non-MTP configurations (default spec_decoding="none")
-    - conc-list: [ 4, 192, 360, 668 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen4tep8_batch128_allconc_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen4tep8_batch128_allconc_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [ 5, 15, 30, 55 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen5tep4_batch8_allconc_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen5tep4_batch8_allconc_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 5
-        tp: 4
-        ep: 4
-        dp-attn: false
-    - conc-list: [ 666 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen1dep16_batch32_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen1dep16_batch32_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [ 2253 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen1dep32_batch64_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen1dep32_batch64_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-    - conc-list: [ 4301, 6452 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen1dep8_batch768_allconc_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen1dep8_batch768_allconc_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [ 4301 ]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx2dep4_gen1dep16_batch256_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx2dep4_gen1dep16_batch256_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [ 4301 ]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx2dep4_gen1dep32_batch128_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx2dep4_gen1dep32_batch128_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 32
-        ep: 32
-        dp-attn: true
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # Non-MTP configurations (default spec_decoding="none")
-    - conc-list: [ 4 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx1dep4_gen4tep8_batch1_allconc_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx1dep4_gen4tep8_batch1_allconc_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 4
-        tp: 8
-        ep: 8
-        dp-attn: false
-    - conc-list: [ 156 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx1dep4_gen4tep4_batch32_allconc_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx1dep4_gen4tep4_batch32_allconc_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: false
-    - conc-list: [ 5, 15, 30, 60, 105 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx1dep4_gen5tep4_batch16_allconc_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx1dep4_gen5tep4_batch16_allconc_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 5
-        tp: 4
-        ep: 4
-        dp-attn: false
-    - conc-list: [ 333 ]
-      prefill:
-        num-worker: 2
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx2dep4_gen1dep16_batch16_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx2dep4_gen1dep16_batch16_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [ 615 ]
-      prefill:
-        num-worker: 3
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx3dep4_gen1dep16_batch32_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx3dep4_gen1dep16_batch32_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [ 2151 ]
-      prefill:
-        num-worker: 5
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx5dep4_gen1dep8_batch256_allconc_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx5dep4_gen1dep8_batch256_allconc_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [ 2253 ]
-      prefill:
-        num-worker: 7
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx7dep4_gen1dep16_batch128_eplb0_mtp0.yaml
-        - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx7dep4_gen1dep16_batch128_eplb0_mtp0.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # Non-MTP configurations (default spec_decoding="none")
+      - conc-list: [ 4, 192, 360, 668 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen4tep8_batch128_allconc_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen4tep8_batch128_allconc_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [ 5, 15, 30, 55 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen5tep4_batch8_allconc_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen5tep4_batch8_allconc_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 5
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [ 666 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen1dep16_batch32_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen1dep16_batch32_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [ 2253 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen1dep32_batch64_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen1dep32_batch64_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+      - conc-list: [ 4301, 6452 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen1dep8_batch768_allconc_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen1dep8_batch768_allconc_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [ 4301 ]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx2dep4_gen1dep16_batch256_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx2dep4_gen1dep16_batch256_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [ 4301 ]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx2dep4_gen1dep32_batch128_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx2dep4_gen1dep32_batch128_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 32
+          ep: 32
+          dp-attn: true
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # Non-MTP configurations (default spec_decoding="none")
+      - conc-list: [ 4 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx1dep4_gen4tep8_batch1_allconc_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx1dep4_gen4tep8_batch1_allconc_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 4
+          tp: 8
+          ep: 8
+          dp-attn: false
+      - conc-list: [ 156 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx1dep4_gen4tep4_batch32_allconc_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx1dep4_gen4tep4_batch32_allconc_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [ 5, 15, 30, 60, 105 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx1dep4_gen5tep4_batch16_allconc_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx1dep4_gen5tep4_batch16_allconc_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 5
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [ 333 ]
+        prefill:
+          num-worker: 2
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx2dep4_gen1dep16_batch16_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx2dep4_gen1dep16_batch16_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [ 615 ]
+        prefill:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx3dep4_gen1dep16_batch32_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx3dep4_gen1dep16_batch32_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [ 2151 ]
+        prefill:
+          num-worker: 5
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx5dep4_gen1dep8_batch256_allconc_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx5dep4_gen1dep8_batch256_allconc_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [ 2253 ]
+        prefill:
+          num-worker: 7
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx7dep4_gen1dep16_batch128_eplb0_mtp0.yaml
+          - "CONFIG_FILE=recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL8K_OSL1K/STP/ctx7dep4_gen1dep16_batch128_eplb0_mtp0.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
 
 kimik2.5-fp4-gb200-dynamo-vllm:
   image: vllm/vllm-openai:v0.18.0-cu130
@@ -7513,97 +7606,98 @@ kimik2.5-fp4-gb200-dynamo-vllm:
   framework: dynamo-vllm
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - conc-list: [256, 512, 1024, 2048, 3072, 4096]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-1p1d-dep4-dep16.yaml
-        - "CONFIG_FILE=recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-1p1d-dep4-dep16.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [4, 8, 16, 32, 64, 128]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-1p4d-dep4-tep4.yaml
-        - "CONFIG_FILE=recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-1p4d-dep4-tep4.yaml"
-      decode:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: false
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - conc-list: [4, 8, 16, 32, 128]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-1p4d-dep4-tep4.yaml
-        - "CONFIG_FILE=recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-1p4d-dep4-tep4.yaml"
-      decode:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: false
-    - conc-list: [512, 1024]
-      prefill:
-        num-worker: 3
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-3p1d-dep4-dep16.yaml
-        - "CONFIG_FILE=recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-3p1d-dep4-dep16.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    - conc-list: [2048]
-      prefill:
-        num-worker: 5
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-5p1d-dep4-dep8.yaml
-        - "CONFIG_FILE=recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-5p1d-dep4-dep8.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-    - conc-list: [3072, 4096]
-      prefill:
-        num-worker: 6
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-6p1d-dep4-dep16.yaml
-        - "CONFIG_FILE=recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-6p1d-dep4-dep16.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - conc-list: [256, 512, 1024, 2048, 3072, 4096]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-1p1d-dep4-dep16.yaml
+          - "CONFIG_FILE=recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-1p1d-dep4-dep16.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [4, 8, 16, 32, 64, 128]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-1p4d-dep4-tep4.yaml
+          - "CONFIG_FILE=recipes/vllm/kimi-k2.5/1k1k/disagg-gb200-1p4d-dep4-tep4.yaml"
+        decode:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: false
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - conc-list: [4, 8, 16, 32, 128]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-1p4d-dep4-tep4.yaml
+          - "CONFIG_FILE=recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-1p4d-dep4-tep4.yaml"
+        decode:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [512, 1024]
+        prefill:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-3p1d-dep4-dep16.yaml
+          - "CONFIG_FILE=recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-3p1d-dep4-dep16.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      - conc-list: [2048]
+        prefill:
+          num-worker: 5
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-5p1d-dep4-dep8.yaml
+          - "CONFIG_FILE=recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-5p1d-dep4-dep8.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+      - conc-list: [3072, 4096]
+        prefill:
+          num-worker: 6
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-6p1d-dep4-dep16.yaml
+          - "CONFIG_FILE=recipes/vllm/kimi-k2.5/8k1k/disagg-gb200-6p1d-dep4-dep16.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
 
 dsv4-fp4-gb200-dynamo-vllm:
   image: vllm/vllm-openai:deepseekv4-cu130
@@ -7614,105 +7708,106 @@ dsv4-fp4-gb200-dynamo-vllm:
   framework: dynamo-vllm
   multinode: true
   disagg: true
-  seq-len-configs:
-  # 1k/1k — extrapolated from kimi-k2.5 1k/1k topologies, scaled to DSV4-Pro's
-  # DP>=8 constraint. No upstream NVIDIA reference for DSV4-Pro vLLM disagg
-  # at this seq-len yet (PR #67 only publishes 8k/1k).
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # Low-concurrency / interactivity: 1 prefill (DP=8) + 1 decode (TP=8).
-    # 4 nodes total. Mirrors NVIDIA aflowers/gb200-dsv4-recipes branch
-    # 1p1d-dep8-tep8.yaml (offload + numa-bind stripped — see recipe header).
-    - conc-list: [1, 4, 8, 16, 32, 64]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/vllm/deepseek-v4/1k1k/disagg-gb200-1p1d-dep8-tep8.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # Mid throughput: 1 prefill (DP=8) + 1 wide decode (DP=16).
-    # 6 nodes. Single prefill is plenty for 1k prompts up to ~conc 4096.
-    - conc-list: [128, 256, 1024, 2048, 4096]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/vllm/deepseek-v4/1k1k/disagg-gb200-1p1d-dep8-dep16.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    # High throughput: 3 prefills (DP=8) + 1 wide decode (DP=16). 10 nodes.
-    # The 4096 overlap with the 1p1d block gives a crossover point. 8192
-    # would saturate 1p1d's prefill, so this topology takes over there.
-    - conc-list: [4096, 8192]
-      prefill:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/vllm/deepseek-v4/1k1k/disagg-gb200-3p1d-dep8-dep16.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # Low-concurrency / interactivity: 1 prefill (DP=8) + 1 decode (TP=8).
-    # 4 nodes total. Mirrors NVIDIA aflowers/gb200-dsv4-recipes branch.
-    - conc-list: [1, 4, 8, 16, 32, 64]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb200-1p1d-dep8-tep8.yaml"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-    # Mid: 3 prefills (DP=8) + 1 wide decode (DP=16). 10 nodes total.
-    - conc-list: [512, 1024]
-      prefill:
-        num-worker: 3
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb200-3p1d-dep8-dep16.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
-    # Max throughput: 7 prefills (DP=8) + 1 wide decode (DP=16). 18 nodes
-    # (full cluster). Mirrors NVIDIA/srt-slurm PR #67.
-    - conc-list: [4096, 8192]
-      prefill:
-        num-worker: 7
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb200-7p1d-dep8-dep16.yaml"
-      decode:
-        num-worker: 1
-        tp: 16
-        ep: 16
-        dp-attn: true
+  scenarios:
+    fixed-seq-len:
+    # 1k/1k — extrapolated from kimi-k2.5 1k/1k topologies, scaled to DSV4-Pro's
+    # DP>=8 constraint. No upstream NVIDIA reference for DSV4-Pro vLLM disagg
+    # at this seq-len yet (PR #67 only publishes 8k/1k).
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # Low-concurrency / interactivity: 1 prefill (DP=8) + 1 decode (TP=8).
+      # 4 nodes total. Mirrors NVIDIA aflowers/gb200-dsv4-recipes branch
+      # 1p1d-dep8-tep8.yaml (offload + numa-bind stripped — see recipe header).
+      - conc-list: [1, 4, 8, 16, 32, 64]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/deepseek-v4/1k1k/disagg-gb200-1p1d-dep8-tep8.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # Mid throughput: 1 prefill (DP=8) + 1 wide decode (DP=16).
+      # 6 nodes. Single prefill is plenty for 1k prompts up to ~conc 4096.
+      - conc-list: [128, 256, 1024, 2048, 4096]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/deepseek-v4/1k1k/disagg-gb200-1p1d-dep8-dep16.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      # High throughput: 3 prefills (DP=8) + 1 wide decode (DP=16). 10 nodes.
+      # The 4096 overlap with the 1p1d block gives a crossover point. 8192
+      # would saturate 1p1d's prefill, so this topology takes over there.
+      - conc-list: [4096, 8192]
+        prefill:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/deepseek-v4/1k1k/disagg-gb200-3p1d-dep8-dep16.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # Low-concurrency / interactivity: 1 prefill (DP=8) + 1 decode (TP=8).
+      # 4 nodes total. Mirrors NVIDIA aflowers/gb200-dsv4-recipes branch.
+      - conc-list: [1, 4, 8, 16, 32, 64]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb200-1p1d-dep8-tep8.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+      # Mid: 3 prefills (DP=8) + 1 wide decode (DP=16). 10 nodes total.
+      - conc-list: [512, 1024]
+        prefill:
+          num-worker: 3
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb200-3p1d-dep8-dep16.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
+      # Max throughput: 7 prefills (DP=8) + 1 wide decode (DP=16). 18 nodes
+      # (full cluster). Mirrors NVIDIA/srt-slurm PR #67.
+      - conc-list: [4096, 8192]
+        prefill:
+          num-worker: 7
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/deepseek-v4/8k1k/disagg-gb200-7p1d-dep8-dep16.yaml"
+        decode:
+          num-worker: 1
+          tp: 16
+          ep: 16
+          dp-attn: true
diff --git a/.github/workflows/benchmark-multinode-tmpl.yml b/.github/workflows/benchmark-multinode-tmpl.yml
index 75036a986..43b42c88e 100644
--- a/.github/workflows/benchmark-multinode-tmpl.yml
+++ b/.github/workflows/benchmark-multinode-tmpl.yml
@@ -91,6 +91,31 @@ on:
         type: string
         required: false
         default: ""
+      scenario-type:
+        description: "Scenario type (fixed-seq-len or agentic-coding)"
+        type: string
+        required: false
+        default: fixed-seq-len
+      conc:
+        description: "Concurrency for agentic-coding scenarios (single value per matrix entry)"
+        type: string
+        required: false
+        default: ""
+      duration:
+        description: "Agentic trace replay duration in seconds"
+        type: string
+        required: false
+        default: "1800"
+      offloading:
+        description: "KV offload backend for agentic scenarios (none/cpu/ssd)"
+        required: false
+        type: string
+        default: 'none'
+      total-cpu-dram-gb:
+        description: "Total CPU DRAM in GB for KV offloading"
+        required: false
+        type: string
+        default: '600'
       ref:
         description: "Git ref (branch/sha) to checkout"
         required: false
@@ -113,6 +138,13 @@ env:
   RUN_EVAL: ${{ inputs.run-eval }}
   EVAL_ONLY: ${{ inputs.eval-only }}
   EVAL_CONC: ${{ inputs.eval-conc }}
+  SCENARIO_TYPE: ${{ inputs.scenario-type }}
+  SCENARIO_SUBDIR: ${{ inputs.scenario-type == 'agentic-coding' && 'agentic/' || '' }}
+  CONC: ${{ inputs.conc }}
+  USERS: ${{ inputs.conc }}
+  DURATION: ${{ inputs.duration }}
+  OFFLOADING: ${{ inputs.offloading }}
+  TOTAL_CPU_DRAM_GB: ${{ inputs.total-cpu-dram-gb }}
   PYTHONDONTWRITEBYTECODE: '1'
   PYTHONPYCACHEPREFIX: /tmp/inferencex-pycache
 
@@ -152,7 +184,8 @@ jobs:
           token: ${{ secrets.REPO_PAT }}
           fetch-depth: 0
           ref: ${{ inputs.ref || github.sha }}
-          clean: false
+          clean: true
+          submodules: true
 
       - name: Cleanup stale eval outputs (pre-run)
         if: ${{ inputs.run-eval || inputs.eval-only }}
@@ -182,6 +215,13 @@ jobs:
               echo "Eval-only run failed: no results*.json files found." >&2
               exit 1
             fi
+          elif [ "${{ inputs.scenario-type }}" = "agentic-coding" ]; then
+            if [ -f "${RESULT_FILENAME}.json" ]; then
+              echo "Found agentic result file: ${RESULT_FILENAME}.json"
+            else
+              echo "Run failed: Agentic benchmark result ${RESULT_FILENAME}.json not found." >&2
+              exit 1
+            fi
           else
             # Check if at least one result file was created
             if ls ${RESULT_FILENAME}_*.json 1> /dev/null 2>&1; then
@@ -194,7 +234,7 @@ jobs:
           fi
 
       - name: Process result
-        if: ${{ !inputs.eval-only }}
+        if: ${{ !inputs.eval-only && inputs.scenario-type != 'agentic-coding' }}
         env:
           RUNNER_TYPE: ${{ inputs.runner }}
         run: |
@@ -215,7 +255,7 @@ jobs:
           done
 
       - name: Upload result
-        if: ${{ !inputs.eval-only }}
+        if: ${{ !inputs.eval-only && inputs.scenario-type != 'agentic-coding' }}
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: bmk_${{ env.RESULT_FILENAME }}
@@ -229,6 +269,27 @@ jobs:
           path: multinode_server_logs.tar.gz
           if-no-files-found: ignore
 
+      - name: Upload agentic aggregated result
+        if: ${{ !inputs.eval-only && inputs.scenario-type == 'agentic-coding' }}
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: bmk_agentic_${{ env.RESULT_FILENAME }}
+          path: ${{ env.RESULT_FILENAME }}.json
+
+      - name: Upload agentic raw results
+        if: ${{ always() && inputs.scenario-type == 'agentic-coding' }}
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: agentic_${{ env.RESULT_FILENAME }}
+          path: |
+            LOGS/agentic/benchmark.log
+            LOGS/agentic/benchmark_command.txt
+            LOGS/agentic/workload_distribution_summary.txt
+            LOGS/agentic/workload_distribution_plots.png
+            LOGS/agentic/trace_replay/detailed_results.csv
+            LOGS/agentic/trace_replay/debug_trace.jsonl
+          if-no-files-found: ignore
+
       - name: Upload eval results (if any)
         if: ${{ always() && (env.RUN_EVAL == 'true' || inputs.eval-only) }}
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
diff --git a/.github/workflows/benchmark-tmpl.yml b/.github/workflows/benchmark-tmpl.yml
index c38082cbe..ef74abd0b 100644
--- a/.github/workflows/benchmark-tmpl.yml
+++ b/.github/workflows/benchmark-tmpl.yml
@@ -67,7 +67,26 @@ on:
         description: "Git ref (branch/sha) to checkout"
         required: false
         type: string
-
+      scenario-type:
+        description: "Scenario type (fixed-seq-len or agentic-coding)"
+        required: false
+        type: string
+        default: 'fixed-seq-len'
+      offloading:
+        description: "KV offload backend for agentic scenarios (none/cpu/ssd)"
+        required: false
+        type: string
+        default: 'none'
+      total-cpu-dram-gb:
+        description: "Total CPU DRAM in GB for KV offloading"
+        required: false
+        type: string
+        default: '600'
+      duration:
+        description: "Benchmark duration in seconds"
+        required: false
+        type: string
+        default: '1800'
 env:
   RANDOM_RANGE_RATIO: 0.8
   HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -89,6 +108,13 @@ env:
   DISAGG: ${{ inputs.disagg }}
   RUN_EVAL: ${{ inputs.run-eval }}
   EVAL_ONLY: ${{ inputs.eval-only }}
+  SCENARIO_TYPE: ${{ inputs.scenario-type }}
+  SCENARIO_SUBDIR: ${{ inputs.scenario-type == 'agentic-coding' && 'agentic/' || '' }}
+  USERS: ${{ inputs.conc }}
+  OFFLOADING: ${{ inputs.offloading }}
+  TOTAL_CPU_DRAM_GB: ${{ inputs.total-cpu-dram-gb }}
+  DURATION: ${{ inputs.duration }}
+  RESULT_DIR: /workspace/results
   PYTHONDONTWRITEBYTECODE: '1'
   PYTHONPYCACHEPREFIX: /tmp/inferencex-pycache
 
@@ -124,12 +150,19 @@ jobs:
             done
           fi
 
+          # Cleanup results/ from a prior job on this runner. Agentic jobs
+          # write to fixed subpaths (trace_replay/, metrics_*, etc.), so stale
+          # data from a previous job would otherwise be picked up as this
+          # job's output when replay fails early.
+          rm -rf "${{ github.workspace }}/results" 2>/dev/null || true
+
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
         with:
           token: ${{ secrets.REPO_PAT }}
           fetch-depth: 0
           ref: ${{ inputs.ref || github.sha }}
-          clean: false
+          clean: true
+          submodules: true
 
       - name: Cleanup stale eval outputs (pre-run)
         if: ${{ inputs.run-eval || inputs.eval-only }}
@@ -178,25 +211,53 @@ jobs:
           fi
 
       - name: Process result
-        if: ${{ !inputs.eval-only }}
+        if: ${{ !inputs.eval-only && inputs.scenario-type != 'agentic-coding' }}
         env:
           RUNNER_TYPE: ${{ inputs.runner }}
         run: |
           python3 utils/process_result.py
 
       - name: Upload result
-        if: ${{ !inputs.eval-only }}
+        if: ${{ !inputs.eval-only && inputs.scenario-type != 'agentic-coding' }}
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: bmk_${{ env.RESULT_FILENAME }}
           path: agg_${{ env.RESULT_FILENAME }}.json
 
+      - name: Upload agentic aggregated result
+        if: ${{ inputs.scenario-type == 'agentic-coding' }}
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: bmk_agentic_${{ env.RESULT_FILENAME }}
+          path: ${{ env.RESULT_FILENAME }}.json
+
+      - name: Upload agentic raw results
+        if: ${{ always() && inputs.scenario-type == 'agentic-coding' }}
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: agentic_${{ env.RESULT_FILENAME }}
+          path: |
+            results/server.log
+            results/metrics_server_metrics.csv
+            results/metrics_plots.png
+            results/metrics_workload.png
+            results/metrics_client_metrics.csv
+            results/benchmark.log
+            results/config.yaml
+            results/vllm_command.txt
+            results/benchmark_command.txt
+            results/workload_distribution_summary.txt
+            results/workload_distribution_plots.png
+            results/trace_replay/detailed_results.csv
+            results/trace_replay/debug_trace.jsonl
+          if-no-files-found: ignore
+
       - name: Upload server logs
         if: always()
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: ${{ inputs.eval-only && 'eval_server_logs_' || 'server_logs_' }}${{ env.RESULT_FILENAME }}
-          path: server.log
+          path: ${{ inputs.scenario-type == 'agentic-coding' && 'results/server.log' || 'server.log' }}
           if-no-files-found: ignore
 
       - name: Upload GPU metrics
diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml
index 74d4889f3..4f3a6da6c 100644
--- a/.github/workflows/e2e-tests.yml
+++ b/.github/workflows/e2e-tests.yml
@@ -16,6 +16,11 @@ on:
                 description: "Ref (branch/sha) to checkout for generating configs"
                 required: false
                 type: string
+            duration-override:
+                description: "Override matrix.config.duration (seconds). Empty = use matrix value."
+                required: false
+                type: string
+                default: ""
     workflow_call:
         inputs:
             generate-cli-command:
@@ -30,6 +35,11 @@ on:
                 description: "Ref (branch/sha) to checkout for generating configs"
                 required: false
                 type: string
+            duration-override:
+                description: "Override matrix.config.duration (seconds). Empty = use matrix value."
+                required: false
+                type: string
+                default: ""
 
 jobs:
     get-jobs:
@@ -39,6 +49,8 @@ jobs:
             multi-node-config: ${{ steps.get-jobs.outputs.multi-node-config }}
             eval-config: ${{ steps.get-jobs.outputs.eval-config }}
             multi-node-eval-config: ${{ steps.get-jobs.outputs.multi-node-eval-config }}
+            agentic-config: ${{ steps.get-jobs.outputs.agentic-config }}
+            multi-node-agentic-config: ${{ steps.get-jobs.outputs.multi-node-agentic-config }}
         steps:
             - name: Checkout code (ref)
               if: ${{ inputs.ref && inputs.ref != '' }}
@@ -57,10 +69,14 @@ jobs:
                   pip install pydantic
                   CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py \
                     ${{ inputs.generate-cli-command || github.event.inputs.generate-cli-command }})
-                  SINGLE=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' not in x and not x.get('eval-only', False)]))")
-                  MULTI=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' in x and not x.get('eval-only', False)]))")
-                  EVALS=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' not in x and x.get('run-eval', False)]))")
+                  AGENTIC=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if x.get('scenario-type') == 'agentic-coding' and 'prefill' not in x]))")
+                  MULTI_AGENTIC=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if x.get('scenario-type') == 'agentic-coding' and 'prefill' in x]))")
+                  SINGLE=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' not in x and x.get('scenario-type') != 'agentic-coding' and not x.get('eval-only', False)]))")
+                  MULTI=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' in x and x.get('scenario-type') != 'agentic-coding' and not x.get('eval-only', False)]))")
+                  EVALS=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' not in x and x.get('scenario-type') != 'agentic-coding' and x.get('run-eval', False)]))")
                   MULTI_EVAL=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' in x and x.get('run-eval', False)]))")
+                  echo "agentic-config=$AGENTIC" >> $GITHUB_OUTPUT
+                  echo "multi-node-agentic-config=$MULTI_AGENTIC" >> $GITHUB_OUTPUT
                   echo "single-node-config=$SINGLE" >> $GITHUB_OUTPUT
                   echo "multi-node-config=$MULTI" >> $GITHUB_OUTPUT
                   echo "eval-config=$EVALS" >> $GITHUB_OUTPUT
@@ -146,6 +162,79 @@ jobs:
             eval-conc: ${{ matrix.config.eval-conc }}
             ref: ${{ inputs.ref }}
 
+    test-sweep-agentic:
+        needs: get-jobs
+        if: ${{ needs.get-jobs.outputs.agentic-config != '[]' }}
+        uses: ./.github/workflows/benchmark-tmpl.yml
+        name: agentic /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.get-jobs.outputs.agentic-config) }}
+        secrets: inherit
+        with:
+            exp-name: ${{ matrix.config.exp-name }}
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            tp: ${{ matrix.config.tp }}
+            ep: ${{ matrix.config.ep }}
+            dp-attn: ${{ matrix.config.dp-attn }}
+            conc: ${{ matrix.config.users }}
+            offloading: ${{ matrix.config.offloading }}
+            duration: ${{ inputs.duration-override != '' && inputs.duration-override || matrix.config.duration }}
+            isl: '0'
+            osl: '0'
+            max-model-len: '0'
+            spec-decoding: 'none'
+            disagg: 'false'
+            run-eval: false
+            scenario-type: agentic-coding
+            ref: ${{ inputs.ref }}
+
+    test-sweep-multi-node-agentic:
+        needs: get-jobs
+        if: ${{ needs.get-jobs.outputs.multi-node-agentic-config != '[]' }}
+        uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+        name: multi-node agentic /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.get-jobs.outputs.multi-node-agentic-config) }}
+        secrets: inherit
+        with:
+            exp-name: ${{ matrix.config.exp-name }}
+            isl: '0'
+            osl: '0'
+            max-model-len: '0'
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            conc-list: ${{ toJson(matrix.config.conc) }}
+            spec-decoding: ${{ matrix.config.spec-decoding }}
+            disagg: ${{ matrix.config.disagg }}
+            prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
+            prefill-tp: ${{ matrix.config.prefill.tp }}
+            prefill-ep: ${{ matrix.config.prefill.ep }}
+            prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
+            prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
+            decode-num-worker: ${{ matrix.config.decode.num-worker }}
+            decode-tp: ${{ matrix.config.decode.tp }}
+            decode-ep: ${{ matrix.config.decode.ep }}
+            decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
+            decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
+            conc: ${{ matrix.config.users }}
+            duration: ${{ inputs.duration-override != '' && inputs.duration-override || matrix.config.duration }}
+            run-eval: false
+            scenario-type: agentic-coding
+            ref: ${{ inputs.ref }}
+
     test-sweep-single-node:
         needs: get-jobs
         if: ${{ needs.get-jobs.outputs.single-node-config != '[]' }}
@@ -208,8 +297,8 @@ jobs:
             ref: ${{ inputs.ref }}
 
     collect-results:
-        needs: [test-sweep-multi-node, test-sweep-single-node]
-        if: ${{ always() && (needs.test-sweep-multi-node.result != 'skipped' || needs.test-sweep-single-node.result != 'skipped') }}
+        needs: [test-sweep-multi-node, test-sweep-single-node, test-sweep-agentic, test-sweep-multi-node-agentic]
+        if: ${{ always() && (needs.test-sweep-multi-node.result != 'skipped' || needs.test-sweep-single-node.result != 'skipped' || needs.test-sweep-agentic.result != 'skipped' || needs.test-sweep-multi-node-agentic.result != 'skipped') }}
         uses: ./.github/workflows/collect-results.yml
         secrets: inherit
         with:
@@ -221,8 +310,42 @@ jobs:
         uses: ./.github/workflows/collect-evals.yml
         secrets: inherit
 
+    collect-agentic-results:
+        needs: [test-sweep-agentic, test-sweep-multi-node-agentic]
+        if: ${{ always() && (needs.test-sweep-agentic.result != 'skipped' || needs.test-sweep-multi-node-agentic.result != 'skipped') }}
+        runs-on: ubuntu-latest
+        steps:
+            - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+              with:
+                  submodules: true
+
+            - uses: actions/setup-python@v5
+              with:
+                  python-version: '3.11'
+
+            - name: Install dependencies
+              run: pip install pandas matplotlib numpy
+
+            - name: Download agentic artifacts
+              uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+              with:
+                  pattern: 'agentic_*'
+                  path: results/
+
+            - name: Run aggregation
+              env:
+                  PYTHONPATH: utils/agentic-benchmark/scripts:utils/agentic-benchmark/analysis
+              run: |
+                  python utils/agentic-benchmark/scripts/collect_sweep_results.py results/ aggregated/
+
+            - name: Upload aggregated results
+              uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+              with:
+                  name: agentic_aggregated
+                  path: aggregated/
+
     calc-success-rate:
-        needs: [collect-results, collect-evals]
+        needs: [collect-results, collect-evals, collect-agentic-results]
         if: ${{ always() }}
         runs-on: ubuntu-latest
 
diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
index fd1fa91be..a46ba5797 100644
--- a/.github/workflows/run-sweep.yml
+++ b/.github/workflows/run-sweep.yml
@@ -193,6 +193,77 @@ jobs:
         secrets: inherit
         with: *single-node-inputs
 
+    sweep-agentic:
+        needs: setup
+        if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).single_node['agentic']) != 'null' }}
+        uses: ./.github/workflows/benchmark-tmpl.yml
+        name: agentic /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['agentic'] }}
+        secrets: inherit
+        with:
+            exp-name: ${{ matrix.config.exp-name }}
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            tp: ${{ matrix.config.tp }}
+            ep: ${{ matrix.config.ep }}
+            dp-attn: ${{ matrix.config.dp-attn }}
+            conc: ${{ matrix.config.users }}
+            offloading: ${{ matrix.config.offloading }}
+            duration: ${{ matrix.config.duration }}
+            isl: '0'
+            osl: '0'
+            max-model-len: '0'
+            spec-decoding: 'none'
+            disagg: 'false'
+            run-eval: false
+            scenario-type: agentic-coding
+
+    sweep-multi-node-agentic:
+        needs: setup
+        if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['agentic']) != 'null' }}
+        uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+        name: multi-node agentic /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['agentic'] }}
+        secrets: inherit
+        with:
+            exp-name: ${{ matrix.config.exp-name }}
+            isl: '0'
+            osl: '0'
+            max-model-len: '0'
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            conc-list: ${{ toJson(matrix.config.conc) }}
+            spec-decoding: ${{ matrix.config.spec-decoding }}
+            disagg: ${{ matrix.config.disagg }}
+            prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
+            prefill-tp: ${{ matrix.config.prefill.tp }}
+            prefill-ep: ${{ matrix.config.prefill.ep }}
+            prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
+            prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
+            decode-num-worker: ${{ matrix.config.decode.num-worker }}
+            decode-tp: ${{ matrix.config.decode.tp }}
+            decode-ep: ${{ matrix.config.decode.ep }}
+            decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
+            decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
+            users: ${{ matrix.config.users }}
+            duration: ${{ matrix.config.duration }}
+            run-eval: false
+            scenario-type: agentic-coding
+
     sweep-evals:
         needs: setup
         if: ${{ toJson(fromJson(needs.setup.outputs.search-space-config).evals) != '[]' && toJson(fromJson(needs.setup.outputs.search-space-config).evals) != 'null' }}
@@ -266,8 +337,10 @@ jobs:
             [
                 sweep-single-node-1k1k,
                 sweep-single-node-8k1k,
+                sweep-agentic,
                 sweep-multi-node-1k1k,
                 sweep-multi-node-8k1k,
+                sweep-multi-node-agentic,
                 setup,
             ]
         if: >-
diff --git a/.gitignore b/.gitignore
index 03d36472a..9ef909acc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 **/__pycache__/**
-**/.coverage
\ No newline at end of file
+**/.coverage
+experimental/multiturn/vllm_benchmark/results/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..e6da39b79
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "utils/trace-replay"]
+	path = utils/trace-replay
+	url = https://github.com/callanjfox/kv-cache-tester.git
+	branch = agentx-minimized
diff --git a/AGENTS.md b/AGENTS.md
index 969b95c37..c5a72fe77 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -231,12 +231,13 @@ dsr1-fp8-h200-dynamo-sglang:
   framework: dynamo-sglang
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - conc-list: [1, 4, 16, 32, 64, 128, 256, 512]
-      prefill:
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - conc-list: [1, 4, 16, 32, 64, 128, 256, 512]
+        prefill:
         num-worker: 1
         tp: 8
         ep: 1
diff --git a/benchmarks/benchmark_lib.sh b/benchmarks/benchmark_lib.sh
index 268745735..d5a41cd62 100644
--- a/benchmarks/benchmark_lib.sh
+++ b/benchmarks/benchmark_lib.sh
@@ -73,7 +73,7 @@ check_env_vars() {
     local missing_vars=()
 
     for var_name in "$@"; do
-        if [[ -z "${!var_name}" ]]; then
+        if [[ -z "${!var_name:-}" ]]; then
             missing_vars+=("$var_name")
         fi
     done
@@ -862,3 +862,92 @@ run_eval() {
     fi
     return $eval_rc
 }
+
+
+# --------------------------------
+# Agentic trace replay helpers
+# --------------------------------
+
+INFMAX_CONTAINER_WORKSPACE="${INFMAX_CONTAINER_WORKSPACE:-/workspace}"
+AGENTIC_DIR="${AGENTIC_DIR:-${INFMAX_CONTAINER_WORKSPACE}/utils/agentic-benchmark}"
+TRACE_REPLAY_DIR="${TRACE_REPLAY_DIR:-${INFMAX_CONTAINER_WORKSPACE}/utils/trace-replay}"
+
+agentic_pip_install() {
+    local pip_install=(python3 -m pip install)
+    if python3 -m pip install --help 2>/dev/null | grep -q -- "--break-system-packages"; then
+        pip_install+=(--break-system-packages)
+    fi
+
+    "${pip_install[@]}" "$@"
+}
+
+ensure_hf_cli() {
+    if command -v hf >/dev/null 2>&1; then
+        return 0
+    fi
+
+    # Some lean runtime images used by multinode SGLang include Python but not
+    # the Hugging Face CLI. Install just the hub CLI before prefetching traces.
+    agentic_pip_install --quiet "huggingface_hub[cli]>=0.25.0"
+}
+
+resolve_trace_source() {
+    local dataset="semianalysisai/cc-traces-weka-042026"
+    TRACE_SOURCE_FLAG="--hf-dataset $dataset"
+    echo "Loading traces from Hugging Face dataset: $dataset"
+    # Pre-download the dataset into the shared HF_HUB_CACHE (same mount used
+    # for model weights) so datasets.load_dataset() reads from cache on
+    # subsequent runs instead of re-downloading every job.
+    ensure_hf_cli
+    hf download --repo-type dataset "$dataset"
+}
+
+install_agentic_deps() {
+    agentic_pip_install --quiet urllib3 requests 2>/dev/null || true
+    agentic_pip_install -q -r "$AGENTIC_DIR/requirements.txt"
+    agentic_pip_install -q -r "$TRACE_REPLAY_DIR/requirements.txt"
+    # Force-upgrade datasets: containers often ship an older version without
+    # the `Json` feature type used by the HF traces dataset. `Json` was added
+    # in datasets 4.7.0 (March 2025). Unpinned installs won't upgrade an
+    # already-present package.
+    agentic_pip_install --upgrade "datasets>=4.7.0"
+}
+
+build_replay_cmd() {
+    local result_dir="$1"
+    local duration="${DURATION:-1800}"
+    local max_delay="${MAX_DELAY:-60}"
+    local advance_min="${ADVANCE_MIN:-0.0}"
+    local advance_max="${ADVANCE_MAX:-0.7}"
+
+    REPLAY_CMD="python3 $TRACE_REPLAY_DIR/trace_replay_tester.py"
+    REPLAY_CMD+=" --api-endpoint http://localhost:$PORT"
+    REPLAY_CMD+=" $TRACE_SOURCE_FLAG"
+    REPLAY_CMD+=" --output-dir $result_dir/trace_replay"
+    REPLAY_CMD+=" --start-users $USERS"
+    REPLAY_CMD+=" --max-users $USERS"
+    REPLAY_CMD+=" --test-duration $duration"
+    REPLAY_CMD+=" --recycle"
+    REPLAY_CMD+=" --max-delay $max_delay"
+    REPLAY_CMD+=" --max-concurrent-requests 0"
+    REPLAY_CMD+=" --advance-min $advance_min"
+    REPLAY_CMD+=" --advance-max $advance_max"
+    REPLAY_CMD+=" --warmup-enabled"
+    REPLAY_CMD+=" --seed 42"
+    if [ "${HASH_BLOCK_MODE:-false}" = "true" ]; then
+        REPLAY_CMD+=" --hash-block-mode"
+    fi
+    if [ "${DEBUG_TRACE:-false}" = "true" ]; then
+        REPLAY_CMD+=" --debug-trace"
+    fi
+    REPLAY_CMD+=" --metrics-output-prefix $result_dir/metrics"
+}
+
+write_agentic_result_json() {
+    # Aggregate detailed_results.csv + metrics_server_metrics.csv into
+    # $INFMAX_CONTAINER_WORKSPACE/$RESULT_FILENAME.json. The workflow's
+    # existing retry-based existence check is the single success gate.
+    local result_dir="$1"
+    RESULT_DIR="$result_dir" AGENTIC_OUTPUT_DIR="${AGENTIC_OUTPUT_DIR:-$INFMAX_CONTAINER_WORKSPACE}" \
+        python3 "$INFMAX_CONTAINER_WORKSPACE/utils/process_agentic_result.py"
+}
diff --git a/benchmarks/multi_node/agentic_srt.sh b/benchmarks/multi_node/agentic_srt.sh
new file mode 100644
index 000000000..6e0d50f55
--- /dev/null
+++ b/benchmarks/multi_node/agentic_srt.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+set -euo pipefail
+set -x
+
+# Client-only agentic trace replay for srt-slurm multinode jobs.
+# srt-slurm owns server startup; this script runs as benchmark.type=custom
+# against the already-ready frontend on the head node.
+
+INFMAX_CONTAINER_WORKSPACE="${INFMAX_CONTAINER_WORKSPACE:-/infmax-workspace}"
+source "$INFMAX_CONTAINER_WORKSPACE/benchmarks/benchmark_lib.sh"
+
+check_env_vars MODEL MODEL_PREFIX FRAMEWORK PRECISION USERS RESULT_FILENAME
+
+PORT="${PORT:-8000}"
+RESULT_DIR="${RESULT_DIR:-/logs/agentic}"
+DURATION="${DURATION:-1800}"
+MAX_DELAY="${MAX_DELAY:-60}"
+ADVANCE_MIN="${ADVANCE_MIN:-0.0}"
+ADVANCE_MAX="${ADVANCE_MAX:-0.7}"
+
+mkdir -p "$RESULT_DIR"
+
+resolve_trace_source
+install_agentic_deps
+
+build_replay_cmd "$RESULT_DIR"
+echo "$REPLAY_CMD" > "$RESULT_DIR/benchmark_command.txt"
+
+set +e
+$REPLAY_CMD 2>&1 | tee "$RESULT_DIR/benchmark.log"
+REPLAY_RC=${PIPESTATUS[0]}
+set -e
+
+write_agentic_result_json "$RESULT_DIR"
+
+python3 "$AGENTIC_DIR/scripts/analyze_benchmark_distributions.py" \
+    "$RESULT_DIR/trace_replay" -o "$RESULT_DIR" 2>&1 || true
+
+if [ "$REPLAY_RC" -ne 0 ]; then
+    echo "WARNING: agentic trace replay exited with code $REPLAY_RC after writing available results" >&2
+fi
diff --git a/benchmarks/single_node/agentic/dsr1_fp4_b200.sh b/benchmarks/single_node/agentic/dsr1_fp4_b200.sh
new file mode 100644
index 000000000..6d21f1fd9
--- /dev/null
+++ b/benchmarks/single_node/agentic/dsr1_fp4_b200.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+set -euo pipefail
+set -x
+
+# Agentic trace replay benchmark for DSR1 FP4 on B200 using SGLang.
+#
+# Required env vars:
+#   MODEL, TP, USERS, RESULT_DIR
+
+source "$(dirname "$0")/../../benchmark_lib.sh"
+
+check_env_vars MODEL TP USERS RESULT_DIR
+
+PORT=${PORT:-8888}
+DURATION=${DURATION:-1800}
+MAX_DELAY=${MAX_DELAY:-60}
+ADVANCE_MIN=${ADVANCE_MIN:-0.0}
+ADVANCE_MAX=${ADVANCE_MAX:-0.7}
+EP_SIZE=${EP_SIZE:-1}
+SCHEDULER_RECV_INTERVAL=${SCHEDULER_RECV_INTERVAL:-5}
+
+if [[ -n "${SLURM_JOB_ID:-}" ]]; then
+    echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}"
+fi
+
+if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi
+nvidia-smi
+
+# ---- Resolve traces and install deps ----------------------------------------
+resolve_trace_source
+install_agentic_deps
+
+# ---- Start SGLang server ----------------------------------------------------
+SERVER_LOG="$RESULT_DIR/server.log"
+mkdir -p "$RESULT_DIR"
+
+echo "Starting SGLang server..."
+export TORCH_CUDA_ARCH_LIST="10.0"
+export PYTHONNOUSERSITE=1
+
+python3 -m sglang.launch_server \
+--model-path $MODEL \
+--host 0.0.0.0 \
+--port $PORT \
+--trust-remote-code \
+--tensor-parallel-size=$TP \
+--data-parallel-size=1 \
+--cuda-graph-max-bs $USERS \
+--max-running-requests $USERS \
+--mem-fraction-static 0.85 \
+--kv-cache-dtype fp8_e4m3 \
+--chunked-prefill-size 16384 \
+--ep-size $EP_SIZE \
+--quantization modelopt_fp4 \
+--enable-flashinfer-allreduce-fusion \
+--scheduler-recv-interval $SCHEDULER_RECV_INTERVAL \
+--enable-symm-mem \
+--attention-backend trtllm_mla \
+--moe-runner-backend flashinfer_trtllm \
+--stream-interval 10 \
+--enable-metrics > "$SERVER_LOG" 2>&1 &
+SERVER_PID=$!
+echo "Server PID: $SERVER_PID"
+
+wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
+
+# ---- Run benchmark ----------------------------------------------------------
+build_replay_cmd "$RESULT_DIR"
+
+echo "$REPLAY_CMD" > "$RESULT_DIR/benchmark_command.txt"
+
+set -x
+$REPLAY_CMD 2>&1 | tee "$RESULT_DIR/benchmark.log" || true
+set +x
+
+write_agentic_result_json "$RESULT_DIR"
+
+# ---- Post-processing --------------------------------------------------------
+python3 "$AGENTIC_DIR/scripts/analyze_benchmark_distributions.py" \
+    "$RESULT_DIR/trace_replay" -o "$RESULT_DIR" 2>&1 || true
diff --git a/benchmarks/single_node/agentic/dsr1_fp4_mi355x.sh b/benchmarks/single_node/agentic/dsr1_fp4_mi355x.sh
new file mode 100755
index 000000000..cdc8b8e73
--- /dev/null
+++ b/benchmarks/single_node/agentic/dsr1_fp4_mi355x.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+set -euo pipefail
+set -x
+
+# Agentic trace replay benchmark for DSR1 FP4 on MI355X using SGLang.
+#
+# Required env vars:
+#   MODEL, TP, USERS, RESULT_DIR
+
+source "$(dirname "$0")/../../benchmark_lib.sh"
+
+check_env_vars MODEL TP USERS RESULT_DIR
+
+PORT=${PORT:-8888}
+DURATION=${DURATION:-1800}
+MAX_DELAY=${MAX_DELAY:-60}
+ADVANCE_MIN=${ADVANCE_MIN:-0.0}
+ADVANCE_MAX=${ADVANCE_MAX:-0.7}
+
+if [[ -n "${SLURM_JOB_ID:-}" ]]; then
+    echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}"
+fi
+
+if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi
+rocm-smi
+
+# ---- Resolve traces and install deps ----------------------------------------
+resolve_trace_source
+install_agentic_deps
+
+# ---- Start SGLang server ----------------------------------------------------
+SERVER_LOG="$RESULT_DIR/server.log"
+mkdir -p "$RESULT_DIR"
+
+echo "Starting SGLang server..."
+export SGLANG_USE_AITER=1
+export ROCM_QUICK_REDUCE_QUANTIZATION=INT4
+export PYTHONNOUSERSITE=1
+
+python3 -m sglang.launch_server \
+--model-path=$MODEL \
+--host=0.0.0.0 \
+--port=$PORT \
+--trust-remote-code \
+--tensor-parallel-size=$TP \
+--chunked-prefill-size=16384 \
+--mem-fraction-static=0.8 \
+--num-continuous-decode-steps=4 \
+--cuda-graph-max-bs=$USERS \
+--max-running-requests=$USERS \
+--attention-backend aiter \
+--kv-cache-dtype fp8_e4m3 \
+--enable-metrics > "$SERVER_LOG" 2>&1 &
+SERVER_PID=$!
+echo "Server PID: $SERVER_PID"
+
+wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
+
+# ---- Run benchmark ----------------------------------------------------------
+build_replay_cmd "$RESULT_DIR"
+
+echo "$REPLAY_CMD" > "$RESULT_DIR/benchmark_command.txt"
+
+set -x
+$REPLAY_CMD 2>&1 | tee "$RESULT_DIR/benchmark.log" || true
+set +x
+
+write_agentic_result_json "$RESULT_DIR"
+
+# ---- Post-processing --------------------------------------------------------
+python3 "$AGENTIC_DIR/scripts/analyze_benchmark_distributions.py" \
+    "$RESULT_DIR/trace_replay" -o "$RESULT_DIR" 2>&1 || true
diff --git a/runners/launch_b200-dgxc.sh b/runners/launch_b200-dgxc.sh
index edf5db957..fce9a8813 100644
--- a/runners/launch_b200-dgxc.sh
+++ b/runners/launch_b200-dgxc.sh
@@ -36,9 +36,8 @@ if [[ "$IS_MULTINODE" == "true" ]]; then
         rm -rf "$SRT_REPO_DIR"
     fi
 
-    git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
+    git clone --branch cam/sa-submission-q2-2026 --single-branch https://github.com/cquil11/srt-slurm-nv.git "$SRT_REPO_DIR"
     cd "$SRT_REPO_DIR" || exit 1
-    git checkout sa-submission-q2-2026
 
     echo "Installing srtctl..."
     export UV_INSTALL_DIR="$GITHUB_WORKSPACE/.local/bin"
@@ -111,7 +110,7 @@ EOF
     fi
 
     # Override the job name in the config file with the runner name
-    sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_FILE"
+    sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "${CONFIG_FILE%%:*}"
     # Bump recipe health-check timeout from 360×10s=3600s to 720×10s=7200s
     # so large-model loads (e.g. DSR1-FP8 ~680GB off shared FS) finish in time.
     # Uses ${CONFIG_FILE%%:*} because CONFIG_FILE may carry an :override[N] suffix.
@@ -249,8 +248,7 @@ EOF
 
 else
 
-    HF_HUB_CACHE_MOUNT="/scratch/fsw/gharunners/hf-hub-cache"
-    SQUASH_FILE="/home/sa-shared/containers/$(echo "$IMAGE" | sed 's/[\/:@#]/_/g').sqsh"
+    HF_HUB_CACHE_MOUNT="/scratch/fsw/gharunners/hf-hub-cache"    SQUASH_FILE="/home/sa-shared/containers/$(echo "$IMAGE" | sed 's/[\/:@#]/_/g').sqsh"
     FRAMEWORK_SUFFIX=$([[ "$FRAMEWORK" == "trt" ]] && printf '_trt' || printf '')
     SPEC_SUFFIX=$([[ "$SPEC_DECODING" == "mtp" ]] && printf '_mtp' || printf '')
     # Prefer a framework-tagged script (e.g. dsv4_fp4_b200_vllm.sh) so models
diff --git a/runners/launch_b200-nb.sh b/runners/launch_b200-nb.sh
index e0c8d92fb..2d699f0c4 100644
--- a/runners/launch_b200-nb.sh
+++ b/runners/launch_b200-nb.sh
@@ -35,4 +35,4 @@ srun --partition=$PARTITION --gres=gpu:$TP --exclusive --job-name="$RUNNER_NAME"
 --container-writable \
 --container-workdir=$CONTAINER_MOUNT_DIR \
 --no-container-entrypoint --export=ALL,PORT=8888,UCX_NET_DEVICES=$UCX_NET_DEVICES \
-bash "$BENCH_SCRIPT"
\ No newline at end of file
+bash "$BENCH_SCRIPT"
diff --git a/runners/launch_b300-nv.sh b/runners/launch_b300-nv.sh
index 3c855e805..f47905a21 100644
--- a/runners/launch_b300-nv.sh
+++ b/runners/launch_b300-nv.sh
@@ -37,9 +37,8 @@ if [ -d "$SRT_REPO_DIR" ]; then
     rm -rf "$SRT_REPO_DIR"
 fi
 
-git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
+git clone --branch cam/sa-submission-q2-2026 --single-branch https://github.com/cquil11/srt-slurm-nv.git "$SRT_REPO_DIR"
 cd "$SRT_REPO_DIR" || exit 1
-git checkout sa-submission-q2-2026
 
 echo "Installing srtctl..."
 export UV_INSTALL_DIR="$GITHUB_WORKSPACE/.local/bin"
@@ -114,7 +113,7 @@ if [[ -z "$CONFIG_FILE" ]]; then
 fi
 
 # Override the job name in the config file with the runner name
-sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_FILE"
+sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "${CONFIG_FILE%%:*}"
 SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "b300,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1)
 echo "$SRTCTL_OUTPUT"
 
@@ -310,5 +309,4 @@ else
         --container-workdir=$CONTAINER_MOUNT_DIR \
         --no-container-entrypoint --export=ALL,PORT=8888 \
         bash "$BENCH_SCRIPT"
-
 fi
diff --git a/runners/launch_gb200-nv.sh b/runners/launch_gb200-nv.sh
index 224c3a928..2c3460fd4 100755
--- a/runners/launch_gb200-nv.sh
+++ b/runners/launch_gb200-nv.sh
@@ -159,9 +159,8 @@ elif [[ $FRAMEWORK == "dynamo-trt" && $MODEL_PREFIX == "kimik2.5" ]]; then
     cd "$SRT_REPO_DIR"
     git checkout sa-submission-q2-2026
 else
-    git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR"
+    git clone --branch cam/sa-submission-q2-2026 --single-branch https://github.com/cquil11/srt-slurm-nv.git "$SRT_REPO_DIR"
     cd "$SRT_REPO_DIR"
-    git checkout sa-submission-q1-2026
 fi
 
 echo "Installing srtctl..."
@@ -219,7 +218,7 @@ export INFMAX_WORKSPACE="$GITHUB_WORKSPACE"
 echo "Submitting job with srtctl..."
 
 # Override the job name in the config file with the runner name
-sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_FILE"
+sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "${CONFIG_FILE%%:*}"
 
 if [[ "$FRAMEWORK" == "dynamo-sglang" ]]; then
     SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "gb200,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" --setup-script install-torchao.sh 2>&1)
diff --git a/runners/launch_gb300-nv.sh b/runners/launch_gb300-nv.sh
index 5f48ddcec..7066089f5 100644
--- a/runners/launch_gb300-nv.sh
+++ b/runners/launch_gb300-nv.sh
@@ -4,19 +4,58 @@
 
 set -x
 
-export SLURM_PARTITION="batch"
+export SLURM_PARTITION="batch_1"
 export SLURM_ACCOUNT="benchmark"
+export SLURM_EXCLUDED_NODELIST="${SLURM_EXCLUDED_NODELIST:-im-gb300-r01-c011}"
 export ENROOT_ROOTFS_WRITABLE=1
 
 export MODEL_PATH=$MODEL
 
+resolve_model_path() {
+    local selected=""
+    for candidate in "$@"; do
+        if [[ -d "$candidate" ]]; then
+            selected="$candidate"
+            break
+        fi
+    done
+
+    if [[ -z "$selected" ]]; then
+        echo "ERROR: None of the candidate model paths exist:" >&2
+        for candidate in "$@"; do
+            echo "  - $candidate" >&2
+        done
+        echo "Common model directories:" >&2
+        ls -la /data/models /raid/shared/models /mnt/lustre01/models /home/sa-shared/models /data/home/sa-shared/models >&2 || true
+        return 1
+    fi
+
+    echo "$selected"
+}
+
 if [[ $MODEL_PREFIX == "dsr1" && $PRECISION == "fp4" ]]; then
     export SERVED_MODEL_NAME="deepseek-r1-fp4"
-    export MODEL_PATH=/raid/shared/models/deepseek-r1-0528-fp4-v2
+    MODEL_PATH=$(resolve_model_path \
+        /data/models/dsr1-fp4 \
+        /data/models/deepseek-r1-0528-fp4-v2 \
+        /data/models/DeepSeek-R1-0528-NVFP4-v2 \
+        /raid/shared/models/deepseek-r1-0528-fp4-v2 \
+        /mnt/lustre01/models/deepseek-r1-0528-fp4-v2 \
+        /home/sa-shared/models/deepseek-r1-0528-fp4-v2 \
+        /data/home/sa-shared/models/deepseek-r1-0528-fp4-v2) || exit 1
+    export MODEL_PATH
     export SRT_SLURM_MODEL_PREFIX="dsr1"
 elif [[ $MODEL_PREFIX == "dsr1" && $PRECISION == "fp8" ]]; then
     export SERVED_MODEL_NAME="deepseek-r1-fp8"
-    export MODEL_PATH=/raid/shared/models/deepseek-r1-0528
+    MODEL_PATH=$(resolve_model_path \
+        /data/models/dsr1-fp8 \
+        /data/models/deepseek-r1-0528 \
+        /data/models/DeepSeek-R1-0528 \
+        /raid/shared/models/deepseek-r1-0528 \
+        /mnt/lustre01/models/deepseek-r1-0528 \
+        /home/sa-shared/models/deepseek-r1-0528 \
+        /data/home/sa-shared/models/deepseek-r1-0528) || exit 1
+    export MODEL_PATH
     export SRT_SLURM_MODEL_PREFIX="dsr1-fp8"
 else
     echo "Unsupported model: $MODEL_PREFIX-$PRECISION. Supported models are: dsr1-fp4, dsr1-fp8"
@@ -25,11 +64,81 @@ fi
 
 NGINX_IMAGE="nginx:1.27.4"
 
-SQUASH_FILE="/home/sa-shared/squash/$(echo "$IMAGE" | sed 's/[\/:@#]/_/g').sqsh"
-NGINX_SQUASH_FILE="/home/sa-shared/squash/$(echo "$NGINX_IMAGE" | sed 's/[\/:@#]/_/g').sqsh"
+select_squash_dir() {
+    local candidates=(
+        "${SQUASH_DIR:-}"
+        "/data/squash"
+        "/data/home/sa-shared/squash"
+        "/home/sa-shared/squash"
+    )
+
+    for candidate in "${candidates[@]}"; do
+        if [[ -n "$candidate" ]] && mkdir -p "$candidate" 2>/dev/null && [[ -w "$candidate" ]]; then
+            echo "$candidate"
+            return 0
+        fi
+    done
 
-srun --partition=$SLURM_PARTITION --exclusive --time=180 bash -c "enroot import -o $SQUASH_FILE docker://$IMAGE"
-srun --partition=$SLURM_PARTITION --exclusive --time=180 bash -c "enroot import -o $NGINX_SQUASH_FILE docker://$NGINX_IMAGE"
+    echo "ERROR: No writable shared squash directory found" >&2
+    printf 'Checked:\n' >&2
+    printf '  - %s\n' "${candidates[@]}" >&2
+    return 1
+}
+
+SQUASH_DIR=$(select_squash_dir) || exit 1
+SQUASH_FILE="${SQUASH_DIR}/$(echo "$IMAGE" | sed 's/[\/:@#]/_/g').sqsh"
+NGINX_SQUASH_FILE="${SQUASH_DIR}/$(echo "$NGINX_IMAGE" | sed 's/[\/:@#]/_/g').sqsh"
+
+cleanup_broken_squash_symlink() {
+    local squash_file="$1"
+    if [[ -L "$squash_file" && ! -e "$squash_file" ]]; then
+        echo "Removing broken squash symlink: $squash_file"
+        rm -f "$squash_file"
+    elif [[ -L "$squash_file" ]] && ! readlink -f "$squash_file" >/dev/null 2>&1; then
+        echo "Removing unresolvable squash symlink: $squash_file"
+        rm -f "$squash_file"
+    fi
+}
+
+cleanup_broken_squash_symlink "$SQUASH_FILE"
+cleanup_broken_squash_symlink "$NGINX_SQUASH_FILE"
+
+import_container() {
+    local image="$1"
+    local squash_file="$2"
+
+    if [[ -f "$squash_file" ]] && unsquashfs -l "$squash_file" >/dev/null 2>&1; then
+        echo "Using existing squash image: $squash_file"
+        return 0
+    fi
+
+    echo "Importing $image to $squash_file"
+    rm -f "$squash_file"
+    srun -N 1 -A "$SLURM_ACCOUNT" -p "$SLURM_PARTITION" --exclusive --time=180 \
+        bash -lc "mkdir -p '$(dirname "$squash_file")' && enroot import -o '$squash_file' 'docker://$image' && test -f '$squash_file' && unsquashfs -l '$squash_file' >/dev/null"
+
+    # /data/squash can lag briefly after enroot writes from the import node.
+    for _ in {1..30}; do
+        if [[ -f "$squash_file" ]] && unsquashfs -l "$squash_file" >/dev/null 2>&1; then
+            echo "Imported squash image is visible: $squash_file"
+            return 0
+        fi
+        sleep 2
+    done
+
+    if [[ ! -f "$squash_file" ]]; then
+        echo "ERROR: Container image path does not exist after import: $squash_file" >&2
+        ls -la "$(dirname "$squash_file")" >&2 || true
+        exit 1
+    fi
+
+    echo "ERROR: Container image exists but failed unsquashfs validation: $squash_file" >&2
+    ls -la "$squash_file" >&2 || true
+    exit 1
+}
+
+import_container "$IMAGE" "$SQUASH_FILE"
+import_container "$NGINX_IMAGE" "$NGINX_SQUASH_FILE"
 
 export EVAL_ONLY="${EVAL_ONLY:-false}"
 
@@ -43,9 +152,8 @@ if [ -d "$SRT_REPO_DIR" ]; then
     rm -rf "$SRT_REPO_DIR"
 fi
 
-git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
+git clone --branch cam/sa-submission-q2-2026 --single-branch https://github.com/cquil11/srt-slurm-nv.git "$SRT_REPO_DIR"
 cd "$SRT_REPO_DIR"
-git checkout sa-submission-q2-2026
 
 echo "Installing srtctl..."
 export UV_INSTALL_DIR="$GITHUB_WORKSPACE/.local/bin"
@@ -84,6 +192,7 @@ srtctl_root: "${SRTCTL_ROOT}"
 # Model path aliases
 model_paths:
   "${SRT_SLURM_MODEL_PREFIX}": "${MODEL_PATH}"
+  "dsfp4": "${MODEL_PATH}"
 containers:
   dynamo-trtllm: ${SQUASH_FILE}
   dynamo-sglang: ${SQUASH_FILE}
@@ -109,9 +218,26 @@ if [[ -z "$CONFIG_FILE" ]]; then
 fi
 
 # Override the job name in the config file with the runner name
-sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_FILE"
+CONFIG_PATH="${CONFIG_FILE%%:*}"
+sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_PATH"
+
+if [[ -n "$SLURM_EXCLUDED_NODELIST" ]]; then
+    if grep -q "^sbatch_directives:" "$CONFIG_PATH"; then
+        if grep -q "^  exclude:" "$CONFIG_PATH"; then
+            sed -i "s/^  exclude:.*/  exclude: \"${SLURM_EXCLUDED_NODELIST}\"/" "$CONFIG_PATH"
+        else
+            sed -i "/^sbatch_directives:/a\\  exclude: \"${SLURM_EXCLUDED_NODELIST}\"" "$CONFIG_PATH"
+        fi
+    else
+        sed -i "/^name:.*/a sbatch_directives:\\n  exclude: \"${SLURM_EXCLUDED_NODELIST}\"" "$CONFIG_PATH"
+    fi
+fi
 
-SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "gb300,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1)
+if [[ "$FRAMEWORK" == "dynamo-sglang" ]]; then
+    SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "gb300,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" --setup-script install-torchao.sh 2>&1)
+else
+    SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "gb300,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1)
+fi
 echo "$SRTCTL_OUTPUT"
 
 JOB_ID=$(echo "$SRTCTL_OUTPUT" | grep -oP '✅ Job \K[0-9]+' || echo "$SRTCTL_OUTPUT" | grep -oP 'Job \K[0-9]+')
@@ -129,6 +255,7 @@ echo "Extracted JOB_ID: $JOB_ID"
 # srtctl creates logs in outputs/JOB_ID/logs/
 LOGS_DIR="outputs/$JOB_ID/logs"
 LOG_FILE="$LOGS_DIR/sweep_${JOB_ID}.log"
+mkdir -p "$LOGS_DIR"
 
 # Wait for log file to appear (also check job is still alive)
 while ! ls "$LOG_FILE" &>/dev/null; do
diff --git a/runners/launch_h100-cr.sh b/runners/launch_h100-cr.sh
index 5100419b9..a8bdf11ca 100644
--- a/runners/launch_h100-cr.sh
+++ b/runners/launch_h100-cr.sh
@@ -15,4 +15,4 @@ docker run --rm --network=host --name=$server_name \
 -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e TORCH_CUDA_ARCH_LIST="9.0" -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \
 --entrypoint=/bin/bash \
 $IMAGE \
-benchmarks/single_node/"${EXP_NAME%%_*}_${PRECISION}_h100.sh"
+benchmarks/single_node/${SCENARIO_SUBDIR}"${EXP_NAME%%_*}_${PRECISION}_h100.sh"
diff --git a/runners/launch_h100-cw.sh b/runners/launch_h100-cw.sh
index f3198ca8c..eb6cdafbb 100644
--- a/runners/launch_h100-cw.sh
+++ b/runners/launch_h100-cw.sh
@@ -31,7 +31,7 @@ srun --jobid=$JOB_ID \
 --container-mount-home \
 --container-workdir=/workspace/ \
 --no-container-entrypoint --export=ALL,PORT=8888 \
-bash benchmarks/single_node/${EXP_NAME%%_*}_${PRECISION}_h100.sh
+bash benchmarks/single_node/${SCENARIO_SUBDIR}${EXP_NAME%%_*}_${PRECISION}_h100.sh
 
 rmdir $SAGEMAKER_SHM_PATH
 scancel $JOB_ID
diff --git a/runners/launch_h100-dgxc-slurm.sh b/runners/launch_h100-dgxc-slurm.sh
index 5a2ab64d2..851381ece 100644
--- a/runners/launch_h100-dgxc-slurm.sh
+++ b/runners/launch_h100-dgxc-slurm.sh
@@ -41,9 +41,8 @@ if [[ "$IS_MULTINODE" == "true" ]]; then
         rm -rf "$SRT_REPO_DIR"
     fi
 
-    git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
+    git clone --branch cam/sa-submission-q2-2026 --single-branch https://github.com/cquil11/srt-slurm-nv.git "$SRT_REPO_DIR"
     cd "$SRT_REPO_DIR"
-    git checkout sa-submission-q2-2026
 
     echo "Installing srtctl..."
     export UV_INSTALL_DIR="/mnt/nfs/sa-shared/.uv/bin"
@@ -135,8 +134,7 @@ EOF
     sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_FILE"
     sed -i "/^name:.*/a sbatch_directives:\n  exclude: \"${SLURM_EXCLUDED_NODELIST}\"" "$CONFIG_FILE"
     # Raise sglang's torch-distributed TCPStore timeout from the 600s gloo default
-    sed -i '/^      watchdog-timeout:/a\      dist-timeout: 1800' "${CONFIG_FILE%%:*}"
-    SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "h100,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1)
+    sed -i '/^      watchdog-timeout:/a\      dist-timeout: 1800' "${CONFIG_FILE%%:*}"    SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "h100,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1)
     echo "$SRTCTL_OUTPUT"
 
     # Extract JOB_ID from srtctl output
@@ -288,7 +286,7 @@ else
         --no-container-mount-home \
         --container-workdir=/workspace/ \
         --no-container-entrypoint --export=ALL,PORT=8888 \
-        bash benchmarks/single_node/${EXP_NAME%%_*}_${PRECISION}_h100.sh
+        bash benchmarks/single_node/${SCENARIO_SUBDIR}${EXP_NAME%%_*}_${PRECISION}_h100.sh
 
     scancel $JOB_ID
 
diff --git a/runners/launch_h200-cw.sh b/runners/launch_h200-cw.sh
index 84b40480c..1486c4fa6 100644
--- a/runners/launch_h200-cw.sh
+++ b/runners/launch_h200-cw.sh
@@ -44,7 +44,7 @@ srun --jobid=$JOB_ID \
 --container-mount-home \
 --container-workdir=/workspace/ \
 --no-container-entrypoint --export=ALL \
-bash benchmarks/single_node/${MODEL_CODE}_${PRECISION}_h200${FRAMEWORK_SUFFIX}${SPEC_SUFFIX}.sh
+bash benchmarks/single_node/${SCENARIO_SUBDIR}${MODEL_CODE}_${PRECISION}_h200${FRAMEWORK_SUFFIX}${SPEC_SUFFIX}.sh
 
 rmdir $SAGEMAKER_SHM_PATH
 scancel $JOB_ID
diff --git a/runners/launch_h200-dgxc-slurm.sh b/runners/launch_h200-dgxc-slurm.sh
index e11ca7b20..b082cdcba 100755
--- a/runners/launch_h200-dgxc-slurm.sh
+++ b/runners/launch_h200-dgxc-slurm.sh
@@ -40,9 +40,8 @@ if [[ "$IS_MULTINODE" == "true" ]]; then
         rm -rf "$SRT_REPO_DIR"
     fi
 
-    git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
+    git clone --branch cam/sa-submission-q2-2026 --single-branch https://github.com/cquil11/srt-slurm-nv.git "$SRT_REPO_DIR"
     cd "$SRT_REPO_DIR"
-    git checkout sa-submission-q2-2026
 
     echo "Installing srtctl..."
     curl -LsSf https://astral.sh/uv/install.sh | sh
@@ -127,8 +126,7 @@ EOF
     # Override the job name in the config file with the runner name
     sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_FILE"
     sed -i '/^health_check:/,/^[^ ]/{ /^health_check:/d; /^  /d; }' "${CONFIG_FILE%%:*}"
-    printf '\nhealth_check:\n  max_attempts: 720\n  interval_seconds: 10\n' >> "${CONFIG_FILE%%:*}"
-    SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "h200,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1)
+    printf '\nhealth_check:\n  max_attempts: 720\n  interval_seconds: 10\n' >> "${CONFIG_FILE%%:*}"    SRTCTL_OUTPUT=$(srtctl apply -f "$CONFIG_FILE" --tags "h200,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1)
     echo "$SRTCTL_OUTPUT"
 
     # Extract JOB_ID from srtctl output
@@ -292,7 +290,7 @@ else
         --no-container-mount-home \
         --container-workdir=/workspace/ \
         --no-container-entrypoint --export=ALL,PORT=8888 \
-        bash benchmarks/single_node/${EXP_NAME%%_*}_${PRECISION}_h200$([[ "$FRAMEWORK" == "trt" ]] && printf '_trt')$([[ "$SPEC_DECODING" == "mtp" ]] && printf '_mtp').sh
+        bash benchmarks/single_node/${SCENARIO_SUBDIR}${EXP_NAME%%_*}_${PRECISION}_h200$([[ "$FRAMEWORK" == "trt" ]] && printf '_trt')$([[ "$SPEC_DECODING" == "mtp" ]] && printf '_mtp').sh
 
     scancel $JOB_ID
 
diff --git a/runners/launch_h200-nb.sh b/runners/launch_h200-nb.sh
index 9d157a858..158c30792 100644
--- a/runners/launch_h200-nb.sh
+++ b/runners/launch_h200-nb.sh
@@ -19,4 +19,4 @@ srun --partition=$PARTITION --gres=gpu:$TP --exclusive --job-name="$RUNNER_NAME"
 --container-mount-home \
 --container-workdir=/workspace/ \
 --no-container-entrypoint --export=ALL \
-bash benchmarks/single_node/${MODEL_CODE}_${PRECISION}_h200${FRAMEWORK_SUFFIX}${SPEC_SUFFIX}.sh
+bash benchmarks/single_node/${SCENARIO_SUBDIR}${MODEL_CODE}_${PRECISION}_h200${FRAMEWORK_SUFFIX}${SPEC_SUFFIX}.sh
diff --git a/runners/launch_mi300x-amds.sh b/runners/launch_mi300x-amds.sh
index b654c515a..20addccf4 100644
--- a/runners/launch_mi300x-amds.sh
+++ b/runners/launch_mi300x-amds.sh
@@ -35,6 +35,6 @@ srun --jobid=$JOB_ID \
 --container-remap-root \
 --container-workdir=/workspace/ \
 --no-container-entrypoint --export=ALL \
-bash benchmarks/single_node/${EXP_NAME%%_*}_${PRECISION}_mi300x.sh
+bash benchmarks/single_node/${SCENARIO_SUBDIR}${EXP_NAME%%_*}_${PRECISION}_mi300x.sh
 
 scancel $JOB_ID
\ No newline at end of file
diff --git a/runners/launch_mi325x-amds.sh b/runners/launch_mi325x-amds.sh
index 67f93a309..144b54646 100644
--- a/runners/launch_mi325x-amds.sh
+++ b/runners/launch_mi325x-amds.sh
@@ -35,6 +35,6 @@ srun --jobid=$JOB_ID \
 --container-remap-root \
 --container-workdir=/workspace/ \
 --no-container-entrypoint --export=ALL \
-bash benchmarks/single_node/${EXP_NAME%%_*}_${PRECISION}_mi325x.sh
+bash benchmarks/single_node/${SCENARIO_SUBDIR}${EXP_NAME%%_*}_${PRECISION}_mi325x.sh
 
 scancel $JOB_ID
diff --git a/runners/launch_mi355x-amds.sh b/runners/launch_mi355x-amds.sh
index 152745d4e..ec0881bdd 100644
--- a/runners/launch_mi355x-amds.sh
+++ b/runners/launch_mi355x-amds.sh
@@ -213,8 +213,8 @@ else
     fi
 
     SCRIPT_BASE="${EXP_NAME%%_*}_${PRECISION}_mi355x"
-    SCRIPT_FW="benchmarks/single_node/${SCRIPT_BASE}_${FRAMEWORK}${SPEC_SUFFIX}.sh"
-    SCRIPT_FALLBACK="benchmarks/single_node/${SCRIPT_BASE}${FRAMEWORK_SUFFIX}${SPEC_SUFFIX}.sh"
+    SCRIPT_FW="benchmarks/single_node/${SCENARIO_SUBDIR:-}${SCRIPT_BASE}_${FRAMEWORK}${SPEC_SUFFIX}.sh"
+    SCRIPT_FALLBACK="benchmarks/single_node/${SCENARIO_SUBDIR:-}${SCRIPT_BASE}${FRAMEWORK_SUFFIX}${SPEC_SUFFIX}.sh"
     if [[ -f "$SCRIPT_FW" ]]; then
         BENCHMARK_SCRIPT="$SCRIPT_FW"
     else
diff --git a/utils/agentic-benchmark/bench/__init__.py b/utils/agentic-benchmark/bench/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/utils/agentic-benchmark/bench/metrics_collector.py b/utils/agentic-benchmark/bench/metrics_collector.py
new file mode 100644
index 000000000..af4890f93
--- /dev/null
+++ b/utils/agentic-benchmark/bench/metrics_collector.py
@@ -0,0 +1,897 @@
+"""
+Metrics collector for inference servers during benchmarks.
+Polls /metrics endpoint and generates visualizations.
+Supports vLLM and sglang backends (auto-detected from metrics prefix).
+"""
+
+import asyncio
+import csv
+import re
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+
+import aiohttp
+import matplotlib.pyplot as plt
+
+
+@dataclass
+class MetricsSnapshot:
+    timestamp: float
+    kv_cache_usage: float = 0.0
+    cpu_kv_cache_usage: float = 0.0
+    num_requests_running: int = 0
+    num_requests_waiting: int = 0
+    prefix_cache_hits: int = 0
+    prefix_cache_queries: int = 0
+    cpu_prefix_cache_hits: int = 0
+    cpu_prefix_cache_queries: int = 0
+    prompt_tokens: int = 0
+    generation_tokens: int = 0
+    num_preemptions: int = 0
+    request_success: int = 0
+    # KV offload transfer metrics (cumulative)
+    kv_offload_bytes_gpu_to_cpu: float = 0.0
+    kv_offload_bytes_cpu_to_gpu: float = 0.0
+    kv_offload_time_gpu_to_cpu: float = 0.0
+    kv_offload_time_cpu_to_gpu: float = 0.0
+    # Prompt tokens by source (cumulative)
+    prompt_tokens_local_compute: int = 0
+    prompt_tokens_local_cache_hit: int = 0
+    prompt_tokens_external_kv_transfer: int = 0
+    # Prefill KV computed tokens (cumulative sum from histogram)
+    prefill_kv_computed_tokens_sum: int = 0
+    prefill_kv_computed_tokens_count: int = 0
+
+
+# =============================================================================
+# Metrics Parsers — one per backend
+# =============================================================================
+
+def _get_value(text: str, pattern: str, default: float = 0.0) -> float:
+    """Extract a gauge/counter value from Prometheus text using a regex."""
+    match = re.search(pattern, text)
+    return float(match.group(1)) if match else default
+
+
+class VLLMMetricsParser:
+    """Parse vLLM Prometheus metrics (prefix: vllm:)."""
+
+    def parse(self, text: str) -> MetricsSnapshot:
+        snapshot = MetricsSnapshot(timestamp=time.time())
+        g = lambda p, d=0.0: _get_value(text, p, d)
+
+        # KV cache usage (0-1 scale)
+        snapshot.kv_cache_usage = g(r'vllm:gpu_cache_usage_perc\{[^}]*\}\s+([\d.e+-]+)')
+        if snapshot.kv_cache_usage == 0.0:
+            snapshot.kv_cache_usage = g(r'vllm:kv_cache_usage_perc\{[^}]*\}\s+([\d.e+-]+)')
+
+        snapshot.cpu_kv_cache_usage = g(r'vllm:cpu_cache_usage_perc\{[^}]*\}\s+([\d.e+-]+)')
+
+        snapshot.num_requests_running = int(g(r'vllm:num_requests_running\{[^}]*\}\s+([\d.e+-]+)'))
+        snapshot.num_requests_waiting = int(g(r'vllm:num_requests_waiting\{[^}]*\}\s+([\d.e+-]+)'))
+
+        snapshot.prefix_cache_hits = int(g(r'vllm:prefix_cache_hits_total\{[^}]*\}\s+([\d.e+-]+)'))
+        snapshot.prefix_cache_queries = int(g(r'vllm:prefix_cache_queries_total\{[^}]*\}\s+([\d.e+-]+)'))
+
+        snapshot.cpu_prefix_cache_hits = int(g(r'vllm:external_prefix_cache_hits_total\{[^}]*\}\s+([\d.e+-]+)'))
+        snapshot.cpu_prefix_cache_queries = int(g(r'vllm:external_prefix_cache_queries_total\{[^}]*\}\s+([\d.e+-]+)'))
+
+        snapshot.prompt_tokens = int(g(r'vllm:prompt_tokens_total\{[^}]*\}\s+([\d.e+-]+)'))
+        snapshot.generation_tokens = int(g(r'vllm:generation_tokens_total\{[^}]*\}\s+([\d.e+-]+)'))
+
+        snapshot.num_preemptions = int(g(r'vllm:num_preemptions_total\{[^}]*\}\s+([\d.e+-]+)'))
+
+        for match in re.finditer(
+            r'vllm:request_success_total\{[^}]*finished_reason="[^"]*"[^}]*\}\s+([\d.e+-]+)', text
+        ):
+            snapshot.request_success += int(float(match.group(1)))
+
+        snapshot.kv_offload_bytes_gpu_to_cpu = g(r'vllm:kv_offload_total_bytes_total\{[^}]*transfer_type="GPU_to_CPU"[^}]*\}\s+([\d.e+-]+)')
+        snapshot.kv_offload_bytes_cpu_to_gpu = g(r'vllm:kv_offload_total_bytes_total\{[^}]*transfer_type="CPU_to_GPU"[^}]*\}\s+([\d.e+-]+)')
+        snapshot.kv_offload_time_gpu_to_cpu = g(r'vllm:kv_offload_total_time_total\{[^}]*transfer_type="GPU_to_CPU"[^}]*\}\s+([\d.e+-]+)')
+        snapshot.kv_offload_time_cpu_to_gpu = g(r'vllm:kv_offload_total_time_total\{[^}]*transfer_type="CPU_to_GPU"[^}]*\}\s+([\d.e+-]+)')
+
+        snapshot.prompt_tokens_local_compute = int(g(r'vllm:prompt_tokens_by_source_total\{[^}]*source="local_compute"[^}]*\}\s+([\d.e+-]+)'))
+        snapshot.prompt_tokens_local_cache_hit = int(g(r'vllm:prompt_tokens_by_source_total\{[^}]*source="local_cache_hit"[^}]*\}\s+([\d.e+-]+)'))
+        snapshot.prompt_tokens_external_kv_transfer = int(g(r'vllm:prompt_tokens_by_source_total\{[^}]*source="external_kv_transfer"[^}]*\}\s+([\d.e+-]+)'))
+
+        snapshot.prefill_kv_computed_tokens_sum = int(g(r'vllm:request_prefill_kv_computed_tokens_sum\{[^}]*\}\s+([\d.e+-]+)'))
+        snapshot.prefill_kv_computed_tokens_count = int(g(r'vllm:request_prefill_kv_computed_tokens_count\{[^}]*\}\s+([\d.e+-]+)'))
+
+        return snapshot
+
+
+class SGLangMetricsParser:
+    """Parse sglang Prometheus metrics (prefix: sglang:)."""
+
+    def parse(self, text: str) -> MetricsSnapshot:
+        snapshot = MetricsSnapshot(timestamp=time.time())
+        g = lambda p, d=0.0: _get_value(text, p, d)
+
+        # KV cache usage — sglang reports token_usage as a ratio (0-1)
+        snapshot.kv_cache_usage = g(r'sglang:token_usage\{[^}]*\}\s+([\d.e+-]+)')
+        # Fallback: compute from num_used_tokens / max_total_num_tokens
+        if snapshot.kv_cache_usage == 0.0:
+            used = g(r'sglang:num_used_tokens\{[^}]*\}\s+([\d.e+-]+)')
+            total = g(r'sglang:max_total_num_tokens\{[^}]*\}\s+([\d.e+-]+)')
+            if total > 0:
+                snapshot.kv_cache_usage = used / total
+
+        snapshot.num_requests_running = int(g(r'sglang:num_running_reqs\{[^}]*\}\s+([\d.e+-]+)'))
+        snapshot.num_requests_waiting = int(g(r'sglang:num_queue_reqs\{[^}]*\}\s+([\d.e+-]+)'))
+
+        snapshot.prompt_tokens = int(g(r'sglang:prompt_tokens_total\{[^}]*\}\s+([\d.e+-]+)'))
+        snapshot.generation_tokens = int(g(r'sglang:generation_tokens_total\{[^}]*\}\s+([\d.e+-]+)'))
+
+        # Preemptions — sglang calls them "retractions"
+        snapshot.num_preemptions = int(g(r'sglang:num_retracted_reqs\{[^}]*\}\s+([\d.e+-]+)'))
+
+        snapshot.request_success = int(g(r'sglang:num_requests_total\{[^}]*\}\s+([\d.e+-]+)'))
+
+        # Token source breakdown from realtime_tokens_total (cumulative)
+        snapshot.prompt_tokens_local_compute = int(g(
+            r'sglang:realtime_tokens_total\{[^}]*mode="prefill_compute"[^}]*\}\s+([\d.e+-]+)'))
+        snapshot.prompt_tokens_local_cache_hit = int(g(
+            r'sglang:realtime_tokens_total\{[^}]*mode="prefill_cache"[^}]*\}\s+([\d.e+-]+)'))
+
+        # Derive cumulative hits/queries from the per-source token counters.
+        # This is the correct cumulative cache hit ratio — unlike sglang's
+        # instantaneous `cache_hit_rate` gauge, which is 0 during decode-only
+        # periods and thus yielded spurious 0% hit rates when sampled at
+        # benchmark shutdown.
+        snapshot.prefix_cache_hits = snapshot.prompt_tokens_local_cache_hit
+        snapshot.prefix_cache_queries = (
+            snapshot.prompt_tokens_local_cache_hit
+            + snapshot.prompt_tokens_local_compute
+        )
+
+        return snapshot
+
+
+def detect_backend(text: str) -> str:
+    """Auto-detect backend from metrics text."""
+    if 'vllm:' in text:
+        return 'vllm'
+    elif 'sglang:' in text:
+        return 'sglang'
+    return 'unknown'
+
+
+def get_parser(backend: str):
+    """Get the appropriate parser for the backend."""
+    if backend == 'sglang':
+        return SGLangMetricsParser()
+    return VLLMMetricsParser()  # default
+
+
+@dataclass
+class MetricsCollector:
+    base_url: str
+    poll_interval: float = 1.0
+    snapshots: list[MetricsSnapshot] = field(default_factory=list)
+    _running: bool = False
+    _task: asyncio.Task | None = None
+    _parser: VLLMMetricsParser | SGLangMetricsParser | None = None
+    _backend: str = ""
+    gpu_transfer_collector: object = None
+
+    def _parse_metrics(self, text: str) -> MetricsSnapshot:
+        """Parse Prometheus metrics text, auto-detecting backend on first call."""
+        if self._parser is None:
+            self._backend = detect_backend(text)
+            self._parser = get_parser(self._backend)
+            if self._backend != 'unknown':
+                print(f"Auto-detected metrics backend: {self._backend}")
+        return self._parser.parse(text)
+
+    async def _poll_loop(self) -> None:
+        """Background polling loop."""
+        metrics_url = f"{self.base_url}/metrics"
+        async with aiohttp.ClientSession() as session:
+            while self._running:
+                try:
+                    async with session.get(metrics_url, timeout=aiohttp.ClientTimeout(total=5)) as resp:
+                        if resp.status == 200:
+                            text = await resp.text()
+                            snapshot = self._parse_metrics(text)
+                            self.snapshots.append(snapshot)
+                except Exception as e:
+                    print(f"Metrics poll error: {e}")
+
+                await asyncio.sleep(self.poll_interval)
+
+    def start(self) -> None:
+        """Start background metrics collection."""
+        if self._running:
+            return
+        self._running = True
+        self.snapshots = []
+        self._task = asyncio.create_task(self._poll_loop())
+
+    async def stop(self) -> None:
+        """Stop metrics collection."""
+        self._running = False
+        if self._task:
+            self._task.cancel()
+            try:
+                await self._task
+            except asyncio.CancelledError:
+                pass
+
+    def _trim_idle_prefix(self) -> None:
+        """Drop leading snapshots where the server was idle (no running requests
+        and no prompt tokens processed). Keeps plot x-axis starting at the first
+        real activity instead of showing a long zero-flat prefix."""
+        first_active = next(
+            (
+                i for i, s in enumerate(self.snapshots)
+                if s.num_requests_running > 0 or s.prompt_tokens > 0
+            ),
+            None,
+        )
+        if first_active is not None and first_active > 0:
+            dropped = first_active
+            self.snapshots = self.snapshots[first_active:]
+            print(f"Trimmed {dropped} idle leading snapshots before output")
+
+    def generate_plots(
+        self,
+        output_prefix: str = "metrics",
+        client_metrics: list | None = None,
+    ) -> None:
+        """Generate visualization plots from collected metrics.
+
+        Args:
+            output_prefix: Prefix for output file names
+            client_metrics: Optional list of RequestStats from benchmark clients
+        """
+        self._trim_idle_prefix()
+
+        if len(self.snapshots) < 2:
+            print("Not enough data points for plots")
+            return
+
+        # Convert to relative time (seconds from start)
+        start_time = self.snapshots[0].timestamp
+        times = [(s.timestamp - start_time) for s in self.snapshots]
+
+        # Create figure with subplots
+        num_rows = 6 if client_metrics else 4
+        fig, axes = plt.subplots(num_rows, 2, figsize=(14, 4 * num_rows))
+        fig.suptitle("vLLM Server Metrics During Benchmark", fontsize=14)
+
+        # 1. KV Cache Usage vs Time
+        ax = axes[0, 0]
+        kv_usage = [min(s.kv_cache_usage * 100, 100.0) for s in self.snapshots]
+        ax.scatter(times, kv_usage, alpha=0.15, s=2, c='blue')
+        kv_window = min(50, len(kv_usage) // 10) if len(kv_usage) > 10 else 1
+        if kv_window > 1:
+            rolling_kv = [
+                sum(kv_usage[max(0, i - kv_window):i + 1]) / len(kv_usage[max(0, i - kv_window):i + 1])
+                for i in range(len(kv_usage))
+            ]
+            ax.plot(times, rolling_kv, 'b-', label=f'GPU (avg n={kv_window})', linewidth=2)
+        else:
+            ax.plot(times, kv_usage, 'b-', label='GPU', linewidth=2)
+        # Add external cache if available
+        cpu_kv_usage = [s.cpu_kv_cache_usage * 100 for s in self.snapshots]
+        if any(v > 0 for v in cpu_kv_usage):
+            ax.plot(times, cpu_kv_usage, 'r--', label='External', linewidth=1.5)
+        ax.legend(fontsize=8)
+        ax.set_xlabel("Time (s)")
+        ax.set_ylabel("KV Cache Usage (%)")
+        ax.set_title("KV Cache Utilization Over Time")
+        ax.set_ylim(0, 105)
+        ax.grid(True, alpha=0.3)
+
+        # 2. Running & Waiting Requests vs Time (smoothed + total)
+        ax = axes[0, 1]
+        running = [s.num_requests_running for s in self.snapshots]
+        waiting = [s.num_requests_waiting for s in self.snapshots]
+        total_queue = [r + w for r, w in zip(running, waiting)]
+        q_window = min(30, len(running) // 10) if len(running) > 10 else 1
+        if q_window > 1:
+            rolling_running = [
+                sum(running[max(0, i - q_window):i + 1]) / len(running[max(0, i - q_window):i + 1])
+                for i in range(len(running))
+            ]
+            rolling_waiting = [
+                sum(waiting[max(0, i - q_window):i + 1]) / len(waiting[max(0, i - q_window):i + 1])
+                for i in range(len(waiting))
+            ]
+            rolling_total = [
+                sum(total_queue[max(0, i - q_window):i + 1]) / len(total_queue[max(0, i - q_window):i + 1])
+                for i in range(len(total_queue))
+            ]
+            ax.plot(times, rolling_running, 'g-', label=f'Running (avg n={q_window})', linewidth=1.5)
+            ax.plot(times, rolling_waiting, 'r-', label=f'Waiting (avg n={q_window})', linewidth=1.5)
+            ax.plot(times, rolling_total, 'b-', label=f'Total (avg n={q_window})', linewidth=1.5)
+        else:
+            ax.plot(times, running, 'g-', label='Running', linewidth=1.5)
+            ax.plot(times, waiting, 'r-', label='Waiting', linewidth=1.5)
+            ax.plot(times, total_queue, 'b-', label='Total', linewidth=1.5)
+        ax.set_xlabel("Time (s)")
+        ax.set_ylabel("Requests")
+        ax.set_title("Request Queue Depth")
+        ax.legend(fontsize=8)
+        ax.grid(True, alpha=0.3)
+
+        # 3. Cache Hit Rate vs Time (computed from deltas between polling intervals)
+        ax = axes[1, 0]
+        gpu_hit_rates = []
+        ext_hit_rates = []
+        combined_hit_rates = []
+        has_ext_cache = any(s.cpu_prefix_cache_queries > 0 for s in self.snapshots)
+        for i in range(1, len(self.snapshots)):
+            # GPU (HBM) cache hit rate for this interval
+            gpu_delta_hits = self.snapshots[i].prefix_cache_hits - self.snapshots[i-1].prefix_cache_hits
+            gpu_delta_queries = self.snapshots[i].prefix_cache_queries - self.snapshots[i-1].prefix_cache_queries
+            if gpu_delta_queries > 0:
+                gpu_hit_rates.append(100.0 * gpu_delta_hits / gpu_delta_queries)
+            else:
+                gpu_hit_rates.append(gpu_hit_rates[-1] if gpu_hit_rates else 0)
+
+            # External cache hit rate for this interval
+            if has_ext_cache:
+                ext_delta_hits = self.snapshots[i].cpu_prefix_cache_hits - self.snapshots[i-1].cpu_prefix_cache_hits
+                ext_delta_queries = self.snapshots[i].cpu_prefix_cache_queries - self.snapshots[i-1].cpu_prefix_cache_queries
+                if ext_delta_queries > 0:
+                    ext_hit_rates.append(100.0 * ext_delta_hits / ext_delta_queries)
+                else:
+                    ext_hit_rates.append(ext_hit_rates[-1] if ext_hit_rates else 0)
+
+                # Combined hit rate: (gpu_hits + ext_hits) / (gpu_queries + ext_queries)
+                total_hits = gpu_delta_hits + ext_delta_hits
+                total_queries = gpu_delta_queries + ext_delta_queries
+                if total_queries > 0:
+                    combined_hit_rates.append(100.0 * total_hits / total_queries)
+                else:
+                    combined_hit_rates.append(combined_hit_rates[-1] if combined_hit_rates else 0)
+
+        # Rolling window size
+        window = min(50, len(gpu_hit_rates) // 10) if len(gpu_hit_rates) > 10 else 1
+
+        # Scatter plot for GPU (HBM) cache hit rate
+        ax.scatter(times[1:], gpu_hit_rates, alpha=0.3, s=5, c='purple', label='GPU (HBM)')
+        if window > 1:
+            rolling_gpu = [
+                sum(gpu_hit_rates[max(0, i - window):i + 1]) / len(gpu_hit_rates[max(0, i - window):i + 1])
+                for i in range(len(gpu_hit_rates))
+            ]
+            ax.plot(times[1:], rolling_gpu, 'purple', linewidth=1.5, label=f'GPU avg (n={window})')
+
+        # External cache scatter + rolling (if available)
+        if has_ext_cache and ext_hit_rates:
+            ax.scatter(times[1:], ext_hit_rates, alpha=0.3, s=5, c='orange', label='External')
+            if window > 1:
+                rolling_ext = [
+                    sum(ext_hit_rates[max(0, i - window):i + 1]) / len(ext_hit_rates[max(0, i - window):i + 1])
+                    for i in range(len(ext_hit_rates))
+                ]
+                ax.plot(times[1:], rolling_ext, 'orange', linewidth=1.5, label=f'External avg (n={window})')
+
+            # Combined/total hit rate (only if external exists)
+            ax.scatter(times[1:], combined_hit_rates, alpha=0.2, s=3, c='green', label='Combined')
+            if window > 1:
+                rolling_combined = [
+                    sum(combined_hit_rates[max(0, i - window):i + 1]) / len(combined_hit_rates[max(0, i - window):i + 1])
+                    for i in range(len(combined_hit_rates))
+                ]
+                ax.plot(times[1:], rolling_combined, 'green', linewidth=2, label=f'Combined avg (n={window})')
+
+        ax.legend(loc='best', fontsize=8)
+        ax.set_xlabel("Time (s)")
+        ax.set_ylabel("Hit Rate (%)")
+        ax.set_title("Prefix Cache Hit Rate Per Interval (tokens hit / tokens queried)")
+        ax.set_ylim(0, 105)
+        ax.grid(True, alpha=0.3)
+
+        # 4. Throughput vs Time (tokens/sec) with rolling average — decode + total
+        ax = axes[1, 1]
+        decode_throughputs = []
+        total_throughputs = []
+        for i in range(1, len(self.snapshots)):
+            delta_gen = self.snapshots[i].generation_tokens - self.snapshots[i-1].generation_tokens
+            delta_prompt = self.snapshots[i].prompt_tokens - self.snapshots[i-1].prompt_tokens
+            delta_time = self.snapshots[i].timestamp - self.snapshots[i-1].timestamp
+            if delta_time > 0:
+                decode_throughputs.append(delta_gen / delta_time)
+                total_throughputs.append((delta_gen + delta_prompt) / delta_time)
+            else:
+                decode_throughputs.append(0)
+                total_throughputs.append(0)
+        # Cumulative running average total throughput (total tokens / elapsed time)
+        cumulative_total_avg = []
+        t0 = self.snapshots[0].timestamp
+        tokens0 = self.snapshots[0].generation_tokens + self.snapshots[0].prompt_tokens
+        for i in range(1, len(self.snapshots)):
+            elapsed = self.snapshots[i].timestamp - t0
+            total_tokens = (self.snapshots[i].generation_tokens + self.snapshots[i].prompt_tokens) - tokens0
+            cumulative_total_avg.append(total_tokens / elapsed if elapsed > 0 else 0)
+
+        window = min(30, len(decode_throughputs) // 10) if len(decode_throughputs) > 10 else 1
+        if window > 1:
+            rolling_decode = [
+                sum(decode_throughputs[max(0, i - window):i + 1]) / len(decode_throughputs[max(0, i - window):i + 1])
+                for i in range(len(decode_throughputs))
+            ]
+            rolling_total = [
+                sum(total_throughputs[max(0, i - window):i + 1]) / len(total_throughputs[max(0, i - window):i + 1])
+                for i in range(len(total_throughputs))
+            ]
+            ax.plot(times[1:], rolling_total, 'steelblue', linewidth=1.5, label=f'Total (avg n={window})')
+            ax.plot(times[1:], rolling_decode, 'orange', linewidth=1.5, label=f'Decode (avg n={window})')
+            ax.legend(fontsize=8)
+        else:
+            ax.plot(times[1:], total_throughputs, 'steelblue', linewidth=1, alpha=0.8, label='Total')
+            ax.plot(times[1:], decode_throughputs, 'orange', linewidth=1, alpha=0.8, label='Decode')
+            ax.legend(fontsize=8)
+        ax.plot(times[1:], cumulative_total_avg, 'red', linewidth=2, label='Total Running Avg')
+        ax.legend(fontsize=8)
+        ax.set_xlabel("Time (s)")
+        ax.set_ylabel("Tokens/sec")
+        ax.set_title("Throughput (Total & Decode)")
+        ax.grid(True, alpha=0.3)
+
+        # 5. KV Offload Transfer Rate (from vLLM metrics)
+        ax = axes[2, 0]
+        gpu_to_cpu_rates = []
+        cpu_to_gpu_rates = []
+        for i in range(1, len(self.snapshots)):
+            dt = self.snapshots[i].timestamp - self.snapshots[i-1].timestamp
+            if dt > 0:
+                delta_g2c = self.snapshots[i].kv_offload_bytes_gpu_to_cpu - self.snapshots[i-1].kv_offload_bytes_gpu_to_cpu
+                delta_c2g = self.snapshots[i].kv_offload_bytes_cpu_to_gpu - self.snapshots[i-1].kv_offload_bytes_cpu_to_gpu
+                gpu_to_cpu_rates.append(delta_g2c / dt / 1e6)  # MB/s
+                cpu_to_gpu_rates.append(delta_c2g / dt / 1e6)  # MB/s
+            else:
+                gpu_to_cpu_rates.append(0)
+                cpu_to_gpu_rates.append(0)
+        if any(r > 0 for r in gpu_to_cpu_rates) or any(r > 0 for r in cpu_to_gpu_rates):
+            ax.scatter(times[1:], gpu_to_cpu_rates, alpha=0.15, s=3, c='blue')
+            ax.scatter(times[1:], cpu_to_gpu_rates, alpha=0.15, s=3, c='red')
+            xfer_window = min(30, len(gpu_to_cpu_rates) // 10) if len(gpu_to_cpu_rates) > 10 else 1
+            if xfer_window > 1:
+                rolling_g2c = [
+                    sum(gpu_to_cpu_rates[max(0, i - xfer_window):i + 1]) / len(gpu_to_cpu_rates[max(0, i - xfer_window):i + 1])
+                    for i in range(len(gpu_to_cpu_rates))
+                ]
+                rolling_c2g = [
+                    sum(cpu_to_gpu_rates[max(0, i - xfer_window):i + 1]) / len(cpu_to_gpu_rates[max(0, i - xfer_window):i + 1])
+                    for i in range(len(cpu_to_gpu_rates))
+                ]
+                ax.plot(times[1:], rolling_g2c, 'b-', linewidth=1.5, label=f'GPU→CPU (avg n={xfer_window})')
+                ax.plot(times[1:], rolling_c2g, 'r-', linewidth=1.5, label=f'CPU→GPU (avg n={xfer_window})')
+            else:
+                ax.plot(times[1:], gpu_to_cpu_rates, 'b-', linewidth=1, alpha=0.8, label='GPU→CPU')
+                ax.plot(times[1:], cpu_to_gpu_rates, 'r-', linewidth=1, alpha=0.8, label='CPU→GPU')
+            ax.legend(fontsize=8)
+        ax.set_xlabel("Time (s)")
+        ax.set_ylabel("Transfer Rate (MB/s)")
+        ax.set_title("KV Offload Transfer Rate")
+        ax.grid(True, alpha=0.3)
+
+        # 6. Prompt Token Sources Over Time (cumulative percentage)
+        ax = axes[2, 1]
+        initial = self.snapshots[0]
+        cum_compute_pct = []
+        cum_cache_pct = []
+        cum_ext_pct = []
+        for s in self.snapshots:
+            c = s.prompt_tokens_local_compute - initial.prompt_tokens_local_compute
+            h = s.prompt_tokens_local_cache_hit - initial.prompt_tokens_local_cache_hit
+            e = s.prompt_tokens_external_kv_transfer - initial.prompt_tokens_external_kv_transfer
+            total = c + h + e
+            if total > 0:
+                cum_compute_pct.append(100.0 * c / total)
+                cum_cache_pct.append(100.0 * h / total)
+                cum_ext_pct.append(100.0 * e / total)
+            else:
+                cum_compute_pct.append(0)
+                cum_cache_pct.append(0)
+                cum_ext_pct.append(0)
+        if any(v > 0 for v in cum_compute_pct):
+            ax.stackplot(times, cum_compute_pct, cum_cache_pct, cum_ext_pct,
+                        labels=['Prefill', 'HBM Cache Hit', 'Offload Cache Hit'],
+                        colors=['coral', 'steelblue', 'mediumseagreen'], alpha=0.8)
+            ax.legend(fontsize=8, loc='lower left')
+        ax.set_xlabel("Time (s)")
+        ax.set_ylabel("% of Prefill Tokens")
+        ax.set_title("Cumulative Prefill Token Source Breakdown")
+        ax.set_ylim(0, 105)
+        ax.grid(True, alpha=0.3)
+
+        # 7. Cumulative KV Offload Transfers
+        initial = self.snapshots[0]
+        # GPU → CPU cumulative
+        ax = axes[3, 0]
+        cum_g2c = [(s.kv_offload_bytes_gpu_to_cpu - initial.kv_offload_bytes_gpu_to_cpu) / 1e9
+                    for s in self.snapshots]
+        if any(v > 0 for v in cum_g2c):
+            ax.plot(times, cum_g2c, 'b-', linewidth=1.5)
+            ax.fill_between(times, cum_g2c, alpha=0.2, color='blue')
+        ax.set_xlabel("Time (s)")
+        ax.set_ylabel("Cumulative Transfer (GB)")
+        ax.set_title("KV Offload: GPU → CPU (Cumulative)")
+        ax.grid(True, alpha=0.3)
+
+        # CPU → GPU cumulative
+        ax = axes[3, 1]
+        cum_c2g = [(s.kv_offload_bytes_cpu_to_gpu - initial.kv_offload_bytes_cpu_to_gpu) / 1e9
+                    for s in self.snapshots]
+        if any(v > 0 for v in cum_c2g):
+            ax.plot(times, cum_c2g, 'r-', linewidth=1.5)
+            ax.fill_between(times, cum_c2g, alpha=0.2, color='red')
+        ax.set_xlabel("Time (s)")
+        ax.set_ylabel("Cumulative Transfer (GB)")
+        ax.set_title("KV Offload: CPU → GPU (Cumulative)")
+        ax.grid(True, alpha=0.3)
+
+        # 8 & 9. Client metrics plots (TTFT and Latency vs Time)
+        if client_metrics and len(client_metrics) > 0:
+            # Sort by start time
+            sorted_metrics = sorted(client_metrics, key=lambda x: x.start_time_ms)
+            # Convert to relative time (seconds from first request)
+            first_start = sorted_metrics[0].start_time_ms
+            request_times = [(m.start_time_ms - first_start) / 1000.0 for m in sorted_metrics]
+            ttfts = [m.ttft_ms for m in sorted_metrics]
+            latencies = [m.latency_ms for m in sorted_metrics]
+
+            # 8. TTFT vs Time
+            ax = axes[4, 0]
+            ax.scatter(request_times, ttfts, alpha=0.3, s=5, c='blue')
+            # Add rolling average
+            window = min(50, len(ttfts) // 10) if len(ttfts) > 10 else 1
+            if window > 1:
+                rolling_ttft = [
+                    sum(ttfts[max(0, i - window):i + 1]) / len(ttfts[max(0, i - window):i + 1])
+                    for i in range(len(ttfts))
+                ]
+                ax.plot(request_times, rolling_ttft, 'r-', linewidth=1.5, label=f'Rolling avg (n={window})')
+                ax.legend()
+            ax.set_xlabel("Time (s)")
+            ax.set_ylabel("TTFT (ms)")
+            ax.set_title("Time to First Token vs Time")
+            ax.grid(True, alpha=0.3)
+
+            # 9. Latency vs Time
+            ax = axes[4, 1]
+            ax.scatter(request_times, latencies, alpha=0.3, s=5, c='green')
+            # Add rolling average
+            if window > 1:
+                rolling_latency = [
+                    sum(latencies[max(0, i - window):i + 1]) / len(latencies[max(0, i - window):i + 1])
+                    for i in range(len(latencies))
+                ]
+                ax.plot(request_times, rolling_latency, 'r-', linewidth=1.5, label=f'Rolling avg (n={window})')
+                ax.legend()
+            ax.set_xlabel("Time (s)")
+            ax.set_ylabel("Latency (ms)")
+            ax.set_title("Request Latency vs Time")
+            ax.grid(True, alpha=0.3)
+
+            # 10. Interactivity (1/TPOT = tokens/sec) vs Time
+            ax = axes[5, 0]
+            # Filter out zero TPOT values to avoid division by zero
+            tpots = [m.tpot_ms for m in sorted_metrics]
+            interactivity = [1000.0 / t if t > 0 else 0 for t in tpots]  # Convert to tokens/sec
+            ax.scatter(request_times, interactivity, alpha=0.3, s=5, c='purple')
+            # Add rolling average
+            if window > 1:
+                rolling_inter = [
+                    sum(interactivity[max(0, i - window):i + 1]) / len(interactivity[max(0, i - window):i + 1])
+                    for i in range(len(interactivity))
+                ]
+                ax.plot(request_times, rolling_inter, 'r-', linewidth=1.5, label=f'Rolling avg (n={window})')
+                ax.legend()
+            ax.set_xlabel("Time (s)")
+            ax.set_ylabel("Interactivity (tokens/sec)")
+            ax.set_title("Decode Speed (1/TPOT) vs Time")
+            ax.grid(True, alpha=0.3)
+
+            # 11. Preemptions over time
+            ax = axes[5, 1]
+            preemption_rates = []
+            for i in range(1, len(self.snapshots)):
+                dt = self.snapshots[i].timestamp - self.snapshots[i-1].timestamp
+                delta = self.snapshots[i].num_preemptions - self.snapshots[i-1].num_preemptions
+                preemption_rates.append(delta / dt if dt > 0 else 0)
+            if any(r > 0 for r in preemption_rates):
+                ax.scatter(times[1:], preemption_rates, alpha=0.15, s=3, c='red')
+                preempt_window = min(30, len(preemption_rates) // 10) if len(preemption_rates) > 10 else 1
+                if preempt_window > 1:
+                    rolling_preempt = [
+                        sum(preemption_rates[max(0, i - preempt_window):i + 1]) / len(preemption_rates[max(0, i - preempt_window):i + 1])
+                        for i in range(len(preemption_rates))
+                    ]
+                    ax.plot(times[1:], rolling_preempt, 'r-', linewidth=1.5, label=f'Rolling avg (n={preempt_window})')
+                # Cumulative on secondary axis
+                ax2 = ax.twinx()
+                cumulative = [self.snapshots[i].num_preemptions - self.snapshots[0].num_preemptions
+                              for i in range(1, len(self.snapshots))]
+                ax2.plot(times[1:], cumulative, 'b--', linewidth=1, alpha=0.5, label='Cumulative')
+                ax2.set_ylabel("Cumulative Preemptions", color='blue')
+                ax2.tick_params(axis='y', labelcolor='blue')
+            ax.set_xlabel("Time (s)")
+            ax.set_ylabel("Preemptions/sec", color='red')
+            ax.tick_params(axis='y', labelcolor='red')
+            ax.set_title("Preemptions Over Time")
+            ax.grid(True, alpha=0.3)
+
+        plt.tight_layout()
+        plt.savefig(f"{output_prefix}_plots.png", dpi=150)
+        print(f"Saved plots to {output_prefix}_plots.png")
+        plt.close()
+
+        # Also generate a summary
+        self._print_summary()
+
+    def _print_summary(self) -> None:
+        """Print summary statistics."""
+        if len(self.snapshots) < 2:
+            return
+
+        duration = self.snapshots[-1].timestamp - self.snapshots[0].timestamp
+        total_gen_tokens = self.snapshots[-1].generation_tokens - self.snapshots[0].generation_tokens
+        total_prompt_tokens = self.snapshots[-1].prompt_tokens - self.snapshots[0].prompt_tokens
+
+        final = self.snapshots[-1]
+        initial = self.snapshots[0]
+
+        print("\n" + "="*60)
+        print("METRICS SUMMARY")
+        print("="*60)
+        print(f"Duration: {duration:.1f}s")
+        print(f"Total prompt tokens: {total_prompt_tokens:,}")
+        print(f"Total generation tokens: {total_gen_tokens:,}")
+        print(f"Avg generation throughput: {total_gen_tokens/duration:.1f} tok/s")
+        print(f"Peak KV cache usage: {max(s.kv_cache_usage for s in self.snapshots)*100:.1f}%")
+        print(f"Peak running requests: {max(s.num_requests_running for s in self.snapshots)}")
+        print(f"Peak waiting requests: {max(s.num_requests_waiting for s in self.snapshots)}")
+        print(f"Total preemptions: {final.num_preemptions - initial.num_preemptions}")
+
+        if final.prefix_cache_queries > initial.prefix_cache_queries:
+            delta_hits = final.prefix_cache_hits - initial.prefix_cache_hits
+            delta_queries = final.prefix_cache_queries - initial.prefix_cache_queries
+            hit_rate = 100.0 * delta_hits / delta_queries
+            print(f"Overall GPU cache hit rate: {hit_rate:.1f}%")
+            print(f"  - Cache hits: {delta_hits:,} tokens")
+            print(f"  - Cache queries: {delta_queries:,} tokens")
+
+        # External/offloaded cache stats if available
+        if final.cpu_prefix_cache_queries > initial.cpu_prefix_cache_queries:
+            cpu_delta_hits = final.cpu_prefix_cache_hits - initial.cpu_prefix_cache_hits
+            cpu_delta_queries = final.cpu_prefix_cache_queries - initial.cpu_prefix_cache_queries
+            cpu_hit_rate = 100.0 * cpu_delta_hits / cpu_delta_queries
+            print(f"Overall external cache hit rate: {cpu_hit_rate:.1f}%")
+            print(f"  - Cache hits: {cpu_delta_hits:,} tokens")
+            print(f"  - Cache queries: {cpu_delta_queries:,} tokens")
+
+        # Prompt tokens by source
+        total_compute = final.prompt_tokens_local_compute - initial.prompt_tokens_local_compute
+        total_cache_hit = final.prompt_tokens_local_cache_hit - initial.prompt_tokens_local_cache_hit
+        total_ext = final.prompt_tokens_external_kv_transfer - initial.prompt_tokens_external_kv_transfer
+        total_by_source = total_compute + total_cache_hit + total_ext
+        if total_by_source > 0:
+            print(f"Prompt token sources:")
+            print(f"  - Prefill:            {total_compute:>12,} ({100*total_compute/total_by_source:.1f}%)")
+            print(f"  - HBM cache hit:      {total_cache_hit:>12,} ({100*total_cache_hit/total_by_source:.1f}%)")
+            print(f"  - Offload cache hit:  {total_ext:>12,} ({100*total_ext/total_by_source:.1f}%)")
+
+        # KV offload transfer stats
+        g2c_bytes = final.kv_offload_bytes_gpu_to_cpu - initial.kv_offload_bytes_gpu_to_cpu
+        c2g_bytes = final.kv_offload_bytes_cpu_to_gpu - initial.kv_offload_bytes_cpu_to_gpu
+        g2c_time = final.kv_offload_time_gpu_to_cpu - initial.kv_offload_time_gpu_to_cpu
+        c2g_time = final.kv_offload_time_cpu_to_gpu - initial.kv_offload_time_cpu_to_gpu
+        if g2c_bytes > 0 or c2g_bytes > 0:
+            print(f"KV offload transfers:")
+            print(f"  GPU→CPU: {g2c_bytes/1e9:.2f} GB in {g2c_time:.2f}s ({g2c_bytes/g2c_time/1e9:.1f} GB/s)" if g2c_time > 0 else f"  GPU→CPU: {g2c_bytes/1e9:.2f} GB")
+            print(f"  CPU→GPU: {c2g_bytes/1e9:.2f} GB in {c2g_time:.2f}s ({c2g_bytes/c2g_time/1e9:.1f} GB/s)" if c2g_time > 0 else f"  CPU→GPU: {c2g_bytes/1e9:.2f} GB")
+
+        # Prefill KV computed tokens
+        delta_kv_sum = final.prefill_kv_computed_tokens_sum - initial.prefill_kv_computed_tokens_sum
+        delta_kv_count = final.prefill_kv_computed_tokens_count - initial.prefill_kv_computed_tokens_count
+        if delta_kv_count > 0:
+            print(f"Prefill KV computed tokens (excluding cached):")
+            print(f"  Total: {delta_kv_sum:,} tokens across {delta_kv_count:,} requests")
+            print(f"  Avg per request: {delta_kv_sum/delta_kv_count:.0f} tokens")
+
+        print("="*60 + "\n")
+
+    def export_csv(
+        self,
+        output_prefix: str = "metrics",
+        client_metrics: list | None = None,
+    ) -> None:
+        """Export all time series data to CSV files.
+
+        Args:
+            output_prefix: Prefix for output file names
+            client_metrics: Optional list of RequestStats from benchmark clients
+
+        Generates:
+            - {output_prefix}_server_metrics.csv: vLLM server metrics over time
+            - {output_prefix}_gpu_transfer.csv: GPU PCIe transfer stats
+            - {output_prefix}_client_metrics.csv: Per-request client metrics (if provided)
+        """
+        self._trim_idle_prefix()
+
+        output_dir = Path(output_prefix).parent
+        if output_dir and not output_dir.exists():
+            output_dir.mkdir(parents=True, exist_ok=True)
+
+        # 1. Export server metrics (from /metrics endpoint)
+        if self.snapshots:
+            server_csv = f"{output_prefix}_server_metrics.csv"
+            start_time = self.snapshots[0].timestamp
+
+            with open(server_csv, 'w', newline='') as f:
+                writer = csv.writer(f)
+                # Header
+                writer.writerow([
+                    'timestamp_sec',
+                    'relative_time_sec',
+                    'kv_cache_usage_pct',
+                    'cpu_kv_cache_usage_pct',
+                    'num_requests_running',
+                    'num_requests_waiting',
+                    'prefix_cache_hits',
+                    'prefix_cache_queries',
+                    'cpu_prefix_cache_hits',
+                    'cpu_prefix_cache_queries',
+                    'prompt_tokens_total',
+                    'generation_tokens_total',
+                    'num_preemptions_total',
+                    'request_success_total',
+                    # KV offload metrics
+                    'kv_offload_bytes_gpu_to_cpu',
+                    'kv_offload_bytes_cpu_to_gpu',
+                    'kv_offload_time_gpu_to_cpu',
+                    'kv_offload_time_cpu_to_gpu',
+                    # Prompt tokens by source
+                    'prompt_tokens_local_compute',
+                    'prompt_tokens_local_cache_hit',
+                    'prompt_tokens_external_kv_transfer',
+                    # Prefill KV computed
+                    'prefill_kv_computed_tokens_sum',
+                    'prefill_kv_computed_tokens_count',
+                    # Computed per-interval metrics
+                    'interval_cache_hit_rate_pct',
+                    'interval_throughput_tok_per_sec',
+                ])
+
+                for i, s in enumerate(self.snapshots):
+                    relative_time = s.timestamp - start_time
+
+                    # Compute per-interval metrics
+                    cache_hit_rate = 0.0
+                    throughput = 0.0
+                    if i > 0:
+                        prev = self.snapshots[i - 1]
+                        delta_hits = s.prefix_cache_hits - prev.prefix_cache_hits
+                        delta_queries = s.prefix_cache_queries - prev.prefix_cache_queries
+                        if delta_queries > 0:
+                            cache_hit_rate = 100.0 * delta_hits / delta_queries
+
+                        delta_gen = s.generation_tokens - prev.generation_tokens
+                        delta_time = s.timestamp - prev.timestamp
+                        if delta_time > 0:
+                            throughput = delta_gen / delta_time
+
+                    writer.writerow([
+                        f"{s.timestamp:.3f}",
+                        f"{relative_time:.3f}",
+                        f"{s.kv_cache_usage * 100:.2f}",
+                        f"{s.cpu_kv_cache_usage * 100:.2f}",
+                        s.num_requests_running,
+                        s.num_requests_waiting,
+                        s.prefix_cache_hits,
+                        s.prefix_cache_queries,
+                        s.cpu_prefix_cache_hits,
+                        s.cpu_prefix_cache_queries,
+                        s.prompt_tokens,
+                        s.generation_tokens,
+                        s.num_preemptions,
+                        s.request_success,
+                        f"{s.kv_offload_bytes_gpu_to_cpu:.0f}",
+                        f"{s.kv_offload_bytes_cpu_to_gpu:.0f}",
+                        f"{s.kv_offload_time_gpu_to_cpu:.6f}",
+                        f"{s.kv_offload_time_cpu_to_gpu:.6f}",
+                        s.prompt_tokens_local_compute,
+                        s.prompt_tokens_local_cache_hit,
+                        s.prompt_tokens_external_kv_transfer,
+                        s.prefill_kv_computed_tokens_sum,
+                        s.prefill_kv_computed_tokens_count,
+                        f"{cache_hit_rate:.2f}",
+                        f"{throughput:.2f}",
+                    ])
+
+            print(f"Exported server metrics to {server_csv}")
+
+        # 2. Export GPU transfer stats (DEPRECATED - kept for backward compat)
+        if self.gpu_transfer_collector and self.gpu_transfer_collector.snapshots:
+            gpu_csv = f"{output_prefix}_gpu_transfer.csv"
+            gpu_snaps = self.gpu_transfer_collector.snapshots
+            gpu_start = gpu_snaps[0].timestamp
+
+            with open(gpu_csv, 'w', newline='') as f:
+                writer = csv.writer(f)
+                writer.writerow([
+                    'timestamp_sec',
+                    'relative_time_sec',
+                    'gpu_id',
+                    'tx_pci_mb_per_sec',
+                    'rx_pci_mb_per_sec',
+                    'cumulative_tx_gb',
+                    'cumulative_rx_gb',
+                ])
+
+                cumulative_tx = 0.0
+                cumulative_rx = 0.0
+                for i, s in enumerate(gpu_snaps):
+                    relative_time = s.timestamp - gpu_start
+                    if i > 0:
+                        dt = s.timestamp - gpu_snaps[i - 1].timestamp
+                        cumulative_tx += s.tx_pci * dt / 1024  # MB to GB
+                        cumulative_rx += s.rx_pci * dt / 1024
+
+                    writer.writerow([
+                        f"{s.timestamp:.3f}",
+                        f"{relative_time:.3f}",
+                        s.gpu_id,
+                        f"{s.tx_pci:.2f}",
+                        f"{s.rx_pci:.2f}",
+                        f"{cumulative_tx:.4f}",
+                        f"{cumulative_rx:.4f}",
+                    ])
+
+            print(f"Exported GPU transfer metrics to {gpu_csv}")
+
+        # 3. Export client metrics (per-request stats)
+        if client_metrics and len(client_metrics) > 0:
+            client_csv = f"{output_prefix}_client_metrics.csv"
+            sorted_metrics = sorted(client_metrics, key=lambda x: x.start_time_ms)
+            first_start = sorted_metrics[0].start_time_ms
+
+            with open(client_csv, 'w', newline='') as f:
+                writer = csv.writer(f)
+                writer.writerow([
+                    'start_time_ms',
+                    'relative_time_sec',
+                    'ttft_ms',
+                    'tpot_ms',
+                    'latency_ms',
+                    'input_num_turns',
+                    'input_num_tokens',
+                    'output_num_tokens',
+                    'output_num_chunks',
+                    'output_num_first_chunk_tokens',
+                    'approx_cached_percent',
+                    'conversation_id',
+                    'client_id',
+                    'interactivity_tok_per_sec',
+                ])
+
+                for m in sorted_metrics:
+                    relative_time = (m.start_time_ms - first_start) / 1000.0
+                    interactivity = 1000.0 / m.tpot_ms if m.tpot_ms > 0 else 0
+
+                    writer.writerow([
+                        f"{m.start_time_ms:.3f}",
+                        f"{relative_time:.3f}",
+                        f"{m.ttft_ms:.3f}",
+                        f"{m.tpot_ms:.3f}",
+                        f"{m.latency_ms:.3f}",
+                        m.input_num_turns,
+                        m.input_num_tokens,
+                        m.output_num_tokens,
+                        m.output_num_chunks,
+                        m.output_num_first_chunk_tokens,
+                        f"{m.approx_cached_percent:.2f}",
+                        m.conversation_id,
+                        m.client_id,
+                        f"{interactivity:.2f}",
+                    ])
+
+            print(f"Exported client metrics to {client_csv}")
diff --git a/utils/agentic-benchmark/bench/run_metrics_collector.py b/utils/agentic-benchmark/bench/run_metrics_collector.py
new file mode 100644
index 000000000..ddf605324
--- /dev/null
+++ b/utils/agentic-benchmark/bench/run_metrics_collector.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+"""
+Standalone metrics collector for vLLM server.
+
+Polls the vLLM /metrics endpoint and generates server-side plots.
+Designed to run alongside any benchmark client (aiperf, custom, etc.).
+
+Usage:
+    # Start collecting, run your benchmark, then Ctrl+C or kill to stop:
+    python -m bench.run_metrics_collector \
+        --url http://localhost:8888 \
+        --output-prefix results/metrics \
+        --duration 600
+
+    # Or run in background and signal when done:
+    python -m bench.run_metrics_collector \
+        --url http://localhost:8888 \
+        --output-prefix results/metrics \
+        --pid-file /tmp/metrics_collector.pid
+"""
+
+import argparse
+import asyncio
+import os
+import signal
+import sys
+
+from bench.metrics_collector import MetricsCollector
+
+
+async def run(args):
+    collector = MetricsCollector(
+        base_url=args.url,
+        poll_interval=args.poll_interval,
+    )
+
+    collector.start()
+    print(f"Metrics collector started (polling {args.url}/metrics every {args.poll_interval}s)")
+
+    if args.pid_file:
+        with open(args.pid_file, "w") as f:
+            f.write(str(os.getpid()))
+        print(f"PID written to {args.pid_file}")
+
+    # Set up graceful shutdown
+    stop_event = asyncio.Event()
+
+    def handle_signal(*_):
+        print("\nStopping metrics collector...")
+        stop_event.set()
+
+    loop = asyncio.get_event_loop()
+    for sig in (signal.SIGINT, signal.SIGTERM):
+        loop.add_signal_handler(sig, handle_signal)
+
+    # Wait for duration or signal
+    if args.duration:
+        try:
+            await asyncio.wait_for(stop_event.wait(), timeout=args.duration)
+        except asyncio.TimeoutError:
+            print(f"Duration limit reached ({args.duration}s)")
+    else:
+        await stop_event.wait()
+
+    await collector.stop()
+
+    # Generate outputs
+    if len(collector.snapshots) < 2:
+        print("Not enough data points collected")
+        sys.exit(1)
+
+    print(f"Collected {len(collector.snapshots)} snapshots")
+
+    # Generate plots (without client metrics — server-only)
+    collector.generate_plots(output_prefix=args.output_prefix)
+
+    # Export CSV
+    collector.export_csv(output_prefix=args.output_prefix)
+
+    # Clean up PID file
+    if args.pid_file and os.path.exists(args.pid_file):
+        os.remove(args.pid_file)
+
+    print("Done")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Standalone vLLM metrics collector"
+    )
+    parser.add_argument(
+        "--url", "-u",
+        default="http://localhost:8888",
+        help="vLLM server base URL (default: http://localhost:8888)",
+    )
+    parser.add_argument(
+        "--output-prefix", "-o",
+        default="metrics",
+        help="Output file prefix (default: metrics)",
+    )
+    parser.add_argument(
+        "--poll-interval",
+        type=float,
+        default=1.0,
+        help="Polling interval in seconds (default: 1.0)",
+    )
+    parser.add_argument(
+        "--duration", "-d",
+        type=float,
+        default=None,
+        help="Max collection duration in seconds (default: unlimited, stop with signal)",
+    )
+    parser.add_argument(
+        "--pid-file",
+        default=None,
+        help="Write PID to this file for external signaling",
+    )
+    args = parser.parse_args()
+
+    asyncio.run(run(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/utils/agentic-benchmark/requirements.txt b/utils/agentic-benchmark/requirements.txt
new file mode 100644
index 000000000..2b1739577
--- /dev/null
+++ b/utils/agentic-benchmark/requirements.txt
@@ -0,0 +1,4 @@
+numpy>=1.24
+pandas>=2.0.0
+aiohttp>=3.10
+matplotlib
diff --git a/utils/agentic-benchmark/scripts/analyze_benchmark_distributions.py b/utils/agentic-benchmark/scripts/analyze_benchmark_distributions.py
new file mode 100644
index 000000000..aa4b639ca
--- /dev/null
+++ b/utils/agentic-benchmark/scripts/analyze_benchmark_distributions.py
@@ -0,0 +1,395 @@
+#!/usr/bin/env python3
+"""Analyze ISL/OSL/turn distributions from AIPerf benchmark results.
+
+Reads profile_export.jsonl and produces summary stats + distribution plots
+to verify the benchmark workload matches the intended Qwen trace profile.
+
+Usage:
+    python analyze_benchmark_distributions.py path/to/aiperf_artifacts/ -o output_dir/
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import math
+from collections import Counter, defaultdict
+from pathlib import Path
+
+
+def load_records(artifacts_dir: Path) -> list[dict]:
+    """Load per-request records from profile_export.jsonl."""
+    jsonl_path = artifacts_dir / "profile_export.jsonl"
+    records = []
+    with open(jsonl_path) as f:
+        for line in f:
+            line = line.strip()
+            if line:
+                records.append(json.loads(line))
+    return records
+
+
+def load_trace_replay_records(trace_replay_dir: Path) -> list[dict]:
+    """Load per-request records from trace_replay detailed_results.csv.
+
+    Converts to the same format as AIPerf JSONL records so the analyze()
+    function can process both formats identically.
+    """
+    import csv
+    import sys
+    csv.field_size_limit(sys.maxsize)
+
+    csv_path = trace_replay_dir / "detailed_results.csv"
+    records = []
+    with open(csv_path) as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            if row.get("success") != "True":
+                continue
+            records.append({
+                "metadata": {
+                    "x_correlation_id": row["trace_id"],
+                    "conversation_id": row["trace_id"],
+                    "turn_index": int(row["request_idx"]),
+                    "benchmark_phase": "profiling",
+                },
+                "metrics": {
+                    "input_sequence_length": {"value": int(row["input_tokens"])},
+                    "output_sequence_length": {"value": int(row["output_tokens_actual"])},
+                },
+            })
+    return records
+
+
+def analyze(records: list[dict], output_dir: Path) -> None:
+    """Run distribution analysis and save results."""
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Group by conversation
+    convos: dict[str, list[dict]] = defaultdict(list)
+    for r in records:
+        metrics = r.get("metrics", {})
+        if "input_sequence_length" not in metrics or "output_sequence_length" not in metrics:
+            continue
+        # Use x_correlation_id (unique per session) not conversation_id (template, reused)
+        cid = r["metadata"].get("x_correlation_id") or r["metadata"]["conversation_id"]
+        ti = r["metadata"]["turn_index"]
+        isl = metrics["input_sequence_length"]["value"]
+        osl = metrics["output_sequence_length"]["value"]
+        convos[cid].append({"turn": ti, "isl": isl, "osl": osl})
+
+    # Sort turns within each conversation
+    for v in convos.values():
+        v.sort(key=lambda x: x["turn"])
+
+    # Turn count distribution
+    turn_counts = Counter(len(v) for v in convos.values())
+    total_convos = len(convos)
+    total_requests = len(records)
+
+    lines = []
+    lines.append("=" * 70)
+    lines.append("BENCHMARK WORKLOAD DISTRIBUTION ANALYSIS")
+    lines.append("=" * 70)
+    lines.append(f"Total conversations: {total_convos:,}")
+    lines.append(f"Total requests: {total_requests:,}")
+    lines.append(f"Avg turns/conv: {total_requests / total_convos:.2f}")
+    lines.append("")
+
+    lines.append("TURN COUNT DISTRIBUTION:")
+    lines.append(f"  {'Turns':>5s}  {'Count':>6s}  {'Pct':>6s}   Target")
+    target = {1: 59, 2: 20, 3: 10, 4: 5, 5: 3, 6: 2, 7: 1}
+    for k in sorted(turn_counts.keys()):
+        pct = 100 * turn_counts[k] / total_convos
+        tgt = f"{target.get(k, 0):.0f}%" if k in target else ""
+        lines.append(f"  {k:5d}  {turn_counts[k]:6,}  {pct:5.1f}%   {tgt}")
+
+    # ISL/OSL by turn index
+    lines.append("")
+    lines.append("ISL BY TURN INDEX:")
+    lines.append(
+        f"  {'Turn':>4s}  {'N':>6s}  {'Mean':>8s}  {'Median':>8s}  {'Std':>8s}  {'P5':>8s}  {'P95':>8s}"
+    )
+    max_turn = max(t["turn"] for v in convos.values() for t in v)
+    for ti in range(max_turn + 1):
+        vals = sorted(t["isl"] for v in convos.values() for t in v if t["turn"] == ti)
+        if not vals:
+            continue
+        n = len(vals)
+        mean = sum(vals) / n
+        std = math.sqrt(sum((v - mean) ** 2 for v in vals) / n)
+        median = vals[n // 2]
+        p5 = vals[int(n * 0.05)]
+        p95 = vals[int(n * 0.95)]
+        lines.append(
+            f"  {ti:4d}  {n:6,}  {mean:8.0f}  {median:8.0f}  {std:8.0f}  {p5:8.0f}  {p95:8.0f}"
+        )
+
+    lines.append("")
+    lines.append("OSL BY TURN INDEX:")
+    lines.append(
+        f"  {'Turn':>4s}  {'N':>6s}  {'Mean':>8s}  {'Median':>8s}  {'Std':>8s}  {'P5':>8s}  {'P95':>8s}"
+    )
+    for ti in range(max_turn + 1):
+        vals = sorted(t["osl"] for v in convos.values() for t in v if t["turn"] == ti)
+        if not vals:
+            continue
+        n = len(vals)
+        mean = sum(vals) / n
+        std = math.sqrt(sum((v - mean) ** 2 for v in vals) / n)
+        median = vals[n // 2]
+        p5 = vals[int(n * 0.05)]
+        p95 = vals[int(n * 0.95)]
+        lines.append(
+            f"  {ti:4d}  {n:6,}  {mean:8.0f}  {median:8.0f}  {std:8.0f}  {p5:8.0f}  {p95:8.0f}"
+        )
+
+    # Overall ISL/OSL stats
+    all_isl = sorted(t["isl"] for v in convos.values() for t in v)
+    all_osl = sorted(t["osl"] for v in convos.values() for t in v)
+    n = len(all_isl)
+    isl_mean = sum(all_isl) / n
+    osl_mean = sum(all_osl) / n
+    lines.append("")
+    lines.append("ALL REQUESTS ISL:")
+    lines.append(
+        f"  n={n:,}  mean={isl_mean:.0f}  median={all_isl[n//2]}  "
+        f"p5={all_isl[int(n*0.05)]}  p95={all_isl[int(n*0.95)]}"
+    )
+    lines.append("ALL REQUESTS OSL:")
+    lines.append(
+        f"  n={n:,}  mean={osl_mean:.0f}  median={all_osl[n//2]}  "
+        f"p5={all_osl[int(n*0.05)]}  p95={all_osl[int(n*0.95)]}"
+    )
+
+    # Per-conversation stats
+    conv_max_isl = sorted(max(t["isl"] for t in v) for v in convos.values())
+    conv_total_osl = sorted(sum(t["osl"] for t in v) for v in convos.values())
+    nc = len(conv_max_isl)
+    lines.append("")
+    lines.append("PER-CONVERSATION MAX ISL (final context size):")
+    lines.append(
+        f"  n={nc:,}  mean={sum(conv_max_isl)/nc:.0f}  median={conv_max_isl[nc//2]}  "
+        f"p5={conv_max_isl[int(nc*0.05)]}  p95={conv_max_isl[int(nc*0.95)]}"
+    )
+    lines.append("PER-CONVERSATION TOTAL OSL:")
+    lines.append(
+        f"  n={nc:,}  mean={sum(conv_total_osl)/nc:.0f}  median={conv_total_osl[nc//2]}  "
+        f"p5={conv_total_osl[int(nc*0.05)]}  p95={conv_total_osl[int(nc*0.95)]}"
+    )
+
+    # ISL context growth (shows accumulation across turns)
+    lines.append("")
+    lines.append("ISL CONTEXT GROWTH (sample multi-turn conversations):")
+    multi = [(cid, v) for cid, v in convos.items() if len(v) >= 3][:10]
+    for cid, turns in multi:
+        isls = " -> ".join(str(t["isl"]) for t in turns)
+        lines.append(f"  {cid}: {isls}")
+
+    lines.append("=" * 70)
+
+    summary_text = "\n".join(lines)
+    print(summary_text)
+
+    # Save summary
+    (output_dir / "workload_distribution_summary.txt").write_text(summary_text)
+
+    # Try to generate plots (matplotlib may not be available)
+    try:
+        _generate_plots(convos, records, output_dir)
+    except ImportError:
+        print("matplotlib not available, skipping plots")
+
+
+def _generate_plots(
+    convos: dict[str, list[dict]], records: list[dict], output_dir: Path
+) -> None:
+    """Generate distribution plots."""
+    import matplotlib
+
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+
+    fig, axes = plt.subplots(3, 3, figsize=(18, 15))
+    fig.suptitle("Benchmark Workload Distribution Analysis", fontsize=14)
+
+    # (0,0) Turn count distribution
+    ax = axes[0, 0]
+    turn_counts = Counter(len(v) for v in convos.values())
+    turns = sorted(turn_counts.keys())
+    counts = [turn_counts[t] for t in turns]
+    total = sum(counts)
+    bars = ax.bar(turns, [100 * c / total for c in counts], edgecolor="black", alpha=0.7)
+    for bar, t in zip(bars, turns):
+        ax.text(
+            bar.get_x() + bar.get_width() / 2,
+            bar.get_height(),
+            f"{bar.get_height():.0f}%",
+            ha="center",
+            va="bottom",
+            fontsize=8,
+        )
+    ax.set_xlabel("Number of Turns")
+    ax.set_ylabel("% of Conversations")
+    ax.set_title(f"Turn Count Distribution (n={total:,})")
+    ax.grid(True, alpha=0.3, axis="y")
+
+    # (0,1) All requests ISL histogram
+    ax = axes[0, 1]
+    all_isl = [t["isl"] for v in convos.values() for t in v]
+    clip = int(sorted(all_isl)[int(len(all_isl) * 0.99)] * 1.2)
+    ax.hist([v for v in all_isl if v <= clip], bins=80, edgecolor="black", alpha=0.7, color="steelblue")
+    all_isl_sorted = sorted(all_isl)
+    median_isl = all_isl_sorted[len(all_isl) // 2]
+    mean_isl = sum(all_isl) / len(all_isl)
+    ax.axvline(median_isl, color="red", linestyle="--", label=f"Median: {median_isl:,}")
+    ax.axvline(mean_isl, color="orange", linestyle="--", label=f"Mean: {mean_isl:,.0f}")
+    ax.set_xlabel("Input Sequence Length")
+    ax.set_ylabel("Count")
+    ax.set_title(f"All Requests ISL (n={len(all_isl):,})")
+    ax.legend(fontsize=8)
+    ax.grid(True, alpha=0.3, axis="y")
+
+    # (0,2) All requests OSL histogram
+    ax = axes[0, 2]
+    all_osl = [t["osl"] for v in convos.values() for t in v]
+    clip = min(3000, int(sorted(all_osl)[int(len(all_osl) * 0.99)] * 1.2))
+    ax.hist([v for v in all_osl if v <= clip], bins=80, edgecolor="black", alpha=0.7, color="coral")
+    all_osl_sorted = sorted(all_osl)
+    median_osl = all_osl_sorted[len(all_osl) // 2]
+    mean_osl = sum(all_osl) / len(all_osl)
+    ax.axvline(median_osl, color="red", linestyle="--", label=f"Median: {median_osl:,}")
+    ax.axvline(mean_osl, color="orange", linestyle="--", label=f"Mean: {mean_osl:,.0f}")
+    ax.set_xlabel("Output Sequence Length")
+    ax.set_ylabel("Count")
+    ax.set_title(f"All Requests OSL (n={len(all_osl):,})")
+    ax.legend(fontsize=8)
+    ax.grid(True, alpha=0.3, axis="y")
+
+    # (1,0) Average new prefill tokens by turn index (ISL delta per turn)
+    ax = axes[1, 0]
+    # Collect deltas grouped by turn index
+    deltas_by_turn: dict[int, list[int]] = defaultdict(list)
+    for v in convos.values():
+        for i, t in enumerate(v):
+            if i == 0:
+                deltas_by_turn[t["turn"]].append(t["isl"])
+            else:
+                deltas_by_turn[t["turn"]].append(max(0, t["isl"] - v[i - 1]["isl"]))
+    if deltas_by_turn:
+        turn_indices = sorted(deltas_by_turn.keys())
+        means = [sum(deltas_by_turn[ti]) / len(deltas_by_turn[ti]) for ti in turn_indices]
+        ns = [len(deltas_by_turn[ti]) for ti in turn_indices]
+        ax.plot(turn_indices, means, marker="o", markersize=3, linewidth=1, color="mediumseagreen")
+        ax.fill_between(turn_indices, 0, means, alpha=0.2, color="mediumseagreen")
+        # Label first and last points
+        if len(turn_indices) > 0:
+            ax.annotate(f"{means[0]:,.0f}", (turn_indices[0], means[0]), fontsize=7, ha="left", va="bottom")
+        if len(turn_indices) > 1:
+            ax.annotate(f"{means[-1]:,.0f}\n(n={ns[-1]})", (turn_indices[-1], means[-1]), fontsize=7, ha="right", va="bottom")
+    # Overall mean/median across all deltas
+    all_deltas = [d for dlist in deltas_by_turn.values() for d in dlist]
+    if all_deltas:
+        overall_mean = sum(all_deltas) / len(all_deltas)
+        all_deltas_sorted = sorted(all_deltas)
+        overall_median = all_deltas_sorted[len(all_deltas) // 2]
+        ax.axhline(overall_mean, color="orange", linestyle="--", linewidth=1, label=f"Mean: {overall_mean:,.0f}")
+        ax.axhline(overall_median, color="red", linestyle="--", linewidth=1, label=f"Median: {overall_median:,}")
+        ax.legend(fontsize=7)
+    ax.set_xlabel("Turn Index")
+    ax.set_ylabel("Mean New Prefill Tokens")
+    ax.set_title("Avg New Prefill Tokens by Turn")
+    ax.grid(True, alpha=0.3)
+
+    # (1,1) ISL vs OSL scatter
+    ax = axes[1, 1]
+    ax.scatter(all_isl, all_osl, alpha=0.15, s=3, c="purple")
+    ax.set_xlabel("ISL (tokens)")
+    ax.set_ylabel("OSL (tokens)")
+    ax.set_title("ISL vs OSL (all requests)")
+    ax.grid(True, alpha=0.3)
+
+    # (1,2) Per-conversation max ISL vs num turns scatter
+    ax = axes[1, 2]
+    conv_turns = [len(v) for v in convos.values()]
+    conv_max_isl_list = [max(t["isl"] for t in v) for v in convos.values()]
+    ax.scatter(conv_turns, conv_max_isl_list, alpha=0.3, s=8, c="steelblue")
+    ax.set_xlabel("Number of Turns")
+    ax.set_ylabel("Max ISL (tokens)")
+    ax.set_title("Final Context Size vs Turn Count")
+    ax.grid(True, alpha=0.3)
+
+    # (2,0) Per-conversation max ISL (final context size per conversation)
+    ax = axes[2, 0]
+    conv_max_isl = [max(t["isl"] for t in v) for v in convos.values()]
+    clip = int(sorted(conv_max_isl)[int(len(conv_max_isl) * 0.99)] * 1.2)
+    ax.hist([v for v in conv_max_isl if v <= clip], bins=60, edgecolor="black", alpha=0.7, color="steelblue")
+    conv_max_isl_sorted = sorted(conv_max_isl)
+    median_max = conv_max_isl_sorted[len(conv_max_isl) // 2]
+    mean_max = sum(conv_max_isl) / len(conv_max_isl)
+    ax.axvline(median_max, color="red", linestyle="--", label=f"Median: {median_max:,}")
+    ax.axvline(mean_max, color="orange", linestyle="--", label=f"Mean: {mean_max:,.0f}")
+    ax.set_xlabel("Max ISL per Conversation (tokens)")
+    ax.set_ylabel("Count")
+    ax.set_title(f"Per-Conversation Final Context Size (n={len(conv_max_isl):,})")
+    ax.legend(fontsize=8)
+    ax.grid(True, alpha=0.3, axis="y")
+
+    # (3,1) Per-conversation total OSL (sum of all output tokens across turns)
+    ax = axes[2, 1]
+    conv_total_osl = [sum(t["osl"] for t in v) for v in convos.values()]
+    clip = int(sorted(conv_total_osl)[int(len(conv_total_osl) * 0.99)] * 1.2)
+    ax.hist([v for v in conv_total_osl if v <= clip], bins=60, edgecolor="black", alpha=0.7, color="coral")
+    conv_total_osl_sorted = sorted(conv_total_osl)
+    median_tosl = conv_total_osl_sorted[len(conv_total_osl) // 2]
+    mean_tosl = sum(conv_total_osl) / len(conv_total_osl)
+    ax.axvline(median_tosl, color="red", linestyle="--", label=f"Median: {median_tosl:,}")
+    ax.axvline(mean_tosl, color="orange", linestyle="--", label=f"Mean: {mean_tosl:,.0f}")
+    ax.set_xlabel("Total OSL per Conversation (tokens)")
+    ax.set_ylabel("Count")
+    ax.set_title(f"Per-Conversation Total Output Tokens (n={len(conv_total_osl):,})")
+    ax.legend(fontsize=8)
+    ax.grid(True, alpha=0.3, axis="y")
+
+    # (2,2) is empty — already placed scatter at (1,2)
+    axes[2, 2].axis("off")
+
+    plt.tight_layout()
+    out = output_dir / "workload_distribution_plots.png"
+    plt.savefig(out, dpi=150, bbox_inches="tight")
+    plt.close()
+    print(f"Saved plots to {out}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Analyze benchmark workload distributions"
+    )
+    parser.add_argument("artifacts_dir", help="Path to aiperf_artifacts/ or trace_replay/ directory")
+    parser.add_argument(
+        "-o", "--output", default=None, help="Output directory (default: same as artifacts_dir)"
+    )
+    args = parser.parse_args()
+
+    artifacts_dir = Path(args.artifacts_dir)
+    output_dir = Path(args.output) if args.output else artifacts_dir
+
+    # Auto-detect format
+    trace_replay_csv = artifacts_dir / "detailed_results.csv"
+    aiperf_jsonl = artifacts_dir / "profile_export.jsonl"
+
+    if trace_replay_csv.exists():
+        records = load_trace_replay_records(artifacts_dir)
+        print(f"Loaded {len(records):,} records from {artifacts_dir} (trace replay)")
+    elif aiperf_jsonl.exists():
+        records = load_records(artifacts_dir)
+        print(f"Loaded {len(records):,} records from {artifacts_dir} (AIPerf)")
+    else:
+        print(f"No recognized data files in {artifacts_dir}")
+        return
+
+    analyze(records, output_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/utils/agentic-benchmark/scripts/collect_sweep_results.py b/utils/agentic-benchmark/scripts/collect_sweep_results.py
new file mode 100644
index 000000000..91a9619d4
--- /dev/null
+++ b/utils/agentic-benchmark/scripts/collect_sweep_results.py
@@ -0,0 +1,358 @@
+#!/usr/bin/env python3
+"""
+Collect and aggregate multi-turn benchmark sweep results from GitHub Actions
+artifacts.
+
+Expects a directory of artifact subdirectories named:
+    multiturn_tp{N}_users{M}_offload{mode}/
+each containing metrics CSVs, status.txt, etc.
+
+Produces:
+    - summary.csv with per-experiment aggregated metrics
+    - throughput-vs-concurrency and workload-consistency overview plots
+
+Usage:
+    python collect_sweep_results.py <artifacts_dir> <output_dir>
+"""
+
+import json
+import sys
+from pathlib import Path
+
+import pandas as pd
+import numpy as np
+
+
+def _load_custom_client_csv(client_csv: Path, exp_dir: Path) -> pd.DataFrame | None:
+    """Load per-request metrics from custom benchmark client CSV."""
+    df = pd.read_csv(client_csv)
+    if len(df) == 0:
+        return None
+    # Columns expected: start_time_ms, ttft_ms, tpot_ms, latency_ms,
+    #                   input_num_tokens, output_num_tokens, ...
+    return df
+
+
+def _load_aiperf_summary_csv(csv_path: Path) -> dict | None:
+    """Load aggregate metrics directly from aiperf's profile_export_aiperf.csv.
+
+    Returns a dict with pre-computed metrics matching the result schema,
+    or None if the file can't be parsed.
+    """
+    # The CSV has multiple sections with different column counts.
+    # Read raw lines and split into per-metric and scalar sections.
+    lines = csv_path.read_text().strip().split('\n')
+    if len(lines) < 2:
+        return None
+
+    # Section 1: per-metric stats (header + data rows with 14 columns)
+    header = lines[0].split(',')
+    per_metric = {}
+    scalars = {}
+    for line in lines[1:]:
+        if not line.strip():
+            continue
+        parts = line.split(',')
+        if len(parts) == len(header):
+            # Per-metric row
+            per_metric[parts[0]] = {h: parts[i] for i, h in enumerate(header)}
+        elif len(parts) == 2:
+            # Scalar row (Metric, Value)
+            scalars[parts[0]] = parts[1]
+        else:
+            # Different section (GPU metrics) — stop
+            break
+
+    def metric_stat(metric_name, stat):
+        if metric_name in per_metric:
+            try:
+                return float(per_metric[metric_name].get(stat, 0))
+            except (ValueError, TypeError):
+                return 0
+        return 0
+
+    def scalar_val(metric_name):
+        if metric_name in scalars:
+            try:
+                return float(scalars[metric_name])
+            except (ValueError, TypeError):
+                return 0
+        return 0
+
+    return {
+        "num_requests": int(scalar_val("Request Count")),
+        "throughput_rps": scalar_val("Request Throughput (requests/sec)"),
+        "output_throughput_tps": scalar_val("Output Token Throughput (tokens/sec)"),
+        "total_throughput_tps": scalar_val("Total Token Throughput (tokens/sec)"),
+        "input_throughput_tps": scalar_val("Total Token Throughput (tokens/sec)") - scalar_val("Output Token Throughput (tokens/sec)"),
+        "mean_ttft_ms": metric_stat("Time to First Token (ms)", "avg"),
+        "p50_ttft_ms": metric_stat("Time to First Token (ms)", "p50"),
+        "p90_ttft_ms": metric_stat("Time to First Token (ms)", "p90"),
+        "p99_ttft_ms": metric_stat("Time to First Token (ms)", "p99"),
+        "mean_tpot_ms": metric_stat("Inter Token Latency (ms)", "avg"),
+        "p50_tpot_ms": metric_stat("Inter Token Latency (ms)", "p50"),
+        "p90_tpot_ms": metric_stat("Inter Token Latency (ms)", "p90"),
+        "p99_tpot_ms": metric_stat("Inter Token Latency (ms)", "p99"),
+        "mean_latency_ms": metric_stat("Request Latency (ms)", "avg"),
+        "p50_latency_ms": metric_stat("Request Latency (ms)", "p50"),
+        "p90_latency_ms": metric_stat("Request Latency (ms)", "p90"),
+        "p99_latency_ms": metric_stat("Request Latency (ms)", "p99"),
+    }
+
+
+def _load_trace_replay_csv(csv_path: Path) -> pd.DataFrame | None:
+    """Load per-request metrics from trace_replay detailed_results.csv."""
+    df = pd.read_csv(csv_path)
+    if len(df) == 0:
+        return None
+
+    # Filter to successful requests only
+    df = df[df["success"] == True].copy()
+    if len(df) == 0:
+        return None
+
+    # Convert to the same schema as _load_aiperf_jsonl
+    latency_s = df["request_complete_time"] - df["request_start_time"]
+    return pd.DataFrame({
+        "start_time_ms": df["request_start_time"] * 1000,
+        "ttft_ms": df["ttft"] * 1000,
+        "tpot_ms": df["itl"] * 1000,
+        "latency_ms": latency_s * 1000,
+        "input_num_tokens": df["input_tokens"],
+        "output_num_tokens": df["output_tokens_actual"],
+    })
+
+
+def load_experiment(exp_dir: Path) -> dict | None:
+    """Load metrics from a single experiment artifact directory."""
+    client_csv = exp_dir / "metrics_client_metrics.csv"
+    server_csv = exp_dir / "metrics_server_metrics.csv"
+
+    # No more status.txt: an experiment is considered SUCCESS iff its
+    # trace_replay/detailed_results.csv has at least one successful row.
+    # Failed / missing jobs show up as FAILED in the summary.
+    trace_replay_csv = exp_dir / "trace_replay" / "detailed_results.csv"
+    status = "FAILED"
+    if trace_replay_csv.exists():
+        try:
+            import csv as _csv
+            import sys as _sys
+            _csv.field_size_limit(_sys.maxsize)
+            with open(trace_replay_csv) as _f:
+                if any(r.get('success') == 'True' for r in _csv.DictReader(_f)):
+                    status = "SUCCESS"
+        except Exception:
+            pass
+
+    # Check for aiperf summary CSV (preferred) or per-record JSONL (fallback)
+    aiperf_summary_csv = None
+    aiperf_artifacts = exp_dir / "aiperf_artifacts"
+    if aiperf_artifacts.exists():
+        candidate = aiperf_artifacts / "profile_export_aiperf.csv"
+        if candidate.exists():
+            aiperf_summary_csv = candidate
+
+    # Check for trace replay output
+    trace_replay_csv = exp_dir / "trace_replay" / "detailed_results.csv"
+
+    if not client_csv.exists() and aiperf_summary_csv is None and not trace_replay_csv.exists():
+        return None
+
+    # Parse experiment name from directory.
+    # Supports formats:
+    #   multiturn_tp{N}_users{M}_offload{mode}
+    #   tp{N}_users{M}_offload{mode}
+    #   agentic_{model}_tp{N}_users{M}_offload{mode}_{extra...}
+    import re
+    name = exp_dir.name
+    match = re.search(r'tp(\d+)_users(\d+)_offload(on|off)', name)
+    if not match:
+        print(f"Warning: cannot parse experiment name '{exp_dir.name}', skipping")
+        return None
+
+    tp = int(match.group(1))
+    users = int(match.group(2))
+    offload = match.group(3)
+
+    result = {
+        "exp_name": name,
+        "tp": tp,
+        "users": users,
+        "offload": offload,
+        "status": status,
+    }
+
+    if status != "SUCCESS":
+        return result
+
+    try:
+        # Determine data source: aiperf summary CSV (preferred), custom client CSV, or trace replay CSV
+        if aiperf_summary_csv is not None:
+            aiperf_metrics = _load_aiperf_summary_csv(aiperf_summary_csv)
+            if aiperf_metrics is None:
+                return result
+            result.update(aiperf_metrics)
+        elif client_csv.exists():
+            df = _load_custom_client_csv(client_csv, exp_dir)
+            if df is None or len(df) == 0:
+                return result
+
+            # Prefer benchmark_metadata.json for precise wall-clock duration
+            metadata_file = exp_dir / "benchmark_metadata.json"
+            total_time_sec = None
+            if metadata_file.exists():
+                try:
+                    with open(metadata_file) as f:
+                        metadata = json.load(f)
+                    total_time_sec = metadata.get("benchmark_runtime_sec")
+                except Exception:
+                    pass
+
+            if not total_time_sec or total_time_sec <= 0:
+                first_start_ms = df["start_time_ms"].min()
+                last_finish_ms = (df["start_time_ms"] + df["latency_ms"]).max()
+                total_time_sec = (last_finish_ms - first_start_ms) / 1000.0
+            if total_time_sec <= 0:
+                total_time_sec = df["latency_ms"].sum() / 1000
+
+            num_requests = len(df)
+            result.update({
+                "num_requests": num_requests,
+                "throughput_rps": num_requests / total_time_sec if total_time_sec > 0 else 0,
+                "input_throughput_tps": df["input_num_tokens"].sum() / total_time_sec if total_time_sec > 0 else 0,
+                "output_throughput_tps": df["output_num_tokens"].sum() / total_time_sec if total_time_sec > 0 else 0,
+                "total_throughput_tps": (df["input_num_tokens"].sum() + df["output_num_tokens"].sum()) / total_time_sec if total_time_sec > 0 else 0,
+                "mean_ttft_ms": df["ttft_ms"].mean(),
+                "p50_ttft_ms": df["ttft_ms"].median(),
+                "p90_ttft_ms": df["ttft_ms"].quantile(0.9),
+                "p99_ttft_ms": df["ttft_ms"].quantile(0.99),
+                "mean_tpot_ms": df["tpot_ms"].mean(),
+                "p50_tpot_ms": df["tpot_ms"].median(),
+                "p90_tpot_ms": df["tpot_ms"].quantile(0.9),
+                "p99_tpot_ms": df["tpot_ms"].quantile(0.99),
+                "mean_latency_ms": df["latency_ms"].mean(),
+                "p50_latency_ms": df["latency_ms"].median(),
+                "p90_latency_ms": df["latency_ms"].quantile(0.9),
+                "p99_latency_ms": df["latency_ms"].quantile(0.99),
+            })
+        elif trace_replay_csv.exists():
+            df = _load_trace_replay_csv(trace_replay_csv)
+            if df is None or len(df) == 0:
+                return result
+
+            metadata_file = exp_dir / "benchmark_metadata.json"
+            total_time_sec = None
+            if metadata_file.exists():
+                try:
+                    with open(metadata_file) as f:
+                        metadata = json.load(f)
+                    total_time_sec = metadata.get("benchmark_runtime_sec")
+                except Exception:
+                    pass
+
+            if not total_time_sec or total_time_sec <= 0:
+                first_start_ms = df["start_time_ms"].min()
+                last_finish_ms = (df["start_time_ms"] + df["latency_ms"]).max()
+                total_time_sec = (last_finish_ms - first_start_ms) / 1000.0
+            if total_time_sec <= 0:
+                total_time_sec = df["latency_ms"].sum() / 1000
+
+            num_requests = len(df)
+            result.update({
+                "num_requests": num_requests,
+                "throughput_rps": num_requests / total_time_sec if total_time_sec > 0 else 0,
+                "input_throughput_tps": df["input_num_tokens"].sum() / total_time_sec if total_time_sec > 0 else 0,
+                "output_throughput_tps": df["output_num_tokens"].sum() / total_time_sec if total_time_sec > 0 else 0,
+                "total_throughput_tps": (df["input_num_tokens"].sum() + df["output_num_tokens"].sum()) / total_time_sec if total_time_sec > 0 else 0,
+                "mean_ttft_ms": df["ttft_ms"].mean(),
+                "p50_ttft_ms": df["ttft_ms"].median(),
+                "p90_ttft_ms": df["ttft_ms"].quantile(0.9),
+                "p99_ttft_ms": df["ttft_ms"].quantile(0.99),
+                "mean_tpot_ms": df["tpot_ms"].mean(),
+                "p50_tpot_ms": df["tpot_ms"].median(),
+                "p90_tpot_ms": df["tpot_ms"].quantile(0.9),
+                "p99_tpot_ms": df["tpot_ms"].quantile(0.99),
+                "mean_latency_ms": df["latency_ms"].mean(),
+                "p50_latency_ms": df["latency_ms"].median(),
+                "p90_latency_ms": df["latency_ms"].quantile(0.9),
+                "p99_latency_ms": df["latency_ms"].quantile(0.99),
+            })
+        else:
+            return result
+
+        # Cache hit rates from server metrics
+        if server_csv.exists():
+            try:
+                sdf = pd.read_csv(server_csv)
+                if len(sdf) > 0:
+                    final = sdf.iloc[-1]
+                    if final.get("prefix_cache_queries", 0) > 0:
+                        result["gpu_hit_rate"] = 100 * final["prefix_cache_hits"] / final["prefix_cache_queries"]
+                    if final.get("cpu_prefix_cache_queries", 0) > 0:
+                        result["cpu_hit_rate"] = 100 * final["cpu_prefix_cache_hits"] / final["cpu_prefix_cache_queries"]
+            except Exception as e:
+                print(f"Warning: failed to load server metrics for {exp_dir.name}: {e}")
+
+    except Exception as e:
+        print(f"Warning: failed to load client metrics for {exp_dir.name}: {e}")
+
+    return result
+
+
+def main() -> None:
+    if len(sys.argv) < 3:
+        print(f"Usage: {sys.argv[0]} <artifacts_dir> <output_dir>")
+        sys.exit(1)
+
+    artifacts_dir = Path(sys.argv[1])
+    output_dir = Path(sys.argv[2])
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    if not artifacts_dir.is_dir():
+        print(f"Error: {artifacts_dir} is not a directory")
+        sys.exit(1)
+
+    # Load all experiments
+    experiments = []
+    for subdir in sorted(artifacts_dir.iterdir()):
+        if not subdir.is_dir():
+            continue
+        result = load_experiment(subdir)
+        if result is not None:
+            experiments.append(result)
+
+    if not experiments:
+        print("No experiments found.")
+        sys.exit(0)
+
+    # Write summary CSV
+    summary_path = output_dir / "summary.csv"
+    df = pd.DataFrame(experiments)
+    df.to_csv(summary_path, index=False)
+    print(f"Summary written to {summary_path} ({len(experiments)} experiments)")
+
+    # Print status summary
+    success = sum(1 for e in experiments if e.get("status") == "SUCCESS")
+    failed = sum(1 for e in experiments if e.get("status") == "FAILED")
+    other = len(experiments) - success - failed
+    print(f"  SUCCESS: {success}, FAILED: {failed}, OTHER: {other}")
+
+    # Run overview plots (throughput vs concurrency, workload consistency)
+    try:
+        from plot_sweep_overview import plot_throughput_vs_concurrency, plot_workload_consistency
+        pareto_input = output_dir / "pareto_input"
+        summary_csv = pareto_input / "experiment_summary.csv"
+        if summary_csv.exists():
+            overview_df = pd.read_csv(summary_csv)
+            plot_throughput_vs_concurrency(overview_df, output_dir)
+            plot_workload_consistency(pareto_input, output_dir)
+        else:
+            print("Warning: No experiment_summary.csv found, skipping overview plots")
+    except Exception as e:
+        print(f"Warning: Overview plots failed: {e}")
+
+    print(f"Aggregated results saved to {output_dir}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/utils/agentic-benchmark/scripts/plot_sweep_overview.py b/utils/agentic-benchmark/scripts/plot_sweep_overview.py
new file mode 100644
index 000000000..1fd04bdc0
--- /dev/null
+++ b/utils/agentic-benchmark/scripts/plot_sweep_overview.py
@@ -0,0 +1,222 @@
+#!/usr/bin/env python3
+"""Generate overview plots for sweep results.
+
+Produces:
+- throughput_vs_concurrency.png: Throughput & cache hit rate vs concurrent sessions per TP
+- workload_consistency.png: ISL distribution box plots per experiment to verify consistent workload
+
+Usage:
+    python plot_sweep_overview.py <pareto_input_dir> [<output_dir>]
+"""
+
+import csv
+import sys
+from collections import defaultdict
+from pathlib import Path
+
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+
+def plot_throughput_vs_concurrency(df: pd.DataFrame, output_dir: Path) -> None:
+    """Throughput and cache hit rate vs concurrent sessions, per TP."""
+    tps = sorted(df["tp"].unique())
+    n = len(tps)
+    if n == 0:
+        return
+
+    fig, axes = plt.subplots(2, n, figsize=(7 * n, 10))
+    if n == 1:
+        axes = axes.reshape(2, 1)
+    fig.suptitle("Throughput & Cache Hit Rate vs Concurrent Sessions", fontsize=15)
+
+    for idx, tp in enumerate(tps):
+        tp_df = df[df["tp"] == tp].sort_values("bs")
+        off = tp_df[tp_df["offload"] == "off"].sort_values("bs")
+        on = tp_df[tp_df["offload"] == "on"].sort_values("bs")
+
+        # --- Top row: Throughput ---
+        ax = axes[0, idx]
+        if len(off) > 0:
+            ax.plot(off["bs"], off["total_tps_per_gpu"], "o-", color="#d62728",
+                    linewidth=2.5, markersize=7, label="Offload OFF")
+        if len(on) > 0:
+            ax.plot(on["bs"], on["total_tps_per_gpu"], "s-", color="#2ca02c",
+                    linewidth=2.5, markersize=7, label="Offload ON")
+
+        # Annotate max gain
+        if len(off) > 0 and len(on) > 0:
+            merged = pd.merge(off[["bs", "total_tps_per_gpu"]], on[["bs", "total_tps_per_gpu"]],
+                              on="bs", suffixes=("_off", "_on"))
+            if len(merged) > 0:
+                merged["gain_pct"] = ((merged["total_tps_per_gpu_on"] - merged["total_tps_per_gpu_off"])
+                                      / merged["total_tps_per_gpu_off"] * 100)
+                max_row = merged.loc[merged["gain_pct"].idxmax()]
+                if max_row["gain_pct"] > 20:
+                    ax.annotate(f"+{max_row['gain_pct']:.0f}%",
+                                xy=(max_row["bs"], max_row["total_tps_per_gpu_on"]),
+                                xytext=(0, 15), textcoords="offset points",
+                                fontsize=11, fontweight="bold", color="green", ha="center")
+
+        ax.set_xlabel("Concurrent Sessions", fontsize=10)
+        ax.set_ylabel("Throughput/GPU (tok/s)", fontsize=10)
+        ax.set_title(f"TP{tp} — Throughput", fontsize=13, fontweight="bold")
+        max_tput = df["total_tps_per_gpu"].max()
+        ax.set_ylim(0, max_tput * 1.15 if max_tput > 0 else 15000)
+        ax.legend(fontsize=9)
+        ax.grid(True, alpha=0.2)
+
+        # --- Bottom row: Cache hit rate ---
+        ax = axes[1, idx]
+        if len(off) > 0:
+            ax.plot(off["bs"], off["gpu_hit_rate"], "o-", color="#d62728",
+                    linewidth=2, markersize=6, label="GPU Hit — OFF")
+        if len(on) > 0:
+            ax.plot(on["bs"], on["gpu_hit_rate"], "s-", color="#2ca02c",
+                    linewidth=2, markersize=6, label="GPU Hit — ON")
+            cpu_hit = on["cpu_hit_rate"].fillna(0)
+            if cpu_hit.max() > 1:
+                ax.plot(on["bs"], cpu_hit, "v--", color="#9467bd",
+                        linewidth=2, markersize=6, label="CPU Hit — ON")
+
+        ax.set_xlabel("Concurrent Sessions", fontsize=10)
+        ax.set_ylabel("Cache Hit Rate (%)", fontsize=10)
+        ax.set_title(f"TP{tp} — Cache Hit Rate", fontsize=13, fontweight="bold")
+        ax.set_ylim(0, 105)
+        ax.legend(fontsize=9)
+        ax.grid(True, alpha=0.2)
+
+    plt.tight_layout()
+    out = output_dir / "throughput_vs_concurrency.png"
+    plt.savefig(out, dpi=150, bbox_inches="tight")
+    plt.close()
+    print(f"Saved {out}")
+
+
+def plot_workload_consistency(pareto_input_dir: Path, output_dir: Path) -> None:
+    """ISL distribution box plots per experiment to verify consistent workload."""
+    csv.field_size_limit(sys.maxsize)
+
+    tps = set()
+    data_by_tp: dict[int, list[tuple[int, str, list[float]]]] = defaultdict(list)
+
+    for exp_dir in sorted(pareto_input_dir.iterdir()):
+        if not exp_dir.is_dir() or not exp_dir.name.startswith("tp"):
+            continue
+        if "offloadon" in exp_dir.name:
+            continue  # Only use offload-off for consistency check
+
+        parts = exp_dir.name.split("_")
+        try:
+            tp = int(parts[0].replace("tp", ""))
+            bs = int(parts[1].replace("bs", ""))
+        except (IndexError, ValueError):
+            continue
+
+        tps.add(tp)
+
+        # Try trace replay CSV
+        csv_path = exp_dir / "trace_replay" / "detailed_results.csv"
+        if not csv_path.exists():
+            # Try aiperf JSONL
+            continue
+
+        isls = []
+        try:
+            with open(csv_path) as f:
+                reader = csv.DictReader(f)
+                for row in reader:
+                    if row.get("success") == "True":
+                        isls.append(int(row["input_tokens"]) / 1000)  # k tokens
+        except Exception:
+            continue
+
+        if isls:
+            data_by_tp[tp].append((bs, exp_dir.name, isls))
+
+    if not data_by_tp:
+        print("No workload data found for consistency plot")
+        return
+
+    sorted_tps = sorted(data_by_tp.keys())
+    n = len(sorted_tps)
+
+    fig, axes = plt.subplots(1, n, figsize=(7 * n, 6))
+    if n == 1:
+        axes = [axes]
+    fig.suptitle("Workload Consistency — ISL Distribution Per Experiment (Offload OFF)", fontsize=14)
+
+    for idx, tp in enumerate(sorted_tps):
+        ax = axes[idx]
+        entries = sorted(data_by_tp[tp], key=lambda x: x[0])
+
+        box_data = [e[2] for e in entries]
+        labels = [str(e[0]) for e in entries]
+        means = [np.mean(e[2]) for e in entries]
+
+        bp = ax.boxplot(box_data, tick_labels=labels, patch_artist=True,
+                        showfliers=False, widths=0.6,
+                        medianprops=dict(color="red", linewidth=2))
+        for patch in bp["boxes"]:
+            patch.set_facecolor("steelblue")
+            patch.set_alpha(0.6)
+
+        ax.plot(range(1, len(means) + 1), means, "o--", color="orange", linewidth=2,
+                markersize=6, label=f"Mean ({np.mean(means):.0f}k ± {np.std(means):.0f}k)", zorder=5)
+
+        overall_mean = np.mean(means)
+        overall_std = np.std(means)
+        ax.axhspan(overall_mean - overall_std, overall_mean + overall_std,
+                   alpha=0.1, color="orange", label="±1σ band")
+        ax.axhline(overall_mean, color="orange", linestyle=":", alpha=0.5)
+
+        ax.set_xlabel("Concurrent Sessions", fontsize=11)
+        ax.set_ylabel("ISL (k tokens)", fontsize=11)
+        ax.set_title(f"TP{tp}", fontsize=13, fontweight="bold")
+        ax.legend(fontsize=9)
+        ax.grid(True, alpha=0.2, axis="y")
+        ax.set_ylim(0, 140)
+
+    plt.tight_layout()
+    out = output_dir / "workload_consistency.png"
+    plt.savefig(out, dpi=150, bbox_inches="tight")
+    plt.close()
+    print(f"Saved {out}")
+
+
+def main():
+    if len(sys.argv) < 2:
+        print(f"Usage: {sys.argv[0]} <pareto_input_dir> [<output_dir>]")
+        sys.exit(1)
+
+    pareto_input_dir = Path(sys.argv[1])
+    output_dir = Path(sys.argv[2]) if len(sys.argv) > 2 else pareto_input_dir.parent
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Load experiment summary
+    summary_csv = pareto_input_dir / "experiment_summary.csv"
+    if not summary_csv.exists():
+        # Try parent
+        summary_csv = output_dir / "summary.csv"
+    if not summary_csv.exists():
+        print(f"No summary CSV found in {pareto_input_dir} or {output_dir}")
+        return
+
+    df = pd.read_csv(summary_csv)
+
+    # Ensure required columns exist
+    required = ["tp", "bs", "offload", "total_tps_per_gpu", "gpu_hit_rate"]
+    missing = [c for c in required if c not in df.columns]
+    if missing:
+        print(f"Missing columns in summary: {missing}")
+        return
+
+    plot_throughput_vs_concurrency(df, output_dir)
+    plot_workload_consistency(pareto_input_dir, output_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/utils/compare_results.py b/utils/compare_results.py
index 86bb7aa13..5b7388cb2 100644
--- a/utils/compare_results.py
+++ b/utils/compare_results.py
@@ -198,6 +198,7 @@ def main():
             results.extend(data)
         else:
             results.append(data)
+    results = [r for r in results if r.get("scenario_type") != "agentic-coding"]
 
     print(f"Loaded {len(results)} benchmark results", file=sys.stderr)
 
diff --git a/utils/matrix_logic/generate_sweep_configs.py b/utils/matrix_logic/generate_sweep_configs.py
index e543bb4af..1a088ff8a 100644
--- a/utils/matrix_logic/generate_sweep_configs.py
+++ b/utils/matrix_logic/generate_sweep_configs.py
@@ -9,6 +9,7 @@
 
 from validation import (
     validate_matrix_entry,
+    validate_agentic_matrix_entry,
     load_config_files,
     load_runner_file,
     Fields
@@ -121,8 +122,10 @@ def _max_eval_conc(ie):
         eval_concs = _eligible_eval_concs(best_entry)
         mn_eval_conc[best_idx] = eval_concs[len(eval_concs) // 2]
 
-    # Mark the selected entries
+    # Mark the selected entries (skip agentic entries which don't support evals)
     for i, entry in enumerate(matrix_values):
+        if entry.get(Fields.SCENARIO_TYPE.value) == 'agentic-coding':
+            continue
         entry[Fields.RUN_EVAL.value] = i in eval_indices
         if i in mn_eval_conc:
             entry[Fields.EVAL_CONC.value] = mn_eval_conc[i]
@@ -181,7 +184,9 @@ def generate_full_sweep(args, all_config_data, runner_data):
         # Get disagg value, defaulting to False if not specified
         disagg = val.get(Fields.DISAGG.value, False)
 
-        seq_len_configs = val[Fields.SEQ_LEN_CONFIGS.value]
+        scenarios = val[Fields.SCENARIOS.value]
+        scenario_filter = set(args.scenario_type) if getattr(args, 'scenario_type', None) else None
+        seq_len_configs = scenarios.get(Fields.FIXED_SEQ_LEN.value, []) if (scenario_filter is None or 'fixed-seq-len' in scenario_filter) else []
         image = val[Fields.IMAGE.value]
         model = val[Fields.MODEL.value]
         precision = val[Fields.PRECISION.value]
@@ -373,6 +378,95 @@ def generate_full_sweep(args, all_config_data, runner_data):
                         if conc > conc_end:
                             conc = conc_end
 
+        # ---- Agentic-coding scenarios ----
+        agentic_configs = scenarios.get(Fields.AGENTIC_CODING.value, []) if (scenario_filter is None or 'agentic-coding' in scenario_filter) else []
+
+        for agentic_config in agentic_configs:
+            bmk_space = agentic_config[Fields.SEARCH_SPACE.value]
+            duration = agentic_config.get(Fields.DURATION.value, 1800)
+
+            for bmk in bmk_space:
+                if is_multinode:
+                    prefill = bmk[Fields.PREFILL.value]
+                    decode = bmk[Fields.DECODE.value]
+                    spec_decoding = bmk.get(Fields.SPEC_DECODING.value, "none")
+                else:
+                    tp = bmk[Fields.TP.value]
+                    ep = bmk.get(Fields.EP.value)
+                    dp_attn = bmk.get(Fields.DP_ATTN.value)
+                offloading = bmk.get(Fields.OFFLOADING.value, "none")
+
+                # Get concurrency values
+                conc_list = bmk.get(Fields.CONC_LIST.value)
+                if conc_list:
+                    conc_values = conc_list
+                else:
+                    conc_start = bmk[Fields.CONC_START.value]
+                    conc_end = bmk[Fields.CONC_END.value]
+                    conc_values = []
+                    conc = conc_start
+                    while conc <= conc_end:
+                        conc_values.append(conc)
+                        if conc == conc_end:
+                            break
+                        conc *= args.step_size
+                        if conc > conc_end:
+                            conc = conc_end
+
+                # Apply conc filters
+                if args.min_conc is not None:
+                    conc_values = [c for c in conc_values if c >= args.min_conc]
+                if args.max_conc is not None:
+                    conc_values = [c for c in conc_values if c <= args.max_conc]
+                if not conc_values:
+                    continue
+
+                runners_for_entry = runner_nodes_to_use if runner_nodes_to_use else [runner]
+
+                for users in conc_values:
+                    for runner_value in runners_for_entry:
+                        if is_multinode:
+                            entry = {
+                                Fields.IMAGE.value: image,
+                                Fields.MODEL.value: model,
+                                Fields.MODEL_PREFIX.value: model_code,
+                                Fields.PRECISION.value: precision,
+                                Fields.FRAMEWORK.value: framework,
+                                Fields.RUNNER.value: runner_value,
+                                Fields.SPEC_DECODING.value: spec_decoding,
+                                Fields.PREFILL.value: prefill,
+                                Fields.DECODE.value: decode,
+                                Fields.USERS.value: users,
+                                Fields.CONC.value: [users],
+                                Fields.DURATION.value: duration,
+                                Fields.EXP_NAME.value: (
+                                    f"{model_code}_p{prefill[Fields.NUM_WORKER.value]}x{prefill[Fields.TP.value]}"
+                                    f"_d{decode[Fields.NUM_WORKER.value]}x{decode[Fields.TP.value]}_users{users}"
+                                ),
+                                Fields.DISAGG.value: disagg,
+                                Fields.SCENARIO_TYPE.value: "agentic-coding",
+                            }
+                        else:
+                            entry = {
+                                Fields.IMAGE.value: image,
+                                Fields.MODEL.value: model,
+                                Fields.MODEL_PREFIX.value: model_code,
+                                Fields.PRECISION.value: precision,
+                                Fields.FRAMEWORK.value: framework,
+                                Fields.RUNNER.value: runner_value,
+                                Fields.TP.value: tp,
+                                Fields.EP.value: ep if ep is not None else 1,
+                                Fields.DP_ATTN.value: dp_attn if dp_attn is not None else False,
+                                Fields.USERS.value: users,
+                                Fields.OFFLOADING.value: offloading,
+                                Fields.DURATION.value: duration,
+                                Fields.EXP_NAME.value: f"{model_code}_tp{tp}_users{users}_offload{offloading}",
+                                Fields.SCENARIO_TYPE.value: "agentic-coding",
+                            }
+
+                        validate_agentic_matrix_entry(entry)
+                        matrix_values.append(entry)
+
     return matrix_values
 
 
@@ -430,7 +524,7 @@ def generate_runner_model_sweep_config(args, all_config_data, runner_data):
 
         # Find 1k1k config
         target_config = None
-        for config in val[Fields.SEQ_LEN_CONFIGS.value]:
+        for config in val[Fields.SCENARIOS.value].get(Fields.FIXED_SEQ_LEN.value, []):
             if config[Fields.ISL.value] == 1024 and config[Fields.OSL.value] == 1024:
                 target_config = config
                 break
@@ -564,7 +658,9 @@ def generate_test_config_sweep(args, all_config_data):
         if getattr(args, 'seq_lens', None):
             seq_lens_filter = {seq_len_stoi[s] for s in args.seq_lens}
 
-        for seq_len_config in val[Fields.SEQ_LEN_CONFIGS.value]:
+        scenario_filter = set(args.scenario_type) if getattr(args, 'scenario_type', None) else None
+        fixed_configs = val[Fields.SCENARIOS.value].get(Fields.FIXED_SEQ_LEN.value, []) if (scenario_filter is None or 'fixed-seq-len' in scenario_filter) else []
+        for seq_len_config in fixed_configs:
             isl = seq_len_config[Fields.ISL.value]
             osl = seq_len_config[Fields.OSL.value]
 
@@ -674,6 +770,84 @@ def generate_test_config_sweep(args, all_config_data):
                         }
                         matrix_values.append(validate_matrix_entry(entry, is_multinode=False))
 
+        # ---- Agentic-coding scenarios ----
+        agentic_configs = val[Fields.SCENARIOS.value].get(Fields.AGENTIC_CODING.value, []) if (scenario_filter is None or 'agentic-coding' in scenario_filter) else []
+        for agentic_config in agentic_configs:
+            duration = agentic_config.get(Fields.DURATION.value, 1800)
+
+            for bmk in agentic_config[Fields.SEARCH_SPACE.value]:
+                if is_multinode:
+                    prefill = bmk[Fields.PREFILL.value]
+                    decode = bmk[Fields.DECODE.value]
+                    spec_decoding = bmk.get(Fields.SPEC_DECODING.value, "none")
+                else:
+                    tp = bmk[Fields.TP.value]
+                    ep = bmk.get(Fields.EP.value)
+                    dp_attn = bmk.get(Fields.DP_ATTN.value)
+                offloading = bmk.get(Fields.OFFLOADING.value, "none")
+
+                conc_list = bmk.get(Fields.CONC_LIST.value)
+                if conc_list:
+                    conc_values = conc_list
+                else:
+                    conc_start = bmk[Fields.CONC_START.value]
+                    conc_end = bmk[Fields.CONC_END.value]
+                    conc_values = []
+                    conc = conc_start
+                    while conc <= conc_end:
+                        conc_values.append(conc)
+                        if conc == conc_end:
+                            break
+                        conc *= 2
+                        if conc > conc_end:
+                            conc = conc_end
+
+                if getattr(args, 'conc', None):
+                    conc_values = [c for c in conc_values if c in args.conc]
+                if not conc_values:
+                    continue
+
+                for users in conc_values:
+                    if is_multinode:
+                        entry = {
+                            Fields.IMAGE.value: image,
+                            Fields.MODEL.value: model,
+                            Fields.MODEL_PREFIX.value: model_code,
+                            Fields.PRECISION.value: precision,
+                            Fields.FRAMEWORK.value: framework,
+                            Fields.RUNNER.value: runner,
+                            Fields.SPEC_DECODING.value: spec_decoding,
+                            Fields.PREFILL.value: prefill,
+                            Fields.DECODE.value: decode,
+                            Fields.USERS.value: users,
+                            Fields.CONC.value: [users],
+                            Fields.DURATION.value: duration,
+                            Fields.EXP_NAME.value: (
+                                f"{model_code}_p{prefill[Fields.NUM_WORKER.value]}x{prefill[Fields.TP.value]}"
+                                f"_d{decode[Fields.NUM_WORKER.value]}x{decode[Fields.TP.value]}_users{users}"
+                            ),
+                            Fields.DISAGG.value: disagg,
+                            Fields.SCENARIO_TYPE.value: "agentic-coding",
+                        }
+                    else:
+                        entry = {
+                            Fields.IMAGE.value: image,
+                            Fields.MODEL.value: model,
+                            Fields.MODEL_PREFIX.value: model_code,
+                            Fields.PRECISION.value: precision,
+                            Fields.FRAMEWORK.value: framework,
+                            Fields.RUNNER.value: runner,
+                            Fields.TP.value: tp,
+                            Fields.EP.value: ep if ep is not None else 1,
+                            Fields.DP_ATTN.value: dp_attn if dp_attn is not None else False,
+                            Fields.USERS.value: users,
+                            Fields.OFFLOADING.value: offloading,
+                            Fields.DURATION.value: duration,
+                            Fields.EXP_NAME.value: f"{model_code}_tp{tp}_users{users}_offload{offloading}",
+                            Fields.SCENARIO_TYPE.value: "agentic-coding",
+                        }
+                    matrix_values.append(validate_agentic_matrix_entry(entry))
+
     return matrix_values
 
 
@@ -747,6 +921,13 @@ def main():
         required=False,
         help='Filter runner nodes by substring match (e.g., "amd" to only include nodes containing that string). Expands each config to individual matching nodes.'
     )
+    parent_parser.add_argument(
+        '--scenario-type',
+        nargs='+',
+        choices=['fixed-seq-len', 'agentic-coding'],
+        required=False,
+        help='Scenario type(s) to include. If not specified, all scenario types are generated.'
+    )
 
     # Create main parser
     parser = argparse.ArgumentParser(
diff --git a/utils/matrix_logic/validation.py b/utils/matrix_logic/validation.py
index ce10840b5..e96f6bce3 100644
--- a/utils/matrix_logic/validation.py
+++ b/utils/matrix_logic/validation.py
@@ -20,9 +20,13 @@ class Fields(Enum):
     PRECISION = 'precision'
     FRAMEWORK = 'framework'
     RUNNER = 'runner'
-    SEQ_LEN_CONFIGS = 'seq-len-configs'
+    SCENARIOS = 'scenarios'
     MULTINODE = 'multinode'
 
+    # Scenario type keys
+    FIXED_SEQ_LEN = 'fixed-seq-len'
+    AGENTIC_CODING = 'agentic-coding'
+
     # Seq-len-config fields
     ISL = 'isl'
     OSL = 'osl'
@@ -45,11 +49,17 @@ class Fields(Enum):
     MAX_NUM_TOKENS = 'max-num-tokens'
     ADDITIONAL_SETTINGS = 'additional-settings'
 
+    # Agentic coding fields
+    OFFLOADING = 'offloading'
+    DURATION = 'duration'
+
     # Matrix entry fields
     CONC = 'conc'
     MAX_MODEL_LEN = 'max-model-len'
     EXP_NAME = 'exp-name'
     DISAGG = 'disagg'
+    SCENARIO_TYPE = 'scenario-type'
+    USERS = 'users'
 
     # Eval
     RUN_EVAL = 'run-eval'
@@ -133,6 +143,65 @@ class MultiNodeMatrixEntry(BaseModel):
     eval_conc: Optional[int] = Field(default=None, alias=Fields.EVAL_CONC.value)
 
 
+class SingleNodeAgenticMatrixEntry(BaseModel):
+    """Pydantic model for validating single-node agentic coding matrix entries."""
+    model_config = ConfigDict(extra='forbid', populate_by_name=True)
+
+    image: str
+    model: str
+    model_prefix: str = Field(alias=Fields.MODEL_PREFIX.value)
+    precision: str
+    framework: str
+    runner: str
+    tp: int
+    ep: int
+    dp_attn: bool = Field(alias=Fields.DP_ATTN.value)
+    users: int
+    offloading: Literal["none", "cpu", "ssd"] = Field(alias=Fields.OFFLOADING.value)
+    duration: int = Field(default=1800, alias=Fields.DURATION.value)
+    exp_name: str = Field(alias=Fields.EXP_NAME.value)
+    scenario_type: str = Field(alias=Fields.SCENARIO_TYPE.value)
+
+
+class MultiNodeAgenticMatrixEntry(BaseModel):
+    """Pydantic model for validating multinode agentic coding matrix entries."""
+    model_config = ConfigDict(extra='forbid', populate_by_name=True)
+
+    image: str
+    model: str
+    model_prefix: str = Field(alias=Fields.MODEL_PREFIX.value)
+    precision: str
+    framework: str
+    spec_decoding: Literal["mtp", "draft_model", "none"] = Field(
+        alias=Fields.SPEC_DECODING.value
+    )
+    runner: str
+    prefill: WorkerConfig
+    decode: WorkerConfig
+    users: int
+    conc: List[int]
+    duration: int = Field(default=1800, alias=Fields.DURATION.value)
+    exp_name: str = Field(alias=Fields.EXP_NAME.value)
+    disagg: bool
+    scenario_type: str = Field(alias=Fields.SCENARIO_TYPE.value)
+
+
+AgenticMatrixEntry = Union[SingleNodeAgenticMatrixEntry, MultiNodeAgenticMatrixEntry]
+
+
+def validate_agentic_matrix_entry(entry: dict) -> dict:
+    """Validate that an agentic matrix entry matches the expected structure."""
+    try:
+        if Fields.PREFILL.value in entry:
+            MultiNodeAgenticMatrixEntry(**entry)
+        else:
+            SingleNodeAgenticMatrixEntry(**entry)
+    except ValidationError as e:
+        raise ValueError(
+            f"The following parsed agentic matrix entry failed validation:\n{pprint.pformat(entry)}\n{e}")
+    return entry
+
+
 def validate_matrix_entry(entry: dict, is_multinode: bool) -> dict:
     """Validate that matrix_values entries match the expected structure.
 
@@ -260,6 +329,80 @@ class MultiNodeSeqLenConfig(BaseModel):
         alias=Fields.SEARCH_SPACE.value)
 
 
+class AgenticCodingSearchSpaceEntry(BaseModel):
+    """Agentic coding search space configuration."""
+    model_config = ConfigDict(extra='forbid', populate_by_name=True)
+
+    tp: Optional[int] = None
+    ep: Optional[int] = None
+    dp_attn: Optional[bool] = Field(default=None, alias=Fields.DP_ATTN.value)
+    spec_decoding: Literal["mtp", "draft_model", "none"] = Field(
+        default="none", alias=Fields.SPEC_DECODING.value)
+    prefill: Optional[WorkerConfig] = None
+    decode: Optional[WorkerConfig] = None
+    offloading: Literal["none", "cpu", "ssd"] = Field(default="none", alias=Fields.OFFLOADING.value)
+    conc_start: Optional[int] = Field(default=None, alias=Fields.CONC_START.value)
+    conc_end: Optional[int] = Field(default=None, alias=Fields.CONC_END.value)
+    conc_list: Optional[List[int]] = Field(default=None, alias=Fields.CONC_LIST.value)
+
+    @model_validator(mode='after')
+    def validate_conc_fields(self):
+        return _validate_conc_fields(self)
+
+    @model_validator(mode='after')
+    def validate_topology_fields(self):
+        has_single_node = self.tp is not None
+        has_any_multinode_field = self.prefill is not None or self.decode is not None
+        has_complete_multinode = self.prefill is not None and self.decode is not None
+        if has_single_node:
+            valid = not has_any_multinode_field
+        else:
+            valid = has_complete_multinode
+        if not valid:
+            raise ValueError("Agentic search-space entries must specify either tp or both prefill and decode")
+        return self
+
+
+class AgenticCodingConfig(BaseModel):
+    """Agentic coding scenario configuration for trace replay benchmarks."""
+    model_config = ConfigDict(extra='forbid', populate_by_name=True)
+
+    search_space: List[AgenticCodingSearchSpaceEntry] = Field(alias=Fields.SEARCH_SPACE.value)
+    duration: int = Field(default=1800, alias=Fields.DURATION.value)
+
+
+class SingleNodeScenarios(BaseModel):
+    """Scenarios wrapper for single-node configs."""
+    model_config = ConfigDict(extra='forbid', populate_by_name=True)
+
+    fixed_seq_len: Optional[List[SingleNodeSeqLenConfig]] = Field(
+        default=None, alias=Fields.FIXED_SEQ_LEN.value)
+    agentic_coding: Optional[List[AgenticCodingConfig]] = Field(
+        default=None, alias=Fields.AGENTIC_CODING.value)
+
+    @model_validator(mode='after')
+    def at_least_one_scenario(self):
+        if not self.fixed_seq_len and not self.agentic_coding:
+            raise ValueError("At least one scenario type must be specified")
+        return self
+
+
+class MultiNodeScenarios(BaseModel):
+    """Scenarios wrapper for multinode configs."""
+    model_config = ConfigDict(extra='forbid', populate_by_name=True)
+
+    fixed_seq_len: Optional[List[MultiNodeSeqLenConfig]] = Field(
+        default=None, alias=Fields.FIXED_SEQ_LEN.value)
+    agentic_coding: Optional[List[AgenticCodingConfig]] = Field(
+        default=None, alias=Fields.AGENTIC_CODING.value)
+
+    @model_validator(mode='after')
+    def at_least_one_scenario(self):
+        if not self.fixed_seq_len and not self.agentic_coding:
+            raise ValueError("At least one scenario type must be specified")
+        return self
+
+
 class SingleNodeMasterConfigEntry(BaseModel):
     """Top-level single node master configuration entry."""
     model_config = ConfigDict(extra='forbid', populate_by_name=True)
@@ -272,8 +415,7 @@ class SingleNodeMasterConfigEntry(BaseModel):
     runner: str
     multinode: Literal[False]
     disagg: bool = Field(default=False)
-    seq_len_configs: List[SingleNodeSeqLenConfig] = Field(
-        alias=Fields.SEQ_LEN_CONFIGS.value)
+    scenarios: SingleNodeScenarios
 
 
 class MultiNodeMasterConfigEntry(BaseModel):
@@ -288,8 +430,7 @@ class MultiNodeMasterConfigEntry(BaseModel):
     runner: str
     multinode: Literal[True]
     disagg: bool = Field(default=False)
-    seq_len_configs: List[MultiNodeSeqLenConfig] = Field(
-        alias=Fields.SEQ_LEN_CONFIGS.value)
+    scenarios: MultiNodeScenarios
 
 
 def validate_master_config(master_configs: dict) -> List[dict]:
@@ -343,6 +484,10 @@ class ChangelogEntry(BaseModel):
     description: list[str] = Field(min_length=1)
     pr_link: str = Field(alias="pr-link")
     evals_only: bool = Field(alias="evals-only", default=False)
+    scenario_type: Optional[List[str]] = Field(
+        alias="scenario-type", default=None,
+        description="Restrict to specific scenario types (e.g., ['fixed-seq-len', 'agentic-coding'])"
+    )
 
 
 class ChangelogMetadata(BaseModel):
@@ -361,9 +506,9 @@ class ChangelogMatrixEntry(BaseModel):
     """
     model_config = ConfigDict(extra="forbid", populate_by_name=True)
 
-    single_node: dict[str, list[SingleNodeMatrixEntry]
+    single_node: dict[str, list[Union[SingleNodeMatrixEntry, SingleNodeAgenticMatrixEntry]]
                       ] = Field(default_factory=dict)
-    multi_node: dict[str, list[MultiNodeMatrixEntry]
+    multi_node: dict[str, list[Union[MultiNodeMatrixEntry, MultiNodeAgenticMatrixEntry]]
                      ] = Field(default_factory=dict)
     evals: list[SingleNodeMatrixEntry] = Field(default_factory=list)
     multinode_evals: list[MultiNodeMatrixEntry] = Field(default_factory=list)
diff --git a/utils/process_agentic_result.py b/utils/process_agentic_result.py
new file mode 100644
index 000000000..c84b79a64
--- /dev/null
+++ b/utils/process_agentic_result.py
@@ -0,0 +1,347 @@
+#!/usr/bin/env python3
+"""Process agentic trace replay benchmark results into an aggregated JSON file.
+
+Reads detailed_results.csv and metrics_server_metrics.csv from the benchmark
+output directory and produces an agg_*.json file matching the naming convention
+of fixed-seq-len results.
+
+Expected env vars:
+    RESULT_FILENAME - base name for output file (e.g., dsr1_tp4_users8_offloadcpu_...)
+    MODEL, MODEL_PREFIX, FRAMEWORK, PRECISION, TP, EP_SIZE, DP_ATTENTION
+    USERS, OFFLOADING, RUNNER_TYPE
+"""
+
+import csv
+import json
+import os
+import sys
+import statistics
+
+csv.field_size_limit(sys.maxsize)
+from pathlib import Path
+
+
+def percentile(data, p):
+    if not data:
+        return 0.0
+    sorted_data = sorted(data)
+    k = (len(sorted_data) - 1) * (p / 100)
+    f = int(k)
+    c = f + 1
+    if c >= len(sorted_data):
+        return sorted_data[f]
+    return sorted_data[f] + (k - f) * (sorted_data[c] - sorted_data[f])
+
+
+def load_detailed_results(path):
+    with open(path) as f:
+        return list(csv.DictReader(f))
+
+
+def load_server_metrics(path):
+    with open(path) as f:
+        return list(csv.DictReader(f))
+
+
+def env_int(name, default=0):
+    value = os.environ.get(name)
+    if value in (None, ""):
+        return default
+    return int(value)
+
+
+def env_bool(name, default=False):
+    value = os.environ.get(name)
+    if value in (None, ""):
+        return default
+    return value.lower() in ("1", "true", "yes", "on")
+
+
+def compute_qps_stats(rows):
+    """Compute QPS from request completion timestamps using 1-second sliding windows."""
+    if len(rows) < 2:
+        return {}
+
+    complete_times = sorted(float(r['request_complete_time']) for r in rows if r.get('success') == 'True')
+    if len(complete_times) < 2:
+        return {}
+
+    start = complete_times[0]
+    end = complete_times[-1]
+    duration = end - start
+    if duration <= 0:
+        return {}
+
+    window = 1.0
+    qps_values = []
+    t = start
+    while t + window <= end:
+        count = sum(1 for ct in complete_times if t <= ct < t + window)
+        qps_values.append(count / window)
+        t += window
+
+    if not qps_values:
+        overall_qps = len(complete_times) / duration
+        return {"mean_qps": overall_qps}
+
+    return {
+        "mean_qps": statistics.mean(qps_values),
+        "median_qps": statistics.median(qps_values),
+        "p90_qps": percentile(qps_values, 90),
+        "p99_qps": percentile(qps_values, 99),
+        "p99.9_qps": percentile(qps_values, 99.9),
+        "std_qps": statistics.pstdev(qps_values) if len(qps_values) > 1 else 0.0,
+    }
+
+
+def compute_latency_stats(rows):
+    """Emit the same keys fixed-seq-len emits (mean/median/std/p90/p99/p99.9
+    for ttft, tpot, intvty, itl, e2el) so downstream consumers can treat
+    both scenarios identically.
+
+    - ttft: time to first token (s) — direct from trace replay
+    - e2el: end-to-end request latency (s) — what trace replay calls ttlt
+    - itl:  inter-token latency (s) — direct from trace replay
+    - tpot: time per output token (s) — same measure as itl; aliased for
+            fixed-seq-len compatibility
+    - intvty: interactivity (1/tpot) — tokens/s per-request decode rate
+    """
+    ttfts = [float(r['ttft']) for r in rows if r.get('success') == 'True' and float(r['ttft']) > 0]
+    e2els = [float(r['ttlt']) for r in rows if r.get('success') == 'True' and float(r['ttlt']) > 0]
+    itls = [float(r['itl']) for r in rows if r.get('success') == 'True' and float(r['itl']) > 0]
+
+    def stats_for(prefix, values):
+        if not values:
+            return {}
+        out = {
+            f"mean_{prefix}": statistics.mean(values),
+            f"median_{prefix}": statistics.median(values),
+            f"p90_{prefix}": percentile(values, 90),
+            f"p99_{prefix}": percentile(values, 99),
+            f"p99.9_{prefix}": percentile(values, 99.9),
+        }
+        out[f"std_{prefix}"] = statistics.pstdev(values) if len(values) > 1 else 0.0
+        return out
+
+    result = {}
+    result.update(stats_for("ttft", ttfts))
+    result.update(stats_for("e2el", e2els))
+    result.update(stats_for("itl", itls))
+    # tpot = itl (agentic has no speculative-decoding distinction)
+    result.update(stats_for("tpot", itls))
+    # intvty = 1 / tpot (tokens/second per-request decode rate)
+    if itls:
+        intvtys = [1.0 / v for v in itls if v > 0]
+        result.update(stats_for("intvty", intvtys))
+    return result
+
+
+def compute_workload_stats(rows):
+    input_tokens = [int(r['input_tokens']) for r in rows if r.get('success') == 'True']
+    output_expected = [int(r['output_tokens_expected']) for r in rows if r.get('success') == 'True']
+    output_actual = [int(r['output_tokens_actual']) for r in rows if r.get('success') == 'True']
+
+    result = {}
+    for name, values in [("input_tokens", input_tokens), ("output_tokens_expected", output_expected), ("output_tokens_actual", output_actual)]:
+        if values:
+            result[f"mean_{name}"] = statistics.mean(values)
+            result[f"median_{name}"] = statistics.median(values)
+            result[f"p90_{name}"] = percentile(values, 90)
+            result[f"p99_{name}"] = percentile(values, 99)
+            result[f"p99.9_{name}"] = percentile(values, 99.9)
+            result[f"std_{name}"] = statistics.pstdev(values) if len(values) > 1 else 0.0
+    return result
+
+
+def compute_cache_stats(rows, server_metrics):
+    """Compute cache hit rates from both detailed results and server metrics."""
+    result = {
+        "theoretical_cache_hit_rate": None,
+        "server_gpu_cache_hit_rate": None,
+        "server_cpu_cache_hit_rate": None,
+        "kv_offload_bytes_gpu_to_cpu": None,
+        "kv_offload_bytes_cpu_to_gpu": None,
+        "kv_offload_time_gpu_to_cpu": None,
+        "kv_offload_time_cpu_to_gpu": None,
+        "cpu_kv_cache_usage_pct": None,
+        "total_prompt_tokens": None,
+        "total_generation_tokens": None,
+        "total_requests_completed": None,
+    }
+
+    # Theoretical infinite-cache hit rate from detailed results.
+    # A block counts as a hit iff its hash_id was seen earlier in the session.
+    total_hit_blocks = sum(int(r.get('cache_hit_blocks', 0)) for r in rows)
+    total_miss_blocks = sum(int(r.get('cache_miss_blocks', 0)) for r in rows)
+    total_blocks = total_hit_blocks + total_miss_blocks
+    if total_blocks > 0:
+        result["theoretical_cache_hit_rate"] = total_hit_blocks / total_blocks
+
+    # From server metrics: actual prefix cache hit rate (last row)
+    if server_metrics:
+        last = server_metrics[-1]
+        hits = int(last.get('prefix_cache_hits', 0))
+        queries = int(last.get('prefix_cache_queries', 0))
+        if queries > 0:
+            result["server_gpu_cache_hit_rate"] = hits / queries
+
+        cpu_hits = int(last.get('cpu_prefix_cache_hits', 0))
+        cpu_queries = int(last.get('cpu_prefix_cache_queries', 0))
+        if cpu_queries > 0:
+            result["server_cpu_cache_hit_rate"] = cpu_hits / cpu_queries
+
+        offload_g2c = float(last.get('kv_offload_bytes_gpu_to_cpu', 0))
+        offload_c2g = float(last.get('kv_offload_bytes_cpu_to_gpu', 0))
+        if offload_g2c > 0 or offload_c2g > 0:
+            result["kv_offload_bytes_gpu_to_cpu"] = offload_g2c
+            result["kv_offload_bytes_cpu_to_gpu"] = offload_c2g
+            result["kv_offload_time_gpu_to_cpu"] = float(last.get('kv_offload_time_gpu_to_cpu', 0))
+            result["kv_offload_time_cpu_to_gpu"] = float(last.get('kv_offload_time_cpu_to_gpu', 0))
+
+        cpu_cache_pct = float(last.get('cpu_kv_cache_usage_pct', 0))
+        if cpu_cache_pct > 0:
+            result["cpu_kv_cache_usage_pct"] = cpu_cache_pct
+
+        result["total_prompt_tokens"] = int(last.get('prompt_tokens_total', 0))
+        result["total_generation_tokens"] = int(last.get('generation_tokens_total', 0))
+        result["total_requests_completed"] = int(last.get('request_success_total', 0))
+
+    return result
+
+
+def compute_throughput_stats(rows, server_metrics):
+    """Compute throughput from completed requests."""
+    successful = [r for r in rows if r.get('success') == 'True']
+    if len(successful) < 2:
+        return {}
+
+    start = min(float(r['request_start_time']) for r in successful)
+    end = max(float(r['request_complete_time']) for r in successful)
+    duration = end - start
+    if duration <= 0:
+        return {}
+
+    total_input = sum(int(r['input_tokens']) for r in successful)
+    total_output = sum(int(r['output_tokens_actual']) for r in successful)
+
+    return {
+        "input_tput_tps": total_input / duration,
+        "output_tput_tps": total_output / duration,
+        "total_tput_tps": (total_input + total_output) / duration,
+        "duration_seconds": duration,
+    }
+
+
+def main():
+    result_filename = os.environ.get('RESULT_FILENAME', '')
+    if not result_filename:
+        print("ERROR: RESULT_FILENAME env var not set", file=sys.stderr)
+        sys.exit(1)
+
+    # Result paths are relative to RESULT_DIR (set by the agentic script, e.g.
+    # /workspace/results). When run standalone from the repo root, fall back
+    # to ./results.
+    result_dir = Path(os.environ.get('RESULT_DIR', 'results'))
+    output_dir = Path(os.environ.get('AGENTIC_OUTPUT_DIR', '.'))
+
+    detailed_path = result_dir / "trace_replay/detailed_results.csv"
+    metrics_path = result_dir / "metrics_server_metrics.csv"
+
+    if not detailed_path.exists():
+        print(f"ERROR: {detailed_path} not found", file=sys.stderr)
+        sys.exit(1)
+
+    rows = load_detailed_results(detailed_path)
+    server_metrics = load_server_metrics(metrics_path) if metrics_path.exists() else []
+
+    successful = [r for r in rows if r.get('success') == 'True']
+
+    is_multinode = env_bool('IS_MULTINODE')
+    tp = env_int('TP', 1)
+    ep = env_int('EP_SIZE', 1)
+    dp_attention = os.environ.get('DP_ATTENTION', 'false')
+    num_gpus = tp
+
+    if is_multinode:
+        prefill_num_workers = env_int('PREFILL_NUM_WORKERS')
+        prefill_tp = env_int('PREFILL_TP')
+        prefill_ep = env_int('PREFILL_EP', 1)
+        prefill_dp_attention = os.environ.get('PREFILL_DP_ATTN', 'false')
+        decode_num_workers = env_int('DECODE_NUM_WORKERS')
+        decode_tp = env_int('DECODE_TP')
+        decode_ep = env_int('DECODE_EP', 1)
+        decode_dp_attention = os.environ.get('DECODE_DP_ATTN', 'false')
+        num_prefill_gpu = prefill_num_workers * prefill_tp
+        num_decode_gpu = decode_num_workers * decode_tp
+        num_gpus = num_prefill_gpu + num_decode_gpu
+        # Keep legacy fields populated for consumers that have not split by topology yet.
+        tp = prefill_tp + decode_tp
+        ep = max(prefill_ep, decode_ep)
+        dp_attention = "true" if env_bool('PREFILL_DP_ATTN') or env_bool('DECODE_DP_ATTN') else "false"
+
+    users = int(os.environ.get('USERS', '0'))
+    agg = {
+        "hw": os.environ.get('RUNNER_TYPE', ''),
+        # conc mirrors fixed-seq-len's field; users is the historical agentic
+        # name. Keep both so consumers can use either.
+        "conc": users,
+        "users": users,
+        "image": os.environ.get('IMAGE', ''),
+        "model": os.environ.get('MODEL', ''),
+        "infmax_model_prefix": os.environ.get('MODEL_PREFIX', ''),
+        "framework": os.environ.get('FRAMEWORK', ''),
+        "precision": os.environ.get('PRECISION', ''),
+        "spec_decoding": os.environ.get('SPEC_DECODING', 'none'),
+        "disagg": env_bool('DISAGG'),
+        "scenario_type": "agentic-coding",
+        "is_multinode": is_multinode,
+        "tp": tp,
+        "ep": ep,
+        "dp_attention": dp_attention,
+        "offloading": os.environ.get('OFFLOADING', 'none'),
+        "num_requests_total": len(rows),
+        "num_requests_successful": len(successful),
+    }
+
+    if is_multinode:
+        agg.update({
+            "prefill_num_workers": prefill_num_workers,
+            "prefill_tp": prefill_tp,
+            "prefill_ep": prefill_ep,
+            "prefill_dp_attention": prefill_dp_attention,
+            "num_prefill_gpu": num_prefill_gpu,
+            "decode_num_workers": decode_num_workers,
+            "decode_tp": decode_tp,
+            "decode_ep": decode_ep,
+            "decode_dp_attention": decode_dp_attention,
+            "num_decode_gpu": num_decode_gpu,
+        })
+
+    agg.update(compute_qps_stats(successful))
+    agg.update(compute_latency_stats(successful))
+    agg.update(compute_workload_stats(successful))
+    agg.update(compute_cache_stats(successful, server_metrics))
+    agg.update(compute_throughput_stats(successful, server_metrics))
+
+    # Per-GPU throughput
+    if "total_tput_tps" in agg and num_gpus > 0:
+        agg["tput_per_gpu"] = agg["total_tput_tps"] / num_gpus
+        agg["output_tput_per_gpu"] = agg.get("output_tput_tps", 0) / num_gpus
+        agg["input_tput_per_gpu"] = agg.get("input_tput_tps", 0) / num_gpus
+
+    output_path = output_dir / f"{result_filename}.json"
+    with open(output_path, 'w') as f:
+        json.dump(agg, f, indent=2)
+
+    print(f"Saved aggregated agentic result to {output_path}")
+    print(f"  Requests: {len(successful)}/{len(rows)} successful")
+    if "mean_qps" in agg:
+        print(f"  QPS: mean={agg['mean_qps']:.2f} median={agg.get('median_qps', 0):.2f} p99={agg.get('p99_qps', 0):.2f}")
+    if agg.get("server_gpu_cache_hit_rate") is not None:
+        print(f"  GPU cache hit rate: {agg['server_gpu_cache_hit_rate']:.1%}")
+    if agg.get("tput_per_gpu") is not None:
+        print(f"  Throughput per GPU: {agg['tput_per_gpu']:.0f} tok/s")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/utils/process_changelog.py b/utils/process_changelog.py
index a3d0f26f9..4c8c07864 100644
--- a/utils/process_changelog.py
+++ b/utils/process_changelog.py
@@ -161,6 +161,8 @@ def main():
                     *MASTER_CONFIGS,
                     "--no-evals",
                 ]
+                if entry.scenario_type:
+                    base_cmd.extend(["--scenario-type", *entry.scenario_type])
                 try:
                     result = subprocess.run(
                         base_cmd,
@@ -187,6 +189,8 @@ def main():
                 *MASTER_CONFIGS,
                 "--evals-only",
             ]
+            if entry.scenario_type:
+                base_cmd.extend(["--scenario-type", *entry.scenario_type])
             try:
                 eval_result = subprocess.run(
                     base_cmd,
@@ -203,10 +207,16 @@ def main():
         all_benchmark_results = trim_conc(all_benchmark_results)
 
     for result in all_benchmark_results:
-        seq_len_str = seq_len_to_str(result["isl"], result["osl"])
-        if "prefill" in result and result["prefill"] is not None:
+        if result.get("scenario-type") == "agentic-coding":
+            if result.get("prefill") is not None:
+                final_results["multi_node"]["agentic"].append(result)
+            else:
+                final_results["single_node"]["agentic"].append(result)
+        elif "prefill" in result and result["prefill"] is not None:
+            seq_len_str = seq_len_to_str(result["isl"], result["osl"])
             final_results["multi_node"][seq_len_str].append(result)
         else:
+            seq_len_str = seq_len_to_str(result["isl"], result["osl"])
             final_results["single_node"][seq_len_str].append(result)
 
     final_results["evals"] = [e for e in all_eval_results if e.get("prefill") is None]
diff --git a/utils/summarize.py b/utils/summarize.py
index c99001728..2dfeaa419 100644
--- a/utils/summarize.py
+++ b/utils/summarize.py
@@ -73,8 +73,9 @@ def main():
         if result and 'is_multinode' in result:
             results.append(result)
 
-    single_node_results = [r for r in results if not r['is_multinode']]
-    multinode_results = [r for r in results if r['is_multinode']]
+    single_node_results = [r for r in results if not r['is_multinode'] and r.get('scenario_type') != 'agentic-coding']
+    multinode_results = [r for r in results if r['is_multinode'] and r.get('scenario_type') != 'agentic-coding']
+    agentic_results = [r for r in results if r.get('scenario_type') == 'agentic-coding']
 
     # Single-node and multi-node results have different fields and therefore need to be printed separately
     if single_node_results:
@@ -191,4 +192,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/utils/trace-replay b/utils/trace-replay
new file mode 160000
index 000000000..6560957a3
--- /dev/null
+++ b/utils/trace-replay
@@ -0,0 +1 @@
+Subproject commit 6560957a3936dc631b8b585e4fd8374c8954285c