From 19f7029e19a3b48c9887a596e770854364d2086f Mon Sep 17 00:00:00 2001
From: pinjie <pinjiex@nvidia.com>
Date: Mon, 15 Jun 2026 00:21:03 -0700
Subject: [PATCH] update accvlab perf

Signed-off-by: pinjie <pinjiex@nvidia.com>
---
 .../docs/_on_doc_generation.py                |  82 ++++
 .../docs/evaluation.rst                       | 227 +++++++---
 .../evaluation/plot_decoder_evaluation.py     | 408 ++++++++++++++++++
 .../hevc_gop30_random_access.csv              |   6 +
 .../cross_decoder/hevc_gop30_sequential.csv   |   6 +
 .../streampetr_training/setup_a.csv           |   3 +
 .../streampetr_training/setup_b.csv           |   3 +
 .../bframes_random_access.csv                 |   4 +
 .../video_config_sweep/bframes_sequential.csv |   4 +
 .../codec_random_access.csv                   |   3 +
 .../video_config_sweep/codec_sequential.csv   |   3 +
 .../video_config_sweep/gop_random_access.csv  |   6 +
 .../video_config_sweep/gop_sequential.csv     |   6 +
 13 files changed, 704 insertions(+), 57 deletions(-)
 create mode 100644 packages/on_demand_video_decoder/docs/_on_doc_generation.py
 create mode 100644 packages/on_demand_video_decoder/evaluation/plot_decoder_evaluation.py
 create mode 100644 packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_random_access.csv
 create mode 100644 packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_sequential.csv
 create mode 100644 packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_a.csv
 create mode 100644 packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_b.csv
 create mode 100644 packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_random_access.csv
 create mode 100644 packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_sequential.csv
 create mode 100644 packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_random_access.csv
 create mode 100644 packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_sequential.csv
 create mode 100644 packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_random_access.csv
 create mode 100644 packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_sequential.csv

diff --git a/packages/on_demand_video_decoder/docs/_on_doc_generation.py b/packages/on_demand_video_decoder/docs/_on_doc_generation.py
new file mode 100644
index 0000000..4e0c016
--- /dev/null
+++ b/packages/on_demand_video_decoder/docs/_on_doc_generation.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pathlib import Path
+import sys
+from typing import Any
+
+_RESULTS_SUBDIR = Path("evaluation_results")
+_GENERATED_IMAGE_SUBDIR = Path("evaluation")
+
+_REQUIRED_CSV_INPUTS = (
+    "cross_decoder/hevc_gop30_random_access.csv",
+    "cross_decoder/hevc_gop30_sequential.csv",
+    "video_config_sweep/gop_random_access.csv",
+    "video_config_sweep/gop_sequential.csv",
+    "video_config_sweep/bframes_random_access.csv",
+    "video_config_sweep/bframes_sequential.csv",
+    "video_config_sweep/codec_random_access.csv",
+    "video_config_sweep/codec_sequential.csv",
+    "streampetr_training/setup_a.csv",
+    "streampetr_training/setup_b.csv",
+)
+
+_REQUIRED_IMAGE_NAMES = (
+    "cross_decoder.png",
+    "video_config_gop.png",
+    "video_config_bframes.png",
+    "video_config_codec.png",
+    "streampetr_training.png",
+)
+
+
+def _validate_csv_inputs(input_dir: Path) -> None:
+    if not input_dir.exists():
+        raise FileNotFoundError(
+            "Required committed CSV input directory is missing for on_demand_video_decoder docs asset generation: "
+            f"{input_dir}."
+        )
+
+    missing = [input_dir / rel for rel in _REQUIRED_CSV_INPUTS if not (input_dir / rel).exists()]
+    if missing:
+        missing_list = "\n".join(f"  - {p}" for p in missing)
+        raise FileNotFoundError(
+            "Missing required committed CSV input file(s) for on_demand_video_decoder docs asset generation:\n"
+            f"{missing_list}"
+        )
+
+
+def _validate_images(output_dir: Path) -> None:
+    missing = [output_dir / name for name in _REQUIRED_IMAGE_NAMES if not (output_dir / name).exists()]
+    if missing:
+        missing_list = "\n".join(f"  - {p}" for p in missing)
+        raise FileNotFoundError(
+            "on_demand_video_decoder docs asset generation did not produce all images referenced by evaluation.rst:\n"
+            f"{missing_list}"
+        )
+
+
+def generate_docs_assets(context: Any) -> None:
+    input_dir = context.package_root / _RESULTS_SUBDIR
+    output_dir = context.generated_dir / _GENERATED_IMAGE_SUBDIR
+
+    _validate_csv_inputs(input_dir)
+
+    evaluation_dir = context.package_root / "evaluation"
+    sys.path.insert(0, str(evaluation_dir))
+    import plot_decoder_evaluation
+
+    plot_decoder_evaluation.generate_all(input_root=input_dir, output_dir=output_dir)
+
+    _validate_images(output_dir)
diff --git a/packages/on_demand_video_decoder/docs/evaluation.rst b/packages/on_demand_video_decoder/docs/evaluation.rst
index 4a4c67c..79b0304 100644
--- a/packages/on_demand_video_decoder/docs/evaluation.rst
+++ b/packages/on_demand_video_decoder/docs/evaluation.rst
@@ -1,23 +1,170 @@
 Evaluation
 ==========
 
-The on-demand video decoder was used for training a StreamPETR model on the NuScenes mini dataset and 
-compared to the performance to both the 
+
+Decoder Throughput Benchmark (nuScenes)
+----------------------------------------
+
+This section benchmarks the standalone decoding throughput of multiple decoders across five GPU
+platforms using nuScenes video clips.  All results are **6-camera aggregate FPS** (random access,
+one frame drawn per iteration across all six cameras), measured on a single GPU.
+
+Test Environment
+~~~~~~~~~~~~~~~~
+
+**Video clips**
+
+.. list-table::
+   :header-rows: 1
+
+   * - Property
+     - Value
+   * - Source dataset
+     - nuScenes
+   * - Resolution
+     - 1600 × 900
+   * - Frame rate
+     - 10 FPS
+   * - Frames per clip
+     - 235
+   * - Cameras
+     - 6 (CAM_FRONT, CAM_FRONT_LEFT, CAM_FRONT_RIGHT, CAM_BACK, CAM_BACK_LEFT, CAM_BACK_RIGHT)
+   * - Pixel format
+     - YUV 4:2:0
+
+**Hardware platforms**
+
+.. list-table::
+   :header-rows: 1
+
+   * - GPU
+     - Compute Capability
+     - Driver
+     - CPU
+     - CPU Cores
+   * - NVIDIA A100 80 GB PCIe
+     - CC 8.0 (Ampere)
+     - 595.58.03
+     - Intel Xeon Silver 4210R @ 2.40 GHz
+     - 10 physical / 20 logical
+   * - NVIDIA H200 NVL
+     - CC 9.0 (Hopper)
+     - 595.58.03
+     - AMD EPYC 9554
+     - 128 physical / 256 logical
+   * - NVIDIA B200
+     - CC 10.0 (Blackwell)
+     - 610.43.02
+     - Intel Xeon Platinum 8570
+     - 112 physical / 224 logical
+   * - NVIDIA B300
+     - CC 10.3 (Blackwell)
+     - 610.43.02
+     - Intel Xeon 6776P
+     - 128 physical / 256 logical
+   * - NVIDIA RTX PRO 6000 Blackwell Server Edition
+     - CC 12.0 (Blackwell)
+     - 595.58.03
+     - Intel Xeon Platinum 8480+
+     - 112 physical / 224 logical
+
+All nodes run CUDA 12.9 inside a ``nvcr.io/nvidia/pytorch:25.05-py3`` container.
+
+**Decoder versions**
+
+.. list-table::
+   :header-rows: 1
+
+   * - Decoder
+     - Library / Version
+     - Backend
+   * - ``accv_lab.on_demand_video_decoder``
+     - accv_lab.on_demand_video_decoder
+     - NVDEC
+   * - ``pynvc_gpu``
+     - PyNvVideoCodec 2.1.0
+     - NVDEC
+   * - ``decord_gpu``
+     - decord 0.6.0
+     - NVDEC
+   * - ``decord_cpu``
+     - decord 0.6.0
+     - FFmpeg software decode
+   * - ``opencv_cpu``
+     - OpenCV 4.11.0
+     - FFmpeg software decode
+
+All GPU builds use FFmpeg 4.4.6 with nv-codec-headers n11.1.5.3.
+CPU decoders (``decord_cpu``, ``opencv_cpu``) run on the host CPU listed in the hardware table above.
+
+HEVC GOP=30 — Cross-Decoder Comparison
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Random-access and sequential FPS (6-camera total) for the ``hevc_gop30_bf0`` configuration.
+Hatched bars indicate the decoder failed on this config due to a known decord 0.6 EOF-retry bug.
+
+.. Data source: evaluation_results/cross_decoder/hevc_gop30_random_access.csv
+..              evaluation_results/cross_decoder/hevc_gop30_sequential.csv
+
+.. figure:: _generated/evaluation/cross_decoder.png
+   :alt: Cross-decoder FPS comparison for HEVC GOP=30 (random and sequential access)
+   :align: center
+   :width: 100%
+
+On-demand Video Decoder - Across Video Configurations and Hardware
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+6-camera aggregate FPS for ``accvlab_gpu``.  Each pair of tables varies one encoding parameter
+while the other two are held at their defaults (HEVC, GOP = 30, B-frames = 0).
+
+**Effect of GOP size — HEVC, B-frames = 0**
+
+.. Data source: evaluation_results/video_config_sweep/gop_random_access.csv
+..              evaluation_results/video_config_sweep/gop_sequential.csv
+
+.. figure:: _generated/evaluation/video_config_gop.png
+   :alt: Effect of GOP size on FPS (random and sequential access)
+   :align: center
+   :width: 90%
+
+**Effect of B-frames — HEVC, GOP = 30**
+
+.. Data source: evaluation_results/video_config_sweep/bframes_random_access.csv
+..              evaluation_results/video_config_sweep/bframes_sequential.csv
+
+.. figure:: _generated/evaluation/video_config_bframes.png
+   :alt: Effect of B-frames on FPS (random and sequential access)
+   :align: center
+   :width: 90%
+
+**Effect of Codec — GOP = 30, B-frames = 0**
+
+.. Data source: evaluation_results/video_config_sweep/codec_random_access.csv
+..              evaluation_results/video_config_sweep/codec_sequential.csv
+
+.. figure:: _generated/evaluation/video_config_codec.png
+   :alt: Effect of codec choice on FPS (random and sequential access)
+   :align: center
+   :width: 90%
+
+
+StreamPETR Training Performance
+---------------------------------
+
+The on-demand video decoder was used for training a StreamPETR model on the NuScenes mini dataset and
+compared to the performance to both the
 `original StreamPETR implementation (with image-based training) <https://github.com/exiawsh/StreamPETR>`_,
 and in one case to OpenCV-based video training. The results are shown below.
 
 Setup
------
+~~~~~
 
-Experiment Setup
-~~~~~~~~~~~~~~~~
-
-For the video training, the demuxer-free approach is used (see 
-:doc:`pytorch_integration_examples/dataloader_demuxer_free_decode` for details on this approach). Here, the 
+For the video training, the demuxer-free approach is used (see
+:doc:`pytorch_integration_examples/dataloader_demuxer_free_decode` for details on this approach). Here, the
 GOP packets are extracted and stored prior to the training.
 
-In the video training, the frames are decoded in the training process, and consequently, pre-processing is 
-performed in the training process on the GPU. Note that this is not a viable optimization for the image-based 
+In the video training, the frames are decoded in the training process, and consequently, pre-processing is
+performed in the training process on the GPU. Note that this is not a viable optimization for the image-based
 training, as it adds significant overhead when passing the full-resolution images to the training process.
 
 
@@ -29,17 +176,17 @@ The training is performed for the NuScenes mini dataset, with the following conf
     - No B-frames
     - Including both samples and sweeps (resulting in ~12 frames per second)
     - 1600x900 resolution (same as images)
-    
+
   - Batch size of 16 per GPU
 
 .. note::
 
-  We are planning to add a demo for the On-Demand Video Decoder package in the future, including the 
+  We are planning to add a demo for the On-Demand Video Decoder package in the future, including the
   implementation of the experiments performed in this evaluation.
 
 
 Hardware Setup A
-~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^
 
 .. list-table:: System Configuration
    :header-rows: 1
@@ -51,7 +198,7 @@ Hardware Setup A
 
 
 Hardware Setup B
-~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^
 
 .. list-table:: System Configuration
    :header-rows: 1
@@ -63,54 +210,20 @@ Hardware Setup B
 
 
 Results & Discussion
---------------------
+~~~~~~~~~~~~~~~~~~~~
 
-Results
-~~~~~~~
-
-Results for both hardware systems are shown in the following tables.
-
-.. list-table:: Runtime Comparison for Hardware Setup A
-   :header-rows: 1
-
-   * - Configuration
-     - Image [ms]
-     - Video: OpenCV [ms]
-     - Video: Ours [ms]
-     - Speedup (vs. Image)
-   * - 1 GPU
-     - 725
-     - 1674
-     - **751**
-     - × 0.97
-   * - 8 GPU
-     - 1025
-     - 2663
-     - **908**
-     - × 1.13
-
-
-.. list-table:: Runtime Comparison for Hardware Setup B
-   :header-rows: 1
+Results for both hardware systems are shown below.
 
-   * - Configuration
-     - Image [ms]
-     - Video [ms]
-     - Speedup
-   * - 1 GPU
-     - 878
-     - **862**
-     - × 1.02
-   * - 8 GPU
-     - 1310
-     - **1070**
-     - × 1.22
+.. Data source: evaluation_results/streampetr_training/setup_a.csv
+..              evaluation_results/streampetr_training/setup_b.csv
 
+.. figure:: _generated/evaluation/streampetr_training.png
+   :alt: StreamPETR training iteration time comparison across hardware setups
+   :align: center
+   :width: 100%
 
-Discussion
-~~~~~~~~~~
 
 On both systems, the performance of the video-based training is comparable to the image-based training for
 the 1 GPU configuration. The video training outperforms the image training for the 8 GPU configuration,
 with the speedup depending on the system. However, please note that the main goal is to reduce the storage
-requirements while maintaining good performance.
\ No newline at end of file
+requirements while maintaining good performance.
diff --git a/packages/on_demand_video_decoder/evaluation/plot_decoder_evaluation.py b/packages/on_demand_video_decoder/evaluation/plot_decoder_evaluation.py
new file mode 100644
index 0000000..c3e088b
--- /dev/null
+++ b/packages/on_demand_video_decoder/evaluation/plot_decoder_evaluation.py
@@ -0,0 +1,408 @@
+# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Generate benchmark chart images for the on_demand_video_decoder evaluation docs."""
+
+import csv
+from pathlib import Path
+
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+import numpy as np
+
+# ---------------------------------------------------------------------------
+# Color palettes
+# ---------------------------------------------------------------------------
+
+_GPU_COLORS = {
+    "A100":         "#4C72B0",
+    "H200 NVL":     "#55A868",
+    "B200":         "#C44E52",
+    "B300":         "#8172B2",
+    "RTX PRO 6000": "#CCB974",
+}
+
+_DECODER_COLORS = {
+    "accvlab_gpu": "#55A868",
+    "pynvc_gpu":   "#4C72B0",
+    "decord_gpu":  "#C44E52",
+    "decord_cpu":  "#8172B2",
+    "opencv_cpu":  "#CCB974",
+}
+
+_DECODER_LABELS = {
+    "accvlab_gpu": "accvlab_gpu (ours)",
+    "pynvc_gpu":   "pynvc_gpu",
+    "decord_gpu":  "decord_gpu",
+    "decord_cpu":  "decord_cpu",
+    "opencv_cpu":  "opencv_cpu",
+}
+
+_NA_HATCH = "////"
+
+# ---------------------------------------------------------------------------
+# CSV helpers
+# ---------------------------------------------------------------------------
+
+
+def _read_csv(path: Path) -> tuple[list[str], list[list[str]]]:
+    """Return (header_row, data_rows) from a CSV file."""
+    with open(path, newline="") as f:
+        reader = csv.reader(f)
+        rows = list(reader)
+    return rows[0], rows[1:]
+
+
+def _parse_float(value: str):
+    """Return float or None for empty/missing cells."""
+    value = value.strip()
+    if value == "":
+        return None
+    return float(value)
+
+
+# ---------------------------------------------------------------------------
+# Figure helpers
+# ---------------------------------------------------------------------------
+
+
+def _save(fig: plt.Figure, output_path: Path) -> None:
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    fig.savefig(output_path, dpi=150, bbox_inches="tight")
+    plt.close(fig)
+
+
+def _apply_style(ax: plt.Axes, title: str, xlabel: str, ylabel: str = "FPS (6-camera aggregate)") -> None:
+    ax.set_title(title, fontsize=11, fontweight="bold")
+    ax.set_xlabel(xlabel, fontsize=10)
+    ax.set_ylabel(ylabel, fontsize=10)
+    ax.yaxis.grid(True, linestyle="--", alpha=0.5)
+    ax.set_axisbelow(True)
+    ax.spines["top"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+
+
+# ---------------------------------------------------------------------------
+# Plot: cross-decoder grouped bar chart
+# ---------------------------------------------------------------------------
+
+
+def plot_cross_decoder(
+    random_csv: Path,
+    sequential_csv: Path,
+    output_path: Path,
+) -> None:
+    fig, axes = plt.subplots(2, 1, figsize=(10, 9), constrained_layout=True)
+    fig.suptitle("HEVC GOP=30 — Cross-Decoder Comparison\n(6-camera aggregate FPS, higher is better)", fontsize=12)
+
+    for ax, csv_path, access_label in zip(
+        axes,
+        [random_csv, sequential_csv],
+        ["Random Access", "Sequential (Stream) Access"],
+    ):
+        header, rows = _read_csv(csv_path)
+        # header: [gpu, decoder1, decoder2, ...]
+        gpu_names = [r[0] for r in rows]
+        decoders = header[1:]
+        n_gpus = len(gpu_names)
+        n_dec = len(decoders)
+
+        x = np.arange(n_gpus)
+        width = 0.8 / n_dec
+        offsets = np.linspace(-(n_dec - 1) / 2, (n_dec - 1) / 2, n_dec) * width
+
+        for i, decoder in enumerate(decoders):
+            vals = [_parse_float(r[i + 1]) for r in rows]
+            bar_x = x + offsets[i]
+            color = _DECODER_COLORS.get(decoder, "#888888")
+            label = _DECODER_LABELS.get(decoder, decoder)
+
+            for j, v in enumerate(vals):
+                if v is None:
+                    # draw an N/A hatched bar using the max height for reference
+                    ax.bar(
+                        bar_x[j], 1, width=width * 0.9,
+                        color="white", edgecolor=color, linewidth=1,
+                        hatch=_NA_HATCH, alpha=0.6,
+                        label="_nolegend_",
+                    )
+                else:
+                    ax.bar(
+                        bar_x[j], v, width=width * 0.9,
+                        color=color, edgecolor="white", linewidth=0.5,
+                        label=label if j == 0 else "_nolegend_",
+                    )
+                    ax.text(
+                        bar_x[j], v + max(vals[k] for k in range(n_gpus) if vals[k]) * 0.01,
+                        str(int(v)), ha="center", va="bottom", fontsize=6.5, rotation=90,
+                    )
+
+        ax.set_xticks(x)
+        ax.set_xticklabels(gpu_names, fontsize=9)
+        _apply_style(ax, access_label, "GPU")
+
+        # legend only on first subplot
+        if ax is axes[0]:
+            handles = [
+                mpatches.Patch(color=_DECODER_COLORS.get(d, "#888"), label=_DECODER_LABELS.get(d, d))
+                for d in decoders
+            ]
+            na_patch = mpatches.Patch(
+                facecolor="white", edgecolor="#888", hatch=_NA_HATCH, label="N/A (decoder failed)"
+            )
+            ax.legend(handles=handles + [na_patch], fontsize=8, loc="upper right")
+
+    _save(fig, output_path)
+
+
+# ---------------------------------------------------------------------------
+# Plot: line chart for a continuous config sweep (GOP, B-frames)
+# ---------------------------------------------------------------------------
+
+
+def plot_config_line(
+    random_csv: Path,
+    sequential_csv: Path,
+    output_path: Path,
+    xlabel: str,
+    title: str,
+    xscale: str = "linear",
+) -> None:
+    fig, axes = plt.subplots(2, 1, figsize=(9, 8), constrained_layout=True)
+    fig.suptitle(title + "\n(6-camera aggregate FPS, higher is better)", fontsize=12)
+
+    for ax, csv_path, access_label in zip(
+        axes,
+        [random_csv, sequential_csv],
+        ["Random Access", "Sequential (Stream) Access"],
+    ):
+        header, rows = _read_csv(csv_path)
+        x_vals = [_parse_float(r[0]) for r in rows]
+        gpu_names = header[1:]
+
+        for gpu in gpu_names:
+            col_idx = header.index(gpu)
+            y_vals = [_parse_float(r[col_idx]) for r in rows]
+            color = _GPU_COLORS.get(gpu, "#888888")
+
+            # split into segments at None gaps so line breaks at N/A
+            xs, ys = [], []
+            for xv, yv in zip(x_vals, y_vals):
+                if yv is None:
+                    if xs:
+                        ax.plot(xs, ys, marker="o", color=color, linewidth=1.8, markersize=5)
+                        xs, ys = [], []
+                else:
+                    xs.append(xv)
+                    ys.append(yv)
+            if xs:
+                ax.plot(xs, ys, marker="o", color=color, linewidth=1.8, markersize=5, label=gpu)
+
+        if xscale == "log":
+            ax.set_xscale("log")
+            ax.set_xticks(x_vals)
+            ax.set_xticklabels([str(int(v)) for v in x_vals], fontsize=9)
+            ax.xaxis.set_minor_formatter(matplotlib.ticker.NullFormatter())
+        else:
+            ax.set_xticks(x_vals)
+            ax.set_xticklabels([str(int(v)) for v in x_vals], fontsize=9)
+
+        _apply_style(ax, access_label, xlabel)
+
+        if ax is axes[0]:
+            ax.legend(fontsize=8, loc="upper right")
+
+    _save(fig, output_path)
+
+
+# ---------------------------------------------------------------------------
+# Plot: codec grouped bar chart
+# ---------------------------------------------------------------------------
+
+
+def plot_codec_bars(
+    random_csv: Path,
+    sequential_csv: Path,
+    output_path: Path,
+) -> None:
+    fig, axes = plt.subplots(2, 1, figsize=(9, 8), constrained_layout=True)
+    fig.suptitle("Effect of Codec — GOP=30, B-frames=0\n(6-camera aggregate FPS, higher is better)", fontsize=12)
+
+    for ax, csv_path, access_label in zip(
+        axes,
+        [random_csv, sequential_csv],
+        ["Random Access", "Sequential (Stream) Access"],
+    ):
+        header, rows = _read_csv(csv_path)
+        codec_names = [r[0] for r in rows]
+        gpu_names = header[1:]
+        n_codecs = len(codec_names)
+        n_gpus = len(gpu_names)
+
+        x = np.arange(n_codecs)
+        width = 0.8 / n_gpus
+        offsets = np.linspace(-(n_gpus - 1) / 2, (n_gpus - 1) / 2, n_gpus) * width
+
+        for i, gpu in enumerate(gpu_names):
+            col_idx = header.index(gpu)
+            vals = [_parse_float(r[col_idx]) for r in rows]
+            color = _GPU_COLORS.get(gpu, "#888888")
+            bar_x = x + offsets[i]
+
+            bars = ax.bar(
+                bar_x, vals, width=width * 0.9,
+                color=color, edgecolor="white", linewidth=0.5,
+                label=gpu,
+            )
+            for bar, v in zip(bars, vals):
+                if v is not None:
+                    ax.text(
+                        bar.get_x() + bar.get_width() / 2,
+                        v + max(v for v in vals if v) * 0.01,
+                        str(int(v)), ha="center", va="bottom", fontsize=8,
+                    )
+
+        ax.set_xticks(x)
+        ax.set_xticklabels(codec_names, fontsize=10)
+        _apply_style(ax, access_label, "Codec")
+
+        if ax is axes[0]:
+            ax.legend(fontsize=8, loc="upper right")
+
+    _save(fig, output_path)
+
+
+# ---------------------------------------------------------------------------
+# Plot: StreamPETR training horizontal bars
+# ---------------------------------------------------------------------------
+
+
+def plot_streampetr(
+    setup_a_csv: Path,
+    setup_b_csv: Path,
+    output_path: Path,
+) -> None:
+    fig, axes = plt.subplots(1, 2, figsize=(12, 4), constrained_layout=True)
+    fig.suptitle("StreamPETR Training Performance — Iteration Time (lower is better)", fontsize=12)
+
+    setup_configs = [
+        (axes[0], setup_a_csv, "Hardware Setup A\n(8× RTX 6000D, 2× EPYC 7742)",
+         ["Image", "Video: OpenCV", "Video: Ours"], ["image_ms", "video_opencv_ms", "video_ours_ms"],
+         ["#4C72B0", "#CCB974", "#55A868"]),
+        (axes[1], setup_b_csv, "Hardware Setup B\n(8× H20, 2× Xeon Platinum 8468V)",
+         ["Image", "Video: Ours"], ["image_ms", "video_ms"],
+         ["#4C72B0", "#55A868"]),
+    ]
+
+    for ax, csv_path, title, series_labels, col_names, colors in setup_configs:
+        header, rows = _read_csv(csv_path)
+        configs = [r[0] for r in rows]
+        n_configs = len(configs)
+        n_series = len(series_labels)
+
+        y = np.arange(n_configs)
+        height = 0.7 / n_series
+        offsets = np.linspace(-(n_series - 1) / 2, (n_series - 1) / 2, n_series) * height
+
+        for i, (col, label, color) in enumerate(zip(col_names, series_labels, colors)):
+            col_idx = header.index(col)
+            vals = [_parse_float(r[col_idx]) for r in rows]
+            bar_y = y + offsets[i]
+            bars = ax.barh(bar_y, vals, height=height * 0.9, color=color, label=label, edgecolor="white")
+            for bar, v in zip(bars, vals):
+                if v is not None:
+                    ax.text(
+                        v + max(v for v in vals if v) * 0.01,
+                        bar.get_y() + bar.get_height() / 2,
+                        f"{int(v)} ms", va="center", fontsize=8,
+                    )
+
+        # annotate speedup for "Video: Ours" vs Image
+        image_col = "image_ms"
+        ours_col = col_names[-1]
+        if image_col in header and ours_col in header:
+            image_idx = header.index(image_col)
+            ours_idx = header.index(ours_col)
+            for j, row in enumerate(rows):
+                img_v = _parse_float(row[image_idx])
+                our_v = _parse_float(row[ours_idx])
+                if img_v and our_v:
+                    speedup = img_v / our_v
+                    ax.text(
+                        0.98, y[j] + offsets[-1],
+                        f"×{speedup:.2f} vs image",
+                        transform=ax.get_yaxis_transform(),
+                        ha="right", va="center", fontsize=7.5, style="italic",
+                        color="#333333",
+                    )
+
+        ax.set_yticks(y)
+        ax.set_yticklabels(configs, fontsize=9)
+        ax.set_xlabel("Iteration time [ms]", fontsize=10)
+        ax.set_title(title, fontsize=10, fontweight="bold")
+        ax.xaxis.grid(True, linestyle="--", alpha=0.5)
+        ax.set_axisbelow(True)
+        ax.spines["top"].set_visible(False)
+        ax.spines["right"].set_visible(False)
+        ax.legend(fontsize=8, loc="lower right")
+
+    _save(fig, output_path)
+
+
+# ---------------------------------------------------------------------------
+# Public entry point
+# ---------------------------------------------------------------------------
+
+
+def generate_all(input_root: Path, output_dir: Path) -> None:
+    cross = input_root / "cross_decoder"
+    sweep = input_root / "video_config_sweep"
+    streampetr = input_root / "streampetr_training"
+
+    plot_cross_decoder(
+        random_csv=cross / "hevc_gop30_random_access.csv",
+        sequential_csv=cross / "hevc_gop30_sequential.csv",
+        output_path=output_dir / "cross_decoder.png",
+    )
+
+    plot_config_line(
+        random_csv=sweep / "gop_random_access.csv",
+        sequential_csv=sweep / "gop_sequential.csv",
+        output_path=output_dir / "video_config_gop.png",
+        xlabel="GOP size",
+        title="Effect of GOP Size — HEVC, B-frames=0",
+        xscale="log",
+    )
+
+    plot_config_line(
+        random_csv=sweep / "bframes_random_access.csv",
+        sequential_csv=sweep / "bframes_sequential.csv",
+        output_path=output_dir / "video_config_bframes.png",
+        xlabel="Number of B-frames",
+        title="Effect of B-frames — HEVC, GOP=30",
+    )
+
+    plot_codec_bars(
+        random_csv=sweep / "codec_random_access.csv",
+        sequential_csv=sweep / "codec_sequential.csv",
+        output_path=output_dir / "video_config_codec.png",
+    )
+
+    plot_streampetr(
+        setup_a_csv=streampetr / "setup_a.csv",
+        setup_b_csv=streampetr / "setup_b.csv",
+        output_path=output_dir / "streampetr_training.png",
+    )
diff --git a/packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_random_access.csv b/packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_random_access.csv
new file mode 100644
index 0000000..f0cd76c
--- /dev/null
+++ b/packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_random_access.csv
@@ -0,0 +1,6 @@
+gpu,accvlab_gpu,pynvc_gpu,decord_gpu,decord_cpu,opencv_cpu
+A100,337,189,114,44,34
+H200 NVL,402,316,170,68,52
+B200,474,345,,56,47
+B300,416,324,126,40,32
+RTX PRO 6000,420,377,,25,41
diff --git a/packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_sequential.csv b/packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_sequential.csv
new file mode 100644
index 0000000..20c4b6a
--- /dev/null
+++ b/packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_sequential.csv
@@ -0,0 +1,6 @@
+gpu,accvlab_gpu,pynvc_gpu,decord_gpu,decord_cpu,opencv_cpu
+A100,1424,583,421,235,462
+H200 NVL,1795,1245,707,351,789
+B200,2158,1182,,302,707
+B300,2167,1191,396,205,469
+RTX PRO 6000,1881,1484,,139,595
diff --git a/packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_a.csv b/packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_a.csv
new file mode 100644
index 0000000..f378c32
--- /dev/null
+++ b/packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_a.csv
@@ -0,0 +1,3 @@
+config,image_ms,video_opencv_ms,video_ours_ms,speedup_vs_image
+1 GPU,725,1674,751,0.97
+8 GPU,1025,2663,908,1.13
diff --git a/packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_b.csv b/packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_b.csv
new file mode 100644
index 0000000..ba0f832
--- /dev/null
+++ b/packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_b.csv
@@ -0,0 +1,3 @@
+config,image_ms,video_ms,speedup
+1 GPU,878,862,1.02
+8 GPU,1310,1070,1.22
diff --git a/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_random_access.csv b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_random_access.csv
new file mode 100644
index 0000000..cb42f0a
--- /dev/null
+++ b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_random_access.csv
@@ -0,0 +1,4 @@
+bframes,A100,H200 NVL,B200,B300,RTX PRO 6000
+0,337,402,474,416,420
+2,284,445,548,441,466
+4,230,469,572,458,513
diff --git a/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_sequential.csv b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_sequential.csv
new file mode 100644
index 0000000..9c0acc6
--- /dev/null
+++ b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_sequential.csv
@@ -0,0 +1,4 @@
+bframes,A100,H200 NVL,B200,B300,RTX PRO 6000
+0,1424,1795,2158,2167,1881
+2,657,1665,2112,1749,1658
+4,1123,1627,1977,1812,1545
diff --git a/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_random_access.csv b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_random_access.csv
new file mode 100644
index 0000000..3023e23
--- /dev/null
+++ b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_random_access.csv
@@ -0,0 +1,3 @@
+codec,A100,H200 NVL,B200,B300,RTX PRO 6000
+H.264,186,217,484,468,486
+HEVC,337,402,474,416,420
diff --git a/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_sequential.csv b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_sequential.csv
new file mode 100644
index 0000000..fa72d4a
--- /dev/null
+++ b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_sequential.csv
@@ -0,0 +1,3 @@
+codec,A100,H200 NVL,B200,B300,RTX PRO 6000
+H.264,707,1002,2002,2188,2105
+HEVC,1424,1795,2158,2167,1881
diff --git a/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_random_access.csv b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_random_access.csv
new file mode 100644
index 0000000..ae22f66
--- /dev/null
+++ b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_random_access.csv
@@ -0,0 +1,6 @@
+gop_size,A100,H200 NVL,B200,B300,RTX PRO 6000
+1,531,1145,994,673,857
+12,641,708,842,641,778
+30,337,402,474,416,420
+60,197,240,296,298,245
+250,61,78,100,101,82
diff --git a/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_sequential.csv b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_sequential.csv
new file mode 100644
index 0000000..8763bf3
--- /dev/null
+++ b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_sequential.csv
@@ -0,0 +1,6 @@
+gop_size,A100,H200 NVL,B200,B300,RTX PRO 6000
+1,1598,2244,2286,2130,2221
+12,1397,1710,1967,1941,1691
+30,1424,1795,2158,2167,1881
+60,1383,1779,2116,2194,1816
+250,1320,1753,2089,2102,1770