From 19f7029e19a3b48c9887a596e770854364d2086f Mon Sep 17 00:00:00 2001 From: pinjie Date: Mon, 15 Jun 2026 00:21:03 -0700 Subject: [PATCH] update accvlab perf Signed-off-by: pinjie --- .../docs/_on_doc_generation.py | 82 ++++ .../docs/evaluation.rst | 227 +++++++--- .../evaluation/plot_decoder_evaluation.py | 408 ++++++++++++++++++ .../hevc_gop30_random_access.csv | 6 + .../cross_decoder/hevc_gop30_sequential.csv | 6 + .../streampetr_training/setup_a.csv | 3 + .../streampetr_training/setup_b.csv | 3 + .../bframes_random_access.csv | 4 + .../video_config_sweep/bframes_sequential.csv | 4 + .../codec_random_access.csv | 3 + .../video_config_sweep/codec_sequential.csv | 3 + .../video_config_sweep/gop_random_access.csv | 6 + .../video_config_sweep/gop_sequential.csv | 6 + 13 files changed, 704 insertions(+), 57 deletions(-) create mode 100644 packages/on_demand_video_decoder/docs/_on_doc_generation.py create mode 100644 packages/on_demand_video_decoder/evaluation/plot_decoder_evaluation.py create mode 100644 packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_random_access.csv create mode 100644 packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_sequential.csv create mode 100644 packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_a.csv create mode 100644 packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_b.csv create mode 100644 packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_random_access.csv create mode 100644 packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_sequential.csv create mode 100644 packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_random_access.csv create mode 100644 packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_sequential.csv create mode 100644 packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_random_access.csv create mode 100644 packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_sequential.csv diff --git a/packages/on_demand_video_decoder/docs/_on_doc_generation.py b/packages/on_demand_video_decoder/docs/_on_doc_generation.py new file mode 100644 index 0000000..4e0c016 --- /dev/null +++ b/packages/on_demand_video_decoder/docs/_on_doc_generation.py @@ -0,0 +1,82 @@ +# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pathlib import Path +import sys +from typing import Any + +_RESULTS_SUBDIR = Path("evaluation_results") +_GENERATED_IMAGE_SUBDIR = Path("evaluation") + +_REQUIRED_CSV_INPUTS = ( + "cross_decoder/hevc_gop30_random_access.csv", + "cross_decoder/hevc_gop30_sequential.csv", + "video_config_sweep/gop_random_access.csv", + "video_config_sweep/gop_sequential.csv", + "video_config_sweep/bframes_random_access.csv", + "video_config_sweep/bframes_sequential.csv", + "video_config_sweep/codec_random_access.csv", + "video_config_sweep/codec_sequential.csv", + "streampetr_training/setup_a.csv", + "streampetr_training/setup_b.csv", +) + +_REQUIRED_IMAGE_NAMES = ( + "cross_decoder.png", + "video_config_gop.png", + "video_config_bframes.png", + "video_config_codec.png", + "streampetr_training.png", +) + + +def _validate_csv_inputs(input_dir: Path) -> None: + if not input_dir.exists(): + raise FileNotFoundError( + "Required committed CSV input directory is missing for on_demand_video_decoder docs asset generation: " + f"{input_dir}." + ) + + missing = [input_dir / rel for rel in _REQUIRED_CSV_INPUTS if not (input_dir / rel).exists()] + if missing: + missing_list = "\n".join(f" - {p}" for p in missing) + raise FileNotFoundError( + "Missing required committed CSV input file(s) for on_demand_video_decoder docs asset generation:\n" + f"{missing_list}" + ) + + +def _validate_images(output_dir: Path) -> None: + missing = [output_dir / name for name in _REQUIRED_IMAGE_NAMES if not (output_dir / name).exists()] + if missing: + missing_list = "\n".join(f" - {p}" for p in missing) + raise FileNotFoundError( + "on_demand_video_decoder docs asset generation did not produce all images referenced by evaluation.rst:\n" + f"{missing_list}" + ) + + +def generate_docs_assets(context: Any) -> None: + input_dir = context.package_root / _RESULTS_SUBDIR + output_dir = context.generated_dir / _GENERATED_IMAGE_SUBDIR + + _validate_csv_inputs(input_dir) + + evaluation_dir = context.package_root / "evaluation" + sys.path.insert(0, str(evaluation_dir)) + import plot_decoder_evaluation + + plot_decoder_evaluation.generate_all(input_root=input_dir, output_dir=output_dir) + + _validate_images(output_dir) diff --git a/packages/on_demand_video_decoder/docs/evaluation.rst b/packages/on_demand_video_decoder/docs/evaluation.rst index 4a4c67c..79b0304 100644 --- a/packages/on_demand_video_decoder/docs/evaluation.rst +++ b/packages/on_demand_video_decoder/docs/evaluation.rst @@ -1,23 +1,170 @@ Evaluation ========== -The on-demand video decoder was used for training a StreamPETR model on the NuScenes mini dataset and -compared to the performance to both the + +Decoder Throughput Benchmark (nuScenes) +---------------------------------------- + +This section benchmarks the standalone decoding throughput of multiple decoders across five GPU +platforms using nuScenes video clips. All results are **6-camera aggregate FPS** (random access, +one frame drawn per iteration across all six cameras), measured on a single GPU. + +Test Environment +~~~~~~~~~~~~~~~~ + +**Video clips** + +.. list-table:: + :header-rows: 1 + + * - Property + - Value + * - Source dataset + - nuScenes + * - Resolution + - 1600 × 900 + * - Frame rate + - 10 FPS + * - Frames per clip + - 235 + * - Cameras + - 6 (CAM_FRONT, CAM_FRONT_LEFT, CAM_FRONT_RIGHT, CAM_BACK, CAM_BACK_LEFT, CAM_BACK_RIGHT) + * - Pixel format + - YUV 4:2:0 + +**Hardware platforms** + +.. list-table:: + :header-rows: 1 + + * - GPU + - Compute Capability + - Driver + - CPU + - CPU Cores + * - NVIDIA A100 80 GB PCIe + - CC 8.0 (Ampere) + - 595.58.03 + - Intel Xeon Silver 4210R @ 2.40 GHz + - 10 physical / 20 logical + * - NVIDIA H200 NVL + - CC 9.0 (Hopper) + - 595.58.03 + - AMD EPYC 9554 + - 128 physical / 256 logical + * - NVIDIA B200 + - CC 10.0 (Blackwell) + - 610.43.02 + - Intel Xeon Platinum 8570 + - 112 physical / 224 logical + * - NVIDIA B300 + - CC 10.3 (Blackwell) + - 610.43.02 + - Intel Xeon 6776P + - 128 physical / 256 logical + * - NVIDIA RTX PRO 6000 Blackwell Server Edition + - CC 12.0 (Blackwell) + - 595.58.03 + - Intel Xeon Platinum 8480+ + - 112 physical / 224 logical + +All nodes run CUDA 12.9 inside a ``nvcr.io/nvidia/pytorch:25.05-py3`` container. + +**Decoder versions** + +.. list-table:: + :header-rows: 1 + + * - Decoder + - Library / Version + - Backend + * - ``accv_lab.on_demand_video_decoder`` + - accv_lab.on_demand_video_decoder + - NVDEC + * - ``pynvc_gpu`` + - PyNvVideoCodec 2.1.0 + - NVDEC + * - ``decord_gpu`` + - decord 0.6.0 + - NVDEC + * - ``decord_cpu`` + - decord 0.6.0 + - FFmpeg software decode + * - ``opencv_cpu`` + - OpenCV 4.11.0 + - FFmpeg software decode + +All GPU builds use FFmpeg 4.4.6 with nv-codec-headers n11.1.5.3. +CPU decoders (``decord_cpu``, ``opencv_cpu``) run on the host CPU listed in the hardware table above. + +HEVC GOP=30 — Cross-Decoder Comparison +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Random-access and sequential FPS (6-camera total) for the ``hevc_gop30_bf0`` configuration. +Hatched bars indicate the decoder failed on this config due to a known decord 0.6 EOF-retry bug. + +.. Data source: evaluation_results/cross_decoder/hevc_gop30_random_access.csv +.. evaluation_results/cross_decoder/hevc_gop30_sequential.csv + +.. figure:: _generated/evaluation/cross_decoder.png + :alt: Cross-decoder FPS comparison for HEVC GOP=30 (random and sequential access) + :align: center + :width: 100% + +On-demand Video Decoder - Across Video Configurations and Hardware +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +6-camera aggregate FPS for ``accvlab_gpu``. Each pair of tables varies one encoding parameter +while the other two are held at their defaults (HEVC, GOP = 30, B-frames = 0). + +**Effect of GOP size — HEVC, B-frames = 0** + +.. Data source: evaluation_results/video_config_sweep/gop_random_access.csv +.. evaluation_results/video_config_sweep/gop_sequential.csv + +.. figure:: _generated/evaluation/video_config_gop.png + :alt: Effect of GOP size on FPS (random and sequential access) + :align: center + :width: 90% + +**Effect of B-frames — HEVC, GOP = 30** + +.. Data source: evaluation_results/video_config_sweep/bframes_random_access.csv +.. evaluation_results/video_config_sweep/bframes_sequential.csv + +.. figure:: _generated/evaluation/video_config_bframes.png + :alt: Effect of B-frames on FPS (random and sequential access) + :align: center + :width: 90% + +**Effect of Codec — GOP = 30, B-frames = 0** + +.. Data source: evaluation_results/video_config_sweep/codec_random_access.csv +.. evaluation_results/video_config_sweep/codec_sequential.csv + +.. figure:: _generated/evaluation/video_config_codec.png + :alt: Effect of codec choice on FPS (random and sequential access) + :align: center + :width: 90% + + +StreamPETR Training Performance +--------------------------------- + +The on-demand video decoder was used for training a StreamPETR model on the NuScenes mini dataset and +compared to the performance to both the `original StreamPETR implementation (with image-based training) `_, and in one case to OpenCV-based video training. The results are shown below. Setup ------ +~~~~~ -Experiment Setup -~~~~~~~~~~~~~~~~ - -For the video training, the demuxer-free approach is used (see -:doc:`pytorch_integration_examples/dataloader_demuxer_free_decode` for details on this approach). Here, the +For the video training, the demuxer-free approach is used (see +:doc:`pytorch_integration_examples/dataloader_demuxer_free_decode` for details on this approach). Here, the GOP packets are extracted and stored prior to the training. -In the video training, the frames are decoded in the training process, and consequently, pre-processing is -performed in the training process on the GPU. Note that this is not a viable optimization for the image-based +In the video training, the frames are decoded in the training process, and consequently, pre-processing is +performed in the training process on the GPU. Note that this is not a viable optimization for the image-based training, as it adds significant overhead when passing the full-resolution images to the training process. @@ -29,17 +176,17 @@ The training is performed for the NuScenes mini dataset, with the following conf - No B-frames - Including both samples and sweeps (resulting in ~12 frames per second) - 1600x900 resolution (same as images) - + - Batch size of 16 per GPU .. note:: - We are planning to add a demo for the On-Demand Video Decoder package in the future, including the + We are planning to add a demo for the On-Demand Video Decoder package in the future, including the implementation of the experiments performed in this evaluation. Hardware Setup A -~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^ .. list-table:: System Configuration :header-rows: 1 @@ -51,7 +198,7 @@ Hardware Setup A Hardware Setup B -~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^ .. list-table:: System Configuration :header-rows: 1 @@ -63,54 +210,20 @@ Hardware Setup B Results & Discussion --------------------- +~~~~~~~~~~~~~~~~~~~~ -Results -~~~~~~~ - -Results for both hardware systems are shown in the following tables. - -.. list-table:: Runtime Comparison for Hardware Setup A - :header-rows: 1 - - * - Configuration - - Image [ms] - - Video: OpenCV [ms] - - Video: Ours [ms] - - Speedup (vs. Image) - * - 1 GPU - - 725 - - 1674 - - **751** - - × 0.97 - * - 8 GPU - - 1025 - - 2663 - - **908** - - × 1.13 - - -.. list-table:: Runtime Comparison for Hardware Setup B - :header-rows: 1 +Results for both hardware systems are shown below. - * - Configuration - - Image [ms] - - Video [ms] - - Speedup - * - 1 GPU - - 878 - - **862** - - × 1.02 - * - 8 GPU - - 1310 - - **1070** - - × 1.22 +.. Data source: evaluation_results/streampetr_training/setup_a.csv +.. evaluation_results/streampetr_training/setup_b.csv +.. figure:: _generated/evaluation/streampetr_training.png + :alt: StreamPETR training iteration time comparison across hardware setups + :align: center + :width: 100% -Discussion -~~~~~~~~~~ On both systems, the performance of the video-based training is comparable to the image-based training for the 1 GPU configuration. The video training outperforms the image training for the 8 GPU configuration, with the speedup depending on the system. However, please note that the main goal is to reduce the storage -requirements while maintaining good performance. \ No newline at end of file +requirements while maintaining good performance. diff --git a/packages/on_demand_video_decoder/evaluation/plot_decoder_evaluation.py b/packages/on_demand_video_decoder/evaluation/plot_decoder_evaluation.py new file mode 100644 index 0000000..c3e088b --- /dev/null +++ b/packages/on_demand_video_decoder/evaluation/plot_decoder_evaluation.py @@ -0,0 +1,408 @@ +# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Generate benchmark chart images for the on_demand_video_decoder evaluation docs.""" + +import csv +from pathlib import Path + +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches +import numpy as np + +# --------------------------------------------------------------------------- +# Color palettes +# --------------------------------------------------------------------------- + +_GPU_COLORS = { + "A100": "#4C72B0", + "H200 NVL": "#55A868", + "B200": "#C44E52", + "B300": "#8172B2", + "RTX PRO 6000": "#CCB974", +} + +_DECODER_COLORS = { + "accvlab_gpu": "#55A868", + "pynvc_gpu": "#4C72B0", + "decord_gpu": "#C44E52", + "decord_cpu": "#8172B2", + "opencv_cpu": "#CCB974", +} + +_DECODER_LABELS = { + "accvlab_gpu": "accvlab_gpu (ours)", + "pynvc_gpu": "pynvc_gpu", + "decord_gpu": "decord_gpu", + "decord_cpu": "decord_cpu", + "opencv_cpu": "opencv_cpu", +} + +_NA_HATCH = "////" + +# --------------------------------------------------------------------------- +# CSV helpers +# --------------------------------------------------------------------------- + + +def _read_csv(path: Path) -> tuple[list[str], list[list[str]]]: + """Return (header_row, data_rows) from a CSV file.""" + with open(path, newline="") as f: + reader = csv.reader(f) + rows = list(reader) + return rows[0], rows[1:] + + +def _parse_float(value: str): + """Return float or None for empty/missing cells.""" + value = value.strip() + if value == "": + return None + return float(value) + + +# --------------------------------------------------------------------------- +# Figure helpers +# --------------------------------------------------------------------------- + + +def _save(fig: plt.Figure, output_path: Path) -> None: + output_path.parent.mkdir(parents=True, exist_ok=True) + fig.savefig(output_path, dpi=150, bbox_inches="tight") + plt.close(fig) + + +def _apply_style(ax: plt.Axes, title: str, xlabel: str, ylabel: str = "FPS (6-camera aggregate)") -> None: + ax.set_title(title, fontsize=11, fontweight="bold") + ax.set_xlabel(xlabel, fontsize=10) + ax.set_ylabel(ylabel, fontsize=10) + ax.yaxis.grid(True, linestyle="--", alpha=0.5) + ax.set_axisbelow(True) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + + +# --------------------------------------------------------------------------- +# Plot: cross-decoder grouped bar chart +# --------------------------------------------------------------------------- + + +def plot_cross_decoder( + random_csv: Path, + sequential_csv: Path, + output_path: Path, +) -> None: + fig, axes = plt.subplots(2, 1, figsize=(10, 9), constrained_layout=True) + fig.suptitle("HEVC GOP=30 — Cross-Decoder Comparison\n(6-camera aggregate FPS, higher is better)", fontsize=12) + + for ax, csv_path, access_label in zip( + axes, + [random_csv, sequential_csv], + ["Random Access", "Sequential (Stream) Access"], + ): + header, rows = _read_csv(csv_path) + # header: [gpu, decoder1, decoder2, ...] + gpu_names = [r[0] for r in rows] + decoders = header[1:] + n_gpus = len(gpu_names) + n_dec = len(decoders) + + x = np.arange(n_gpus) + width = 0.8 / n_dec + offsets = np.linspace(-(n_dec - 1) / 2, (n_dec - 1) / 2, n_dec) * width + + for i, decoder in enumerate(decoders): + vals = [_parse_float(r[i + 1]) for r in rows] + bar_x = x + offsets[i] + color = _DECODER_COLORS.get(decoder, "#888888") + label = _DECODER_LABELS.get(decoder, decoder) + + for j, v in enumerate(vals): + if v is None: + # draw an N/A hatched bar using the max height for reference + ax.bar( + bar_x[j], 1, width=width * 0.9, + color="white", edgecolor=color, linewidth=1, + hatch=_NA_HATCH, alpha=0.6, + label="_nolegend_", + ) + else: + ax.bar( + bar_x[j], v, width=width * 0.9, + color=color, edgecolor="white", linewidth=0.5, + label=label if j == 0 else "_nolegend_", + ) + ax.text( + bar_x[j], v + max(vals[k] for k in range(n_gpus) if vals[k]) * 0.01, + str(int(v)), ha="center", va="bottom", fontsize=6.5, rotation=90, + ) + + ax.set_xticks(x) + ax.set_xticklabels(gpu_names, fontsize=9) + _apply_style(ax, access_label, "GPU") + + # legend only on first subplot + if ax is axes[0]: + handles = [ + mpatches.Patch(color=_DECODER_COLORS.get(d, "#888"), label=_DECODER_LABELS.get(d, d)) + for d in decoders + ] + na_patch = mpatches.Patch( + facecolor="white", edgecolor="#888", hatch=_NA_HATCH, label="N/A (decoder failed)" + ) + ax.legend(handles=handles + [na_patch], fontsize=8, loc="upper right") + + _save(fig, output_path) + + +# --------------------------------------------------------------------------- +# Plot: line chart for a continuous config sweep (GOP, B-frames) +# --------------------------------------------------------------------------- + + +def plot_config_line( + random_csv: Path, + sequential_csv: Path, + output_path: Path, + xlabel: str, + title: str, + xscale: str = "linear", +) -> None: + fig, axes = plt.subplots(2, 1, figsize=(9, 8), constrained_layout=True) + fig.suptitle(title + "\n(6-camera aggregate FPS, higher is better)", fontsize=12) + + for ax, csv_path, access_label in zip( + axes, + [random_csv, sequential_csv], + ["Random Access", "Sequential (Stream) Access"], + ): + header, rows = _read_csv(csv_path) + x_vals = [_parse_float(r[0]) for r in rows] + gpu_names = header[1:] + + for gpu in gpu_names: + col_idx = header.index(gpu) + y_vals = [_parse_float(r[col_idx]) for r in rows] + color = _GPU_COLORS.get(gpu, "#888888") + + # split into segments at None gaps so line breaks at N/A + xs, ys = [], [] + for xv, yv in zip(x_vals, y_vals): + if yv is None: + if xs: + ax.plot(xs, ys, marker="o", color=color, linewidth=1.8, markersize=5) + xs, ys = [], [] + else: + xs.append(xv) + ys.append(yv) + if xs: + ax.plot(xs, ys, marker="o", color=color, linewidth=1.8, markersize=5, label=gpu) + + if xscale == "log": + ax.set_xscale("log") + ax.set_xticks(x_vals) + ax.set_xticklabels([str(int(v)) for v in x_vals], fontsize=9) + ax.xaxis.set_minor_formatter(matplotlib.ticker.NullFormatter()) + else: + ax.set_xticks(x_vals) + ax.set_xticklabels([str(int(v)) for v in x_vals], fontsize=9) + + _apply_style(ax, access_label, xlabel) + + if ax is axes[0]: + ax.legend(fontsize=8, loc="upper right") + + _save(fig, output_path) + + +# --------------------------------------------------------------------------- +# Plot: codec grouped bar chart +# --------------------------------------------------------------------------- + + +def plot_codec_bars( + random_csv: Path, + sequential_csv: Path, + output_path: Path, +) -> None: + fig, axes = plt.subplots(2, 1, figsize=(9, 8), constrained_layout=True) + fig.suptitle("Effect of Codec — GOP=30, B-frames=0\n(6-camera aggregate FPS, higher is better)", fontsize=12) + + for ax, csv_path, access_label in zip( + axes, + [random_csv, sequential_csv], + ["Random Access", "Sequential (Stream) Access"], + ): + header, rows = _read_csv(csv_path) + codec_names = [r[0] for r in rows] + gpu_names = header[1:] + n_codecs = len(codec_names) + n_gpus = len(gpu_names) + + x = np.arange(n_codecs) + width = 0.8 / n_gpus + offsets = np.linspace(-(n_gpus - 1) / 2, (n_gpus - 1) / 2, n_gpus) * width + + for i, gpu in enumerate(gpu_names): + col_idx = header.index(gpu) + vals = [_parse_float(r[col_idx]) for r in rows] + color = _GPU_COLORS.get(gpu, "#888888") + bar_x = x + offsets[i] + + bars = ax.bar( + bar_x, vals, width=width * 0.9, + color=color, edgecolor="white", linewidth=0.5, + label=gpu, + ) + for bar, v in zip(bars, vals): + if v is not None: + ax.text( + bar.get_x() + bar.get_width() / 2, + v + max(v for v in vals if v) * 0.01, + str(int(v)), ha="center", va="bottom", fontsize=8, + ) + + ax.set_xticks(x) + ax.set_xticklabels(codec_names, fontsize=10) + _apply_style(ax, access_label, "Codec") + + if ax is axes[0]: + ax.legend(fontsize=8, loc="upper right") + + _save(fig, output_path) + + +# --------------------------------------------------------------------------- +# Plot: StreamPETR training horizontal bars +# --------------------------------------------------------------------------- + + +def plot_streampetr( + setup_a_csv: Path, + setup_b_csv: Path, + output_path: Path, +) -> None: + fig, axes = plt.subplots(1, 2, figsize=(12, 4), constrained_layout=True) + fig.suptitle("StreamPETR Training Performance — Iteration Time (lower is better)", fontsize=12) + + setup_configs = [ + (axes[0], setup_a_csv, "Hardware Setup A\n(8× RTX 6000D, 2× EPYC 7742)", + ["Image", "Video: OpenCV", "Video: Ours"], ["image_ms", "video_opencv_ms", "video_ours_ms"], + ["#4C72B0", "#CCB974", "#55A868"]), + (axes[1], setup_b_csv, "Hardware Setup B\n(8× H20, 2× Xeon Platinum 8468V)", + ["Image", "Video: Ours"], ["image_ms", "video_ms"], + ["#4C72B0", "#55A868"]), + ] + + for ax, csv_path, title, series_labels, col_names, colors in setup_configs: + header, rows = _read_csv(csv_path) + configs = [r[0] for r in rows] + n_configs = len(configs) + n_series = len(series_labels) + + y = np.arange(n_configs) + height = 0.7 / n_series + offsets = np.linspace(-(n_series - 1) / 2, (n_series - 1) / 2, n_series) * height + + for i, (col, label, color) in enumerate(zip(col_names, series_labels, colors)): + col_idx = header.index(col) + vals = [_parse_float(r[col_idx]) for r in rows] + bar_y = y + offsets[i] + bars = ax.barh(bar_y, vals, height=height * 0.9, color=color, label=label, edgecolor="white") + for bar, v in zip(bars, vals): + if v is not None: + ax.text( + v + max(v for v in vals if v) * 0.01, + bar.get_y() + bar.get_height() / 2, + f"{int(v)} ms", va="center", fontsize=8, + ) + + # annotate speedup for "Video: Ours" vs Image + image_col = "image_ms" + ours_col = col_names[-1] + if image_col in header and ours_col in header: + image_idx = header.index(image_col) + ours_idx = header.index(ours_col) + for j, row in enumerate(rows): + img_v = _parse_float(row[image_idx]) + our_v = _parse_float(row[ours_idx]) + if img_v and our_v: + speedup = img_v / our_v + ax.text( + 0.98, y[j] + offsets[-1], + f"×{speedup:.2f} vs image", + transform=ax.get_yaxis_transform(), + ha="right", va="center", fontsize=7.5, style="italic", + color="#333333", + ) + + ax.set_yticks(y) + ax.set_yticklabels(configs, fontsize=9) + ax.set_xlabel("Iteration time [ms]", fontsize=10) + ax.set_title(title, fontsize=10, fontweight="bold") + ax.xaxis.grid(True, linestyle="--", alpha=0.5) + ax.set_axisbelow(True) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.legend(fontsize=8, loc="lower right") + + _save(fig, output_path) + + +# --------------------------------------------------------------------------- +# Public entry point +# --------------------------------------------------------------------------- + + +def generate_all(input_root: Path, output_dir: Path) -> None: + cross = input_root / "cross_decoder" + sweep = input_root / "video_config_sweep" + streampetr = input_root / "streampetr_training" + + plot_cross_decoder( + random_csv=cross / "hevc_gop30_random_access.csv", + sequential_csv=cross / "hevc_gop30_sequential.csv", + output_path=output_dir / "cross_decoder.png", + ) + + plot_config_line( + random_csv=sweep / "gop_random_access.csv", + sequential_csv=sweep / "gop_sequential.csv", + output_path=output_dir / "video_config_gop.png", + xlabel="GOP size", + title="Effect of GOP Size — HEVC, B-frames=0", + xscale="log", + ) + + plot_config_line( + random_csv=sweep / "bframes_random_access.csv", + sequential_csv=sweep / "bframes_sequential.csv", + output_path=output_dir / "video_config_bframes.png", + xlabel="Number of B-frames", + title="Effect of B-frames — HEVC, GOP=30", + ) + + plot_codec_bars( + random_csv=sweep / "codec_random_access.csv", + sequential_csv=sweep / "codec_sequential.csv", + output_path=output_dir / "video_config_codec.png", + ) + + plot_streampetr( + setup_a_csv=streampetr / "setup_a.csv", + setup_b_csv=streampetr / "setup_b.csv", + output_path=output_dir / "streampetr_training.png", + ) diff --git a/packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_random_access.csv b/packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_random_access.csv new file mode 100644 index 0000000..f0cd76c --- /dev/null +++ b/packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_random_access.csv @@ -0,0 +1,6 @@ +gpu,accvlab_gpu,pynvc_gpu,decord_gpu,decord_cpu,opencv_cpu +A100,337,189,114,44,34 +H200 NVL,402,316,170,68,52 +B200,474,345,,56,47 +B300,416,324,126,40,32 +RTX PRO 6000,420,377,,25,41 diff --git a/packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_sequential.csv b/packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_sequential.csv new file mode 100644 index 0000000..20c4b6a --- /dev/null +++ b/packages/on_demand_video_decoder/evaluation_results/cross_decoder/hevc_gop30_sequential.csv @@ -0,0 +1,6 @@ +gpu,accvlab_gpu,pynvc_gpu,decord_gpu,decord_cpu,opencv_cpu +A100,1424,583,421,235,462 +H200 NVL,1795,1245,707,351,789 +B200,2158,1182,,302,707 +B300,2167,1191,396,205,469 +RTX PRO 6000,1881,1484,,139,595 diff --git a/packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_a.csv b/packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_a.csv new file mode 100644 index 0000000..f378c32 --- /dev/null +++ b/packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_a.csv @@ -0,0 +1,3 @@ +config,image_ms,video_opencv_ms,video_ours_ms,speedup_vs_image +1 GPU,725,1674,751,0.97 +8 GPU,1025,2663,908,1.13 diff --git a/packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_b.csv b/packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_b.csv new file mode 100644 index 0000000..ba0f832 --- /dev/null +++ b/packages/on_demand_video_decoder/evaluation_results/streampetr_training/setup_b.csv @@ -0,0 +1,3 @@ +config,image_ms,video_ms,speedup +1 GPU,878,862,1.02 +8 GPU,1310,1070,1.22 diff --git a/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_random_access.csv b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_random_access.csv new file mode 100644 index 0000000..cb42f0a --- /dev/null +++ b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_random_access.csv @@ -0,0 +1,4 @@ +bframes,A100,H200 NVL,B200,B300,RTX PRO 6000 +0,337,402,474,416,420 +2,284,445,548,441,466 +4,230,469,572,458,513 diff --git a/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_sequential.csv b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_sequential.csv new file mode 100644 index 0000000..9c0acc6 --- /dev/null +++ b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/bframes_sequential.csv @@ -0,0 +1,4 @@ +bframes,A100,H200 NVL,B200,B300,RTX PRO 6000 +0,1424,1795,2158,2167,1881 +2,657,1665,2112,1749,1658 +4,1123,1627,1977,1812,1545 diff --git a/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_random_access.csv b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_random_access.csv new file mode 100644 index 0000000..3023e23 --- /dev/null +++ b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_random_access.csv @@ -0,0 +1,3 @@ +codec,A100,H200 NVL,B200,B300,RTX PRO 6000 +H.264,186,217,484,468,486 +HEVC,337,402,474,416,420 diff --git a/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_sequential.csv b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_sequential.csv new file mode 100644 index 0000000..fa72d4a --- /dev/null +++ b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/codec_sequential.csv @@ -0,0 +1,3 @@ +codec,A100,H200 NVL,B200,B300,RTX PRO 6000 +H.264,707,1002,2002,2188,2105 +HEVC,1424,1795,2158,2167,1881 diff --git a/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_random_access.csv b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_random_access.csv new file mode 100644 index 0000000..ae22f66 --- /dev/null +++ b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_random_access.csv @@ -0,0 +1,6 @@ +gop_size,A100,H200 NVL,B200,B300,RTX PRO 6000 +1,531,1145,994,673,857 +12,641,708,842,641,778 +30,337,402,474,416,420 +60,197,240,296,298,245 +250,61,78,100,101,82 diff --git a/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_sequential.csv b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_sequential.csv new file mode 100644 index 0000000..8763bf3 --- /dev/null +++ b/packages/on_demand_video_decoder/evaluation_results/video_config_sweep/gop_sequential.csv @@ -0,0 +1,6 @@ +gop_size,A100,H200 NVL,B200,B300,RTX PRO 6000 +1,1598,2244,2286,2130,2221 +12,1397,1710,1967,1941,1691 +30,1424,1795,2158,2167,1881 +60,1383,1779,2116,2194,1816 +250,1320,1753,2089,2102,1770