From d816d70d0cf29b100e5b3967f37b2837d55f41a0 Mon Sep 17 00:00:00 2001 From: dmoodie Date: Tue, 28 Apr 2026 11:35:57 -0400 Subject: [PATCH 01/12] don't need this return statement Signed-off-by: dmoodie --- modelopt/onnx/quantization/autotune/benchmark.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py index f931ae6c11a..df6dbc877d0 100644 --- a/modelopt/onnx/quantization/autotune/benchmark.py +++ b/modelopt/onnx/quantization/autotune/benchmark.py @@ -220,7 +220,6 @@ def __init__( "Remote autotuning requires '--skipInference' to be set. Adding it to trtexec arguments." ) self.trtexec_args.append("--skipInference") - return except ImportError: self.logger.warning( "Remote autotuning is not supported with TensorRT version < 10.15. " From 19a4961632a1644d1e3bfee2ee8fcc4f8e891d43 Mon Sep 17 00:00:00 2001 From: dmoodie Date: Thu, 30 Apr 2026 11:53:38 -0400 Subject: [PATCH 02/12] Use trtexec_safe to perform remote benchmarking with the cli when use the safe runtime Signed-off-by: dmoodie --- .../onnx/quantization/autotune/benchmark.py | 98 +++++++++++++++++-- 1 file changed, 91 insertions(+), 7 deletions(-) diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py index df6dbc877d0..4796255492c 100644 --- a/modelopt/onnx/quantization/autotune/benchmark.py +++ b/modelopt/onnx/quantization/autotune/benchmark.py @@ -37,6 +37,7 @@ from abc import ABC, abstractmethod from pathlib import Path from typing import Any +from urllib.parse import parse_qs, urlparse import numpy as np import torch @@ -145,6 +146,13 @@ def _write_log_file(self, file: Path | str | None, content: str) -> None: self.logger.warning(f"Failed to save logs to {file}: {e}") +safe_pattern = ( + r"\[\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2}\]\s+\[I\]\s+" + r"Average over \d+ runs - GPU latency:\s*([\d.]+)\s*ms" +) +std_pattern = r"\[I\]\s+Latency:.*?median\s*=\s*([\d.]+)\s*ms" + + class TrtExecBenchmark(Benchmark): """TensorRT benchmark using trtexec command-line tool. @@ -183,7 +191,6 @@ def __init__( self.temp_model_path = os.path.join(self.temp_dir, "temp_model.onnx") self.logger.debug(f"Created temporary engine directory: {self.temp_dir}") self.logger.debug(f"Temporary model path: {self.temp_model_path}") - self.latency_pattern = r"\[I\]\s+Latency:.*?median\s*=\s*([\d.]+)\s*ms" self._base_cmd = [ self.trtexec_path, @@ -204,9 +211,56 @@ def __init__( self.logger.debug(f"Added plugin library: {plugin_path}") trtexec_args = self.trtexec_args or [] - has_remote_config = any("--remoteAutoTuningConfig" in arg for arg in trtexec_args) - - if has_remote_config: + self.has_remote_config = any("--remoteAutoTuningConfig" in arg for arg in trtexec_args) + self.remote_ip: str | None = None + self.remote_port: int | None = None + self.remote_user: str | None = None + self.remote_password: str | None = None + self.remote_engine_path: str | None = "trtexec_benchmark_model.trt" + self.remote_bin_path: str = "trtexec" + + if self.has_remote_config: + remote_config = [arg for arg in trtexec_args if "--remoteAutoTuningConfig" in arg] + if len(remote_config) != 1: + raise ValueError("Exactly one --remoteAutoTuningConfig argument is required") + # Parse --remoteAutoTuningConfig argument, which may be given as: + # ('--remoteAutoTuningConfig=ssh://user:pass@host:port?...') or + # ('--remoteAutoTuningConfig', 'ssh://user:pass@host:port?...') + # + # The logic: find the arg starting with '--remoteAutoTuningConfig' + # If formatted as '--remoteAutoTuningConfig=...', split off the '=' + # Otherwise, grab the next argument. + config_arg_value: str | None = None + for i, arg in enumerate(trtexec_args): + if arg.startswith("--remoteAutoTuningConfig"): + if arg == "--remoteAutoTuningConfig": + # Value should be the next argument + if i + 1 < len(trtexec_args): + config_arg_value = trtexec_args[i + 1] + else: + raise ValueError("Missing value for --remoteAutoTuningConfig") + elif arg.startswith("--remoteAutoTuningConfig="): + config_arg_value = arg.split("=", 1)[1] + else: + raise ValueError(f"Malformed --remoteAutoTuningConfig argument: {arg}") + break + if not config_arg_value: + raise ValueError("Could not parse --remoteAutoTuningConfig argument") + remote_config_str: str = config_arg_value + + if not remote_config_str.startswith("ssh://"): + raise ValueError("Only 'ssh://' remote autotuning config URLs are supported") + parsed = urlparse(remote_config_str) + # parsed.username, parsed.password, parsed.hostname, parsed.port, parsed.query + self.remote_user = parsed.username + self.remote_password = parsed.password + self.remote_ip = parsed.hostname + self.remote_port = parsed.port + # Parse query options into a dict + self.remote_options = { + k: v[0] if len(v) == 1 else v for k, v in parse_qs(parsed.query).items() + } + self.remote_bin_path = os.path.dirname(str(self.remote_options["remote_exec_path"])) try: _check_for_trtexec(min_version="10.15") self.logger.debug("TensorRT Python API version >= 10.15 detected") @@ -215,6 +269,7 @@ def __init__( "Remote autotuning requires '--safe' to be set. Adding it to trtexec arguments." ) self.trtexec_args.append("--safe") + self.is_safe = True if "--skipInference" not in trtexec_args: self.logger.warning( "Remote autotuning requires '--skipInference' to be set. Adding it to trtexec arguments." @@ -228,9 +283,14 @@ def __init__( trtexec_args = [ arg for arg in trtexec_args if "--remoteAutoTuningConfig" not in arg ] + self.is_safe = "--safe" in trtexec_args + if self.is_safe: + self.latency_pattern = safe_pattern + else: + self.latency_pattern = std_pattern self._base_cmd.extend(trtexec_args) - self.logger.debug(f"Base command template: {' '.join(self._base_cmd)}") + self.logger.info(f"Base command template: {' '.join(self._base_cmd)}") def __del__(self): """Cleanup temporary directory.""" @@ -292,10 +352,34 @@ def run( self.logger.error(f"trtexec failed with return code {result.returncode}") self.logger.error(f"stderr: {result.stderr}") return float("inf") + if self.has_remote_config and self.is_safe: + # need to push the model to the device and use trtexec_safe to run + scp_cmd = [ + "scp", + f"-P{self.remote_port}", + self.engine_path, + f"{self.remote_user}@{self.remote_ip}:{self.remote_engine_path}", + ] + result = subprocess.run(scp_cmd) # nosec B603 + if result.returncode != 0: + self.logger.error("Failed to push engine to remote device") + return float("inf") + ld_path = ( + f"LD_LIBRARY_PATH={self.remote_options['remote_lib_path']}:$LD_LIBRARY_PATH" + ) + trt_path = f"{os.path.join(self.remote_bin_path, 'trtexec_safe')}" + trtexec_safe_cmd = [ + "ssh", + "-p", + f"{self.remote_port}", + f"{self.remote_user}:{self.remote_password}@{self.remote_ip}", + f"{ld_path} {trt_path} --loadEngine={self.remote_engine_path}", + ] + result = subprocess.run(trtexec_safe_cmd, capture_output=True, text=True) # nosec B603 if not (match := re.search(self.latency_pattern, result.stdout, re.IGNORECASE)): - self.logger.warning("Could not parse median latency from trtexec output") - self.logger.debug(f"trtexec stdout:\n{result.stdout}") + self.logger.warning(f"trtexec stdout:\n{result.stdout}") + self.logger.error("Could not parse median latency from trtexec output") return float("inf") latency = float(match.group(1)) self.logger.info(f"TrtExec benchmark (median): {latency:.2f} ms") From e45b08e2e695168628c99355e73f7a3750fe45da Mon Sep 17 00:00:00 2001 From: dmoodie Date: Thu, 30 Apr 2026 12:50:23 -0400 Subject: [PATCH 03/12] Look at gpu compute time instead of top level latency, add better config validation, default value for remote port Signed-off-by: dmoodie --- .../onnx/quantization/autotune/benchmark.py | 38 ++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py index 4796255492c..8f8600cea3e 100644 --- a/modelopt/onnx/quantization/autotune/benchmark.py +++ b/modelopt/onnx/quantization/autotune/benchmark.py @@ -150,7 +150,7 @@ def _write_log_file(self, file: Path | str | None, content: str) -> None: r"\[\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2}\]\s+\[I\]\s+" r"Average over \d+ runs - GPU latency:\s*([\d.]+)\s*ms" ) -std_pattern = r"\[I\]\s+Latency:.*?median\s*=\s*([\d.]+)\s*ms" +std_pattern = r"\[I\]\s+GPU Compute Time:.*?median\s*=\s*([\d.]+)\s*ms" class TrtExecBenchmark(Benchmark): @@ -213,7 +213,7 @@ def __init__( trtexec_args = self.trtexec_args or [] self.has_remote_config = any("--remoteAutoTuningConfig" in arg for arg in trtexec_args) self.remote_ip: str | None = None - self.remote_port: int | None = None + self.remote_port: int = 22 self.remote_user: str | None = None self.remote_password: str | None = None self.remote_engine_path: str | None = "trtexec_benchmark_model.trt" @@ -260,6 +260,12 @@ def __init__( self.remote_options = { k: v[0] if len(v) == 1 else v for k, v in parse_qs(parsed.query).items() } + required_params = ["remote_exec_path", "remote_lib_path"] + missing = [p for p in required_params if p not in self.remote_options] + if missing: + raise ValueError( + f"Missing required query parameters in --remoteAutoTuningConfig: {missing}" + ) self.remote_bin_path = os.path.dirname(str(self.remote_options["remote_exec_path"])) try: _check_for_trtexec(min_version="10.15") @@ -284,10 +290,6 @@ def __init__( arg for arg in trtexec_args if "--remoteAutoTuningConfig" not in arg ] self.is_safe = "--safe" in trtexec_args - if self.is_safe: - self.latency_pattern = safe_pattern - else: - self.latency_pattern = std_pattern self._base_cmd.extend(trtexec_args) self.logger.info(f"Base command template: {' '.join(self._base_cmd)}") @@ -352,6 +354,7 @@ def run( self.logger.error(f"trtexec failed with return code {result.returncode}") self.logger.error(f"stderr: {result.stderr}") return float("inf") + latency_pattern = std_pattern if self.has_remote_config and self.is_safe: # need to push the model to the device and use trtexec_safe to run scp_cmd = [ @@ -372,12 +375,29 @@ def run( "ssh", "-p", f"{self.remote_port}", - f"{self.remote_user}:{self.remote_password}@{self.remote_ip}", + f"{self.remote_user}@{self.remote_ip}", f"{ld_path} {trt_path} --loadEngine={self.remote_engine_path}", ] result = subprocess.run(trtexec_safe_cmd, capture_output=True, text=True) # nosec B603 - - if not (match := re.search(self.latency_pattern, result.stdout, re.IGNORECASE)): + latency_pattern = safe_pattern + if result.returncode != 0: + # fallback and try trtexec with "--safe" + trt_path = f"{os.path.join(self.remote_bin_path, 'trtexec')}" + trtexec_safe_cmd = [ + "ssh", + "-p", + f"{self.remote_port}", + f"{self.remote_user}:{self.remote_password}@{self.remote_ip}", + f"{ld_path} {trt_path} --safe --loadEngine={self.remote_engine_path}", + ] + result = subprocess.run(trtexec_safe_cmd, capture_output=True, text=True) # nosec B603 + latency_pattern = std_pattern + if result.returncode != 0: + self.logger.error( + f"Failed to run trtexec_safe or trtexec with '--safe'\n {result.stdout}" + ) + return float("inf") + if not (match := re.search(latency_pattern, result.stdout, re.IGNORECASE)): self.logger.warning(f"trtexec stdout:\n{result.stdout}") self.logger.error("Could not parse median latency from trtexec output") return float("inf") From bb0442218f62af577d9c954e9d87dcf1bbeb0d4d Mon Sep 17 00:00:00 2001 From: dmoodie Date: Thu, 30 Apr 2026 13:08:12 -0400 Subject: [PATCH 04/12] Use ssh pass for remote password Signed-off-by: dmoodie --- modelopt/onnx/quantization/autotune/benchmark.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py index 8f8600cea3e..e7f3d62ae9d 100644 --- a/modelopt/onnx/quantization/autotune/benchmark.py +++ b/modelopt/onnx/quantization/autotune/benchmark.py @@ -214,7 +214,7 @@ def __init__( self.has_remote_config = any("--remoteAutoTuningConfig" in arg for arg in trtexec_args) self.remote_ip: str | None = None self.remote_port: int = 22 - self.remote_user: str | None = None + self.remote_user: str = "root" self.remote_password: str | None = None self.remote_engine_path: str | None = "trtexec_benchmark_model.trt" self.remote_bin_path: str = "trtexec" @@ -356,6 +356,11 @@ def run( return float("inf") latency_pattern = std_pattern if self.has_remote_config and self.is_safe: + ssh_pass = [] + if self.remote_password: + ssh_pass.append("sshpass") + ssh_pass.append("-p") + ssh_pass.append(self.remote_password) # need to push the model to the device and use trtexec_safe to run scp_cmd = [ "scp", @@ -363,6 +368,7 @@ def run( self.engine_path, f"{self.remote_user}@{self.remote_ip}:{self.remote_engine_path}", ] + scp_cmd = ssh_pass + scp_cmd result = subprocess.run(scp_cmd) # nosec B603 if result.returncode != 0: self.logger.error("Failed to push engine to remote device") @@ -378,6 +384,7 @@ def run( f"{self.remote_user}@{self.remote_ip}", f"{ld_path} {trt_path} --loadEngine={self.remote_engine_path}", ] + trtexec_safe_cmd = ssh_pass + trtexec_safe_cmd result = subprocess.run(trtexec_safe_cmd, capture_output=True, text=True) # nosec B603 latency_pattern = safe_pattern if result.returncode != 0: @@ -387,9 +394,11 @@ def run( "ssh", "-p", f"{self.remote_port}", - f"{self.remote_user}:{self.remote_password}@{self.remote_ip}", + f"{self.remote_user}@{self.remote_ip}", f"{ld_path} {trt_path} --safe --loadEngine={self.remote_engine_path}", ] + trtexec_safe_cmd = ssh_pass + trtexec_safe_cmd + result = subprocess.run(trtexec_safe_cmd, capture_output=True, text=True) # nosec B603 latency_pattern = std_pattern if result.returncode != 0: From 8b779b311f20b04e8ad2352f8059eb643bd13995 Mon Sep 17 00:00:00 2001 From: dmoodie Date: Thu, 30 Apr 2026 13:10:37 -0400 Subject: [PATCH 05/12] Revert base cmd logging back to debug Signed-off-by: dmoodie --- modelopt/onnx/quantization/autotune/benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py index e7f3d62ae9d..98f885ede13 100644 --- a/modelopt/onnx/quantization/autotune/benchmark.py +++ b/modelopt/onnx/quantization/autotune/benchmark.py @@ -292,7 +292,7 @@ def __init__( self.is_safe = "--safe" in trtexec_args self._base_cmd.extend(trtexec_args) - self.logger.info(f"Base command template: {' '.join(self._base_cmd)}") + self.logger.debug(f"Base command template: {' '.join(self._base_cmd)}") def __del__(self): """Cleanup temporary directory.""" From d95b5716f57bd063b5a8544185c7936281253ec1 Mon Sep 17 00:00:00 2001 From: dmoodie Date: Thu, 30 Apr 2026 14:03:34 -0400 Subject: [PATCH 06/12] Guard against unable to parse port Signed-off-by: dmoodie --- modelopt/onnx/quantization/autotune/benchmark.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py index 98f885ede13..18d41e452dc 100644 --- a/modelopt/onnx/quantization/autotune/benchmark.py +++ b/modelopt/onnx/quantization/autotune/benchmark.py @@ -256,6 +256,8 @@ def __init__( self.remote_password = parsed.password self.remote_ip = parsed.hostname self.remote_port = parsed.port + if self.remote_port is None: + self.remote_port = 22 # Parse query options into a dict self.remote_options = { k: v[0] if len(v) == 1 else v for k, v in parse_qs(parsed.query).items() From 14dc2937b6eaa34c1578ea6f8178ed13d3ee567e Mon Sep 17 00:00:00 2001 From: dmoodie Date: Thu, 30 Apr 2026 14:11:25 -0400 Subject: [PATCH 07/12] Use shlex quote for shell strings Signed-off-by: dmoodie --- modelopt/onnx/quantization/autotune/benchmark.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py index 18d41e452dc..0f40aa42734 100644 --- a/modelopt/onnx/quantization/autotune/benchmark.py +++ b/modelopt/onnx/quantization/autotune/benchmark.py @@ -30,6 +30,7 @@ import importlib.util import os import re +import shlex import shutil import subprocess # nosec B404 import tempfile @@ -215,8 +216,8 @@ def __init__( self.remote_ip: str | None = None self.remote_port: int = 22 self.remote_user: str = "root" - self.remote_password: str | None = None - self.remote_engine_path: str | None = "trtexec_benchmark_model.trt" + self.remote_password: str = "" + self.remote_engine_path: str = "trtexec_benchmark_model.trt" self.remote_bin_path: str = "trtexec" if self.has_remote_config: @@ -269,6 +270,7 @@ def __init__( f"Missing required query parameters in --remoteAutoTuningConfig: {missing}" ) self.remote_bin_path = os.path.dirname(str(self.remote_options["remote_exec_path"])) + self.remote_lib_path = str(self.remote_options["remote_lib_path"]) try: _check_for_trtexec(min_version="10.15") self.logger.debug("TensorRT Python API version >= 10.15 detected") @@ -368,23 +370,21 @@ def run( "scp", f"-P{self.remote_port}", self.engine_path, - f"{self.remote_user}@{self.remote_ip}:{self.remote_engine_path}", + f"{self.remote_user}@{self.remote_ip}:{shlex.quote(self.remote_engine_path)}", ] scp_cmd = ssh_pass + scp_cmd result = subprocess.run(scp_cmd) # nosec B603 if result.returncode != 0: self.logger.error("Failed to push engine to remote device") return float("inf") - ld_path = ( - f"LD_LIBRARY_PATH={self.remote_options['remote_lib_path']}:$LD_LIBRARY_PATH" - ) + ld_path = f"LD_LIBRARY_PATH={shlex.quote(self.remote_lib_path)}:$LD_LIBRARY_PATH" trt_path = f"{os.path.join(self.remote_bin_path, 'trtexec_safe')}" trtexec_safe_cmd = [ "ssh", "-p", f"{self.remote_port}", f"{self.remote_user}@{self.remote_ip}", - f"{ld_path} {trt_path} --loadEngine={self.remote_engine_path}", + f"{ld_path} {shlex.quote(trt_path)} --loadEngine={shlex.quote(self.remote_engine_path)}", ] trtexec_safe_cmd = ssh_pass + trtexec_safe_cmd result = subprocess.run(trtexec_safe_cmd, capture_output=True, text=True) # nosec B603 @@ -397,7 +397,7 @@ def run( "-p", f"{self.remote_port}", f"{self.remote_user}@{self.remote_ip}", - f"{ld_path} {trt_path} --safe --loadEngine={self.remote_engine_path}", + f"{ld_path} {shlex.quote(trt_path)} --safe --loadEngine={shlex.quote(self.remote_engine_path)}", ] trtexec_safe_cmd = ssh_pass + trtexec_safe_cmd From 144df91a3968b6e2f0425c242a0722cf649b6bed Mon Sep 17 00:00:00 2001 From: dmoodie Date: Thu, 30 Apr 2026 14:18:51 -0400 Subject: [PATCH 08/12] Better logging of scp errors Signed-off-by: dmoodie --- modelopt/onnx/quantization/autotune/benchmark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py index 0f40aa42734..6b1ff396217 100644 --- a/modelopt/onnx/quantization/autotune/benchmark.py +++ b/modelopt/onnx/quantization/autotune/benchmark.py @@ -373,9 +373,9 @@ def run( f"{self.remote_user}@{self.remote_ip}:{shlex.quote(self.remote_engine_path)}", ] scp_cmd = ssh_pass + scp_cmd - result = subprocess.run(scp_cmd) # nosec B603 + result = subprocess.run(scp_cmd, capture_output=True, text=True) # nosec B603 if result.returncode != 0: - self.logger.error("Failed to push engine to remote device") + self.logger.error(f"Failed to push engine to remote device: {result.stderr}") return float("inf") ld_path = f"LD_LIBRARY_PATH={shlex.quote(self.remote_lib_path)}:$LD_LIBRARY_PATH" trt_path = f"{os.path.join(self.remote_bin_path, 'trtexec_safe')}" From 28e4e8e8438e00c902e9aa2543807acc758b484f Mon Sep 17 00:00:00 2001 From: dmoodie Date: Thu, 30 Apr 2026 15:48:43 -0400 Subject: [PATCH 09/12] Add timeout for network ops Signed-off-by: dmoodie --- .../onnx/quantization/autotune/benchmark.py | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py index 6b1ff396217..545acc2831c 100644 --- a/modelopt/onnx/quantization/autotune/benchmark.py +++ b/modelopt/onnx/quantization/autotune/benchmark.py @@ -219,6 +219,7 @@ def __init__( self.remote_password: str = "" self.remote_engine_path: str = "trtexec_benchmark_model.trt" self.remote_bin_path: str = "trtexec" + self.remote_timeout_sec = 300 if self.has_remote_config: remote_config = [arg for arg in trtexec_args if "--remoteAutoTuningConfig" in arg] @@ -335,7 +336,9 @@ def run( cmd = [*self._base_cmd, f"--onnx={model_path}"] self.logger.debug(f"Running: {' '.join(cmd)}") - result = subprocess.run(cmd, capture_output=True, text=True) # nosec B603 + result = subprocess.run( + cmd, capture_output=True, text=True, timeout=self.remote_timeout_sec + ) # nosec B603 self._write_log_file( log_file, "\n".join( @@ -373,7 +376,9 @@ def run( f"{self.remote_user}@{self.remote_ip}:{shlex.quote(self.remote_engine_path)}", ] scp_cmd = ssh_pass + scp_cmd - result = subprocess.run(scp_cmd, capture_output=True, text=True) # nosec B603 + result = subprocess.run( + scp_cmd, capture_output=True, text=True, timeout=self.remote_timeout_sec + ) # nosec B603 if result.returncode != 0: self.logger.error(f"Failed to push engine to remote device: {result.stderr}") return float("inf") @@ -387,7 +392,12 @@ def run( f"{ld_path} {shlex.quote(trt_path)} --loadEngine={shlex.quote(self.remote_engine_path)}", ] trtexec_safe_cmd = ssh_pass + trtexec_safe_cmd - result = subprocess.run(trtexec_safe_cmd, capture_output=True, text=True) # nosec B603 + result = subprocess.run( + trtexec_safe_cmd, + capture_output=True, + text=True, + timeout=self.remote_timeout_sec, + ) # nosec B603 latency_pattern = safe_pattern if result.returncode != 0: # fallback and try trtexec with "--safe" @@ -401,7 +411,12 @@ def run( ] trtexec_safe_cmd = ssh_pass + trtexec_safe_cmd - result = subprocess.run(trtexec_safe_cmd, capture_output=True, text=True) # nosec B603 + result = subprocess.run( + trtexec_safe_cmd, + capture_output=True, + text=True, + timeout=self.remote_timeout_sec, + ) # nosec B603 latency_pattern = std_pattern if result.returncode != 0: self.logger.error( From 36600920454ad8e5d3f5ca10b15c4e6a3007c055 Mon Sep 17 00:00:00 2001 From: dmoodie Date: Thu, 30 Apr 2026 17:19:05 -0400 Subject: [PATCH 10/12] hard fail on remoteAutoTuning argument provided but unavailable Signed-off-by: dmoodie --- modelopt/onnx/quantization/autotune/benchmark.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py index 545acc2831c..422508e7280 100644 --- a/modelopt/onnx/quantization/autotune/benchmark.py +++ b/modelopt/onnx/quantization/autotune/benchmark.py @@ -286,14 +286,13 @@ def __init__( "Remote autotuning requires '--skipInference' to be set. Adding it to trtexec arguments." ) self.trtexec_args.append("--skipInference") - except ImportError: + except ImportError as e: self.logger.warning( "Remote autotuning is not supported with TensorRT version < 10.15. " "Removing --remoteAutoTuningConfig from trtexec arguments" ) - trtexec_args = [ - arg for arg in trtexec_args if "--remoteAutoTuningConfig" not in arg - ] + raise e + self.is_safe = "--safe" in trtexec_args self._base_cmd.extend(trtexec_args) From 2f75b5ddd96f307bf822ff07b3dd22b45d4966c5 Mon Sep 17 00:00:00 2001 From: dmoodie Date: Thu, 30 Apr 2026 17:24:25 -0400 Subject: [PATCH 11/12] raise on incorrectly parsed values from remoteAutoTuningConfig --- modelopt/onnx/quantization/autotune/benchmark.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py index 422508e7280..c33406faa83 100644 --- a/modelopt/onnx/quantization/autotune/benchmark.py +++ b/modelopt/onnx/quantization/autotune/benchmark.py @@ -253,11 +253,14 @@ def __init__( if not remote_config_str.startswith("ssh://"): raise ValueError("Only 'ssh://' remote autotuning config URLs are supported") parsed = urlparse(remote_config_str) - # parsed.username, parsed.password, parsed.hostname, parsed.port, parsed.query self.remote_user = parsed.username self.remote_password = parsed.password self.remote_ip = parsed.hostname self.remote_port = parsed.port + if self.remote_user is None: + raise ValueError("Unable to parse remote user from --remoteAutoTuningConfig") + if self.remote_ip is None: + raise ValueError("Unable to parse remote IP from --remoteAutoTuningConfig") if self.remote_port is None: self.remote_port = 22 # Parse query options into a dict From 2512076316990887a4a120d388754ce52de0958e Mon Sep 17 00:00:00 2001 From: dmoodie Date: Fri, 1 May 2026 11:56:04 -0400 Subject: [PATCH 12/12] Remove timeout, some remote operations can take hours so the user should be the one to decide on killing the process. Improve documentation, improve log messages Signed-off-by: dmoodie --- .../onnx/quantization/autotune/benchmark.py | 36 +++++++------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py index c33406faa83..db3b09606a9 100644 --- a/modelopt/onnx/quantization/autotune/benchmark.py +++ b/modelopt/onnx/quantization/autotune/benchmark.py @@ -219,7 +219,6 @@ def __init__( self.remote_password: str = "" self.remote_engine_path: str = "trtexec_benchmark_model.trt" self.remote_bin_path: str = "trtexec" - self.remote_timeout_sec = 300 if self.has_remote_config: remote_config = [arg for arg in trtexec_args if "--remoteAutoTuningConfig" in arg] @@ -291,8 +290,7 @@ def __init__( self.trtexec_args.append("--skipInference") except ImportError as e: self.logger.warning( - "Remote autotuning is not supported with TensorRT version < 10.15. " - "Removing --remoteAutoTuningConfig from trtexec arguments" + "Remote autotuning is not supported with TensorRT version < 10.15." ) raise e @@ -338,9 +336,7 @@ def run( cmd = [*self._base_cmd, f"--onnx={model_path}"] self.logger.debug(f"Running: {' '.join(cmd)}") - result = subprocess.run( - cmd, capture_output=True, text=True, timeout=self.remote_timeout_sec - ) # nosec B603 + result = subprocess.run(cmd, capture_output=True, text=True) # nosec B603 self._write_log_file( log_file, "\n".join( @@ -378,9 +374,7 @@ def run( f"{self.remote_user}@{self.remote_ip}:{shlex.quote(self.remote_engine_path)}", ] scp_cmd = ssh_pass + scp_cmd - result = subprocess.run( - scp_cmd, capture_output=True, text=True, timeout=self.remote_timeout_sec - ) # nosec B603 + result = subprocess.run(scp_cmd, capture_output=True, text=True) # nosec B603 if result.returncode != 0: self.logger.error(f"Failed to push engine to remote device: {result.stderr}") return float("inf") @@ -391,34 +385,26 @@ def run( "-p", f"{self.remote_port}", f"{self.remote_user}@{self.remote_ip}", - f"{ld_path} {shlex.quote(trt_path)} --loadEngine={shlex.quote(self.remote_engine_path)}", + f"{ld_path} {shlex.quote(trt_path)} --useCudaGraphs " + f"--loadEngine={shlex.quote(self.remote_engine_path)}", ] trtexec_safe_cmd = ssh_pass + trtexec_safe_cmd - result = subprocess.run( - trtexec_safe_cmd, - capture_output=True, - text=True, - timeout=self.remote_timeout_sec, - ) # nosec B603 + result = subprocess.run(trtexec_safe_cmd, capture_output=True, text=True) # nosec B603 latency_pattern = safe_pattern if result.returncode != 0: - # fallback and try trtexec with "--safe" + # fallback and try trtexec with "--safe" in case this is a safety proxy target trt_path = f"{os.path.join(self.remote_bin_path, 'trtexec')}" trtexec_safe_cmd = [ "ssh", "-p", f"{self.remote_port}", f"{self.remote_user}@{self.remote_ip}", - f"{ld_path} {shlex.quote(trt_path)} --safe --loadEngine={shlex.quote(self.remote_engine_path)}", + f"{ld_path} {shlex.quote(trt_path)} --safe --useCudaGraphs " + f"--loadEngine={shlex.quote(self.remote_engine_path)}", ] trtexec_safe_cmd = ssh_pass + trtexec_safe_cmd - result = subprocess.run( - trtexec_safe_cmd, - capture_output=True, - text=True, - timeout=self.remote_timeout_sec, - ) # nosec B603 + result = subprocess.run(trtexec_safe_cmd, capture_output=True, text=True) # nosec B603 latency_pattern = std_pattern if result.returncode != 0: self.logger.error( @@ -426,6 +412,8 @@ def run( ) return float("inf") if not (match := re.search(latency_pattern, result.stdout, re.IGNORECASE)): + # this could be due to creating a degenerate onnx file that can't be engine built. + # thus not a hard failure self.logger.warning(f"trtexec stdout:\n{result.stdout}") self.logger.error("Could not parse median latency from trtexec output") return float("inf")