diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py index df6dbc877d0..d6dd27ea7fb 100644 --- a/modelopt/onnx/quantization/autotune/benchmark.py +++ b/modelopt/onnx/quantization/autotune/benchmark.py @@ -31,7 +31,6 @@ import os import re import shutil -import subprocess # nosec B404 import tempfile import time from abc import ABC, abstractmethod @@ -42,7 +41,7 @@ import torch from modelopt.onnx.logging_config import logger -from modelopt.onnx.quantization.ort_utils import _check_for_trtexec +from modelopt.onnx.quantization.ort_utils import _check_for_trtexec, _run_trtexec TRT_AVAILABLE = importlib.util.find_spec("tensorrt") is not None if TRT_AVAILABLE: @@ -186,7 +185,6 @@ def __init__( self.latency_pattern = r"\[I\]\s+Latency:.*?median\s*=\s*([\d.]+)\s*ms" self._base_cmd = [ - self.trtexec_path, f"--avgRuns={self.timing_runs}", f"--iterations={self.timing_runs}", f"--warmUp={self.warmup_runs}", @@ -269,7 +267,7 @@ def run( cmd = [*self._base_cmd, f"--onnx={model_path}"] self.logger.debug(f"Running: {' '.join(cmd)}") - result = subprocess.run(cmd, capture_output=True, text=True) # nosec B603 + result = _run_trtexec(cmd) self._write_log_file( log_file, "\n".join( diff --git a/modelopt/onnx/quantization/ort_utils.py b/modelopt/onnx/quantization/ort_utils.py index 0ea465487a3..3233a2227a8 100755 --- a/modelopt/onnx/quantization/ort_utils.py +++ b/modelopt/onnx/quantization/ort_utils.py @@ -44,6 +44,22 @@ def _check_lib_in_ld_library_path(ld_library_path, lib_pattern): return False, None +def _run_trtexec( + args: list[str] | None = None, timeout: float | None = None +) -> subprocess.CompletedProcess: + """Run a 'trtexec' command via subprocess. + + Args: + args: Arguments to pass to trtexec (without the 'trtexec' command itself). + timeout: Optional subprocess timeout in seconds. + + Returns: + The completed subprocess result. + """ + cmd = ["trtexec", *(args or [])] + return subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) # nosec B603 + + def _check_for_trtexec(min_version: str = "10.0") -> str: """Check if the `trtexec` CLI tool is available in PATH and is >= min_version. @@ -87,7 +103,7 @@ def _parse_version_from_string(version_str: str) -> str | None: ) try: - result = subprocess.run([trtexec_path], capture_output=True, text=True, timeout=5) # nosec B603 + result = _run_trtexec(timeout=5) banner_output = result.stdout + result.stderr parsed_version = _parse_version_from_string(banner_output) diff --git a/modelopt/torch/_deploy/_runtime/tensorrt/constants.py b/modelopt/torch/_deploy/_runtime/tensorrt/constants.py index c4f387482e9..d9ace1645a3 100644 --- a/modelopt/torch/_deploy/_runtime/tensorrt/constants.py +++ b/modelopt/torch/_deploy/_runtime/tensorrt/constants.py @@ -32,10 +32,6 @@ ONE_GIBI_IN_BYTES = 1 << 30 # TensorRT conversion tool names -TRTEXEC = "trtexec" - -# trtexec path within docker -TRTEXEC_PATH = "trtexec" DEFAULT_ARTIFACT_DIR = "modelopt_build/trt_artifacts" # Default conversion params diff --git a/modelopt/torch/_deploy/_runtime/tensorrt/engine_builder.py b/modelopt/torch/_deploy/_runtime/tensorrt/engine_builder.py index 055a1f26b27..9a4f26475ed 100644 --- a/modelopt/torch/_deploy/_runtime/tensorrt/engine_builder.py +++ b/modelopt/torch/_deploy/_runtime/tensorrt/engine_builder.py @@ -28,7 +28,6 @@ DEFAULT_NUM_INFERENCE_PER_RUN, SHA_256_HASH_LENGTH, TRT_MODE_FLAGS, - TRTEXEC_PATH, WARMUP_TIME_MS, TRTMode, ) @@ -41,25 +40,29 @@ ) -# TODO: Get rid of this function or get approval for `# nosec` usage if we want to include this -# as a non-compiled python file in the release. -def _run_command(cmd: list[str], cwd: Path | None = None) -> tuple[int, bytes]: - """Util function to execute a command. +def _run_trtexec_streamed(args: list[str], cwd: Path | None = None) -> tuple[int, bytes]: + """Run a 'trtexec' command via subprocess, streaming stdout/stderr to a temp file. - This util will not direct stdout and stderr to console if the cmd succeeds. + The 'trtexec' binary is hardcoded as the executable; only its arguments may be supplied + by the caller. This restricts the function to trtexec invocations. + + Output handling: stdout and stderr are captured to a temp file and returned as bytes. + On failure (non-zero returncode), the captured output is also logged at ERROR level; + on success, this function emits nothing to the console. Args: - cmd: the command line list - cwd: current working directory + args: Arguments to pass to trtexec (without the 'trtexec' command itself). + cwd: Optional working directory for the subprocess. Returns: - return code: 0 means successful, otherwise means failed - log_string: the stdout and stderr output as a string - + A tuple of (returncode, output) where output is the combined stdout/stderr bytes. """ + cmd = ["trtexec", *args] logging.info(" ".join(cmd)) with NamedTemporaryFile("w+b") as log: - p = subprocess.Popen(cmd, stdout=log, stderr=log, cwd=str(cwd) if cwd else None) # nosec + p = subprocess.Popen( # nosec B603 - cmd[0] is hardcoded "trtexec" + cmd, stdout=log, stderr=log, cwd=str(cwd) if cwd else None + ) p.wait() log.seek(0) output = log.read() @@ -181,7 +184,7 @@ def _build_command( calib_cache_path: Path | None = None, timing_cache_path: Path | None = None, ) -> list[str]: - cmd = [TRTEXEC_PATH, f"--onnx={onnx_path}"] + cmd = [f"--onnx={onnx_path}"] cmd.extend(TRT_MODE_FLAGS[trt_mode]) if trt_mode == TRTMode.INT8 and calib_cache and calib_cache_path: @@ -235,7 +238,7 @@ def _setup_files_and_paths( cmd = _build_command(onnx_path, engine_path, calib_cache_path, timing_cache_path) try: - ret_code, out = _run_command(cmd) + ret_code, out = _run_trtexec_streamed(cmd) if ret_code != 0: return None, out @@ -284,7 +287,7 @@ def profile_engine( """ def _build_command(engine_path: Path, profile_path: Path, layer_info_path: Path) -> list[str]: - cmd = [TRTEXEC_PATH, f"--loadEngine={engine_path}"] + cmd = [f"--loadEngine={engine_path}"] cmd += _get_profiling_params(profiling_runs) if enable_layerwise_profiling: @@ -320,7 +323,7 @@ def _setup_files_and_paths(tmp_dir_path: Path, engine_hash: str) -> tuple[Path, cmd = _build_command(engine_path, profile_path, layer_info_path) try: - ret_code, out = _run_command(cmd) + ret_code, out = _run_trtexec_streamed(cmd) if ret_code != 0: return None, out