From 7d32c0ba5d3237ceb2a770e8a70989081563d106 Mon Sep 17 00:00:00 2001 From: suharvest Date: Mon, 29 Jun 2026 07:45:03 +0800 Subject: [PATCH 1/2] Respect explicit CUDA architectures The default CMAKE_CUDA_ARCHITECTURES (80;86;89[;100a;120]) was applied whenever AARCH64_BUILD was undefined, clobbering an explicit architecture passed on the command line. This broke native Jetson Orin (sm_87) builds configured with -DCMAKE_CUDA_ARCHITECTURES=87. enable_language(CUDA) (CMP0104 NEW, the default at our cmake_minimum 3.20) initializes CMAKE_CUDA_ARCHITECTURES to a compiler default, so the variable is already set after project() even when the user passed nothing. Record whether the user supplied an explicit value before project() runs, and only apply the project default when they did not. Regular x86 builds are unchanged; explicit selections (including an empty value) are preserved. Signed-off-by: suharvest --- CMakeLists.txt | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 486402b9..49b39aad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & +# AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of @@ -15,6 +15,17 @@ cmake_minimum_required(VERSION 3.20 FATAL_ERROR) +# Record whether the user explicitly requested CUDA architectures *before* +# project() runs. enable_language(CUDA) (CMP0104 NEW) initializes +# CMAKE_CUDA_ARCHITECTURES to a compiler default, so testing it afterwards +# cannot tell a user-provided -DCMAKE_CUDA_ARCHITECTURES from that default. +set(_EDGELLM_USER_CUDA_ARCHS FALSE) +if(DEFINED CMAKE_CUDA_ARCHITECTURES) + if(NOT CMAKE_CUDA_ARCHITECTURES STREQUAL "") + set(_EDGELLM_USER_CUDA_ARCHS TRUE) + endif() +endif() + project( tensorrt_edgellm_sdk VERSION 0.8.0 @@ -61,7 +72,10 @@ endif() set_ifndef(CUDA_CTK_VERSION 12.8) set_ifndef(CUDA_DIR /usr/local/cuda-${CUDA_CTK_VERSION}) -if(NOT DEFINED AARCH64_BUILD) +# Apply the project's default architecture set only when the user did not pass +# an explicit -DCMAKE_CUDA_ARCHITECTURES (e.g. a native Jetson SM87 build), and +# not for the aarch64 cross toolchain which selects its own. +if(NOT _EDGELLM_USER_CUDA_ARCHS AND NOT DEFINED AARCH64_BUILD) set(CMAKE_CUDA_ARCHITECTURES 80;86;89) if(CUDA_CTK_VERSION VERSION_GREATER_EQUAL 12.8) list(APPEND CMAKE_CUDA_ARCHITECTURES 100a 120) From 29d18b7b26330cdd7895c49b13e566d941e90f23 Mon Sep 17 00:00:00 2001 From: suharvest Date: Mon, 29 Jun 2026 07:45:03 +0800 Subject: [PATCH 2/2] Propagate CuTe DSL cudart shim and --wrap to static-lib consumers cute_dsl_setup() linked the cutedsl cudart shim and the CUDA<12.8 -Wl,--wrap=_cudaLaunchKernelEx option PRIVATEly. For STATIC_LIBRARY link targets a static archive performs no link step, so neither reached the final executable, breaking the CuTe DSL kernels on JetPack 6 / CUDA 12.6. Propagate the shim via PUBLIC and the --wrap link option via INTERFACE for static-library targets so the consuming executable inherits them; keep PRIVATE for shared/other targets. Signed-off-by: suharvest --- cmake/CuteDsl.cmake | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cmake/CuteDsl.cmake b/cmake/CuteDsl.cmake index a0e2b9f6..f24f5eaf 100644 --- a/cmake/CuteDsl.cmake +++ b/cmake/CuteDsl.cmake @@ -775,7 +775,8 @@ function(cute_dsl_setup) foreach(_tgt ${ARG_LINK_TARGETS}) get_target_property(_tgt_type ${_tgt} TYPE) if(_tgt_type STREQUAL "STATIC_LIBRARY") - target_link_libraries(${_tgt} PUBLIC "${_static_lib}") + target_link_libraries(${_tgt} PUBLIC "${_static_lib}" + trt_edgellm_cutedsl_cudart_shim) else() target_link_libraries(${_tgt} PRIVATE "${_static_lib}" trt_edgellm_cutedsl_cudart_shim) @@ -787,7 +788,11 @@ function(cute_dsl_setup) # JetPack 6). if(NOT _cute_dsl_cuda_ver STREQUAL "" AND _cute_dsl_cuda_ver VERSION_LESS 12.8) - target_link_options(${_tgt} PRIVATE "-Wl,--wrap=_cudaLaunchKernelEx") + if(_tgt_type STREQUAL "STATIC_LIBRARY") + target_link_options(${_tgt} INTERFACE "-Wl,--wrap=_cudaLaunchKernelEx") + else() + target_link_options(${_tgt} PRIVATE "-Wl,--wrap=_cudaLaunchKernelEx") + endif() endif() endforeach()