From 2208f7d35b1c7b0457474ab9b084706fa0ef5b2a Mon Sep 17 00:00:00 2001 From: Julian Stamm <97609514+julcst@users.noreply.github.com> Date: Sun, 14 Dec 2025 18:24:55 +0100 Subject: [PATCH 1/2] Add TCNN_HALF_PRECISION definition to kernel --- src/rtc_kernel.cu | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/rtc_kernel.cu b/src/rtc_kernel.cu index 9794304f..66aec33d 100644 --- a/src/rtc_kernel.cu +++ b/src/rtc_kernel.cu @@ -178,6 +178,8 @@ CudaRtcKernel::CudaRtcKernel(const std::string& name, const std::string& kernel_ {OPTS} */ + #define TCNN_HALF_PRECISION {TCNN_HALF_PRECISION} + // NVRTC does not come with the C++ standard library out of the box and // it would be troublesome to bundle it or require users to have it installed // in readily available paths. So we instead include a minimal custom @@ -190,7 +192,8 @@ CudaRtcKernel::CudaRtcKernel(const std::string& name, const std::string& kernel_ "KERNEL_NAME"_a = name, "PREAMBLE"_a = generate_device_code_preamble(), "OPTS"_a = join(opts, "\n"), - "KERNEL_CODE"_a = kernel_code + "KERNEL_CODE"_a = kernel_code, + "TCNN_HALF_PRECISION"_a = TCNN_HALF_PRECISION ); size_t code_hash = hash_combine(0, complete_code); From 09ee3d6cd70fc05669c81e7e7739f7c41c8a7be8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20M=C3=BCller?= Date: Sun, 14 Dec 2025 23:53:17 +0100 Subject: [PATCH 2/2] fix(rtc): move TCNN_HALF_PRECISION definition to compiler opts --- src/rtc_kernel.cu | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/rtc_kernel.cu b/src/rtc_kernel.cu index 66aec33d..b7f0ab8b 100644 --- a/src/rtc_kernel.cu +++ b/src/rtc_kernel.cu @@ -132,6 +132,7 @@ CudaRtcKernel::CudaRtcKernel(const std::string& name, const std::string& kernel_ std::vector opts = { fmt::format("--gpu-architecture=compute_{}", cc), + fmt::format("-DTCNN_HALF_PRECISION={}", TCNN_HALF_PRECISION), fmt::format("-DTCNN_MIN_GPU_ARCH={}", cc), "--std=c++14", #ifdef TCNN_RTC_USE_FAST_MATH @@ -178,8 +179,6 @@ CudaRtcKernel::CudaRtcKernel(const std::string& name, const std::string& kernel_ {OPTS} */ - #define TCNN_HALF_PRECISION {TCNN_HALF_PRECISION} - // NVRTC does not come with the C++ standard library out of the box and // it would be troublesome to bundle it or require users to have it installed // in readily available paths. So we instead include a minimal custom @@ -192,8 +191,7 @@ CudaRtcKernel::CudaRtcKernel(const std::string& name, const std::string& kernel_ "KERNEL_NAME"_a = name, "PREAMBLE"_a = generate_device_code_preamble(), "OPTS"_a = join(opts, "\n"), - "KERNEL_CODE"_a = kernel_code, - "TCNN_HALF_PRECISION"_a = TCNN_HALF_PRECISION + "KERNEL_CODE"_a = kernel_code ); size_t code_hash = hash_combine(0, complete_code);