From 0bddd1be19e94cc4ca244ddbce39fdf95cea1899 Mon Sep 17 00:00:00 2001 From: Scott White Date: Wed, 1 Jul 2026 14:35:01 -0700 Subject: [PATCH] Use instead of in context attention FMHA headers This replaces C-style includes in the context attention FMHA code with the C++ header. Including from a public header is fragile in CUDA/TensorRT builds, especially on Jetson/Thor where CUDA include paths are active and the C math header may resolve through CUDA wrapper headers. Using keeps the code consistent with C++ compilation and avoids leaking C math-header behavior into translation units that include fmhaParams_v2.h. No runtime behavior change intended. Co-Authored-By: Claude Opus 4.7 --- cpp/kernels/contextAttentionKernels/contextFMHARunner.cpp | 2 +- cpp/kernels/contextAttentionKernels/fmhaParams_v2.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/kernels/contextAttentionKernels/contextFMHARunner.cpp b/cpp/kernels/contextAttentionKernels/contextFMHARunner.cpp index 186e3498..6978d19c 100644 --- a/cpp/kernels/contextAttentionKernels/contextFMHARunner.cpp +++ b/cpp/kernels/contextAttentionKernels/contextFMHARunner.cpp @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/kernels/contextAttentionKernels/fmhaParams_v2.h b/cpp/kernels/contextAttentionKernels/fmhaParams_v2.h index 4252197d..72a26bdb 100644 --- a/cpp/kernels/contextAttentionKernels/fmhaParams_v2.h +++ b/cpp/kernels/contextAttentionKernels/fmhaParams_v2.h @@ -18,7 +18,7 @@ #pragma once #include -#include +#include #include //! \brief Parameters for ALiBi (Attention with Linear Biases) positional encoding